提交 07be1337 编写于 作者: L Linus Torvalds

Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This has our merge window series of cleanups and fixes.  These target
  a wide range of issues, but do include some important fixes for
  qgroups, O_DIRECT, and fsync handling.  Jeff Mahoney moved around a
  few definitions to make them easier for userland to consume.

  Also whiteout support is included now that issues with overlayfs have
  been cleared up.

  I have one more fix pending for page faults during btrfs_copy_from_user,
  but I wanted to get this bulk out the door first"

* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits)
  btrfs: fix memory leak during RAID 5/6 device replacement
  Btrfs: add semaphore to synchronize direct IO writes with fsync
  Btrfs: fix race between block group relocation and nocow writes
  Btrfs: fix race between fsync and direct IO writes for prealloc extents
  Btrfs: fix number of transaction units for renames with whiteout
  Btrfs: pin logs earlier when doing a rename exchange operation
  Btrfs: unpin logs if rename exchange operation fails
  Btrfs: fix inode leak on failure to setup whiteout inode in rename
  btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT
  Btrfs: pin log earlier when renaming
  Btrfs: unpin log if rename operation fails
  Btrfs: don't do unnecessary delalloc flushes when relocating
  Btrfs: don't wait for unrelated IO to finish before relocation
  Btrfs: fix empty symlink after creating symlink and fsync parent dir
  Btrfs: fix for incorrect directory entries after fsync log replay
  btrfs: build fixup for qgroup_account_snapshot
  btrfs: qgroup: Fix qgroup accounting when creating snapshot
  Btrfs: fix fspath error deallocation
  btrfs: make find_workspace warn if there are no workspaces
  btrfs: make find_workspace always succeed
  ...
...@@ -1991,7 +1991,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, ...@@ -1991,7 +1991,7 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
ifp = kmalloc(sizeof(*ifp), GFP_NOFS); ifp = kmalloc(sizeof(*ifp), GFP_NOFS);
if (!ifp) { if (!ifp) {
kfree(fspath); vfree(fspath);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
......
...@@ -196,6 +196,16 @@ struct btrfs_inode { ...@@ -196,6 +196,16 @@ struct btrfs_inode {
struct list_head delayed_iput; struct list_head delayed_iput;
long delayed_iput_count; long delayed_iput_count;
/*
* To avoid races between lockless (i_mutex not held) direct IO writes
* and concurrent fsync requests. Direct IO writes must acquire read
* access on this semaphore for creating an extent map and its
* corresponding ordered extent. The fast fsync path must acquire write
* access on this semaphore before it collects ordered extents and
* extent maps.
*/
struct rw_semaphore dio_sem;
struct inode vfs_inode; struct inode vfs_inode;
}; };
......
...@@ -743,8 +743,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ...@@ -743,8 +743,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
static struct { static struct {
struct list_head idle_ws; struct list_head idle_ws;
spinlock_t ws_lock; spinlock_t ws_lock;
int num_ws; /* Number of free workspaces */
atomic_t alloc_ws; int free_ws;
/* Total number of allocated workspaces */
atomic_t total_ws;
/* Waiters for a free workspace */
wait_queue_head_t ws_wait; wait_queue_head_t ws_wait;
} btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; } btrfs_comp_ws[BTRFS_COMPRESS_TYPES];
...@@ -758,16 +761,34 @@ void __init btrfs_init_compress(void) ...@@ -758,16 +761,34 @@ void __init btrfs_init_compress(void)
int i; int i;
for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
struct list_head *workspace;
INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws); INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws);
spin_lock_init(&btrfs_comp_ws[i].ws_lock); spin_lock_init(&btrfs_comp_ws[i].ws_lock);
atomic_set(&btrfs_comp_ws[i].alloc_ws, 0); atomic_set(&btrfs_comp_ws[i].total_ws, 0);
init_waitqueue_head(&btrfs_comp_ws[i].ws_wait); init_waitqueue_head(&btrfs_comp_ws[i].ws_wait);
/*
* Preallocate one workspace for each compression type so
* we can guarantee forward progress in the worst case
*/
workspace = btrfs_compress_op[i]->alloc_workspace();
if (IS_ERR(workspace)) {
printk(KERN_WARNING
"BTRFS: cannot preallocate compression workspace, will try later");
} else {
atomic_set(&btrfs_comp_ws[i].total_ws, 1);
btrfs_comp_ws[i].free_ws = 1;
list_add(workspace, &btrfs_comp_ws[i].idle_ws);
}
} }
} }
/* /*
* this finds an available workspace or allocates a new one * This finds an available workspace or allocates a new one.
* ERR_PTR is returned if things go bad. * If it's not possible to allocate a new one, waits until there's one.
* Preallocation makes a forward progress guarantees and we do not return
* errors.
*/ */
static struct list_head *find_workspace(int type) static struct list_head *find_workspace(int type)
{ {
...@@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type) ...@@ -777,36 +798,58 @@ static struct list_head *find_workspace(int type)
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws; atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *num_ws = &btrfs_comp_ws[idx].num_ws; int *free_ws = &btrfs_comp_ws[idx].free_ws;
again: again:
spin_lock(ws_lock); spin_lock(ws_lock);
if (!list_empty(idle_ws)) { if (!list_empty(idle_ws)) {
workspace = idle_ws->next; workspace = idle_ws->next;
list_del(workspace); list_del(workspace);
(*num_ws)--; (*free_ws)--;
spin_unlock(ws_lock); spin_unlock(ws_lock);
return workspace; return workspace;
} }
if (atomic_read(alloc_ws) > cpus) { if (atomic_read(total_ws) > cpus) {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
spin_unlock(ws_lock); spin_unlock(ws_lock);
prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(ws_wait, &wait, TASK_UNINTERRUPTIBLE);
if (atomic_read(alloc_ws) > cpus && !*num_ws) if (atomic_read(total_ws) > cpus && !*free_ws)
schedule(); schedule();
finish_wait(ws_wait, &wait); finish_wait(ws_wait, &wait);
goto again; goto again;
} }
atomic_inc(alloc_ws); atomic_inc(total_ws);
spin_unlock(ws_lock); spin_unlock(ws_lock);
workspace = btrfs_compress_op[idx]->alloc_workspace(); workspace = btrfs_compress_op[idx]->alloc_workspace();
if (IS_ERR(workspace)) { if (IS_ERR(workspace)) {
atomic_dec(alloc_ws); atomic_dec(total_ws);
wake_up(ws_wait); wake_up(ws_wait);
/*
* Do not return the error but go back to waiting. There's a
* workspace preallocated for each type and the compression
* time is bounded so we get to a workspace eventually. This
* makes our caller's life easier.
*
* To prevent silent and low-probability deadlocks (when the
* initial preallocation fails), check if there are any
* workspaces at all.
*/
if (atomic_read(total_ws) == 0) {
static DEFINE_RATELIMIT_STATE(_rs,
/* once per minute */ 60 * HZ,
/* no burst */ 1);
if (__ratelimit(&_rs)) {
printk(KERN_WARNING
"no compression workspaces, low memory, retrying");
}
}
goto again;
} }
return workspace; return workspace;
} }
...@@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace) ...@@ -820,21 +863,21 @@ static void free_workspace(int type, struct list_head *workspace)
int idx = type - 1; int idx = type - 1;
struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws;
spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock;
atomic_t *alloc_ws = &btrfs_comp_ws[idx].alloc_ws; atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws;
wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait;
int *num_ws = &btrfs_comp_ws[idx].num_ws; int *free_ws = &btrfs_comp_ws[idx].free_ws;
spin_lock(ws_lock); spin_lock(ws_lock);
if (*num_ws < num_online_cpus()) { if (*free_ws < num_online_cpus()) {
list_add(workspace, idle_ws); list_add(workspace, idle_ws);
(*num_ws)++; (*free_ws)++;
spin_unlock(ws_lock); spin_unlock(ws_lock);
goto wake; goto wake;
} }
spin_unlock(ws_lock); spin_unlock(ws_lock);
btrfs_compress_op[idx]->free_workspace(workspace); btrfs_compress_op[idx]->free_workspace(workspace);
atomic_dec(alloc_ws); atomic_dec(total_ws);
wake: wake:
/* /*
* Make sure counter is updated before we wake up waiters. * Make sure counter is updated before we wake up waiters.
...@@ -857,7 +900,7 @@ static void free_workspaces(void) ...@@ -857,7 +900,7 @@ static void free_workspaces(void)
workspace = btrfs_comp_ws[i].idle_ws.next; workspace = btrfs_comp_ws[i].idle_ws.next;
list_del(workspace); list_del(workspace);
btrfs_compress_op[i]->free_workspace(workspace); btrfs_compress_op[i]->free_workspace(workspace);
atomic_dec(&btrfs_comp_ws[i].alloc_ws); atomic_dec(&btrfs_comp_ws[i].total_ws);
} }
} }
} }
...@@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping, ...@@ -894,8 +937,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
int ret; int ret;
workspace = find_workspace(type); workspace = find_workspace(type);
if (IS_ERR(workspace))
return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
start, len, pages, start, len, pages,
...@@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in, ...@@ -930,8 +971,6 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
int ret; int ret;
workspace = find_workspace(type); workspace = find_workspace(type);
if (IS_ERR(workspace))
return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in, ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
disk_start, disk_start,
...@@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, ...@@ -952,8 +991,6 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
int ret; int ret;
workspace = find_workspace(type); workspace = find_workspace(type);
if (IS_ERR(workspace))
return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->decompress(workspace, data_in, ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
dest_page, start_byte, dest_page, start_byte,
......
...@@ -1011,7 +1011,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, ...@@ -1011,7 +1011,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
return ret; return ret;
if (refs == 0) { if (refs == 0) {
ret = -EROFS; ret = -EROFS;
btrfs_std_error(root->fs_info, ret, NULL); btrfs_handle_fs_error(root->fs_info, ret, NULL);
return ret; return ret;
} }
} else { } else {
...@@ -1928,7 +1928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ...@@ -1928,7 +1928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
child = read_node_slot(root, mid, 0); child = read_node_slot(root, mid, 0);
if (!child) { if (!child) {
ret = -EROFS; ret = -EROFS;
btrfs_std_error(root->fs_info, ret, NULL); btrfs_handle_fs_error(root->fs_info, ret, NULL);
goto enospc; goto enospc;
} }
...@@ -2031,7 +2031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ...@@ -2031,7 +2031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
*/ */
if (!left) { if (!left) {
ret = -EROFS; ret = -EROFS;
btrfs_std_error(root->fs_info, ret, NULL); btrfs_handle_fs_error(root->fs_info, ret, NULL);
goto enospc; goto enospc;
} }
wret = balance_node_right(trans, root, mid, left); wret = balance_node_right(trans, root, mid, left);
......
此差异已折叠。
...@@ -134,7 +134,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( ...@@ -134,7 +134,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
/* cached in the btrfs inode and can be accessed */ /* cached in the btrfs inode and can be accessed */
atomic_add(2, &node->refs); atomic_add(2, &node->refs);
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); ret = radix_tree_preload(GFP_NOFS);
if (ret) { if (ret) {
kmem_cache_free(delayed_node_cache, node); kmem_cache_free(delayed_node_cache, node);
return ERR_PTR(ret); return ERR_PTR(ret);
......
...@@ -44,9 +44,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree( ...@@ -44,9 +44,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
struct btrfs_fs_info *fs_info, struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev, struct btrfs_device *srcdev,
struct btrfs_device *tgtdev); struct btrfs_device *tgtdev);
static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid,
char *srcdev_name,
struct btrfs_device **device);
static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info); static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
static int btrfs_dev_replace_kthread(void *data); static int btrfs_dev_replace_kthread(void *data);
static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info); static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
...@@ -305,8 +302,8 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) ...@@ -305,8 +302,8 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
dev_replace->cursor_left_last_write_of_item; dev_replace->cursor_left_last_write_of_item;
} }
int btrfs_dev_replace_start(struct btrfs_root *root, int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name,
struct btrfs_ioctl_dev_replace_args *args) u64 srcdevid, char *srcdev_name, int read_src)
{ {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
...@@ -315,29 +312,16 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -315,29 +312,16 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_device *tgt_device = NULL; struct btrfs_device *tgt_device = NULL;
struct btrfs_device *src_device = NULL; struct btrfs_device *src_device = NULL;
switch (args->start.cont_reading_from_srcdev_mode) {
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
break;
default:
return -EINVAL;
}
if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
args->start.tgtdev_name[0] == '\0')
return -EINVAL;
/* the disk copy procedure reuses the scrub code */ /* the disk copy procedure reuses the scrub code */
mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->volume_mutex);
ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, ret = btrfs_find_device_by_devspec(root, srcdevid,
args->start.srcdev_name, srcdev_name, &src_device);
&src_device);
if (ret) { if (ret) {
mutex_unlock(&fs_info->volume_mutex); mutex_unlock(&fs_info->volume_mutex);
return ret; return ret;
} }
ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, ret = btrfs_init_dev_replace_tgtdev(root, tgtdev_name,
src_device, &tgt_device); src_device, &tgt_device);
mutex_unlock(&fs_info->volume_mutex); mutex_unlock(&fs_info->volume_mutex);
if (ret) if (ret)
...@@ -364,18 +348,17 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -364,18 +348,17 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
break; break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
goto leave; goto leave;
} }
dev_replace->cont_reading_from_srcdev_mode = dev_replace->cont_reading_from_srcdev_mode = read_src;
args->start.cont_reading_from_srcdev_mode;
WARN_ON(!src_device); WARN_ON(!src_device);
dev_replace->srcdev = src_device; dev_replace->srcdev = src_device;
WARN_ON(!tgt_device); WARN_ON(!tgt_device);
dev_replace->tgtdev = tgt_device; dev_replace->tgtdev = tgt_device;
btrfs_info_in_rcu(root->fs_info, btrfs_info_in_rcu(fs_info,
"dev_replace from %s (devid %llu) to %s started", "dev_replace from %s (devid %llu) to %s started",
src_device->missing ? "<missing disk>" : src_device->missing ? "<missing disk>" :
rcu_str_deref(src_device->name), rcu_str_deref(src_device->name),
...@@ -396,14 +379,13 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -396,14 +379,13 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->item_needs_writeback = 1; dev_replace->item_needs_writeback = 1;
atomic64_set(&dev_replace->num_write_errors, 0); atomic64_set(&dev_replace->num_write_errors, 0);
atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace, 1); btrfs_dev_replace_unlock(dev_replace, 1);
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
if (ret) if (ret)
btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret); btrfs_err(fs_info, "kobj add dev failed %d\n", ret);
btrfs_wait_ordered_roots(root->fs_info, -1); btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
/* force writing the updated state information to disk */ /* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
...@@ -421,11 +403,9 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -421,11 +403,9 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
btrfs_device_get_total_bytes(src_device), btrfs_device_get_total_bytes(src_device),
&dev_replace->scrub_progress, 0, 1); &dev_replace->scrub_progress, 0, 1);
ret = btrfs_dev_replace_finishing(root->fs_info, ret); ret = btrfs_dev_replace_finishing(fs_info, ret);
/* don't warn if EINPROGRESS, someone else might be running scrub */
if (ret == -EINPROGRESS) { if (ret == -EINPROGRESS) {
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS;
ret = 0;
} else { } else {
WARN_ON(ret); WARN_ON(ret);
} }
...@@ -440,6 +420,35 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ...@@ -440,6 +420,35 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
return ret; return ret;
} }
int btrfs_dev_replace_by_ioctl(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args)
{
int ret;
switch (args->start.cont_reading_from_srcdev_mode) {
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
break;
default:
return -EINVAL;
}
if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
args->start.tgtdev_name[0] == '\0')
return -EINVAL;
ret = btrfs_dev_replace_start(root, args->start.tgtdev_name,
args->start.srcdevid,
args->start.srcdev_name,
args->start.cont_reading_from_srcdev_mode);
args->result = ret;
/* don't warn if EINPROGRESS, someone else might be running scrub */
if (ret == BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS)
ret = 0;
return ret;
}
/* /*
* blocked until all flighting bios are finished. * blocked until all flighting bios are finished.
*/ */
...@@ -495,7 +504,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -495,7 +504,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret; return ret;
} }
btrfs_wait_ordered_roots(root->fs_info, -1); btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1);
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
...@@ -560,10 +569,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, ...@@ -560,10 +569,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
ASSERT(list_empty(&src_device->resized_list)); ASSERT(list_empty(&src_device->resized_list));
tgt_device->commit_total_bytes = src_device->commit_total_bytes; tgt_device->commit_total_bytes = src_device->commit_total_bytes;
tgt_device->commit_bytes_used = src_device->bytes_used; tgt_device->commit_bytes_used = src_device->bytes_used;
if (fs_info->sb->s_bdev == src_device->bdev)
fs_info->sb->s_bdev = tgt_device->bdev; btrfs_assign_next_active_device(fs_info, src_device, tgt_device);
if (fs_info->fs_devices->latest_bdev == src_device->bdev)
fs_info->fs_devices->latest_bdev = tgt_device->bdev;
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++; fs_info->fs_devices->rw_devices++;
...@@ -626,25 +634,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree( ...@@ -626,25 +634,6 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
write_unlock(&em_tree->lock); write_unlock(&em_tree->lock);
} }
static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid,
char *srcdev_name,
struct btrfs_device **device)
{
int ret;
if (srcdevid) {
ret = 0;
*device = btrfs_find_device(root->fs_info, srcdevid, NULL,
NULL);
if (!*device)
ret = -ENOENT;
} else {
ret = btrfs_find_device_missing_or_by_path(root, srcdev_name,
device);
}
return ret;
}
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args) struct btrfs_ioctl_dev_replace_args *args)
{ {
......
...@@ -25,8 +25,10 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info); ...@@ -25,8 +25,10 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info); struct btrfs_fs_info *fs_info);
void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info); void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_start(struct btrfs_root *root, int btrfs_dev_replace_by_ioctl(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args); struct btrfs_ioctl_dev_replace_args *args);
int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name,
u64 srcdevid, char *srcdev_name, int read_src);
void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args); struct btrfs_ioctl_dev_replace_args *args);
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
......
...@@ -1640,7 +1640,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, ...@@ -1640,7 +1640,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
{ {
int ret; int ret;
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); ret = radix_tree_preload(GFP_NOFS);
if (ret) if (ret)
return ret; return ret;
...@@ -2417,7 +2417,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, ...@@ -2417,7 +2417,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
/* returns with log_tree_root freed on success */ /* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root); ret = btrfs_recover_log_trees(log_tree_root);
if (ret) { if (ret) {
btrfs_std_error(tree_root->fs_info, ret, btrfs_handle_fs_error(tree_root->fs_info, ret,
"Failed to recover log tree"); "Failed to recover log tree");
free_extent_buffer(log_tree_root->node); free_extent_buffer(log_tree_root->node);
kfree(log_tree_root); kfree(log_tree_root);
...@@ -2517,6 +2517,7 @@ int open_ctree(struct super_block *sb, ...@@ -2517,6 +2517,7 @@ int open_ctree(struct super_block *sb,
int num_backups_tried = 0; int num_backups_tried = 0;
int backup_index = 0; int backup_index = 0;
int max_active; int max_active;
bool cleaner_mutex_locked = false;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL); tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
...@@ -2713,7 +2714,7 @@ int open_ctree(struct super_block *sb, ...@@ -2713,7 +2714,7 @@ int open_ctree(struct super_block *sb,
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
*/ */
if (btrfs_check_super_csum(bh->b_data)) { if (btrfs_check_super_csum(bh->b_data)) {
printk(KERN_ERR "BTRFS: superblock checksum mismatch\n"); btrfs_err(fs_info, "superblock checksum mismatch");
err = -EINVAL; err = -EINVAL;
brelse(bh); brelse(bh);
goto fail_alloc; goto fail_alloc;
...@@ -2733,7 +2734,7 @@ int open_ctree(struct super_block *sb, ...@@ -2733,7 +2734,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
if (ret) { if (ret) {
printk(KERN_ERR "BTRFS: superblock contains fatal errors\n"); btrfs_err(fs_info, "superblock contains fatal errors");
err = -EINVAL; err = -EINVAL;
goto fail_alloc; goto fail_alloc;
} }
...@@ -2768,9 +2769,9 @@ int open_ctree(struct super_block *sb, ...@@ -2768,9 +2769,9 @@ int open_ctree(struct super_block *sb,
features = btrfs_super_incompat_flags(disk_super) & features = btrfs_super_incompat_flags(disk_super) &
~BTRFS_FEATURE_INCOMPAT_SUPP; ~BTRFS_FEATURE_INCOMPAT_SUPP;
if (features) { if (features) {
printk(KERN_ERR "BTRFS: couldn't mount because of " btrfs_err(fs_info,
"unsupported optional features (%Lx).\n", "cannot mount because of unsupported optional features (%llx)",
features); features);
err = -EINVAL; err = -EINVAL;
goto fail_alloc; goto fail_alloc;
} }
...@@ -2781,7 +2782,7 @@ int open_ctree(struct super_block *sb, ...@@ -2781,7 +2782,7 @@ int open_ctree(struct super_block *sb,
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
printk(KERN_INFO "BTRFS: has skinny extents\n"); btrfs_info(fs_info, "has skinny extents");
/* /*
* flag our filesystem as having big metadata blocks if * flag our filesystem as having big metadata blocks if
...@@ -2789,7 +2790,8 @@ int open_ctree(struct super_block *sb, ...@@ -2789,7 +2790,8 @@ int open_ctree(struct super_block *sb,
*/ */
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); btrfs_info(fs_info,
"flagging fs with big metadata feature");
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
} }
...@@ -2805,9 +2807,9 @@ int open_ctree(struct super_block *sb, ...@@ -2805,9 +2807,9 @@ int open_ctree(struct super_block *sb,
*/ */
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(sectorsize != nodesize)) { (sectorsize != nodesize)) {
printk(KERN_ERR "BTRFS: unequal leaf/node/sector sizes " btrfs_err(fs_info,
"are not allowed for mixed block groups on %s\n", "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
sb->s_id); nodesize, sectorsize);
goto fail_alloc; goto fail_alloc;
} }
...@@ -2820,8 +2822,8 @@ int open_ctree(struct super_block *sb, ...@@ -2820,8 +2822,8 @@ int open_ctree(struct super_block *sb,
features = btrfs_super_compat_ro_flags(disk_super) & features = btrfs_super_compat_ro_flags(disk_super) &
~BTRFS_FEATURE_COMPAT_RO_SUPP; ~BTRFS_FEATURE_COMPAT_RO_SUPP;
if (!(sb->s_flags & MS_RDONLY) && features) { if (!(sb->s_flags & MS_RDONLY) && features) {
printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " btrfs_err(fs_info,
"unsupported option features (%Lx).\n", "cannot mount read-write because of unsupported optional features (%llx)",
features); features);
err = -EINVAL; err = -EINVAL;
goto fail_alloc; goto fail_alloc;
...@@ -2850,8 +2852,7 @@ int open_ctree(struct super_block *sb, ...@@ -2850,8 +2852,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_sys_array(tree_root); ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->chunk_mutex);
if (ret) { if (ret) {
printk(KERN_ERR "BTRFS: failed to read the system " btrfs_err(fs_info, "failed to read the system array: %d", ret);
"array on %s\n", sb->s_id);
goto fail_sb_buffer; goto fail_sb_buffer;
} }
...@@ -2865,8 +2866,7 @@ int open_ctree(struct super_block *sb, ...@@ -2865,8 +2866,7 @@ int open_ctree(struct super_block *sb,
generation); generation);
if (IS_ERR(chunk_root->node) || if (IS_ERR(chunk_root->node) ||
!extent_buffer_uptodate(chunk_root->node)) { !extent_buffer_uptodate(chunk_root->node)) {
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", btrfs_err(fs_info, "failed to read chunk root");
sb->s_id);
if (!IS_ERR(chunk_root->node)) if (!IS_ERR(chunk_root->node))
free_extent_buffer(chunk_root->node); free_extent_buffer(chunk_root->node);
chunk_root->node = NULL; chunk_root->node = NULL;
...@@ -2880,8 +2880,7 @@ int open_ctree(struct super_block *sb, ...@@ -2880,8 +2880,7 @@ int open_ctree(struct super_block *sb,
ret = btrfs_read_chunk_tree(chunk_root); ret = btrfs_read_chunk_tree(chunk_root);
if (ret) { if (ret) {
printk(KERN_ERR "BTRFS: failed to read chunk tree on %s\n", btrfs_err(fs_info, "failed to read chunk tree: %d", ret);
sb->s_id);
goto fail_tree_roots; goto fail_tree_roots;
} }
...@@ -2892,8 +2891,7 @@ int open_ctree(struct super_block *sb, ...@@ -2892,8 +2891,7 @@ int open_ctree(struct super_block *sb,
btrfs_close_extra_devices(fs_devices, 0); btrfs_close_extra_devices(fs_devices, 0);
if (!fs_devices->latest_bdev) { if (!fs_devices->latest_bdev) {
printk(KERN_ERR "BTRFS: failed to read devices on %s\n", btrfs_err(fs_info, "failed to read devices");
sb->s_id);
goto fail_tree_roots; goto fail_tree_roots;
} }
...@@ -2905,8 +2903,7 @@ int open_ctree(struct super_block *sb, ...@@ -2905,8 +2903,7 @@ int open_ctree(struct super_block *sb,
generation); generation);
if (IS_ERR(tree_root->node) || if (IS_ERR(tree_root->node) ||
!extent_buffer_uptodate(tree_root->node)) { !extent_buffer_uptodate(tree_root->node)) {
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", btrfs_warn(fs_info, "failed to read tree root");
sb->s_id);
if (!IS_ERR(tree_root->node)) if (!IS_ERR(tree_root->node))
free_extent_buffer(tree_root->node); free_extent_buffer(tree_root->node);
tree_root->node = NULL; tree_root->node = NULL;
...@@ -2938,20 +2935,19 @@ int open_ctree(struct super_block *sb, ...@@ -2938,20 +2935,19 @@ int open_ctree(struct super_block *sb,
ret = btrfs_recover_balance(fs_info); ret = btrfs_recover_balance(fs_info);
if (ret) { if (ret) {
printk(KERN_ERR "BTRFS: failed to recover balance\n"); btrfs_err(fs_info, "failed to recover balance: %d", ret);
goto fail_block_groups; goto fail_block_groups;
} }
ret = btrfs_init_dev_stats(fs_info); ret = btrfs_init_dev_stats(fs_info);
if (ret) { if (ret) {
printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n", btrfs_err(fs_info, "failed to init dev_stats: %d", ret);
ret);
goto fail_block_groups; goto fail_block_groups;
} }
ret = btrfs_init_dev_replace(fs_info); ret = btrfs_init_dev_replace(fs_info);
if (ret) { if (ret) {
pr_err("BTRFS: failed to init dev_replace: %d\n", ret); btrfs_err(fs_info, "failed to init dev_replace: %d", ret);
goto fail_block_groups; goto fail_block_groups;
} }
...@@ -2959,31 +2955,33 @@ int open_ctree(struct super_block *sb, ...@@ -2959,31 +2955,33 @@ int open_ctree(struct super_block *sb,
ret = btrfs_sysfs_add_fsid(fs_devices, NULL); ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
if (ret) { if (ret) {
pr_err("BTRFS: failed to init sysfs fsid interface: %d\n", ret); btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
ret);
goto fail_block_groups; goto fail_block_groups;
} }
ret = btrfs_sysfs_add_device(fs_devices); ret = btrfs_sysfs_add_device(fs_devices);
if (ret) { if (ret) {
pr_err("BTRFS: failed to init sysfs device interface: %d\n", ret); btrfs_err(fs_info, "failed to init sysfs device interface: %d",
ret);
goto fail_fsdev_sysfs; goto fail_fsdev_sysfs;
} }
ret = btrfs_sysfs_add_mounted(fs_info); ret = btrfs_sysfs_add_mounted(fs_info);
if (ret) { if (ret) {
pr_err("BTRFS: failed to init sysfs interface: %d\n", ret); btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
goto fail_fsdev_sysfs; goto fail_fsdev_sysfs;
} }
ret = btrfs_init_space_info(fs_info); ret = btrfs_init_space_info(fs_info);
if (ret) { if (ret) {
printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret); btrfs_err(fs_info, "failed to initialize space info: %d", ret);
goto fail_sysfs; goto fail_sysfs;
} }
ret = btrfs_read_block_groups(fs_info->extent_root); ret = btrfs_read_block_groups(fs_info->extent_root);
if (ret) { if (ret) {
printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); btrfs_err(fs_info, "failed to read block groups: %d", ret);
goto fail_sysfs; goto fail_sysfs;
} }
fs_info->num_tolerated_disk_barrier_failures = fs_info->num_tolerated_disk_barrier_failures =
...@@ -2991,12 +2989,20 @@ int open_ctree(struct super_block *sb, ...@@ -2991,12 +2989,20 @@ int open_ctree(struct super_block *sb,
if (fs_info->fs_devices->missing_devices > if (fs_info->fs_devices->missing_devices >
fs_info->num_tolerated_disk_barrier_failures && fs_info->num_tolerated_disk_barrier_failures &&
!(sb->s_flags & MS_RDONLY)) { !(sb->s_flags & MS_RDONLY)) {
pr_warn("BTRFS: missing devices(%llu) exceeds the limit(%d), writeable mount is not allowed\n", btrfs_warn(fs_info,
"missing devices (%llu) exceeds the limit (%d), writeable mount is not allowed",
fs_info->fs_devices->missing_devices, fs_info->fs_devices->missing_devices,
fs_info->num_tolerated_disk_barrier_failures); fs_info->num_tolerated_disk_barrier_failures);
goto fail_sysfs; goto fail_sysfs;
} }
/*
* Hold the cleaner_mutex thread here so that we don't block
* for a long time on btrfs_recover_relocation. cleaner_kthread
* will wait for us to finish mounting the filesystem.
*/
mutex_lock(&fs_info->cleaner_mutex);
cleaner_mutex_locked = true;
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner"); "btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread)) if (IS_ERR(fs_info->cleaner_kthread))
...@@ -3011,8 +3017,7 @@ int open_ctree(struct super_block *sb, ...@@ -3011,8 +3017,7 @@ int open_ctree(struct super_block *sb,
if (!btrfs_test_opt(tree_root, SSD) && if (!btrfs_test_opt(tree_root, SSD) &&
!btrfs_test_opt(tree_root, NOSSD) && !btrfs_test_opt(tree_root, NOSSD) &&
!fs_info->fs_devices->rotating) { !fs_info->fs_devices->rotating) {
printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD " btrfs_info(fs_info, "detected SSD devices, enabling SSD mode");
"mode\n");
btrfs_set_opt(fs_info->mount_opt, SSD); btrfs_set_opt(fs_info->mount_opt, SSD);
} }
...@@ -3030,8 +3035,9 @@ int open_ctree(struct super_block *sb, ...@@ -3030,8 +3035,9 @@ int open_ctree(struct super_block *sb,
1 : 0, 1 : 0,
fs_info->check_integrity_print_mask); fs_info->check_integrity_print_mask);
if (ret) if (ret)
printk(KERN_WARNING "BTRFS: failed to initialize" btrfs_warn(fs_info,
" integrity check module %s\n", sb->s_id); "failed to initialize integrity check module: %d",
ret);
} }
#endif #endif
ret = btrfs_read_qgroup_config(fs_info); ret = btrfs_read_qgroup_config(fs_info);
...@@ -3056,17 +3062,17 @@ int open_ctree(struct super_block *sb, ...@@ -3056,17 +3062,17 @@ int open_ctree(struct super_block *sb,
ret = btrfs_cleanup_fs_roots(fs_info); ret = btrfs_cleanup_fs_roots(fs_info);
if (ret) if (ret)
goto fail_qgroup; goto fail_qgroup;
/* We locked cleaner_mutex before creating cleaner_kthread. */
mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(tree_root); ret = btrfs_recover_relocation(tree_root);
mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) { if (ret < 0) {
printk(KERN_WARNING btrfs_warn(fs_info, "failed to recover relocation: %d",
"BTRFS: failed to recover relocation\n"); ret);
err = -EINVAL; err = -EINVAL;
goto fail_qgroup; goto fail_qgroup;
} }
} }
mutex_unlock(&fs_info->cleaner_mutex);
cleaner_mutex_locked = false;
location.objectid = BTRFS_FS_TREE_OBJECTID; location.objectid = BTRFS_FS_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY; location.type = BTRFS_ROOT_ITEM_KEY;
...@@ -3083,11 +3089,11 @@ int open_ctree(struct super_block *sb, ...@@ -3083,11 +3089,11 @@ int open_ctree(struct super_block *sb,
if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
pr_info("BTRFS: creating free space tree\n"); btrfs_info(fs_info, "creating free space tree");
ret = btrfs_create_free_space_tree(fs_info); ret = btrfs_create_free_space_tree(fs_info);
if (ret) { if (ret) {
pr_warn("BTRFS: failed to create free space tree %d\n", btrfs_warn(fs_info,
ret); "failed to create free space tree: %d", ret);
close_ctree(tree_root); close_ctree(tree_root);
return ret; return ret;
} }
...@@ -3104,14 +3110,14 @@ int open_ctree(struct super_block *sb, ...@@ -3104,14 +3110,14 @@ int open_ctree(struct super_block *sb,
ret = btrfs_resume_balance_async(fs_info); ret = btrfs_resume_balance_async(fs_info);
if (ret) { if (ret) {
printk(KERN_WARNING "BTRFS: failed to resume balance\n"); btrfs_warn(fs_info, "failed to resume balance: %d", ret);
close_ctree(tree_root); close_ctree(tree_root);
return ret; return ret;
} }
ret = btrfs_resume_dev_replace_async(fs_info); ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) { if (ret) {
pr_warn("BTRFS: failed to resume dev_replace\n"); btrfs_warn(fs_info, "failed to resume device replace: %d", ret);
close_ctree(tree_root); close_ctree(tree_root);
return ret; return ret;
} }
...@@ -3120,33 +3126,33 @@ int open_ctree(struct super_block *sb, ...@@ -3120,33 +3126,33 @@ int open_ctree(struct super_block *sb,
if (btrfs_test_opt(tree_root, CLEAR_CACHE) && if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
pr_info("BTRFS: clearing free space tree\n"); btrfs_info(fs_info, "clearing free space tree");
ret = btrfs_clear_free_space_tree(fs_info); ret = btrfs_clear_free_space_tree(fs_info);
if (ret) { if (ret) {
pr_warn("BTRFS: failed to clear free space tree %d\n", btrfs_warn(fs_info,
ret); "failed to clear free space tree: %d", ret);
close_ctree(tree_root); close_ctree(tree_root);
return ret; return ret;
} }
} }
if (!fs_info->uuid_root) { if (!fs_info->uuid_root) {
pr_info("BTRFS: creating UUID tree\n"); btrfs_info(fs_info, "creating UUID tree");
ret = btrfs_create_uuid_tree(fs_info); ret = btrfs_create_uuid_tree(fs_info);
if (ret) { if (ret) {
pr_warn("BTRFS: failed to create the UUID tree %d\n", btrfs_warn(fs_info,
ret); "failed to create the UUID tree: %d", ret);
close_ctree(tree_root); close_ctree(tree_root);
return ret; return ret;
} }
} else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) || } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
fs_info->generation != fs_info->generation !=
btrfs_super_uuid_tree_generation(disk_super)) { btrfs_super_uuid_tree_generation(disk_super)) {
pr_info("BTRFS: checking UUID tree\n"); btrfs_info(fs_info, "checking UUID tree");
ret = btrfs_check_uuid_tree(fs_info); ret = btrfs_check_uuid_tree(fs_info);
if (ret) { if (ret) {
pr_warn("BTRFS: failed to check the UUID tree %d\n", btrfs_warn(fs_info,
ret); "failed to check the UUID tree: %d", ret);
close_ctree(tree_root); close_ctree(tree_root);
return ret; return ret;
} }
...@@ -3180,6 +3186,10 @@ int open_ctree(struct super_block *sb, ...@@ -3180,6 +3186,10 @@ int open_ctree(struct super_block *sb,
filemap_write_and_wait(fs_info->btree_inode->i_mapping); filemap_write_and_wait(fs_info->btree_inode->i_mapping);
fail_sysfs: fail_sysfs:
if (cleaner_mutex_locked) {
mutex_unlock(&fs_info->cleaner_mutex);
cleaner_mutex_locked = false;
}
btrfs_sysfs_remove_mounted(fs_info); btrfs_sysfs_remove_mounted(fs_info);
fail_fsdev_sysfs: fail_fsdev_sysfs:
...@@ -3646,7 +3656,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) ...@@ -3646,7 +3656,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (ret) { if (ret) {
mutex_unlock( mutex_unlock(
&root->fs_info->fs_devices->device_list_mutex); &root->fs_info->fs_devices->device_list_mutex);
btrfs_std_error(root->fs_info, ret, btrfs_handle_fs_error(root->fs_info, ret,
"errors while submitting device barriers."); "errors while submitting device barriers.");
return ret; return ret;
} }
...@@ -3686,7 +3696,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) ...@@ -3686,7 +3696,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
/* FUA is masked off if unsupported and can't be the reason */ /* FUA is masked off if unsupported and can't be the reason */
btrfs_std_error(root->fs_info, -EIO, btrfs_handle_fs_error(root->fs_info, -EIO,
"%d errors while writing supers", total_errors); "%d errors while writing supers", total_errors);
return -EIO; return -EIO;
} }
...@@ -3704,7 +3714,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) ...@@ -3704,7 +3714,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
} }
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (total_errors > max_errors) { if (total_errors > max_errors) {
btrfs_std_error(root->fs_info, -EIO, btrfs_handle_fs_error(root->fs_info, -EIO,
"%d errors while writing supers", total_errors); "%d errors while writing supers", total_errors);
return -EIO; return -EIO;
} }
......
...@@ -3824,6 +3824,59 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) ...@@ -3824,6 +3824,59 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
return readonly; return readonly;
} }
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct btrfs_block_group_cache *bg;
bool ret = true;
bg = btrfs_lookup_block_group(fs_info, bytenr);
if (!bg)
return false;
spin_lock(&bg->lock);
if (bg->ro)
ret = false;
else
atomic_inc(&bg->nocow_writers);
spin_unlock(&bg->lock);
/* no put on block group, done by btrfs_dec_nocow_writers */
if (!ret)
btrfs_put_block_group(bg);
return ret;
}
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct btrfs_block_group_cache *bg;
bg = btrfs_lookup_block_group(fs_info, bytenr);
ASSERT(bg);
if (atomic_dec_and_test(&bg->nocow_writers))
wake_up_atomic_t(&bg->nocow_writers);
/*
* Once for our lookup and once for the lookup done by a previous call
* to btrfs_inc_nocow_writers()
*/
btrfs_put_block_group(bg);
btrfs_put_block_group(bg);
}
static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a)
{
schedule();
return 0;
}
void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
{
wait_on_atomic_t(&bg->nocow_writers,
btrfs_wait_nocow_writers_atomic_t,
TASK_UNINTERRUPTIBLE);
}
static const char *alloc_name(u64 flags) static const char *alloc_name(u64 flags)
{ {
switch (flags) { switch (flags) {
...@@ -4141,7 +4194,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes) ...@@ -4141,7 +4194,7 @@ int btrfs_alloc_data_chunk_ondemand(struct inode *inode, u64 bytes)
if (need_commit > 0) { if (need_commit > 0) {
btrfs_start_delalloc_roots(fs_info, 0, -1); btrfs_start_delalloc_roots(fs_info, 0, -1);
btrfs_wait_ordered_roots(fs_info, -1); btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
} }
trans = btrfs_join_transaction(root); trans = btrfs_join_transaction(root);
...@@ -4583,7 +4636,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, ...@@ -4583,7 +4636,8 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
*/ */
btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); btrfs_start_delalloc_roots(root->fs_info, 0, nr_items);
if (!current->journal_info) if (!current->journal_info)
btrfs_wait_ordered_roots(root->fs_info, nr_items); btrfs_wait_ordered_roots(root->fs_info, nr_items,
0, (u64)-1);
} }
} }
...@@ -4620,7 +4674,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, ...@@ -4620,7 +4674,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
/* Calc the number of the pages we need flush for space reservation */ /* Calc the number of the pages we need flush for space reservation */
items = calc_reclaim_items_nr(root, to_reclaim); items = calc_reclaim_items_nr(root, to_reclaim);
to_reclaim = items * EXTENT_SIZE_PER_ITEM; to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
trans = (struct btrfs_trans_handle *)current->journal_info; trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv; block_rsv = &root->fs_info->delalloc_block_rsv;
...@@ -4632,7 +4686,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, ...@@ -4632,7 +4686,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
if (trans) if (trans)
return; return;
if (wait_ordered) if (wait_ordered)
btrfs_wait_ordered_roots(root->fs_info, items); btrfs_wait_ordered_roots(root->fs_info, items,
0, (u64)-1);
return; return;
} }
...@@ -4671,7 +4726,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, ...@@ -4671,7 +4726,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
loops++; loops++;
if (wait_ordered && !trans) { if (wait_ordered && !trans) {
btrfs_wait_ordered_roots(root->fs_info, items); btrfs_wait_ordered_roots(root->fs_info, items,
0, (u64)-1);
} else { } else {
time_left = schedule_timeout_killable(1); time_left = schedule_timeout_killable(1);
if (time_left) if (time_left)
...@@ -6172,6 +6228,57 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log, ...@@ -6172,6 +6228,57 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
return 0; return 0;
} }
static void
btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
{
atomic_inc(&bg->reservations);
}
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
const u64 start)
{
struct btrfs_block_group_cache *bg;
bg = btrfs_lookup_block_group(fs_info, start);
ASSERT(bg);
if (atomic_dec_and_test(&bg->reservations))
wake_up_atomic_t(&bg->reservations);
btrfs_put_block_group(bg);
}
static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a)
{
schedule();
return 0;
}
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
{
struct btrfs_space_info *space_info = bg->space_info;
ASSERT(bg->ro);
if (!(bg->flags & BTRFS_BLOCK_GROUP_DATA))
return;
/*
* Our block group is read only but before we set it to read only,
* some task might have had allocated an extent from it already, but it
* has not yet created a respective ordered extent (and added it to a
* root's list of ordered extents).
* Therefore wait for any task currently allocating extents, since the
* block group's reservations counter is incremented while a read lock
* on the groups' semaphore is held and decremented after releasing
* the read access on that semaphore and creating the ordered extent.
*/
down_write(&space_info->groups_sem);
up_write(&space_info->groups_sem);
wait_on_atomic_t(&bg->reservations,
btrfs_wait_bg_reservations_atomic_t,
TASK_UNINTERRUPTIBLE);
}
/** /**
* btrfs_update_reserved_bytes - update the block_group and space info counters * btrfs_update_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating * @cache: The cache we are manipulating
...@@ -7025,36 +7132,35 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, ...@@ -7025,36 +7132,35 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
int delalloc) int delalloc)
{ {
struct btrfs_block_group_cache *used_bg = NULL; struct btrfs_block_group_cache *used_bg = NULL;
bool locked = false;
again:
spin_lock(&cluster->refill_lock); spin_lock(&cluster->refill_lock);
if (locked) { while (1) {
if (used_bg == cluster->block_group) used_bg = cluster->block_group;
if (!used_bg)
return NULL;
if (used_bg == block_group)
return used_bg; return used_bg;
up_read(&used_bg->data_rwsem); btrfs_get_block_group(used_bg);
btrfs_put_block_group(used_bg);
}
used_bg = cluster->block_group; if (!delalloc)
if (!used_bg) return used_bg;
return NULL;
if (used_bg == block_group) if (down_read_trylock(&used_bg->data_rwsem))
return used_bg; return used_bg;
btrfs_get_block_group(used_bg); spin_unlock(&cluster->refill_lock);
if (!delalloc) down_read(&used_bg->data_rwsem);
return used_bg;
if (down_read_trylock(&used_bg->data_rwsem)) spin_lock(&cluster->refill_lock);
return used_bg; if (used_bg == cluster->block_group)
return used_bg;
spin_unlock(&cluster->refill_lock); up_read(&used_bg->data_rwsem);
down_read(&used_bg->data_rwsem); btrfs_put_block_group(used_bg);
locked = true; }
goto again;
} }
static inline void static inline void
...@@ -7431,6 +7537,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, ...@@ -7431,6 +7537,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
btrfs_add_free_space(block_group, offset, num_bytes); btrfs_add_free_space(block_group, offset, num_bytes);
goto loop; goto loop;
} }
btrfs_inc_block_group_reservations(block_group);
/* we are all good, lets return */ /* we are all good, lets return */
ins->objectid = search_start; ins->objectid = search_start;
...@@ -7612,8 +7719,10 @@ int btrfs_reserve_extent(struct btrfs_root *root, ...@@ -7612,8 +7719,10 @@ int btrfs_reserve_extent(struct btrfs_root *root,
WARN_ON(num_bytes < root->sectorsize); WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
flags, delalloc); flags, delalloc);
if (!ret && !is_data) {
if (ret == -ENOSPC) { btrfs_dec_block_group_reservations(root->fs_info,
ins->objectid);
} else if (ret == -ENOSPC) {
if (!final_tried && ins->offset) { if (!final_tried && ins->offset) {
num_bytes = min(num_bytes >> 1, ins->offset); num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize); num_bytes = round_down(num_bytes, root->sectorsize);
...@@ -9058,7 +9167,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, ...@@ -9058,7 +9167,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
if (!for_reloc && root_dropped == false) if (!for_reloc && root_dropped == false)
btrfs_add_dead_root(root); btrfs_add_dead_root(root);
if (err && err != -EAGAIN) if (err && err != -EAGAIN)
btrfs_std_error(root->fs_info, err, NULL); btrfs_handle_fs_error(root->fs_info, err, NULL);
return err; return err;
} }
......
...@@ -3200,14 +3200,10 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, ...@@ -3200,14 +3200,10 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret; return ret;
} }
static noinline void update_nr_written(struct page *page, static void update_nr_written(struct page *page, struct writeback_control *wbc,
struct writeback_control *wbc, unsigned long nr_written)
unsigned long nr_written)
{ {
wbc->nr_to_write -= nr_written; wbc->nr_to_write -= nr_written;
if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
page->mapping->writeback_index = page->index + nr_written;
} }
/* /*
...@@ -3368,6 +3364,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, ...@@ -3368,6 +3364,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
while (cur <= end) { while (cur <= end) {
u64 em_end; u64 em_end;
unsigned long max_nr;
if (cur >= i_size) { if (cur >= i_size) {
if (tree->ops && tree->ops->writepage_end_io_hook) if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, cur, tree->ops->writepage_end_io_hook(page, cur,
...@@ -3423,32 +3421,23 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, ...@@ -3423,32 +3421,23 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
continue; continue;
} }
if (tree->ops && tree->ops->writepage_io_hook) { max_nr = (i_size >> PAGE_SHIFT) + 1;
ret = tree->ops->writepage_io_hook(page, cur,
cur + iosize - 1); set_range_writeback(tree, cur, cur + iosize - 1);
} else { if (!PageWriteback(page)) {
ret = 0; btrfs_err(BTRFS_I(inode)->root->fs_info,
"page %lu not writeback, cur %llu end %llu",
page->index, cur, end);
} }
if (ret) {
SetPageError(page);
} else {
unsigned long max_nr = (i_size >> PAGE_SHIFT) + 1;
set_range_writeback(tree, cur, cur + iosize - 1); ret = submit_extent_page(write_flags, tree, wbc, page,
if (!PageWriteback(page)) { sector, iosize, pg_offset,
btrfs_err(BTRFS_I(inode)->root->fs_info, bdev, &epd->bio, max_nr,
"page %lu not writeback, cur %llu end %llu", end_bio_extent_writepage,
page->index, cur, end); 0, 0, 0, false);
} if (ret)
SetPageError(page);
ret = submit_extent_page(write_flags, tree, wbc, page,
sector, iosize, pg_offset,
bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
0, 0, 0, false);
if (ret)
SetPageError(page);
}
cur = cur + iosize; cur = cur + iosize;
pg_offset += iosize; pg_offset += iosize;
nr++; nr++;
...@@ -3920,12 +3909,13 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -3920,12 +3909,13 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
int ret = 0; int ret = 0;
int done = 0; int done = 0;
int err = 0;
int nr_to_write_done = 0; int nr_to_write_done = 0;
struct pagevec pvec; struct pagevec pvec;
int nr_pages; int nr_pages;
pgoff_t index; pgoff_t index;
pgoff_t end; /* Inclusive */ pgoff_t end; /* Inclusive */
pgoff_t done_index;
int range_whole = 0;
int scanned = 0; int scanned = 0;
int tag; int tag;
...@@ -3948,6 +3938,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -3948,6 +3938,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
} else { } else {
index = wbc->range_start >> PAGE_SHIFT; index = wbc->range_start >> PAGE_SHIFT;
end = wbc->range_end >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
scanned = 1; scanned = 1;
} }
if (wbc->sync_mode == WB_SYNC_ALL) if (wbc->sync_mode == WB_SYNC_ALL)
...@@ -3957,6 +3949,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -3957,6 +3949,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
retry: retry:
if (wbc->sync_mode == WB_SYNC_ALL) if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end); tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && !nr_to_write_done && (index <= end) && while (!done && !nr_to_write_done && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
...@@ -3966,6 +3959,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -3966,6 +3959,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i]; struct page *page = pvec.pages[i];
done_index = page->index;
/* /*
* At this point we hold neither mapping->tree_lock nor * At this point we hold neither mapping->tree_lock nor
* lock on the page itself: the page may be truncated or * lock on the page itself: the page may be truncated or
...@@ -4007,8 +4001,20 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -4007,8 +4001,20 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
unlock_page(page); unlock_page(page);
ret = 0; ret = 0;
} }
if (!err && ret < 0) if (ret < 0) {
err = ret; /*
* done_index is set past this page,
* so media errors will not choke
* background writeout for the entire
* file. This has consequences for
* range_cyclic semantics (ie. it may
* not be suitable for data integrity
* writeout).
*/
done_index = page->index + 1;
done = 1;
break;
}
/* /*
* the filesystem may choose to bump up nr_to_write. * the filesystem may choose to bump up nr_to_write.
...@@ -4020,7 +4026,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -4020,7 +4026,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
pagevec_release(&pvec); pagevec_release(&pvec);
cond_resched(); cond_resched();
} }
if (!scanned && !done && !err) { if (!scanned && !done) {
/* /*
* We hit the last page and there is more work to be done: wrap * We hit the last page and there is more work to be done: wrap
* back to the start of the file * back to the start of the file
...@@ -4029,8 +4035,12 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, ...@@ -4029,8 +4035,12 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
index = 0; index = 0;
goto retry; goto retry;
} }
if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole))
mapping->writeback_index = done_index;
btrfs_add_delayed_iput(inode); btrfs_add_delayed_iput(inode);
return err; return ret;
} }
static void flush_epd_write_bio(struct extent_page_data *epd) static void flush_epd_write_bio(struct extent_page_data *epd)
...@@ -4822,7 +4832,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, ...@@ -4822,7 +4832,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
return NULL; return NULL;
eb->fs_info = fs_info; eb->fs_info = fs_info;
again: again:
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); ret = radix_tree_preload(GFP_NOFS);
if (ret) if (ret)
goto free_eb; goto free_eb;
spin_lock(&fs_info->buffer_lock); spin_lock(&fs_info->buffer_lock);
...@@ -4923,7 +4933,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, ...@@ -4923,7 +4933,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
if (uptodate) if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
again: again:
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); ret = radix_tree_preload(GFP_NOFS);
if (ret) if (ret)
goto free_eb; goto free_eb;
......
...@@ -71,7 +71,6 @@ struct extent_io_ops { ...@@ -71,7 +71,6 @@ struct extent_io_ops {
u64 start, u64 end, int *page_started, u64 start, u64 end, int *page_started,
unsigned long *nr_written); unsigned long *nr_written);
int (*writepage_start_hook)(struct page *page, u64 start, u64 end); int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
extent_submit_bio_hook_t *submit_bio_hook; extent_submit_bio_hook_t *submit_bio_hook;
int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset, int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset,
size_t size, struct bio *bio, size_t size, struct bio *bio,
......
...@@ -1696,7 +1696,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1696,7 +1696,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
btrfs_end_write_no_snapshoting(root); btrfs_end_write_no_snapshoting(root);
btrfs_delalloc_release_metadata(inode, release_bytes); btrfs_delalloc_release_metadata(inode, release_bytes);
} else { } else {
btrfs_delalloc_release_space(inode, pos, release_bytes); btrfs_delalloc_release_space(inode,
round_down(pos, root->sectorsize),
release_bytes);
} }
} }
...@@ -2952,7 +2954,7 @@ const struct file_operations btrfs_file_operations = { ...@@ -2952,7 +2954,7 @@ const struct file_operations btrfs_file_operations = {
.fallocate = btrfs_fallocate, .fallocate = btrfs_fallocate,
.unlocked_ioctl = btrfs_ioctl, .unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_ioctl, .compat_ioctl = btrfs_compat_ioctl,
#endif #endif
.copy_file_range = btrfs_copy_file_range, .copy_file_range = btrfs_copy_file_range,
.clone_file_range = btrfs_clone_file_range, .clone_file_range = btrfs_clone_file_range,
......
...@@ -157,7 +157,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, ...@@ -157,7 +157,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
*/ */
if (!btrfs_find_name_in_ext_backref(path, ref_objectid, if (!btrfs_find_name_in_ext_backref(path, ref_objectid,
name, name_len, &extref)) { name, name_len, &extref)) {
btrfs_std_error(root->fs_info, -ENOENT, NULL); btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
ret = -EROFS; ret = -EROFS;
goto out; goto out;
} }
......
此差异已折叠。
...@@ -125,10 +125,10 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags) ...@@ -125,10 +125,10 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
if (flags & BTRFS_INODE_NODATACOW) if (flags & BTRFS_INODE_NODATACOW)
iflags |= FS_NOCOW_FL; iflags |= FS_NOCOW_FL;
if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) if (flags & BTRFS_INODE_NOCOMPRESS)
iflags |= FS_COMPR_FL;
else if (flags & BTRFS_INODE_NOCOMPRESS)
iflags |= FS_NOCOMP_FL; iflags |= FS_NOCOMP_FL;
else if (flags & BTRFS_INODE_COMPRESS)
iflags |= FS_COMPR_FL;
return iflags; return iflags;
} }
...@@ -439,7 +439,7 @@ static noinline int create_subvol(struct inode *dir, ...@@ -439,7 +439,7 @@ static noinline int create_subvol(struct inode *dir,
{ {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_key key; struct btrfs_key key;
struct btrfs_root_item root_item; struct btrfs_root_item *root_item;
struct btrfs_inode_item *inode_item; struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *root = BTRFS_I(dir)->root;
...@@ -455,16 +455,22 @@ static noinline int create_subvol(struct inode *dir, ...@@ -455,16 +455,22 @@ static noinline int create_subvol(struct inode *dir,
u64 qgroup_reserved; u64 qgroup_reserved;
uuid_le new_uuid; uuid_le new_uuid;
root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
if (!root_item)
return -ENOMEM;
ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
if (ret) if (ret)
return ret; goto fail_free;
/* /*
* Don't create subvolume whose level is not zero. Or qgroup will be * Don't create subvolume whose level is not zero. Or qgroup will be
* screwed up since it assume subvolme qgroup's level to be 0. * screwed up since it assume subvolme qgroup's level to be 0.
*/ */
if (btrfs_qgroup_level(objectid)) if (btrfs_qgroup_level(objectid)) {
return -ENOSPC; ret = -ENOSPC;
goto fail_free;
}
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
/* /*
...@@ -474,14 +480,14 @@ static noinline int create_subvol(struct inode *dir, ...@@ -474,14 +480,14 @@ static noinline int create_subvol(struct inode *dir,
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
8, &qgroup_reserved, false); 8, &qgroup_reserved, false);
if (ret) if (ret)
return ret; goto fail_free;
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
ret = PTR_ERR(trans); ret = PTR_ERR(trans);
btrfs_subvolume_release_metadata(root, &block_rsv, btrfs_subvolume_release_metadata(root, &block_rsv,
qgroup_reserved); qgroup_reserved);
return ret; goto fail_free;
} }
trans->block_rsv = &block_rsv; trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size; trans->bytes_reserved = block_rsv.size;
...@@ -509,47 +515,45 @@ static noinline int create_subvol(struct inode *dir, ...@@ -509,47 +515,45 @@ static noinline int create_subvol(struct inode *dir,
BTRFS_UUID_SIZE); BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
memset(&root_item, 0, sizeof(root_item)); inode_item = &root_item->inode;
inode_item = &root_item.inode;
btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_generation(inode_item, 1);
btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_size(inode_item, 3);
btrfs_set_stack_inode_nlink(inode_item, 1); btrfs_set_stack_inode_nlink(inode_item, 1);
btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); btrfs_set_stack_inode_nbytes(inode_item, root->nodesize);
btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
btrfs_set_root_flags(&root_item, 0); btrfs_set_root_flags(root_item, 0);
btrfs_set_root_limit(&root_item, 0); btrfs_set_root_limit(root_item, 0);
btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
btrfs_set_root_bytenr(&root_item, leaf->start); btrfs_set_root_bytenr(root_item, leaf->start);
btrfs_set_root_generation(&root_item, trans->transid); btrfs_set_root_generation(root_item, trans->transid);
btrfs_set_root_level(&root_item, 0); btrfs_set_root_level(root_item, 0);
btrfs_set_root_refs(&root_item, 1); btrfs_set_root_refs(root_item, 1);
btrfs_set_root_used(&root_item, leaf->len); btrfs_set_root_used(root_item, leaf->len);
btrfs_set_root_last_snapshot(&root_item, 0); btrfs_set_root_last_snapshot(root_item, 0);
btrfs_set_root_generation_v2(&root_item, btrfs_set_root_generation_v2(root_item,
btrfs_root_generation(&root_item)); btrfs_root_generation(root_item));
uuid_le_gen(&new_uuid); uuid_le_gen(&new_uuid);
memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
root_item.ctime = root_item.otime; root_item->ctime = root_item->otime;
btrfs_set_root_ctransid(&root_item, trans->transid); btrfs_set_root_ctransid(root_item, trans->transid);
btrfs_set_root_otransid(&root_item, trans->transid); btrfs_set_root_otransid(root_item, trans->transid);
btrfs_tree_unlock(leaf); btrfs_tree_unlock(leaf);
free_extent_buffer(leaf); free_extent_buffer(leaf);
leaf = NULL; leaf = NULL;
btrfs_set_root_dirid(&root_item, new_dirid); btrfs_set_root_dirid(root_item, new_dirid);
key.objectid = objectid; key.objectid = objectid;
key.offset = 0; key.offset = 0;
key.type = BTRFS_ROOT_ITEM_KEY; key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
&root_item); root_item);
if (ret) if (ret)
goto fail; goto fail;
...@@ -601,12 +605,13 @@ static noinline int create_subvol(struct inode *dir, ...@@ -601,12 +605,13 @@ static noinline int create_subvol(struct inode *dir,
BUG_ON(ret); BUG_ON(ret);
ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
root_item.uuid, BTRFS_UUID_KEY_SUBVOL, root_item->uuid, BTRFS_UUID_KEY_SUBVOL,
objectid); objectid);
if (ret) if (ret)
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
fail: fail:
kfree(root_item);
trans->block_rsv = NULL; trans->block_rsv = NULL;
trans->bytes_reserved = 0; trans->bytes_reserved = 0;
btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
...@@ -629,6 +634,10 @@ static noinline int create_subvol(struct inode *dir, ...@@ -629,6 +634,10 @@ static noinline int create_subvol(struct inode *dir,
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
} }
return ret; return ret;
fail_free:
kfree(root_item);
return ret;
} }
static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root) static void btrfs_wait_for_no_snapshoting_writes(struct btrfs_root *root)
...@@ -681,7 +690,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, ...@@ -681,7 +690,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret) if (ret)
goto dec_and_free; goto dec_and_free;
btrfs_wait_ordered_extents(root, -1); btrfs_wait_ordered_extents(root, -1, 0, (u64)-1);
btrfs_init_block_rsv(&pending_snapshot->block_rsv, btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP); BTRFS_BLOCK_RSV_TEMP);
...@@ -2671,10 +2680,10 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) ...@@ -2671,10 +2680,10 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
return ret; return ret;
} }
static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
{ {
struct btrfs_root *root = BTRFS_I(file_inode(file))->root; struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
struct btrfs_ioctl_vol_args *vol_args; struct btrfs_ioctl_vol_args_v2 *vol_args;
int ret; int ret;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
...@@ -2690,7 +2699,9 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ...@@ -2690,7 +2699,9 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
goto err_drop; goto err_drop;
} }
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; /* Check for compatibility reject unknown flags */
if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
return -EOPNOTSUPP;
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) { 1)) {
...@@ -2699,13 +2710,23 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ...@@ -2699,13 +2710,23 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
} }
mutex_lock(&root->fs_info->volume_mutex); mutex_lock(&root->fs_info->volume_mutex);
ret = btrfs_rm_device(root, vol_args->name); if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
ret = btrfs_rm_device(root, NULL, vol_args->devid);
} else {
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name, 0);
}
mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&root->fs_info->volume_mutex);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
if (!ret) if (!ret) {
btrfs_info(root->fs_info, "disk deleted %s",vol_args->name); if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
btrfs_info(root->fs_info, "device deleted: id %llu",
vol_args->devid);
else
btrfs_info(root->fs_info, "device deleted: %s",
vol_args->name);
}
out: out:
kfree(vol_args); kfree(vol_args);
err_drop: err_drop:
...@@ -2713,6 +2734,47 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) ...@@ -2713,6 +2734,47 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
return ret; return ret;
} }
static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
struct btrfs_ioctl_vol_args *vol_args;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
ret = mnt_want_write_file(file);
if (ret)
return ret;
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out_drop_write;
}
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
mutex_lock(&root->fs_info->volume_mutex);
ret = btrfs_rm_device(root, vol_args->name, 0);
mutex_unlock(&root->fs_info->volume_mutex);
if (!ret)
btrfs_info(root->fs_info, "disk deleted %s",vol_args->name);
kfree(vol_args);
out:
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
out_drop_write:
mnt_drop_write_file(file);
return ret;
}
static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
{ {
struct btrfs_ioctl_fs_info_args *fi_args; struct btrfs_ioctl_fs_info_args *fi_args;
...@@ -3472,13 +3534,16 @@ static int btrfs_clone(struct inode *src, struct inode *inode, ...@@ -3472,13 +3534,16 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
u64 last_dest_end = destoff; u64 last_dest_end = destoff;
ret = -ENOMEM; ret = -ENOMEM;
buf = vmalloc(root->nodesize); buf = kmalloc(root->nodesize, GFP_KERNEL | __GFP_NOWARN);
if (!buf) if (!buf) {
return ret; buf = vmalloc(root->nodesize);
if (!buf)
return ret;
}
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) { if (!path) {
vfree(buf); kvfree(buf);
return ret; return ret;
} }
...@@ -3779,7 +3844,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, ...@@ -3779,7 +3844,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
out: out:
btrfs_free_path(path); btrfs_free_path(path);
vfree(buf); kvfree(buf);
return ret; return ret;
} }
...@@ -4380,7 +4445,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) ...@@ -4380,7 +4445,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
1)) { 1)) {
ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
} else { } else {
ret = btrfs_dev_replace_start(root, p); ret = btrfs_dev_replace_by_ioctl(root, p);
atomic_set( atomic_set(
&root->fs_info->mutually_exclusive_operation_running, &root->fs_info->mutually_exclusive_operation_running,
0); 0);
...@@ -4851,8 +4916,8 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) ...@@ -4851,8 +4916,8 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
/* update qgroup status and info */ /* update qgroup status and info */
err = btrfs_run_qgroups(trans, root->fs_info); err = btrfs_run_qgroups(trans, root->fs_info);
if (err < 0) if (err < 0)
btrfs_std_error(root->fs_info, ret, btrfs_handle_fs_error(root->fs_info, err,
"failed to update qgroup status and info\n"); "failed to update qgroup status and info");
err = btrfs_end_transaction(trans, root); err = btrfs_end_transaction(trans, root);
if (err && !ret) if (err && !ret)
ret = err; ret = err;
...@@ -5398,9 +5463,15 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg) ...@@ -5398,9 +5463,15 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
if (ret) if (ret)
return ret; return ret;
ret = mnt_want_write_file(file);
if (ret)
return ret;
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) if (IS_ERR(trans)) {
return PTR_ERR(trans); ret = PTR_ERR(trans);
goto out_drop_write;
}
spin_lock(&root->fs_info->super_lock); spin_lock(&root->fs_info->super_lock);
newflags = btrfs_super_compat_flags(super_block); newflags = btrfs_super_compat_flags(super_block);
...@@ -5419,7 +5490,11 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg) ...@@ -5419,7 +5490,11 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
btrfs_set_super_incompat_flags(super_block, newflags); btrfs_set_super_incompat_flags(super_block, newflags);
spin_unlock(&root->fs_info->super_lock); spin_unlock(&root->fs_info->super_lock);
return btrfs_commit_transaction(trans, root); ret = btrfs_commit_transaction(trans, root);
out_drop_write:
mnt_drop_write_file(file);
return ret;
} }
long btrfs_ioctl(struct file *file, unsigned int long btrfs_ioctl(struct file *file, unsigned int
...@@ -5463,6 +5538,8 @@ long btrfs_ioctl(struct file *file, unsigned int ...@@ -5463,6 +5538,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_add_dev(root, argp); return btrfs_ioctl_add_dev(root, argp);
case BTRFS_IOC_RM_DEV: case BTRFS_IOC_RM_DEV:
return btrfs_ioctl_rm_dev(file, argp); return btrfs_ioctl_rm_dev(file, argp);
case BTRFS_IOC_RM_DEV_V2:
return btrfs_ioctl_rm_dev_v2(file, argp);
case BTRFS_IOC_FS_INFO: case BTRFS_IOC_FS_INFO:
return btrfs_ioctl_fs_info(root, argp); return btrfs_ioctl_fs_info(root, argp);
case BTRFS_IOC_DEV_INFO: case BTRFS_IOC_DEV_INFO:
...@@ -5556,3 +5633,24 @@ long btrfs_ioctl(struct file *file, unsigned int ...@@ -5556,3 +5633,24 @@ long btrfs_ioctl(struct file *file, unsigned int
return -ENOTTY; return -ENOTTY;
} }
#ifdef CONFIG_COMPAT
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
case FS_IOC32_GETFLAGS:
cmd = FS_IOC_GETFLAGS;
break;
case FS_IOC32_SETFLAGS:
cmd = FS_IOC_SETFLAGS;
break;
case FS_IOC32_GETVERSION:
cmd = FS_IOC_GETVERSION;
break;
default:
return -ENOIOCTLCMD;
}
return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
}
#endif
...@@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work) ...@@ -661,14 +661,15 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
* wait for all the ordered extents in a root. This is done when balancing * wait for all the ordered extents in a root. This is done when balancing
* space between drives. * space between drives.
*/ */
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
const u64 range_start, const u64 range_len)
{ {
struct list_head splice, works; LIST_HEAD(splice);
LIST_HEAD(skipped);
LIST_HEAD(works);
struct btrfs_ordered_extent *ordered, *next; struct btrfs_ordered_extent *ordered, *next;
int count = 0; int count = 0;
const u64 range_end = range_start + range_len;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
mutex_lock(&root->ordered_extent_mutex); mutex_lock(&root->ordered_extent_mutex);
spin_lock(&root->ordered_extent_lock); spin_lock(&root->ordered_extent_lock);
...@@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) ...@@ -676,6 +677,14 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
while (!list_empty(&splice) && nr) { while (!list_empty(&splice) && nr) {
ordered = list_first_entry(&splice, struct btrfs_ordered_extent, ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
root_extent_list); root_extent_list);
if (range_end <= ordered->start ||
ordered->start + ordered->disk_len <= range_start) {
list_move_tail(&ordered->root_extent_list, &skipped);
cond_resched_lock(&root->ordered_extent_lock);
continue;
}
list_move_tail(&ordered->root_extent_list, list_move_tail(&ordered->root_extent_list,
&root->ordered_extents); &root->ordered_extents);
atomic_inc(&ordered->refs); atomic_inc(&ordered->refs);
...@@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) ...@@ -694,6 +703,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
nr--; nr--;
count++; count++;
} }
list_splice_tail(&skipped, &root->ordered_extents);
list_splice_tail(&splice, &root->ordered_extents); list_splice_tail(&splice, &root->ordered_extents);
spin_unlock(&root->ordered_extent_lock); spin_unlock(&root->ordered_extent_lock);
...@@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) ...@@ -708,7 +718,8 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
return count; return count;
} }
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct list_head splice; struct list_head splice;
...@@ -728,7 +739,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) ...@@ -728,7 +739,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
&fs_info->ordered_roots); &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock); spin_unlock(&fs_info->ordered_root_lock);
done = btrfs_wait_ordered_extents(root, nr); done = btrfs_wait_ordered_extents(root, nr,
range_start, range_len);
btrfs_put_fs_root(root); btrfs_put_fs_root(root);
spin_lock(&fs_info->ordered_root_lock); spin_lock(&fs_info->ordered_root_lock);
......
...@@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, ...@@ -197,8 +197,10 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered); struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len); u32 *sum, int len);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); const u64 range_start, const u64 range_len);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len);
void btrfs_get_logged_extents(struct inode *inode, void btrfs_get_logged_extents(struct inode *inode,
struct list_head *logged_list, struct list_head *logged_list,
const loff_t start, const loff_t start,
......
...@@ -2418,7 +2418,7 @@ void merge_reloc_roots(struct reloc_control *rc) ...@@ -2418,7 +2418,7 @@ void merge_reloc_roots(struct reloc_control *rc)
} }
out: out:
if (ret) { if (ret) {
btrfs_std_error(root->fs_info, ret, NULL); btrfs_handle_fs_error(root->fs_info, ret, NULL);
if (!list_empty(&reloc_roots)) if (!list_empty(&reloc_roots))
free_reloc_roots(&reloc_roots); free_reloc_roots(&reloc_roots);
...@@ -4254,12 +4254,11 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) ...@@ -4254,12 +4254,11 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu",
rc->block_group->key.objectid, rc->block_group->flags); rc->block_group->key.objectid, rc->block_group->flags);
ret = btrfs_start_delalloc_roots(fs_info, 0, -1); btrfs_wait_block_group_reservations(rc->block_group);
if (ret < 0) { btrfs_wait_nocow_writers(rc->block_group);
err = ret; btrfs_wait_ordered_roots(fs_info, -1,
goto out; rc->block_group->key.objectid,
} rc->block_group->key.offset);
btrfs_wait_ordered_roots(fs_info, -1);
while (1) { while (1) {
mutex_lock(&fs_info->cleaner_mutex); mutex_lock(&fs_info->cleaner_mutex);
......
...@@ -284,7 +284,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) ...@@ -284,7 +284,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
trans = btrfs_join_transaction(tree_root); trans = btrfs_join_transaction(tree_root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
err = PTR_ERR(trans); err = PTR_ERR(trans);
btrfs_std_error(tree_root->fs_info, err, btrfs_handle_fs_error(tree_root->fs_info, err,
"Failed to start trans to delete " "Failed to start trans to delete "
"orphan item"); "orphan item");
break; break;
...@@ -293,7 +293,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) ...@@ -293,7 +293,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
root_key.objectid); root_key.objectid);
btrfs_end_transaction(trans, tree_root); btrfs_end_transaction(trans, tree_root);
if (err) { if (err) {
btrfs_std_error(tree_root->fs_info, err, btrfs_handle_fs_error(tree_root->fs_info, err,
"Failed to delete root orphan " "Failed to delete root orphan "
"item"); "item");
break; break;
......
...@@ -1350,7 +1350,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, ...@@ -1350,7 +1350,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
recover->bbio = bbio; recover->bbio = bbio;
recover->map_length = mapped_length; recover->map_length = mapped_length;
BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS); nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
...@@ -2127,6 +2127,8 @@ static void scrub_missing_raid56_end_io(struct bio *bio) ...@@ -2127,6 +2127,8 @@ static void scrub_missing_raid56_end_io(struct bio *bio)
if (bio->bi_error) if (bio->bi_error)
sblock->no_io_error_seen = 0; sblock->no_io_error_seen = 0;
bio_put(bio);
btrfs_queue_work(fs_info->scrub_workers, &sblock->work); btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
} }
...@@ -2860,7 +2862,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, ...@@ -2860,7 +2862,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
int extent_mirror_num; int extent_mirror_num;
int stop_loop = 0; int stop_loop = 0;
nsectors = map->stripe_len / root->sectorsize; nsectors = div_u64(map->stripe_len, root->sectorsize);
bitmap_len = scrub_calc_parity_bitmap_len(nsectors); bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len, sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
GFP_NOFS); GFP_NOFS);
...@@ -3070,7 +3072,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -3070,7 +3072,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
int slot; int slot;
u64 nstripes; u64 nstripes;
struct extent_buffer *l; struct extent_buffer *l;
struct btrfs_key key;
u64 physical; u64 physical;
u64 logical; u64 logical;
u64 logic_end; u64 logic_end;
...@@ -3079,7 +3080,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -3079,7 +3080,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
int mirror_num; int mirror_num;
struct reada_control *reada1; struct reada_control *reada1;
struct reada_control *reada2; struct reada_control *reada2;
struct btrfs_key key_start; struct btrfs_key key;
struct btrfs_key key_end; struct btrfs_key key_end;
u64 increment = map->stripe_len; u64 increment = map->stripe_len;
u64 offset; u64 offset;
...@@ -3158,21 +3159,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ...@@ -3158,21 +3159,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
scrub_blocked_if_needed(fs_info); scrub_blocked_if_needed(fs_info);
/* FIXME it might be better to start readahead at commit root */ /* FIXME it might be better to start readahead at commit root */
key_start.objectid = logical; key.objectid = logical;
key_start.type = BTRFS_EXTENT_ITEM_KEY; key.type = BTRFS_EXTENT_ITEM_KEY;
key_start.offset = (u64)0; key.offset = (u64)0;
key_end.objectid = logic_end; key_end.objectid = logic_end;
key_end.type = BTRFS_METADATA_ITEM_KEY; key_end.type = BTRFS_METADATA_ITEM_KEY;
key_end.offset = (u64)-1; key_end.offset = (u64)-1;
reada1 = btrfs_reada_add(root, &key_start, &key_end); reada1 = btrfs_reada_add(root, &key, &key_end);
key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key_start.type = BTRFS_EXTENT_CSUM_KEY; key.type = BTRFS_EXTENT_CSUM_KEY;
key_start.offset = logical; key.offset = logical;
key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key_end.type = BTRFS_EXTENT_CSUM_KEY; key_end.type = BTRFS_EXTENT_CSUM_KEY;
key_end.offset = logic_end; key_end.offset = logic_end;
reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); reada2 = btrfs_reada_add(csum_root, &key, &key_end);
if (!IS_ERR(reada1)) if (!IS_ERR(reada1))
btrfs_reada_wait(reada1); btrfs_reada_wait(reada1);
......
...@@ -5939,6 +5939,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ...@@ -5939,6 +5939,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
u32 i; u32 i;
u64 *clone_sources_tmp = NULL; u64 *clone_sources_tmp = NULL;
int clone_sources_to_rollback = 0; int clone_sources_to_rollback = 0;
unsigned alloc_size;
int sort_clone_roots = 0; int sort_clone_roots = 0;
int index; int index;
...@@ -5978,6 +5979,12 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ...@@ -5978,6 +5979,12 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
goto out; goto out;
} }
if (arg->clone_sources_count >
ULLONG_MAX / sizeof(*arg->clone_sources)) {
ret = -EINVAL;
goto out;
}
if (!access_ok(VERIFY_READ, arg->clone_sources, if (!access_ok(VERIFY_READ, arg->clone_sources,
sizeof(*arg->clone_sources) * sizeof(*arg->clone_sources) *
arg->clone_sources_count)) { arg->clone_sources_count)) {
...@@ -6022,40 +6029,53 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ...@@ -6022,40 +6029,53 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
sctx->clone_roots_cnt = arg->clone_sources_count; sctx->clone_roots_cnt = arg->clone_sources_count;
sctx->send_max_size = BTRFS_SEND_BUF_SIZE; sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
sctx->send_buf = vmalloc(sctx->send_max_size); sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN);
if (!sctx->send_buf) { if (!sctx->send_buf) {
ret = -ENOMEM; sctx->send_buf = vmalloc(sctx->send_max_size);
goto out; if (!sctx->send_buf) {
ret = -ENOMEM;
goto out;
}
} }
sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN);
if (!sctx->read_buf) { if (!sctx->read_buf) {
ret = -ENOMEM; sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
goto out; if (!sctx->read_buf) {
ret = -ENOMEM;
goto out;
}
} }
sctx->pending_dir_moves = RB_ROOT; sctx->pending_dir_moves = RB_ROOT;
sctx->waiting_dir_moves = RB_ROOT; sctx->waiting_dir_moves = RB_ROOT;
sctx->orphan_dirs = RB_ROOT; sctx->orphan_dirs = RB_ROOT;
sctx->clone_roots = vzalloc(sizeof(struct clone_root) * alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
(arg->clone_sources_count + 1));
sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
if (!sctx->clone_roots) { if (!sctx->clone_roots) {
ret = -ENOMEM; sctx->clone_roots = vzalloc(alloc_size);
goto out; if (!sctx->clone_roots) {
ret = -ENOMEM;
goto out;
}
} }
alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
if (arg->clone_sources_count) { if (arg->clone_sources_count) {
clone_sources_tmp = vmalloc(arg->clone_sources_count * clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
sizeof(*arg->clone_sources));
if (!clone_sources_tmp) { if (!clone_sources_tmp) {
ret = -ENOMEM; clone_sources_tmp = vmalloc(alloc_size);
goto out; if (!clone_sources_tmp) {
ret = -ENOMEM;
goto out;
}
} }
ret = copy_from_user(clone_sources_tmp, arg->clone_sources, ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
arg->clone_sources_count * alloc_size);
sizeof(*arg->clone_sources));
if (ret) { if (ret) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
...@@ -6089,7 +6109,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ...@@ -6089,7 +6109,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
sctx->clone_roots[i].root = clone_root; sctx->clone_roots[i].root = clone_root;
clone_sources_to_rollback = i + 1; clone_sources_to_rollback = i + 1;
} }
vfree(clone_sources_tmp); kvfree(clone_sources_tmp);
clone_sources_tmp = NULL; clone_sources_tmp = NULL;
} }
...@@ -6207,15 +6227,15 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) ...@@ -6207,15 +6227,15 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
btrfs_root_dec_send_in_progress(sctx->parent_root); btrfs_root_dec_send_in_progress(sctx->parent_root);
kfree(arg); kfree(arg);
vfree(clone_sources_tmp); kvfree(clone_sources_tmp);
if (sctx) { if (sctx) {
if (sctx->send_filp) if (sctx->send_filp)
fput(sctx->send_filp); fput(sctx->send_filp);
vfree(sctx->clone_roots); kvfree(sctx->clone_roots);
vfree(sctx->send_buf); kvfree(sctx->send_buf);
vfree(sctx->read_buf); kvfree(sctx->read_buf);
name_cache_free(sctx); name_cache_free(sctx);
......
...@@ -97,15 +97,6 @@ const char *btrfs_decode_error(int errno) ...@@ -97,15 +97,6 @@ const char *btrfs_decode_error(int errno)
return errstr; return errstr;
} }
static void save_error_info(struct btrfs_fs_info *fs_info)
{
/*
* today we only save the error info into ram. Long term we'll
* also send it down to the disk
*/
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
}
/* btrfs handle error by forcing the filesystem readonly */ /* btrfs handle error by forcing the filesystem readonly */
static void btrfs_handle_error(struct btrfs_fs_info *fs_info) static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
{ {
...@@ -131,11 +122,11 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) ...@@ -131,11 +122,11 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
} }
/* /*
* __btrfs_std_error decodes expected errors from the caller and * __btrfs_handle_fs_error decodes expected errors from the caller and
* invokes the approciate error response. * invokes the approciate error response.
*/ */
__cold __cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...) unsigned int line, int errno, const char *fmt, ...)
{ {
struct super_block *sb = fs_info->sb; struct super_block *sb = fs_info->sb;
...@@ -170,8 +161,13 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, ...@@ -170,8 +161,13 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
} }
#endif #endif
/*
* Today we only save the error info to memory. Long term we'll
* also send it down to the disk
*/
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
/* Don't go through full error handling during mount */ /* Don't go through full error handling during mount */
save_error_info(fs_info);
if (sb->s_flags & MS_BORN) if (sb->s_flags & MS_BORN)
btrfs_handle_error(fs_info); btrfs_handle_error(fs_info);
} }
...@@ -252,7 +248,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, ...@@ -252,7 +248,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
/* Wake up anybody who may be waiting on this transaction */ /* Wake up anybody who may be waiting on this transaction */
wake_up(&root->fs_info->transaction_wait); wake_up(&root->fs_info->transaction_wait);
wake_up(&root->fs_info->transaction_blocked_wait); wake_up(&root->fs_info->transaction_blocked_wait);
__btrfs_std_error(root->fs_info, function, line, errno, NULL); __btrfs_handle_fs_error(root->fs_info, function, line, errno, NULL);
} }
/* /*
* __btrfs_panic decodes unexpected, fatal errors from the caller, * __btrfs_panic decodes unexpected, fatal errors from the caller,
...@@ -1160,7 +1156,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) ...@@ -1160,7 +1156,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0; return 0;
} }
btrfs_wait_ordered_roots(fs_info, -1); btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
trans = btrfs_attach_transaction_barrier(root); trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
...@@ -1488,10 +1484,10 @@ static int setup_security_options(struct btrfs_fs_info *fs_info, ...@@ -1488,10 +1484,10 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,
memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts)); memcpy(&fs_info->security_opts, sec_opts, sizeof(*sec_opts));
} else { } else {
/* /*
* Since SELinux(the only one supports security_mnt_opts) does * Since SELinux (the only one supporting security_mnt_opts)
* NOT support changing context during remount/mount same sb, * does NOT support changing context during remount/mount of
* This must be the same or part of the same security options, * the same sb, this must be the same or part of the same
* just free it. * security options, just free it.
*/ */
security_free_mnt_opts(sec_opts); security_free_mnt_opts(sec_opts);
} }
...@@ -1669,8 +1665,8 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, ...@@ -1669,8 +1665,8 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
unsigned long old_opts) unsigned long old_opts)
{ {
/* /*
* We need cleanup all defragable inodes if the autodefragment is * We need to cleanup all defragable inodes if the autodefragment is
* close or the fs is R/O. * close or the filesystem is read only.
*/ */
if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
...@@ -2051,9 +2047,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -2051,9 +2047,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
int ret; int ret;
u64 thresh = 0; u64 thresh = 0;
int mixed = 0;
/* /*
* holding chunk_muext to avoid allocating new chunks, holding * holding chunk_mutex to avoid allocating new chunks, holding
* device_list_mutex to avoid the device being removed * device_list_mutex to avoid the device being removed
*/ */
rcu_read_lock(); rcu_read_lock();
...@@ -2076,8 +2073,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -2076,8 +2073,17 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
} }
} }
} }
if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
total_free_meta += found->disk_total - found->disk_used; /*
* Metadata in mixed block goup profiles are accounted in data
*/
if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA)
mixed = 1;
else
total_free_meta += found->disk_total -
found->disk_used;
}
total_used += found->disk_used; total_used += found->disk_used;
} }
...@@ -2090,7 +2096,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -2090,7 +2096,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
/* Account global block reserve as used, it's in logical size already */ /* Account global block reserve as used, it's in logical size already */
spin_lock(&block_rsv->lock); spin_lock(&block_rsv->lock);
buf->f_bfree -= block_rsv->size >> bits; /* Mixed block groups accounting is not byte-accurate, avoid overflow */
if (buf->f_bfree >= block_rsv->size >> bits)
buf->f_bfree -= block_rsv->size >> bits;
else
buf->f_bfree = 0;
spin_unlock(&block_rsv->lock); spin_unlock(&block_rsv->lock);
buf->f_bavail = div_u64(total_free_data, factor); buf->f_bavail = div_u64(total_free_data, factor);
...@@ -2115,7 +2125,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -2115,7 +2125,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
*/ */
thresh = 4 * 1024 * 1024; thresh = 4 * 1024 * 1024;
if (total_free_meta - thresh < block_rsv->size) if (!mixed && total_free_meta - thresh < block_rsv->size)
buf->f_bavail = 0; buf->f_bavail = 0;
buf->f_type = BTRFS_SUPER_MAGIC; buf->f_type = BTRFS_SUPER_MAGIC;
......
...@@ -120,6 +120,9 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj, ...@@ -120,6 +120,9 @@ static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
if (!fs_info) if (!fs_info)
return -EPERM; return -EPERM;
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
ret = kstrtoul(skip_spaces(buf), 0, &val); ret = kstrtoul(skip_spaces(buf), 0, &val);
if (ret) if (ret)
return ret; return ret;
...@@ -364,7 +367,13 @@ static ssize_t btrfs_label_show(struct kobject *kobj, ...@@ -364,7 +367,13 @@ static ssize_t btrfs_label_show(struct kobject *kobj,
{ {
struct btrfs_fs_info *fs_info = to_fs_info(kobj); struct btrfs_fs_info *fs_info = to_fs_info(kobj);
char *label = fs_info->super_copy->label; char *label = fs_info->super_copy->label;
return snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label); ssize_t ret;
spin_lock(&fs_info->super_lock);
ret = snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
spin_unlock(&fs_info->super_lock);
return ret;
} }
static ssize_t btrfs_label_store(struct kobject *kobj, static ssize_t btrfs_label_store(struct kobject *kobj,
...@@ -374,6 +383,9 @@ static ssize_t btrfs_label_store(struct kobject *kobj, ...@@ -374,6 +383,9 @@ static ssize_t btrfs_label_store(struct kobject *kobj,
struct btrfs_fs_info *fs_info = to_fs_info(kobj); struct btrfs_fs_info *fs_info = to_fs_info(kobj);
size_t p_len; size_t p_len;
if (!fs_info)
return -EPERM;
if (fs_info->sb->s_flags & MS_RDONLY) if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS; return -EROFS;
......
...@@ -311,10 +311,11 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type) ...@@ -311,10 +311,11 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
* when the transaction commits * when the transaction commits
*/ */
static int record_root_in_trans(struct btrfs_trans_handle *trans, static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root) struct btrfs_root *root,
int force)
{ {
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && if ((test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
root->last_trans < trans->transid) { root->last_trans < trans->transid) || force) {
WARN_ON(root == root->fs_info->extent_root); WARN_ON(root == root->fs_info->extent_root);
WARN_ON(root->commit_root != root->node); WARN_ON(root->commit_root != root->node);
...@@ -331,7 +332,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, ...@@ -331,7 +332,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
smp_wmb(); smp_wmb();
spin_lock(&root->fs_info->fs_roots_radix_lock); spin_lock(&root->fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid) { if (root->last_trans == trans->transid && !force) {
spin_unlock(&root->fs_info->fs_roots_radix_lock); spin_unlock(&root->fs_info->fs_roots_radix_lock);
return 0; return 0;
} }
...@@ -402,7 +403,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, ...@@ -402,7 +403,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
return 0; return 0;
mutex_lock(&root->fs_info->reloc_mutex); mutex_lock(&root->fs_info->reloc_mutex);
record_root_in_trans(trans, root); record_root_in_trans(trans, root, 0);
mutex_unlock(&root->fs_info->reloc_mutex); mutex_unlock(&root->fs_info->reloc_mutex);
return 0; return 0;
...@@ -1310,6 +1311,97 @@ int btrfs_defrag_root(struct btrfs_root *root) ...@@ -1310,6 +1311,97 @@ int btrfs_defrag_root(struct btrfs_root *root)
return ret; return ret;
} }
/* Bisesctability fixup, remove in 4.8 */
#ifndef btrfs_std_error
#define btrfs_std_error btrfs_handle_fs_error
#endif
/*
* Do all special snapshot related qgroup dirty hack.
*
* Will do all needed qgroup inherit and dirty hack like switch commit
* roots inside one transaction and write all btree into disk, to make
* qgroup works.
*/
static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *src,
struct btrfs_root *parent,
struct btrfs_qgroup_inherit *inherit,
u64 dst_objectid)
{
struct btrfs_fs_info *fs_info = src->fs_info;
int ret;
/*
* Save some performance in the case that qgroups are not
* enabled. If this check races with the ioctl, rescan will
* kick in anyway.
*/
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_enabled) {
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return 0;
}
mutex_unlock(&fs_info->qgroup_ioctl_lock);
/*
* We are going to commit transaction, see btrfs_commit_transaction()
* comment for reason locking tree_log_mutex
*/
mutex_lock(&fs_info->tree_log_mutex);
ret = commit_fs_roots(trans, src);
if (ret)
goto out;
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
if (ret < 0)
goto out;
ret = btrfs_qgroup_account_extents(trans, fs_info);
if (ret < 0)
goto out;
/* Now qgroup are all updated, we can inherit it to new qgroups */
ret = btrfs_qgroup_inherit(trans, fs_info,
src->root_key.objectid, dst_objectid,
inherit);
if (ret < 0)
goto out;
/*
* Now we do a simplified commit transaction, which will:
* 1) commit all subvolume and extent tree
* To ensure all subvolume and extent tree have a valid
* commit_root to accounting later insert_dir_item()
* 2) write all btree blocks onto disk
* This is to make sure later btree modification will be cowed
* Or commit_root can be populated and cause wrong qgroup numbers
* In this simplified commit, we don't really care about other trees
* like chunk and root tree, as they won't affect qgroup.
* And we don't write super to avoid half committed status.
*/
ret = commit_cowonly_roots(trans, src);
if (ret)
goto out;
switch_commit_roots(trans->transaction, fs_info);
ret = btrfs_write_and_wait_transaction(trans, src);
if (ret)
btrfs_std_error(fs_info, ret,
"Error while writing out transaction for qgroup");
out:
mutex_unlock(&fs_info->tree_log_mutex);
/*
* Force parent root to be updated, as we recorded it before so its
* last_trans == cur_transid.
* Or it won't be committed again onto disk after later
* insert_dir_item()
*/
if (!ret)
record_root_in_trans(trans, parent, 1);
return ret;
}
/* /*
* new snapshots need to be created at a very specific time in the * new snapshots need to be created at a very specific time in the
* transaction commit. This does the actual creation. * transaction commit. This does the actual creation.
...@@ -1383,7 +1475,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ...@@ -1383,7 +1475,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
dentry = pending->dentry; dentry = pending->dentry;
parent_inode = pending->dir; parent_inode = pending->dir;
parent_root = BTRFS_I(parent_inode)->root; parent_root = BTRFS_I(parent_inode)->root;
record_root_in_trans(trans, parent_root); record_root_in_trans(trans, parent_root, 0);
cur_time = current_fs_time(parent_inode->i_sb); cur_time = current_fs_time(parent_inode->i_sb);
...@@ -1420,7 +1512,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ...@@ -1420,7 +1512,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail; goto fail;
} }
record_root_in_trans(trans, root); record_root_in_trans(trans, root, 0);
btrfs_set_root_last_snapshot(&root->root_item, trans->transid); btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
btrfs_check_and_init_root_item(new_root_item); btrfs_check_and_init_root_item(new_root_item);
...@@ -1516,6 +1608,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ...@@ -1516,6 +1608,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail; goto fail;
} }
/*
* Do special qgroup accounting for snapshot, as we do some qgroup
* snapshot hack to do fast snapshot.
* To co-operate with that hack, we do hack again.
* Or snapshot will be greatly slowed down by a subtree qgroup rescan
*/
ret = qgroup_account_snapshot(trans, root, parent_root,
pending->inherit, objectid);
if (ret < 0)
goto fail;
ret = btrfs_insert_dir_item(trans, parent_root, ret = btrfs_insert_dir_item(trans, parent_root,
dentry->d_name.name, dentry->d_name.len, dentry->d_name.name, dentry->d_name.len,
parent_inode, &key, parent_inode, &key,
...@@ -1559,23 +1662,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ...@@ -1559,23 +1662,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail; goto fail;
} }
/*
* account qgroup counters before qgroup_inherit()
*/
ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
if (ret)
goto fail;
ret = btrfs_qgroup_account_extents(trans, fs_info);
if (ret)
goto fail;
ret = btrfs_qgroup_inherit(trans, fs_info,
root->root_key.objectid,
objectid, pending->inherit);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto fail;
}
fail: fail:
pending->error = ret; pending->error = ret;
dir_item_existed: dir_item_existed:
...@@ -1821,7 +1907,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) ...@@ -1821,7 +1907,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{ {
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
btrfs_wait_ordered_roots(fs_info, -1); btrfs_wait_ordered_roots(fs_info, -1, 0, (u64)-1);
} }
static inline void static inline void
...@@ -2145,7 +2231,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -2145,7 +2231,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = btrfs_write_and_wait_transaction(trans, root); ret = btrfs_write_and_wait_transaction(trans, root);
if (ret) { if (ret) {
btrfs_std_error(root->fs_info, ret, btrfs_handle_fs_error(root->fs_info, ret,
"Error while writing out transaction"); "Error while writing out transaction");
mutex_unlock(&root->fs_info->tree_log_mutex); mutex_unlock(&root->fs_info->tree_log_mutex);
goto scrub_continue; goto scrub_continue;
......
...@@ -4141,6 +4141,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, ...@@ -4141,6 +4141,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
INIT_LIST_HEAD(&extents); INIT_LIST_HEAD(&extents);
down_write(&BTRFS_I(inode)->dio_sem);
write_lock(&tree->lock); write_lock(&tree->lock);
test_gen = root->fs_info->last_trans_committed; test_gen = root->fs_info->last_trans_committed;
...@@ -4169,13 +4170,20 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, ...@@ -4169,13 +4170,20 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
} }
list_sort(NULL, &extents, extent_cmp); list_sort(NULL, &extents, extent_cmp);
btrfs_get_logged_extents(inode, logged_list, start, end);
/* /*
* Collect any new ordered extents within the range. This is to * Some ordered extents started by fsync might have completed
* prevent logging file extent items without waiting for the disk * before we could collect them into the list logged_list, which
* location they point to being written. We do this only to deal * means they're gone, not in our logged_list nor in the inode's
* with races against concurrent lockless direct IO writes. * ordered tree. We want the application/user space to know an
* error happened while attempting to persist file data so that
* it can take proper action. If such error happened, we leave
* without writing to the log tree and the fsync must report the
* file data write error and not commit the current transaction.
*/ */
btrfs_get_logged_extents(inode, logged_list, start, end); ret = btrfs_inode_check_errors(inode);
if (ret)
ctx->io_err = ret;
process: process:
while (!list_empty(&extents)) { while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list); em = list_entry(extents.next, struct extent_map, list);
...@@ -4202,6 +4210,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, ...@@ -4202,6 +4210,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
} }
WARN_ON(!list_empty(&extents)); WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock); write_unlock(&tree->lock);
up_write(&BTRFS_I(inode)->dio_sem);
btrfs_release_path(path); btrfs_release_path(path);
return ret; return ret;
...@@ -4622,23 +4631,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -4622,23 +4631,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
mutex_lock(&BTRFS_I(inode)->log_mutex); mutex_lock(&BTRFS_I(inode)->log_mutex);
/*
* Collect ordered extents only if we are logging data. This is to
* ensure a subsequent request to log this inode in LOG_INODE_ALL mode
* will process the ordered extents if they still exists at the time,
* because when we collect them we test and set for the flag
* BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the
* same ordered extents. The consequence for the LOG_INODE_ALL log mode
* not processing the ordered extents is that we end up logging the
* corresponding file extent items, based on the extent maps in the
* inode's extent_map_tree's modified_list, without logging the
* respective checksums (since the may still be only attached to the
* ordered extents and have not been inserted in the csum tree by
* btrfs_finish_ordered_io() yet).
*/
if (inode_only == LOG_INODE_ALL)
btrfs_get_logged_extents(inode, &logged_list, start, end);
/* /*
* a brute force approach to making sure we get the most uptodate * a brute force approach to making sure we get the most uptodate
* copies of everything. * copies of everything.
...@@ -4846,21 +4838,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -4846,21 +4838,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
goto out_unlock; goto out_unlock;
} }
if (fast_search) { if (fast_search) {
/*
* Some ordered extents started by fsync might have completed
* before we collected the ordered extents in logged_list, which
* means they're gone, not in our logged_list nor in the inode's
* ordered tree. We want the application/user space to know an
* error happened while attempting to persist file data so that
* it can take proper action. If such error happened, we leave
* without writing to the log tree and the fsync must report the
* file data write error and not commit the current transaction.
*/
err = btrfs_inode_check_errors(inode);
if (err) {
ctx->io_err = err;
goto out_unlock;
}
ret = btrfs_log_changed_extents(trans, root, inode, dst_path, ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
&logged_list, ctx, start, end); &logged_list, ctx, start, end);
if (ret) { if (ret) {
...@@ -5158,7 +5135,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, ...@@ -5158,7 +5135,7 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
} }
ctx->log_new_dentries = false; ctx->log_new_dentries = false;
if (type == BTRFS_FT_DIR) if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
log_mode = LOG_INODE_ALL; log_mode = LOG_INODE_ALL;
btrfs_release_path(path); btrfs_release_path(path);
ret = btrfs_log_inode(trans, root, di_inode, ret = btrfs_log_inode(trans, root, di_inode,
...@@ -5278,11 +5255,16 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, ...@@ -5278,11 +5255,16 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
if (IS_ERR(dir_inode)) if (IS_ERR(dir_inode))
continue; continue;
if (ctx)
ctx->log_new_dentries = false;
ret = btrfs_log_inode(trans, root, dir_inode, ret = btrfs_log_inode(trans, root, dir_inode,
LOG_INODE_ALL, 0, LLONG_MAX, ctx); LOG_INODE_ALL, 0, LLONG_MAX, ctx);
if (!ret && if (!ret &&
btrfs_must_commit_transaction(trans, dir_inode)) btrfs_must_commit_transaction(trans, dir_inode))
ret = 1; ret = 1;
if (!ret && ctx && ctx->log_new_dentries)
ret = log_new_dir_dentries(trans, root,
dir_inode, ctx);
iput(dir_inode); iput(dir_inode);
if (ret) if (ret)
goto out; goto out;
...@@ -5519,7 +5501,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) ...@@ -5519,7 +5501,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
ret = walk_log_tree(trans, log_root_tree, &wc); ret = walk_log_tree(trans, log_root_tree, &wc);
if (ret) { if (ret) {
btrfs_std_error(fs_info, ret, "Failed to pin buffers while " btrfs_handle_fs_error(fs_info, ret, "Failed to pin buffers while "
"recovering log root tree."); "recovering log root tree.");
goto error; goto error;
} }
...@@ -5533,7 +5515,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) ...@@ -5533,7 +5515,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
if (ret < 0) { if (ret < 0) {
btrfs_std_error(fs_info, ret, btrfs_handle_fs_error(fs_info, ret,
"Couldn't find tree log root."); "Couldn't find tree log root.");
goto error; goto error;
} }
...@@ -5551,7 +5533,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) ...@@ -5551,7 +5533,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
log = btrfs_read_fs_root(log_root_tree, &found_key); log = btrfs_read_fs_root(log_root_tree, &found_key);
if (IS_ERR(log)) { if (IS_ERR(log)) {
ret = PTR_ERR(log); ret = PTR_ERR(log);
btrfs_std_error(fs_info, ret, btrfs_handle_fs_error(fs_info, ret,
"Couldn't read tree log root."); "Couldn't read tree log root.");
goto error; goto error;
} }
...@@ -5566,7 +5548,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) ...@@ -5566,7 +5548,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
free_extent_buffer(log->node); free_extent_buffer(log->node);
free_extent_buffer(log->commit_root); free_extent_buffer(log->commit_root);
kfree(log); kfree(log);
btrfs_std_error(fs_info, ret, "Couldn't read target root " btrfs_handle_fs_error(fs_info, ret, "Couldn't read target root "
"for tree log recovery."); "for tree log recovery.");
goto error; goto error;
} }
...@@ -5652,11 +5634,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, ...@@ -5652,11 +5634,9 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
* into the file. When the file is logged we check it and * into the file. When the file is logged we check it and
* don't log the parents if the file is fully on disk. * don't log the parents if the file is fully on disk.
*/ */
if (S_ISREG(inode->i_mode)) { mutex_lock(&BTRFS_I(inode)->log_mutex);
mutex_lock(&BTRFS_I(inode)->log_mutex); BTRFS_I(inode)->last_unlink_trans = trans->transid;
BTRFS_I(inode)->last_unlink_trans = trans->transid; mutex_unlock(&BTRFS_I(inode)->log_mutex);
mutex_unlock(&BTRFS_I(inode)->log_mutex);
}
/* /*
* if this directory was already logged any new * if this directory was already logged any new
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册