提交 7e33fd99 编写于 作者: J Josef Bacik 提交者: Chris Mason

Btrfs: don't take the chunk_mutex/dev_list mutex in statfs V2

Our gluster boxes get several thousand statfs() calls per second, which begins
to suck hardcore with all of the lock contention on the chunk mutex and dev list
mutex.  We don't really need to hold these things, if we have transient
weirdness with statfs() because of the chunk allocator we don't care, so remove
this locking.

We still need the dev_list lock if you mount with -o alloc_start however, which
is a good argument for nuking that thing from orbit, but that's a patch for
another day.  Thanks,
Signed-off-by: NJosef Bacik <jbacik@fb.com>
Signed-off-by: NChris Mason <clm@fb.com>
上级 633c0aad
...@@ -1644,8 +1644,20 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) ...@@ -1644,8 +1644,20 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
int i = 0, nr_devices; int i = 0, nr_devices;
int ret; int ret;
/*
* We aren't under the device list lock, so this is racey-ish, but good
* enough for our purposes.
*/
nr_devices = fs_info->fs_devices->open_devices; nr_devices = fs_info->fs_devices->open_devices;
BUG_ON(!nr_devices); if (!nr_devices) {
smp_mb();
nr_devices = fs_info->fs_devices->open_devices;
ASSERT(nr_devices);
if (!nr_devices) {
*free_bytes = 0;
return 0;
}
}
devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
GFP_NOFS); GFP_NOFS);
...@@ -1670,11 +1682,17 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) ...@@ -1670,11 +1682,17 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
else else
min_stripe_size = BTRFS_STRIPE_LEN; min_stripe_size = BTRFS_STRIPE_LEN;
list_for_each_entry(device, &fs_devices->devices, dev_list) { if (fs_info->alloc_start)
mutex_lock(&fs_devices->device_list_mutex);
rcu_read_lock();
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
if (!device->in_fs_metadata || !device->bdev || if (!device->in_fs_metadata || !device->bdev ||
device->is_tgtdev_for_dev_replace) device->is_tgtdev_for_dev_replace)
continue; continue;
if (i >= nr_devices)
break;
avail_space = device->total_bytes - device->bytes_used; avail_space = device->total_bytes - device->bytes_used;
/* align with stripe_len */ /* align with stripe_len */
...@@ -1689,24 +1707,32 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) ...@@ -1689,24 +1707,32 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
skip_space = 1024 * 1024; skip_space = 1024 * 1024;
/* user can set the offset in fs_info->alloc_start. */ /* user can set the offset in fs_info->alloc_start. */
if (fs_info->alloc_start + BTRFS_STRIPE_LEN <= if (fs_info->alloc_start &&
device->total_bytes) fs_info->alloc_start + BTRFS_STRIPE_LEN <=
device->total_bytes) {
rcu_read_unlock();
skip_space = max(fs_info->alloc_start, skip_space); skip_space = max(fs_info->alloc_start, skip_space);
/* /*
* btrfs can not use the free space in [0, skip_space - 1], * btrfs can not use the free space in
* we must subtract it from the total. In order to implement * [0, skip_space - 1], we must subtract it from the
* it, we account the used space in this range first. * total. In order to implement it, we account the used
*/ * space in this range first.
ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1, */
&used_space); ret = btrfs_account_dev_extents_size(device, 0,
if (ret) { skip_space - 1,
kfree(devices_info); &used_space);
return ret; if (ret) {
} kfree(devices_info);
mutex_unlock(&fs_devices->device_list_mutex);
return ret;
}
/* calc the free space in [0, skip_space - 1] */ rcu_read_lock();
skip_space -= used_space;
/* calc the free space in [0, skip_space - 1] */
skip_space -= used_space;
}
/* /*
* we can use the free space in [0, skip_space - 1], subtract * we can use the free space in [0, skip_space - 1], subtract
...@@ -1725,6 +1751,9 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) ...@@ -1725,6 +1751,9 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
i++; i++;
} }
rcu_read_unlock();
if (fs_info->alloc_start)
mutex_unlock(&fs_devices->device_list_mutex);
nr_devices = i; nr_devices = i;
...@@ -1787,8 +1816,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1787,8 +1816,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
* holding chunk_muext to avoid allocating new chunks, holding * holding chunk_muext to avoid allocating new chunks, holding
* device_list_mutex to avoid the device being removed * device_list_mutex to avoid the device being removed
*/ */
mutex_lock(&fs_info->fs_devices->device_list_mutex);
mutex_lock(&fs_info->chunk_mutex);
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(found, head, list) { list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
...@@ -1826,15 +1853,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -1826,15 +1853,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = total_free_data; buf->f_bavail = total_free_data;
ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
if (ret) { if (ret)
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return ret; return ret;
}
buf->f_bavail += div_u64(total_free_data, factor); buf->f_bavail += div_u64(total_free_data, factor);
buf->f_bavail = buf->f_bavail >> bits; buf->f_bavail = buf->f_bavail >> bits;
mutex_unlock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
buf->f_type = BTRFS_SUPER_MAGIC; buf->f_type = BTRFS_SUPER_MAGIC;
buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_bsize = dentry->d_sb->s_blocksize;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册