提交 aa2dfb37 编写于 作者: C Chris Mason

Merge branch 'allocator' of...

Merge branch 'allocator' of git://git.kernel.org/pub/scm/linux/kernel/git/arne/btrfs-unstable-arne into inode_numbers
Signed-off-by: NChris Mason <chris.mason@oracle.com>
...@@ -914,6 +914,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ...@@ -914,6 +914,32 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
return 0; return 0;
} }
/* Used to sort the devices by max_avail(descending sort) */
static int btrfs_cmp_device_free_bytes(const void *dev_info1,
const void *dev_info2)
{
if (((struct btrfs_device_info *)dev_info1)->max_avail >
((struct btrfs_device_info *)dev_info2)->max_avail)
return -1;
else if (((struct btrfs_device_info *)dev_info1)->max_avail <
((struct btrfs_device_info *)dev_info2)->max_avail)
return 1;
else
return 0;
}
/*
* sort the devices by max_avail, in which max free extent size of each device
* is stored.(Descending Sort)
*/
static inline void btrfs_descending_sort_devices(
struct btrfs_device_info *devices,
size_t nr_devices)
{
sort(devices, nr_devices, sizeof(struct btrfs_device_info),
btrfs_cmp_device_free_bytes, NULL);
}
/* /*
* The helper to calc the free space on the devices that can be used to store * The helper to calc the free space on the devices that can be used to store
* file data. * file data.
......
...@@ -805,10 +805,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, ...@@ -805,10 +805,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
/* we don't want to overwrite the superblock on the drive, /* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB * so we make sure to start at an offset of at least 1MB
*/ */
search_start = 1024 * 1024; search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
if (root->fs_info->alloc_start + num_bytes <= search_end)
search_start = max(root->fs_info->alloc_start, search_start);
max_hole_start = search_start; max_hole_start = search_start;
max_hole_size = 0; max_hole_size = 0;
...@@ -2227,275 +2224,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, ...@@ -2227,275 +2224,204 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
return 0; return 0;
} }
static noinline u64 chunk_bytes_by_type(u64 type, u64 calc_size, /*
int num_stripes, int sub_stripes) * sort the devices in descending order by max_avail, total_avail
*/
static int btrfs_cmp_device_info(const void *a, const void *b)
{ {
if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) const struct btrfs_device_info *di_a = a;
return calc_size; const struct btrfs_device_info *di_b = b;
else if (type & BTRFS_BLOCK_GROUP_RAID10)
return calc_size * (num_stripes / sub_stripes);
else
return calc_size * num_stripes;
}
/* Used to sort the devices by max_avail(descending sort) */ if (di_a->max_avail > di_b->max_avail)
int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
{
if (((struct btrfs_device_info *)dev_info1)->max_avail >
((struct btrfs_device_info *)dev_info2)->max_avail)
return -1; return -1;
else if (((struct btrfs_device_info *)dev_info1)->max_avail < if (di_a->max_avail < di_b->max_avail)
((struct btrfs_device_info *)dev_info2)->max_avail) return 1;
if (di_a->total_avail > di_b->total_avail)
return -1;
if (di_a->total_avail < di_b->total_avail)
return 1; return 1;
else
return 0; return 0;
} }
static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type, static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
int *num_stripes, int *min_stripes, struct btrfs_root *extent_root,
int *sub_stripes) struct map_lookup **map_ret,
u64 *num_bytes_out, u64 *stripe_size_out,
u64 start, u64 type)
{ {
*num_stripes = 1; struct btrfs_fs_info *info = extent_root->fs_info;
*min_stripes = 1; struct btrfs_fs_devices *fs_devices = info->fs_devices;
*sub_stripes = 0; struct list_head *cur;
struct map_lookup *map = NULL;
struct extent_map_tree *em_tree;
struct extent_map *em;
struct btrfs_device_info *devices_info = NULL;
u64 total_avail;
int num_stripes; /* total number of stripes to allocate */
int sub_stripes; /* sub_stripes info for map */
int dev_stripes; /* stripes per dev */
int devs_max; /* max devs to use */
int devs_min; /* min devs needed */
int devs_increment; /* ndevs has to be a multiple of this */
int ncopies; /* how many copies to data has */
int ret;
u64 max_stripe_size;
u64 max_chunk_size;
u64 stripe_size;
u64 num_bytes;
int ndevs;
int i;
int j;
if (type & (BTRFS_BLOCK_GROUP_RAID0)) { if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
*num_stripes = fs_devices->rw_devices; (type & BTRFS_BLOCK_GROUP_DUP)) {
*min_stripes = 2; WARN_ON(1);
} type &= ~BTRFS_BLOCK_GROUP_DUP;
if (type & (BTRFS_BLOCK_GROUP_DUP)) {
*num_stripes = 2;
*min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
if (fs_devices->rw_devices < 2)
return -ENOSPC;
*num_stripes = 2;
*min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
*num_stripes = fs_devices->rw_devices;
if (*num_stripes < 4)
return -ENOSPC;
*num_stripes &= ~(u32)1;
*sub_stripes = 2;
*min_stripes = 4;
} }
return 0; if (list_empty(&fs_devices->alloc_list))
} return -ENOSPC;
static u64 __btrfs_calc_stripe_size(struct btrfs_fs_devices *fs_devices, sub_stripes = 1;
u64 proposed_size, u64 type, dev_stripes = 1;
int num_stripes, int small_stripe) devs_increment = 1;
{ ncopies = 1;
int min_stripe_size = 1 * 1024 * 1024; devs_max = 0; /* 0 == as many as possible */
u64 calc_size = proposed_size; devs_min = 1;
u64 max_chunk_size = calc_size;
int ncopies = 1;
if (type & (BTRFS_BLOCK_GROUP_RAID1 | /*
BTRFS_BLOCK_GROUP_DUP | * define the properties of each RAID type.
BTRFS_BLOCK_GROUP_RAID10)) * FIXME: move this to a global table and use it in all RAID
* calculation code
*/
if (type & (BTRFS_BLOCK_GROUP_DUP)) {
dev_stripes = 2;
ncopies = 2;
devs_max = 1;
} else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
devs_min = 2;
} else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
devs_increment = 2;
ncopies = 2; ncopies = 2;
devs_max = 2;
devs_min = 2;
} else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
sub_stripes = 2;
devs_increment = 2;
ncopies = 2;
devs_min = 4;
} else {
devs_max = 1;
}
if (type & BTRFS_BLOCK_GROUP_DATA) { if (type & BTRFS_BLOCK_GROUP_DATA) {
max_chunk_size = 10 * calc_size; max_stripe_size = 1024 * 1024 * 1024;
min_stripe_size = 64 * 1024 * 1024; max_chunk_size = 10 * max_stripe_size;
} else if (type & BTRFS_BLOCK_GROUP_METADATA) { } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
max_chunk_size = 256 * 1024 * 1024; max_stripe_size = 256 * 1024 * 1024;
min_stripe_size = 32 * 1024 * 1024; max_chunk_size = max_stripe_size;
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
calc_size = 8 * 1024 * 1024; max_stripe_size = 8 * 1024 * 1024;
max_chunk_size = calc_size * 2; max_chunk_size = 2 * max_stripe_size;
min_stripe_size = 1 * 1024 * 1024; } else {
printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n",
type);
BUG_ON(1);
} }
/* we don't want a chunk larger than 10% of writeable space */ /* we don't want a chunk larger than 10% of writeable space */
max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
max_chunk_size); max_chunk_size);
if (calc_size * num_stripes > max_chunk_size * ncopies) { devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
calc_size = max_chunk_size * ncopies; GFP_NOFS);
do_div(calc_size, num_stripes); if (!devices_info)
do_div(calc_size, BTRFS_STRIPE_LEN); return -ENOMEM;
calc_size *= BTRFS_STRIPE_LEN;
}
/* we don't want tiny stripes */ cur = fs_devices->alloc_list.next;
if (!small_stripe)
calc_size = max_t(u64, min_stripe_size, calc_size);
/* /*
* we're about to do_div by the BTRFS_STRIPE_LEN so lets make sure * in the first pass through the devices list, we gather information
* we end up with something bigger than a stripe * about the available holes on each device.
*/ */
calc_size = max_t(u64, calc_size, BTRFS_STRIPE_LEN); ndevs = 0;
while (cur != &fs_devices->alloc_list) {
do_div(calc_size, BTRFS_STRIPE_LEN); struct btrfs_device *device;
calc_size *= BTRFS_STRIPE_LEN; u64 max_avail;
u64 dev_offset;
return calc_size;
}
static struct map_lookup *__shrink_map_lookup_stripes(struct map_lookup *map,
int num_stripes)
{
struct map_lookup *new;
size_t len = map_lookup_size(num_stripes);
BUG_ON(map->num_stripes < num_stripes); device = list_entry(cur, struct btrfs_device, dev_alloc_list);
if (map->num_stripes == num_stripes) cur = cur->next;
return map;
new = kmalloc(len, GFP_NOFS); if (!device->writeable) {
if (!new) { printk(KERN_ERR
/* just change map->num_stripes */ "btrfs: read-only device in alloc_list\n");
map->num_stripes = num_stripes; WARN_ON(1);
return map; continue;
} }
memcpy(new, map, len); if (!device->in_fs_metadata)
new->num_stripes = num_stripes; continue;
kfree(map);
return new;
}
/* if (device->total_bytes > device->bytes_used)
* helper to allocate device space from btrfs_device_info, in which we stored total_avail = device->total_bytes - device->bytes_used;
* max free space information of every device. It is used when we can not else
* allocate chunks by default size. total_avail = 0;
* /* avail is off by max(alloc_start, 1MB), but that is the same
* By this helper, we can allocate a new chunk as larger as possible. * for all devices, so it doesn't hurt the sorting later on
*/ */
static int __btrfs_alloc_tiny_space(struct btrfs_trans_handle *trans,
struct btrfs_fs_devices *fs_devices,
struct btrfs_device_info *devices,
int nr_device, u64 type,
struct map_lookup **map_lookup,
int min_stripes, u64 *stripe_size)
{
int i, index, sort_again = 0;
int min_devices = min_stripes;
u64 max_avail, min_free;
struct map_lookup *map = *map_lookup;
int ret;
if (nr_device < min_stripes) ret = find_free_dev_extent(trans, device,
return -ENOSPC; max_stripe_size * dev_stripes,
&dev_offset, &max_avail);
if (ret && ret != -ENOSPC)
goto error;
btrfs_descending_sort_devices(devices, nr_device); if (ret == 0)
max_avail = max_stripe_size * dev_stripes;
max_avail = devices[0].max_avail; if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
if (!max_avail) continue;
return -ENOSPC;
for (i = 0; i < nr_device; i++) { devices_info[ndevs].dev_offset = dev_offset;
/* devices_info[ndevs].max_avail = max_avail;
* if dev_offset = 0, it means the free space of this device devices_info[ndevs].total_avail = total_avail;
* is less than what we need, and we didn't search max avail devices_info[ndevs].dev = device;
* extent on this device, so do it now. ++ndevs;
*/
if (!devices[i].dev_offset) {
ret = find_free_dev_extent(trans, devices[i].dev,
max_avail,
&devices[i].dev_offset,
&devices[i].max_avail);
if (ret != 0 && ret != -ENOSPC)
return ret;
sort_again = 1;
}
} }
/* we update the max avail free extent of each devices, sort again */ /*
if (sort_again) * now sort the devices by hole size / available space
btrfs_descending_sort_devices(devices, nr_device); */
sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
if (type & BTRFS_BLOCK_GROUP_DUP) btrfs_cmp_device_info, NULL);
min_devices = 1;
if (!devices[min_devices - 1].max_avail)
return -ENOSPC;
max_avail = devices[min_devices - 1].max_avail;
if (type & BTRFS_BLOCK_GROUP_DUP)
do_div(max_avail, 2);
max_avail = __btrfs_calc_stripe_size(fs_devices, max_avail, type,
min_stripes, 1);
if (type & BTRFS_BLOCK_GROUP_DUP)
min_free = max_avail * 2;
else
min_free = max_avail;
if (min_free > devices[min_devices - 1].max_avail)
return -ENOSPC;
map = __shrink_map_lookup_stripes(map, min_stripes); /* round down to number of usable stripes */
*stripe_size = max_avail; ndevs -= ndevs % devs_increment;
index = 0; if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
for (i = 0; i < min_stripes; i++) { ret = -ENOSPC;
map->stripes[i].dev = devices[index].dev; goto error;
map->stripes[i].physical = devices[index].dev_offset;
if (type & BTRFS_BLOCK_GROUP_DUP) {
i++;
map->stripes[i].dev = devices[index].dev;
map->stripes[i].physical = devices[index].dev_offset +
max_avail;
}
index++;
} }
*map_lookup = map;
return 0;
}
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (devs_max && ndevs > devs_max)
struct btrfs_root *extent_root, ndevs = devs_max;
struct map_lookup **map_ret, /*
u64 *num_bytes, u64 *stripe_size, * the primary goal is to maximize the number of stripes, so use as many
u64 start, u64 type) * devices as possible, even if the stripes are not maximum sized.
{ */
struct btrfs_fs_info *info = extent_root->fs_info; stripe_size = devices_info[ndevs-1].max_avail;
struct btrfs_device *device = NULL; num_stripes = ndevs * dev_stripes;
struct btrfs_fs_devices *fs_devices = info->fs_devices;
struct list_head *cur;
struct map_lookup *map;
struct extent_map_tree *em_tree;
struct extent_map *em;
struct btrfs_device_info *devices_info;
struct list_head private_devs;
u64 calc_size = 1024 * 1024 * 1024;
u64 min_free;
u64 avail;
u64 dev_offset;
int num_stripes;
int min_stripes;
int sub_stripes;
int min_devices; /* the min number of devices we need */
int i;
int ret;
int index;
if ((type & BTRFS_BLOCK_GROUP_RAID1) && if (stripe_size * num_stripes > max_chunk_size * ncopies) {
(type & BTRFS_BLOCK_GROUP_DUP)) { stripe_size = max_chunk_size * ncopies;
WARN_ON(1); do_div(stripe_size, num_stripes);
type &= ~BTRFS_BLOCK_GROUP_DUP;
} }
if (list_empty(&fs_devices->alloc_list))
return -ENOSPC;
ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
&min_stripes, &sub_stripes);
if (ret)
return ret;
devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, do_div(stripe_size, dev_stripes);
GFP_NOFS); do_div(stripe_size, BTRFS_STRIPE_LEN);
if (!devices_info) stripe_size *= BTRFS_STRIPE_LEN;
return -ENOMEM;
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) { if (!map) {
...@@ -2504,85 +2430,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -2504,85 +2430,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
} }
map->num_stripes = num_stripes; map->num_stripes = num_stripes;
cur = fs_devices->alloc_list.next; for (i = 0; i < ndevs; ++i) {
index = 0; for (j = 0; j < dev_stripes; ++j) {
i = 0; int s = i * dev_stripes + j;
map->stripes[s].dev = devices_info[i].dev;
calc_size = __btrfs_calc_stripe_size(fs_devices, calc_size, type, map->stripes[s].physical = devices_info[i].dev_offset +
num_stripes, 0); j * stripe_size;
if (type & BTRFS_BLOCK_GROUP_DUP) {
min_free = calc_size * 2;
min_devices = 1;
} else {
min_free = calc_size;
min_devices = min_stripes;
}
INIT_LIST_HEAD(&private_devs);
while (index < num_stripes) {
device = list_entry(cur, struct btrfs_device, dev_alloc_list);
BUG_ON(!device->writeable);
if (device->total_bytes > device->bytes_used)
avail = device->total_bytes - device->bytes_used;
else
avail = 0;
cur = cur->next;
if (device->in_fs_metadata && avail >= min_free) {
ret = find_free_dev_extent(trans, device, min_free,
&devices_info[i].dev_offset,
&devices_info[i].max_avail);
if (ret == 0) {
list_move_tail(&device->dev_alloc_list,
&private_devs);
map->stripes[index].dev = device;
map->stripes[index].physical =
devices_info[i].dev_offset;
index++;
if (type & BTRFS_BLOCK_GROUP_DUP) {
map->stripes[index].dev = device;
map->stripes[index].physical =
devices_info[i].dev_offset +
calc_size;
index++;
}
} else if (ret != -ENOSPC)
goto error;
devices_info[i].dev = device;
i++;
} else if (device->in_fs_metadata &&
avail >= BTRFS_STRIPE_LEN) {
devices_info[i].dev = device;
devices_info[i].max_avail = avail;
i++;
}
if (cur == &fs_devices->alloc_list)
break;
}
list_splice(&private_devs, &fs_devices->alloc_list);
if (index < num_stripes) {
if (index >= min_stripes) {
num_stripes = index;
if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
num_stripes /= sub_stripes;
num_stripes *= sub_stripes;
}
map = __shrink_map_lookup_stripes(map, num_stripes);
} else if (i >= min_devices) {
ret = __btrfs_alloc_tiny_space(trans, fs_devices,
devices_info, i, type,
&map, min_stripes,
&calc_size);
if (ret)
goto error;
} else {
ret = -ENOSPC;
goto error;
} }
} }
map->sector_size = extent_root->sectorsize; map->sector_size = extent_root->sectorsize;
...@@ -2593,11 +2446,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -2593,11 +2446,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
map->sub_stripes = sub_stripes; map->sub_stripes = sub_stripes;
*map_ret = map; *map_ret = map;
*stripe_size = calc_size; num_bytes = stripe_size * (num_stripes / ncopies);
*num_bytes = chunk_bytes_by_type(type, calc_size,
map->num_stripes, sub_stripes); *stripe_size_out = stripe_size;
*num_bytes_out = num_bytes;
trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); trace_btrfs_chunk_alloc(info->chunk_root, map, start, num_bytes);
em = alloc_extent_map(); em = alloc_extent_map();
if (!em) { if (!em) {
...@@ -2606,7 +2460,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -2606,7 +2460,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
} }
em->bdev = (struct block_device *)map; em->bdev = (struct block_device *)map;
em->start = start; em->start = start;
em->len = *num_bytes; em->len = num_bytes;
em->block_start = 0; em->block_start = 0;
em->block_len = em->len; em->block_len = em->len;
...@@ -2619,20 +2473,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -2619,20 +2473,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
ret = btrfs_make_block_group(trans, extent_root, 0, type, ret = btrfs_make_block_group(trans, extent_root, 0, type,
BTRFS_FIRST_CHUNK_TREE_OBJECTID, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
start, *num_bytes); start, num_bytes);
BUG_ON(ret); BUG_ON(ret);
index = 0; for (i = 0; i < map->num_stripes; ++i) {
while (index < map->num_stripes) { struct btrfs_device *device;
device = map->stripes[index].dev; u64 dev_offset;
dev_offset = map->stripes[index].physical;
device = map->stripes[i].dev;
dev_offset = map->stripes[i].physical;
ret = btrfs_alloc_dev_extent(trans, device, ret = btrfs_alloc_dev_extent(trans, device,
info->chunk_root->root_key.objectid, info->chunk_root->root_key.objectid,
BTRFS_FIRST_CHUNK_TREE_OBJECTID, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
start, dev_offset, calc_size); start, dev_offset, stripe_size);
BUG_ON(ret); BUG_ON(ret);
index++;
} }
kfree(devices_info); kfree(devices_info);
......
...@@ -144,6 +144,7 @@ struct btrfs_device_info { ...@@ -144,6 +144,7 @@ struct btrfs_device_info {
struct btrfs_device *dev; struct btrfs_device *dev;
u64 dev_offset; u64 dev_offset;
u64 max_avail; u64 max_avail;
u64 total_avail;
}; };
struct map_lookup { struct map_lookup {
...@@ -157,21 +158,6 @@ struct map_lookup { ...@@ -157,21 +158,6 @@ struct map_lookup {
struct btrfs_bio_stripe stripes[]; struct btrfs_bio_stripe stripes[];
}; };
/* Used to sort the devices by max_avail(descending sort) */
int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
/*
* sort the devices by max_avail, in which max free extent size of each device
* is stored.(Descending Sort)
*/
static inline void btrfs_descending_sort_devices(
struct btrfs_device_info *devices,
size_t nr_devices)
{
sort(devices, nr_devices, sizeof(struct btrfs_device_info),
btrfs_cmp_device_free_bytes, NULL);
}
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length); u64 end, u64 *length);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册