提交 18d758a2 编写于 作者: Q Qu Wenruo 提交者: David Sterba

btrfs: replace btrfs_io_context::raid_map with a fixed u64 value

In btrfs_io_context structure, we have a pointer raid_map, which
indicates the logical bytenr for each stripe.

But considering we always call sort_parity_stripes(), the result
raid_map[] is always sorted, thus raid_map[0] is always the logical
bytenr of the full stripe.

So why we waste the space and time (for sorting) for raid_map?

This patch will replace btrfs_io_context::raid_map with a single u64
number, full_stripe_start, by:

- Replace btrfs_io_context::raid_map with full_stripe_start

- Replace call sites using raid_map[0] to use full_stripe_start

- Replace call sites using raid_map[i] to compare with nr_data_stripes.

The benefits are:

- Less memory wasted on raid_map
  It's sizeof(u64) * num_stripes vs sizeof(u64).
  It'll always save at least one u64, and the benefit grows larger with
  num_stripes.

- No more weird alloc_btrfs_io_context() behavior
  As there is only one fixed size + one variable length array.
Signed-off-by: NQu Wenruo <wqu@suse.com>
Signed-off-by: NDavid Sterba <dsterba@suse.com>
上级 1faf3885
...@@ -202,7 +202,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) ...@@ -202,7 +202,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
*/ */
static int rbio_bucket(struct btrfs_raid_bio *rbio) static int rbio_bucket(struct btrfs_raid_bio *rbio)
{ {
u64 num = rbio->bioc->raid_map[0]; u64 num = rbio->bioc->full_stripe_logical;
/* /*
* we shift down quite a bit. We're using byte * we shift down quite a bit. We're using byte
...@@ -567,7 +567,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, ...@@ -567,7 +567,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
test_bit(RBIO_CACHE_BIT, &cur->flags)) test_bit(RBIO_CACHE_BIT, &cur->flags))
return 0; return 0;
if (last->bioc->raid_map[0] != cur->bioc->raid_map[0]) if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical)
return 0; return 0;
/* we can't merge with different operations */ /* we can't merge with different operations */
...@@ -661,7 +661,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) ...@@ -661,7 +661,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
spin_lock(&h->lock); spin_lock(&h->lock);
list_for_each_entry(cur, &h->hash_list, hash_list) { list_for_each_entry(cur, &h->hash_list, hash_list) {
if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0]) if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical)
continue; continue;
spin_lock(&cur->bio_list_lock); spin_lock(&cur->bio_list_lock);
...@@ -1113,7 +1113,7 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio) ...@@ -1113,7 +1113,7 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
struct bio_vec bvec; struct bio_vec bvec;
struct bvec_iter iter; struct bvec_iter iter;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->raid_map[0]; rbio->bioc->full_stripe_logical;
bio_for_each_segment(bvec, bio, iter) { bio_for_each_segment(bvec, bio, iter) {
u32 bvec_offset; u32 bvec_offset;
...@@ -1337,7 +1337,7 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio) ...@@ -1337,7 +1337,7 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
{ {
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->raid_map[0]; rbio->bioc->full_stripe_logical;
int total_nr_sector = offset >> fs_info->sectorsize_bits; int total_nr_sector = offset >> fs_info->sectorsize_bits;
ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors); ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors);
...@@ -1614,7 +1614,7 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio) ...@@ -1614,7 +1614,7 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
{ {
const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT; const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
const u64 full_stripe_start = rbio->bioc->raid_map[0]; const u64 full_stripe_start = rbio->bioc->full_stripe_logical;
const u32 orig_len = orig_bio->bi_iter.bi_size; const u32 orig_len = orig_bio->bi_iter.bi_size;
const u32 sectorsize = fs_info->sectorsize; const u32 sectorsize = fs_info->sectorsize;
u64 cur_logical; u64 cur_logical;
...@@ -1801,9 +1801,8 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, ...@@ -1801,9 +1801,8 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
* here due to a crc mismatch and we can't give them the * here due to a crc mismatch and we can't give them the
* data they want. * data they want.
*/ */
if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) { if (failb == rbio->real_stripes - 1) {
if (rbio->bioc->raid_map[faila] == if (faila == rbio->real_stripes - 2)
RAID5_P_STRIPE)
/* /*
* Only P and Q are corrupted. * Only P and Q are corrupted.
* We only care about data stripes recovery, * We only care about data stripes recovery,
...@@ -1817,7 +1816,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, ...@@ -1817,7 +1816,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
goto pstripe; goto pstripe;
} }
if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) { if (failb == rbio->real_stripes - 2) {
raid6_datap_recov(rbio->real_stripes, sectorsize, raid6_datap_recov(rbio->real_stripes, sectorsize,
faila, pointers); faila, pointers);
} else { } else {
...@@ -2080,8 +2079,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio) ...@@ -2080,8 +2079,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
{ {
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, struct btrfs_root *csum_root = btrfs_csum_root(fs_info,
rbio->bioc->raid_map[0]); rbio->bioc->full_stripe_logical);
const u64 start = rbio->bioc->raid_map[0]; const u64 start = rbio->bioc->full_stripe_logical;
const u32 len = (rbio->nr_data * rbio->stripe_nsectors) << const u32 len = (rbio->nr_data * rbio->stripe_nsectors) <<
fs_info->sectorsize_bits; fs_info->sectorsize_bits;
int ret; int ret;
...@@ -2129,7 +2128,7 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio) ...@@ -2129,7 +2128,7 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
*/ */
btrfs_warn_rl(fs_info, btrfs_warn_rl(fs_info,
"sub-stripe write for full stripe %llu is not safe, failed to get csum: %d", "sub-stripe write for full stripe %llu is not safe, failed to get csum: %d",
rbio->bioc->raid_map[0], ret); rbio->bioc->full_stripe_logical, ret);
no_csum: no_csum:
kfree(rbio->csum_buf); kfree(rbio->csum_buf);
bitmap_free(rbio->csum_bitmap); bitmap_free(rbio->csum_bitmap);
...@@ -2385,10 +2384,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, ...@@ -2385,10 +2384,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
int stripe_offset; int stripe_offset;
int index; int index;
ASSERT(logical >= rbio->bioc->raid_map[0]); ASSERT(logical >= rbio->bioc->full_stripe_logical);
ASSERT(logical + sectorsize <= rbio->bioc->raid_map[0] + ASSERT(logical + sectorsize <= rbio->bioc->full_stripe_logical +
BTRFS_STRIPE_LEN * rbio->nr_data); BTRFS_STRIPE_LEN * rbio->nr_data);
stripe_offset = (int)(logical - rbio->bioc->raid_map[0]); stripe_offset = (int)(logical - rbio->bioc->full_stripe_logical);
index = stripe_offset / sectorsize; index = stripe_offset / sectorsize;
rbio->bio_sectors[index].page = page; rbio->bio_sectors[index].page = page;
rbio->bio_sectors[index].pgoff = pgoff; rbio->bio_sectors[index].pgoff = pgoff;
......
...@@ -1430,7 +1430,7 @@ static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc) ...@@ -1430,7 +1430,7 @@ static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
} }
static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
u64 *raid_map, u64 full_stripe_logical,
int nstripes, int mirror, int nstripes, int mirror,
int *stripe_index, int *stripe_index,
u64 *stripe_offset) u64 *stripe_offset)
...@@ -1438,19 +1438,22 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, ...@@ -1438,19 +1438,22 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
int i; int i;
if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
const int nr_data_stripes = (map_type & BTRFS_BLOCK_GROUP_RAID5) ?
nstripes - 1 : nstripes - 2;
/* RAID5/6 */ /* RAID5/6 */
for (i = 0; i < nstripes; i++) { for (i = 0; i < nr_data_stripes; i++) {
if (raid_map[i] == RAID6_Q_STRIPE || const u64 data_stripe_start = full_stripe_logical +
raid_map[i] == RAID5_P_STRIPE) (i * BTRFS_STRIPE_LEN);
continue;
if (logical >= raid_map[i] && if (logical >= data_stripe_start &&
logical < raid_map[i] + BTRFS_STRIPE_LEN) logical < data_stripe_start + BTRFS_STRIPE_LEN)
break; break;
} }
*stripe_index = i; *stripe_index = i;
*stripe_offset = logical - raid_map[i]; *stripe_offset = (logical - full_stripe_logical) &
BTRFS_STRIPE_LEN_MASK;
} else { } else {
/* The other RAID type */ /* The other RAID type */
*stripe_index = mirror; *stripe_index = mirror;
...@@ -1538,7 +1541,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, ...@@ -1538,7 +1541,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
scrub_stripe_index_and_offset(logical, scrub_stripe_index_and_offset(logical,
bioc->map_type, bioc->map_type,
bioc->raid_map, bioc->full_stripe_logical,
bioc->num_stripes - bioc->num_stripes -
bioc->replace_nr_stripes, bioc->replace_nr_stripes,
mirror_index, mirror_index,
...@@ -2398,7 +2401,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) ...@@ -2398,7 +2401,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
btrfs_bio_counter_inc_blocked(fs_info); btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&length, &bioc); &length, &bioc);
if (ret || !bioc || !bioc->raid_map) if (ret || !bioc)
goto bioc_out; goto bioc_out;
if (WARN_ON(!sctx->is_dev_replace || if (WARN_ON(!sctx->is_dev_replace ||
...@@ -3007,7 +3010,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) ...@@ -3007,7 +3010,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
btrfs_bio_counter_inc_blocked(fs_info); btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start, ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
&length, &bioc); &length, &bioc);
if (ret || !bioc || !bioc->raid_map) if (ret || !bioc)
goto bioc_out; goto bioc_out;
bio = bio_alloc(NULL, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS); bio = bio_alloc(NULL, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS);
......
...@@ -5894,25 +5894,6 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, ...@@ -5894,25 +5894,6 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
return preferred_mirror; return preferred_mirror;
} }
/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes)
{
int i;
int again = 1;
while (again) {
again = 0;
for (i = 0; i < num_stripes - 1; i++) {
/* Swap if parity is on a smaller index */
if (bioc->raid_map[i] > bioc->raid_map[i + 1]) {
swap(bioc->stripes[i], bioc->stripes[i + 1]);
swap(bioc->raid_map[i], bioc->raid_map[i + 1]);
again = 1;
}
}
}
}
static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info, static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
u16 total_stripes) u16 total_stripes)
{ {
...@@ -5922,12 +5903,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ ...@@ -5922,12 +5903,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
/* The size of btrfs_io_context */ /* The size of btrfs_io_context */
sizeof(struct btrfs_io_context) + sizeof(struct btrfs_io_context) +
/* Plus the variable array for the stripes */ /* Plus the variable array for the stripes */
sizeof(struct btrfs_io_stripe) * (total_stripes) + sizeof(struct btrfs_io_stripe) * (total_stripes),
/*
* Plus the raid_map, which includes both the tgt dev
* and the stripes.
*/
sizeof(u64) * (total_stripes),
GFP_NOFS); GFP_NOFS);
if (!bioc) if (!bioc)
...@@ -5936,8 +5912,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ ...@@ -5936,8 +5912,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
refcount_set(&bioc->refs, 1); refcount_set(&bioc->refs, 1);
bioc->fs_info = fs_info; bioc->fs_info = fs_info;
bioc->raid_map = (u64 *)(bioc->stripes + total_stripes);
bioc->replace_stripe_src = -1; bioc->replace_stripe_src = -1;
bioc->full_stripe_logical = (u64)-1;
return bioc; return bioc;
} }
...@@ -6541,33 +6517,39 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, ...@@ -6541,33 +6517,39 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
} }
bioc->map_type = map->type; bioc->map_type = map->type;
for (i = 0; i < num_stripes; i++) { /*
set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset, * For RAID56 full map, we need to make sure the stripes[] follows the
stripe_nr); * rule that data stripes are all ordered, then followed with P and Q
stripe_index++; * (if we have).
} *
* It's still mostly the same as other profiles, just with extra rotation.
/* Build raid_map */ */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
(need_full_stripe(op) || mirror_num > 1)) { (need_full_stripe(op) || mirror_num > 1)) {
u64 tmp; /*
unsigned rot; * For RAID56 @stripe_nr is already the number of full stripes
* before us, which is also the rotation value (needs to modulo
/* Work out the disk rotation on this stripe-set */ * with num_stripes).
rot = stripe_nr % num_stripes; *
* In this case, we just add @stripe_nr with @i, then do the
/* Fill in the logical address of each stripe */ * modulo, to reduce one modulo call.
tmp = stripe_nr * data_stripes; */
for (i = 0; i < data_stripes; i++) bioc->full_stripe_logical = em->start +
bioc->raid_map[(i + rot) % num_stripes] = ((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT);
em->start + ((tmp + i) << BTRFS_STRIPE_LEN_SHIFT); for (i = 0; i < num_stripes; i++)
set_io_stripe(&bioc->stripes[i], map,
bioc->raid_map[(i + rot) % map->num_stripes] = RAID5_P_STRIPE; (i + stripe_nr) % num_stripes,
if (map->type & BTRFS_BLOCK_GROUP_RAID6) stripe_offset, stripe_nr);
bioc->raid_map[(i + rot + 1) % num_stripes] = } else {
RAID6_Q_STRIPE; /*
* For all other non-RAID56 profiles, just copy the target
sort_parity_stripes(bioc, num_stripes); * stripe into the bioc.
*/
for (i = 0; i < num_stripes; i++) {
set_io_stripe(&bioc->stripes[i], map, stripe_index,
stripe_offset, stripe_nr);
stripe_index++;
}
} }
if (need_full_stripe(op)) if (need_full_stripe(op))
......
...@@ -460,11 +460,22 @@ struct btrfs_io_context { ...@@ -460,11 +460,22 @@ struct btrfs_io_context {
u16 replace_nr_stripes; u16 replace_nr_stripes;
s16 replace_stripe_src; s16 replace_stripe_src;
/* /*
* logical block numbers for the start of each stripe * Logical bytenr of the full stripe start, only for RAID56 cases.
* The last one or two are p/q. These are sorted, *
* so raid_map[0] is the start of our full stripe * When this value is set to other than (u64)-1, the stripes[] should
* follow this pattern:
*
* (real_stripes = num_stripes - replace_nr_stripes)
* (data_stripes = (is_raid6) ? (real_stripes - 2) : (real_stripes - 1))
*
* stripes[0]: The first data stripe
* stripes[1]: The second data stripe
* ...
* stripes[data_stripes - 1]: The last data stripe
* stripes[data_stripes]: The P stripe
* stripes[data_stripes + 1]: The Q stripe (only for RAID6).
*/ */
u64 *raid_map; u64 full_stripe_logical;
struct btrfs_io_stripe stripes[]; struct btrfs_io_stripe stripes[];
}; };
......
...@@ -2422,7 +2422,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio, ...@@ -2422,7 +2422,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio,
), ),
TP_fast_assign_btrfs(rbio->bioc->fs_info, TP_fast_assign_btrfs(rbio->bioc->fs_info,
__entry->full_stripe = rbio->bioc->raid_map[0]; __entry->full_stripe = rbio->bioc->full_stripe_logical;
__entry->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; __entry->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
__entry->len = bio->bi_iter.bi_size; __entry->len = bio->bi_iter.bi_size;
__entry->opf = bio_op(bio); __entry->opf = bio_op(bio);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册