From c3a62baf21ad691aae04c2f091debe667439537e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Aug 2022 10:03:24 +0200 Subject: [PATCH] btrfs: use chained bios when cloning The stripes_pending in the btrfs_io_context counts number of inflight low-level bios for an upper btrfs_bio. For reads this is generally one as reads are never cloned, while for writes we can trivially use the bio remaining mechanisms that is used for chained bios. To be able to make use of that mechanism, split out a separate trivial end_io handler for the cloned bios that does a minimal amount of error tracking and which then calls bio_endio on the original bio to transfer control to that, with the remaining counter making sure it is completed last. This then allows to merge btrfs_end_bioc into the original bio bi_end_io handler. To make this all work all error handling needs to happen through the bi_end_io handler, which requires a small amount of reshuffling in submit_stripe_bio so that the bio is cloned already by the time the suitability of the device is checked. This reduces the size of the btrfs_io_context and prepares splitting the btrfs_bio at the stripe boundary. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 94 ++++++++++++++++++++++++---------------------- fs/btrfs/volumes.h | 1 - 2 files changed, 50 insertions(+), 45 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1d01347c844f..44c7529c70c3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5898,7 +5898,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ sizeof(u64) * (total_stripes), GFP_NOFS|__GFP_NOFAIL); - atomic_set(&bioc->error, 0); refcount_set(&bioc->refs, 1); bioc->fs_info = fs_info; @@ -6653,7 +6652,21 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size) bio_trim(bio, offset >> 9, size >> 9); bbio->iter = bio->bi_iter; return bio; +} + +static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev) +{ + if (!dev || !dev->bdev) + return; + if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET) + return; + if (btrfs_op(bio) == BTRFS_MAP_WRITE) + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); + if (!(bio->bi_opf & REQ_RAHEAD)) + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); + if (bio->bi_opf & REQ_PREFLUSH) + btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS); } static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc) @@ -6671,62 +6684,54 @@ static void btrfs_end_bio_work(struct work_struct *work) bio_endio(&bbio->bio); } -static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async) +static void btrfs_end_bio(struct bio *bio) { - struct bio *orig_bio = bioc->orig_bio; - struct btrfs_bio *bbio = btrfs_bio(orig_bio); + struct btrfs_io_stripe *stripe = bio->bi_private; + struct btrfs_io_context *bioc = stripe->bioc; + struct btrfs_bio *bbio = btrfs_bio(bio); btrfs_bio_counter_dec(bioc->fs_info); + if (bio->bi_status) { + atomic_inc(&bioc->error); + btrfs_log_dev_io_error(bio, stripe->dev); + } + bbio->mirror_num = bioc->mirror_num; - orig_bio->bi_private = bioc->private; - orig_bio->bi_end_io = bioc->end_io; + bio->bi_end_io = bioc->end_io; + bio->bi_private = bioc->private; /* * Only send an error to the higher layers if it is beyond the tolerance * threshold. */ if (atomic_read(&bioc->error) > bioc->max_errors) - orig_bio->bi_status = BLK_STS_IOERR; + bio->bi_status = BLK_STS_IOERR; else - orig_bio->bi_status = BLK_STS_OK; + bio->bi_status = BLK_STS_OK; - if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) { + if (btrfs_op(bio) == BTRFS_MAP_READ) { INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work); queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work); } else { - bio_endio(orig_bio); + bio_endio(bio); } btrfs_put_bioc(bioc); } -static void btrfs_end_bio(struct bio *bio) +static void btrfs_clone_write_end_io(struct bio *bio) { struct btrfs_io_stripe *stripe = bio->bi_private; - struct btrfs_io_context *bioc = stripe->bioc; if (bio->bi_status) { - atomic_inc(&bioc->error); - if (bio->bi_status == BLK_STS_IOERR || - bio->bi_status == BLK_STS_TARGET) { - if (btrfs_op(bio) == BTRFS_MAP_WRITE) - btrfs_dev_stat_inc_and_print(stripe->dev, - BTRFS_DEV_STAT_WRITE_ERRS); - else if (!(bio->bi_opf & REQ_RAHEAD)) - btrfs_dev_stat_inc_and_print(stripe->dev, - BTRFS_DEV_STAT_READ_ERRS); - if (bio->bi_opf & REQ_PREFLUSH) - btrfs_dev_stat_inc_and_print(stripe->dev, - BTRFS_DEV_STAT_FLUSH_ERRS); - } + atomic_inc(&stripe->bioc->error); + btrfs_log_dev_io_error(bio, stripe->dev); } - if (bio != bioc->orig_bio) - bio_put(bio); - - if (atomic_dec_and_test(&bioc->stripes_pending)) - btrfs_end_bioc(bioc, true); + /* Pass on control to the original bio this one was cloned from */ + bio_endio(stripe->bioc->orig_bio); + bio_put(bio); } static void submit_stripe_bio(struct btrfs_io_context *bioc, @@ -6737,28 +6742,30 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc, u64 physical = bioc->stripes[dev_nr].physical; struct bio *bio; - if (!dev || !dev->bdev || - test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) || - (btrfs_op(orig_bio) == BTRFS_MAP_WRITE && - !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) { - atomic_inc(&bioc->error); - if (atomic_dec_and_test(&bioc->stripes_pending)) - btrfs_end_bioc(bioc, false); - return; - } - if (clone) { - bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set); + bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set); + bio_inc_remaining(orig_bio); + bio->bi_end_io = btrfs_clone_write_end_io; } else { bio = orig_bio; - bio_set_dev(bio, dev->bdev); btrfs_bio(bio)->device = dev; + bio->bi_end_io = btrfs_end_bio; } bioc->stripes[dev_nr].bioc = bioc; bio->bi_private = &bioc->stripes[dev_nr]; - bio->bi_end_io = btrfs_end_bio; bio->bi_iter.bi_sector = physical >> 9; + + if (!dev || !dev->bdev || + test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) || + (btrfs_op(bio) == BTRFS_MAP_WRITE && + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) { + bio_io_error(bio); + return; + } + + bio_set_dev(bio, dev->bdev); + /* * For zone append writing, bi_sector must point the beginning of the * zone @@ -6807,7 +6814,6 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror bioc->orig_bio = bio; bioc->private = bio->bi_private; bioc->end_io = bio->bi_end_io; - atomic_set(&bioc->stripes_pending, total_devs); if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) && ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 149fb5d522d4..56fc01373b0f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -457,7 +457,6 @@ struct btrfs_discard_stripe { */ struct btrfs_io_context { refcount_t refs; - atomic_t stripes_pending; struct btrfs_fs_info *fs_info; u64 map_type; /* get from map_lookup->type */ bio_end_io_t *end_io; -- GitLab