diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index c48b7a0992f641e25181010ff671cad554a0ff06..baba435692d2c4034239302574abbf7961b7a46a 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -63,138 +63,6 @@ struct sector_ptr { unsigned int uptodate:8; }; -enum btrfs_rbio_ops { - BTRFS_RBIO_WRITE, - BTRFS_RBIO_READ_REBUILD, - BTRFS_RBIO_PARITY_SCRUB, - BTRFS_RBIO_REBUILD_MISSING, -}; - -struct btrfs_raid_bio { - struct btrfs_io_context *bioc; - - /* while we're doing rmw on a stripe - * we put it into a hash table so we can - * lock the stripe and merge more rbios - * into it. - */ - struct list_head hash_list; - - /* - * LRU list for the stripe cache - */ - struct list_head stripe_cache; - - /* - * for scheduling work in the helper threads - */ - struct work_struct work; - - /* - * bio list and bio_list_lock are used - * to add more bios into the stripe - * in hopes of avoiding the full rmw - */ - struct bio_list bio_list; - spinlock_t bio_list_lock; - - /* also protected by the bio_list_lock, the - * plug list is used by the plugging code - * to collect partial bios while plugged. The - * stripe locking code also uses it to hand off - * the stripe lock to the next pending IO - */ - struct list_head plug_list; - - /* - * flags that tell us if it is safe to - * merge with this bio - */ - unsigned long flags; - - /* - * set if we're doing a parity rebuild - * for a read from higher up, which is handled - * differently from a parity rebuild as part of - * rmw - */ - enum btrfs_rbio_ops operation; - - /* Size of each individual stripe on disk */ - u32 stripe_len; - - /* How many pages there are for the full stripe including P/Q */ - u16 nr_pages; - - /* How many sectors there are for the full stripe including P/Q */ - u16 nr_sectors; - - /* Number of data stripes (no p/q) */ - u8 nr_data; - - /* Number of all stripes (including P/Q) */ - u8 real_stripes; - - /* How many pages there are for each stripe */ - u8 stripe_npages; - - /* How many sectors there are for each stripe */ - u8 stripe_nsectors; - - /* First bad stripe, -1 means no corruption */ - s8 faila; - - /* Second bad stripe (for RAID6 use) */ - s8 failb; - - /* Stripe number that we're scrubbing */ - u8 scrubp; - - /* - * size of all the bios in the bio_list. This - * helps us decide if the rbio maps to a full - * stripe or not - */ - int bio_list_bytes; - - int generic_bio_cnt; - - refcount_t refs; - - atomic_t stripes_pending; - - atomic_t error; - - /* Bitmap to record which horizontal stripe has data */ - unsigned long dbitmap; - - /* Allocated with stripe_nsectors-many bits for finish_*() calls */ - unsigned long finish_pbitmap; - - /* - * these are two arrays of pointers. We allocate the - * rbio big enough to hold them both and setup their - * locations when the rbio is allocated - */ - - /* pointers to pages that we allocated for - * reading/writing stripes directly from the disk (including P/Q) - */ - struct page **stripe_pages; - - /* Pointers to the sectors in the bio_list, for faster lookup */ - struct sector_ptr *bio_sectors; - - /* - * For subpage support, we need to map each sector to above - * stripe_pages. - */ - struct sector_ptr *stripe_sectors; - - /* allocated with real_stripes-many pointers for finish_*() calls */ - void **finish_pointers; -}; - static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); static noinline void finish_rmw(struct btrfs_raid_bio *rbio); static void rmw_work(struct work_struct *work); @@ -1275,6 +1143,34 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) spin_unlock_irq(&rbio->bio_list_lock); } +static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio, + struct raid56_bio_trace_info *trace_info) +{ + const struct btrfs_io_context *bioc = rbio->bioc; + int i; + + ASSERT(bioc); + + /* We rely on bio->bi_bdev to find the stripe number. */ + if (!bio->bi_bdev) + goto not_found; + + for (i = 0; i < bioc->num_stripes; i++) { + if (bio->bi_bdev != bioc->stripes[i].dev->bdev) + continue; + trace_info->stripe_nr = i; + trace_info->devid = bioc->stripes[i].dev->devid; + trace_info->offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - + bioc->stripes[i].physical; + return; + } + +not_found: + trace_info->devid = -1; + trace_info->offset = -1; + trace_info->stripe_nr = -1; +} + /* * this is called from one of two situations. We either * have a full stripe from the higher layers, or we've read all @@ -1440,6 +1336,12 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio) while ((bio = bio_list_pop(&bio_list))) { bio->bi_end_io = raid_write_end_io; + if (trace_raid56_write_stripe_enabled()) { + struct raid56_bio_trace_info trace_info = { 0 }; + + bio_get_trace_info(rbio, bio, &trace_info); + trace_raid56_write_stripe(rbio, bio, &trace_info); + } submit_bio(bio); } return; @@ -1701,6 +1603,12 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + if (trace_raid56_read_partial_enabled()) { + struct raid56_bio_trace_info trace_info = { 0 }; + + bio_get_trace_info(rbio, bio, &trace_info); + trace_raid56_read_partial(rbio, bio, &trace_info); + } submit_bio(bio); } /* the actual write will happen once the reads are done */ @@ -2274,6 +2182,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + if (trace_raid56_scrub_read_recover_enabled()) { + struct raid56_bio_trace_info trace_info = { 0 }; + + bio_get_trace_info(rbio, bio, &trace_info); + trace_raid56_scrub_read_recover(rbio, bio, &trace_info); + } submit_bio(bio); } @@ -2643,6 +2557,12 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, while ((bio = bio_list_pop(&bio_list))) { bio->bi_end_io = raid_write_end_io; + if (trace_raid56_scrub_write_stripe_enabled()) { + struct raid56_bio_trace_info trace_info = { 0 }; + + bio_get_trace_info(rbio, bio, &trace_info); + trace_raid56_scrub_write_stripe(rbio, bio, &trace_info); + } submit_bio(bio); } return; @@ -2822,6 +2742,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + if (trace_raid56_scrub_read_enabled()) { + struct raid56_bio_trace_info trace_info = { 0 }; + + bio_get_trace_info(rbio, bio, &trace_info); + trace_raid56_scrub_read(rbio, bio, &trace_info); + } submit_bio(bio); } /* the actual write will happen once the reads are done */ diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index aaad08aefd7d085a984a8e1458bffcf52153fc33..3badde24dcbf06d9062f42024dbbad7c4e1b7cf5 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -7,6 +7,152 @@ #ifndef BTRFS_RAID56_H #define BTRFS_RAID56_H +#include +#include "volumes.h" + +enum btrfs_rbio_ops { + BTRFS_RBIO_WRITE, + BTRFS_RBIO_READ_REBUILD, + BTRFS_RBIO_PARITY_SCRUB, + BTRFS_RBIO_REBUILD_MISSING, +}; + +struct btrfs_raid_bio { + struct btrfs_io_context *bioc; + + /* + * While we're doing RMW on a stripe we put it into a hash table so we + * can lock the stripe and merge more rbios into it. + */ + struct list_head hash_list; + + /* LRU list for the stripe cache */ + struct list_head stripe_cache; + + /* For scheduling work in the helper threads */ + struct work_struct work; + + /* + * bio_list and bio_list_lock are used to add more bios into the stripe + * in hopes of avoiding the full RMW + */ + struct bio_list bio_list; + spinlock_t bio_list_lock; + + /* + * Also protected by the bio_list_lock, the plug list is used by the + * plugging code to collect partial bios while plugged. The stripe + * locking code also uses it to hand off the stripe lock to the next + * pending IO. + */ + struct list_head plug_list; + + /* Flags that tell us if it is safe to merge with this bio. */ + unsigned long flags; + + /* + * Set if we're doing a parity rebuild for a read from higher up, which + * is handled differently from a parity rebuild as part of RMW. + */ + enum btrfs_rbio_ops operation; + + /* Size of each individual stripe on disk */ + u32 stripe_len; + + /* How many pages there are for the full stripe including P/Q */ + u16 nr_pages; + + /* How many sectors there are for the full stripe including P/Q */ + u16 nr_sectors; + + /* Number of data stripes (no p/q) */ + u8 nr_data; + + /* Numer of all stripes (including P/Q) */ + u8 real_stripes; + + /* How many pages there are for each stripe */ + u8 stripe_npages; + + /* How many sectors there are for each stripe */ + u8 stripe_nsectors; + + /* First bad stripe, -1 means no corruption */ + s8 faila; + + /* Second bad stripe (for RAID6 use) */ + s8 failb; + + /* Stripe number that we're scrubbing */ + u8 scrubp; + + /* + * Size of all the bios in the bio_list. This helps us decide if the + * rbio maps to a full stripe or not. + */ + int bio_list_bytes; + + int generic_bio_cnt; + + refcount_t refs; + + atomic_t stripes_pending; + + atomic_t error; + + /* Bitmap to record which horizontal stripe has data */ + unsigned long dbitmap; + + /* Allocated with stripe_nsectors-many bits for finish_*() calls */ + unsigned long finish_pbitmap; + + /* + * These are two arrays of pointers. We allocate the rbio big enough + * to hold them both and setup their locations when the rbio is + * allocated. + */ + + /* + * Pointers to pages that we allocated for reading/writing stripes + * directly from the disk (including P/Q). + */ + struct page **stripe_pages; + + /* Pointers to the sectors in the bio_list, for faster lookup */ + struct sector_ptr *bio_sectors; + + /* + * For subpage support, we need to map each sector to above + * stripe_pages. + */ + struct sector_ptr *stripe_sectors; + + /* Allocated with real_stripes-many pointers for finish_*() calls */ + void **finish_pointers; +}; + +/* + * For trace event usage only. Records useful debug info for each bio submitted + * by RAID56 to each physical device. + * + * No matter signed or not, (-1) is always the one indicating we can not grab + * the proper stripe number. + */ +struct raid56_bio_trace_info { + u64 devid; + + /* The offset inside the stripe. (<= STRIPE_LEN) */ + u32 offset; + + /* + * Stripe number. + * 0 is the first data stripe, and nr_data for P stripe, + * nr_data + 1 for Q stripe. + * >= real_stripes for + */ + u8 stripe_nr; +}; + static inline int nr_parity_stripes(const struct map_lookup *map) { if (map->type & BTRFS_BLOCK_GROUP_RAID5) @@ -21,13 +167,13 @@ static inline int nr_data_stripes(const struct map_lookup *map) { return map->num_stripes - nr_parity_stripes(map); } + #define RAID5_P_STRIPE ((u64)-2) #define RAID6_Q_STRIPE ((u64)-1) #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \ ((x) == RAID6_Q_STRIPE)) -struct btrfs_raid_bio; struct btrfs_device; int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 24b86061c5df30bd70612f4fca0ecc8d98f30187..8539ee2dc79f4af49037dca3afa4c586a4c7062d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -48,6 +48,7 @@ #include "block-group.h" #include "discard.h" #include "qgroup.h" +#include "raid56.h" #define CREATE_TRACE_POINTS #include diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 9ae94ef3e270be20d740799318703369a021f7a9..29fa8ea2cc0f6c8a1883e719111a0026f04d1911 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -30,6 +30,8 @@ struct btrfs_qgroup; struct extent_io_tree; struct prelim_ref; struct btrfs_space_info; +struct btrfs_raid_bio; +struct raid56_bio_trace_info; #define show_ref_type(type) \ __print_symbolic(type, \ @@ -2258,6 +2260,98 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned, TP_ARGS(fs_info, sinfo, old, diff) ); +DECLARE_EVENT_CLASS(btrfs_raid56_bio, + + TP_PROTO(const struct btrfs_raid_bio *rbio, + const struct bio *bio, + const struct raid56_bio_trace_info *trace_info), + + TP_ARGS(rbio, bio, trace_info), + + TP_STRUCT__entry_btrfs( + __field( u64, full_stripe ) + __field( u64, physical ) + __field( u64, devid ) + __field( u32, offset ) + __field( u32, len ) + __field( u8, opf ) + __field( u8, total_stripes ) + __field( u8, real_stripes ) + __field( u8, nr_data ) + __field( u8, stripe_nr ) + ), + + TP_fast_assign_btrfs(rbio->bioc->fs_info, + __entry->full_stripe = rbio->bioc->raid_map[0]; + __entry->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; + __entry->len = bio->bi_iter.bi_size; + __entry->opf = bio_op(bio); + __entry->devid = trace_info->devid; + __entry->offset = trace_info->offset; + __entry->stripe_nr = trace_info->stripe_nr; + __entry->total_stripes = rbio->bioc->num_stripes; + __entry->real_stripes = rbio->real_stripes; + __entry->nr_data = rbio->nr_data; + ), + /* + * For type output, we need to output things like "DATA1" + * (the first data stripe), "DATA2" (the second data stripe), + * "PQ1" (P stripe),"PQ2" (Q stripe), "REPLACE0" (replace target device). + */ + TP_printk_btrfs( +"full_stripe=%llu devid=%lld type=%s%d offset=%d opf=0x%x physical=%llu len=%u", + __entry->full_stripe, __entry->devid, + (__entry->stripe_nr < __entry->nr_data) ? "DATA" : + ((__entry->stripe_nr < __entry->real_stripes) ? "PQ" : + "REPLACE"), + (__entry->stripe_nr < __entry->nr_data) ? + (__entry->stripe_nr + 1) : + ((__entry->stripe_nr < __entry->real_stripes) ? + (__entry->stripe_nr - __entry->nr_data + 1) : 0), + __entry->offset, __entry->opf, __entry->physical, __entry->len) +); + +DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial, + TP_PROTO(const struct btrfs_raid_bio *rbio, + const struct bio *bio, + const struct raid56_bio_trace_info *trace_info), + + TP_ARGS(rbio, bio, trace_info) +); + +DEFINE_EVENT(btrfs_raid56_bio, raid56_write_stripe, + TP_PROTO(const struct btrfs_raid_bio *rbio, + const struct bio *bio, + const struct raid56_bio_trace_info *trace_info), + + TP_ARGS(rbio, bio, trace_info) +); + + +DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_write_stripe, + TP_PROTO(const struct btrfs_raid_bio *rbio, + const struct bio *bio, + const struct raid56_bio_trace_info *trace_info), + + TP_ARGS(rbio, bio, trace_info) +); + +DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read, + TP_PROTO(const struct btrfs_raid_bio *rbio, + const struct bio *bio, + const struct raid56_bio_trace_info *trace_info), + + TP_ARGS(rbio, bio, trace_info) +); + +DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read_recover, + TP_PROTO(const struct btrfs_raid_bio *rbio, + const struct bio *bio, + const struct raid56_bio_trace_info *trace_info), + + TP_ARGS(rbio, bio, trace_info) +); + #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */