dm: fix dm-raid crash if md_handle_request() splits bio

Commit ca522482 ("dm: pass NULL bdev to bio_alloc_clone") introduced the optimization to _not_ perform bio_associate_blkg()'s relatively costly work when DM core clones its bio. But in doing so it exposed the possibility for DM's cloned bio to alter DM target behavior (e.g. crash) if a target were to issue IO without first calling bio_set_dev(). The DM raid target can trigger an MD crash due to its need to split the DM bio that is passed to md_handle_request(). The split will recurse to submit_bio_noacct() using a bio with an uninitialized ->bi_blkg. This NULL bio->bi_blkg causes blk_throtl_bio() to dereference a NULL blkg_to_tg(bio->bi_blkg). Fix this in DM core by adding a new 'needs_bio_set_dev' target flag that will make alloc_tio() call bio_set_dev() on behalf of the target. dm-raid is the only target that requires this flag. bio_set_dev() initializes the DM cloned bio's ->bi_blkg, using bio_associate_blkg, before passing the bio to md_handle_request(). Long-term fix would be to audit and refactor MD code to rely on DM to split its bio, using dm_accept_partial_bio(), but there are MD raid personalities (e.g. raid1 and raid10) whose implementation are tightly coupled to handling the bio splitting inline. Fixes: ca522482 ("dm: pass NULL bdev to bio_alloc_clone") Cc: stable@vger.kernel.org Signed-off-by: N Mike Snitzer <snitzer@kernel.org>

dm: fix dm-raid crash if md_handle_request() splits bio
Commit ca522482 ("dm: pass NULL bdev to bio_alloc_clone") introduced the optimization to _not_ perform bio_associate_blkg()'s relatively costly work when DM core clones its bio. But in doing so it exposed the possibility for DM's cloned bio to alter DM target behavior (e.g. crash) if a target were to issue IO without first calling bio_set_dev(). The DM raid target can trigger an MD crash due to its need to split the DM bio that is passed to md_handle_request(). The split will recurse to submit_bio_noacct() using a bio with an uninitialized ->bi_blkg. This NULL bio->bi_blkg causes blk_throtl_bio() to dereference a NULL blkg_to_tg(bio->bi_blkg). Fix this in DM core by adding a new 'needs_bio_set_dev' target flag that will make alloc_tio() call bio_set_dev() on behalf of the target. dm-raid is the only target that requires this flag. bio_set_dev() initializes the DM cloned bio's ->bi_blkg, using bio_associate_blkg, before passing the bio to md_handle_request(). Long-term fix would be to audit and refactor MD code to rely on DM to split its bio, using dm_accept_partial_bio(), but there are MD raid personalities (e.g. raid1 and raid10) whose implementation are tightly coupled to handling the bio splitting inline. Fixes: ca522482 ("dm: pass NULL bdev to bio_alloc_clone") Cc: stable@vger.kernel.org Signed-off-by: N Mike Snitzer <snitzer@kernel.org>
9dd1cd32 · Mike Snitzer · 7dad24db · 9dd1cd32 · 9dd1cd32 · 9dd1cd32
4 changed file
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3095,6 +3095,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	INIT_WORK(&rs->md.event_work, do_table_event);
 	ti->private = rs;
 	ti->num_flush_bios = 1;
+	ti->needs_bio_set_dev = true;

 	/* Restore any requested new layout for conversion decision */
 	rs_config_restore(rs, &rs_layout);

--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -574,9 +574,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
 	struct bio *clone;

 	clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
-	/* Set default bdev, but target must bio_set_dev() before issuing IO */
-	clone->bi_bdev = md->disk->part0;
-
 	tio = clone_to_tio(clone);
 	tio->flags = 0;
 	dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
@@ -609,6 +606,7 @@ static void free_io(struct dm_io *io)
 static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
 			     unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
 {
+	struct mapped_device *md = ci->io->md;
 	struct dm_target_io *tio;
 	struct bio *clone;

@@ -618,14 +616,10 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
 		/* alloc_io() already initialized embedded clone */
 		clone = &tio->clone;
 	} else {
-		struct mapped_device *md = ci->io->md;
-
 		clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
 					&md->mempools->bs);
 		if (!clone)
 			return NULL;
-		/* Set default bdev, but target must bio_set_dev() before issuing IO */
-		clone->bi_bdev = md->disk->part0;

 		/* REQ_DM_POLL_LIST shouldn't be inherited */
 		clone->bi_opf &= ~REQ_DM_POLL_LIST;
@@ -641,6 +635,11 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
 	tio->len_ptr = len;
 	tio->old_sector = 0;

+	/* Set default bdev, but target must bio_set_dev() before issuing IO */
+	clone->bi_bdev = md->disk->part0;
+	if (unlikely(ti->needs_bio_set_dev))
+		bio_set_dev(clone, md->disk->part0);
+
 	if (len) {
 		clone->bi_iter.bi_size = to_bytes(*len);
 		if (bio_integrity(clone))

--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -373,6 +373,12 @@ struct dm_target {
 	 * after returning DM_MAPIO_SUBMITTED from its map function.
 	 */
 	bool accounts_remapped_io:1;
+
+	/*
+	 * Set if the target will submit the DM bio without first calling
+	 * bio_set_dev(). NOTE: ideally a target should _not_ need this.
+	 */
+	bool needs_bio_set_dev:1;
 };

 void *dm_per_bio_data(struct bio *bio, size_t data_size);

--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -286,9 +286,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)

 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	46
+#define DM_VERSION_MINOR	47
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2022-02-22)"
+#define DM_VERSION_EXTRA	"-ioctl (2022-07-28)"

 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */