Merge tag 'block-5.16-2021-11-13' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe: "Set of fixes that should go into this merge window: - ioctl vs read data race fixes (Shin'ichiro) - blkcg use-after-free fix (Laibin) - Last piece of the puzzle for add_disk() error handling, enable __must_check for (Luis) - Request allocation fixes (Ming) - Misc fixes (me)" * tag 'block-5.16-2021-11-13' of git://git.kernel.dk/linux-block: blk-mq: fix filesystem I/O request allocation blkcg: Remove extra blkcg_bio_issue_init block: Hold invalidate_lock in BLKRESETZONE ioctl blk-mq: rename blk_attempt_bio_merge blk-mq: don't grab ->q_usage_counter in blk_mq_sched_bio_merge block: fix kerneldoc for disk_register_independent_access__ranges() block: add __must_check for *add_disk*() callers block: use enum type for blk_mq_alloc_data->rq_flags block: Hold invalidate_lock in BLKZEROOUT ioctl block: Hold invalidate_lock in BLKDISCARD ioctl

Merge tag 'block-5.16-2021-11-13' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Set of fixes that should go into this merge window: - ioctl vs read data race fixes (Shin'ichiro) - blkcg use-after-free fix (Laibin) - Last piece of the puzzle for add_disk() error handling, enable __must_check for (Luis) - Request allocation fixes (Ming) - Misc fixes (me)" * tag 'block-5.16-2021-11-13' of git://git.kernel.dk/linux-block: blk-mq: fix filesystem I/O request allocation blkcg: Remove extra blkcg_bio_issue_init block: Hold invalidate_lock in BLKRESETZONE ioctl blk-mq: rename blk_attempt_bio_merge blk-mq: don't grab ->q_usage_counter in blk_mq_sched_bio_merge block: fix kerneldoc for disk_register_independent_access__ranges() block: add __must_check for *add_disk*() callers block: use enum type for blk_mq_alloc_data->rq_flags block: Hold invalidate_lock in BLKZEROOUT ioctl block: Hold invalidate_lock in BLKDISCARD ioctl
f44c7dbd · Linus Torvalds · 2b7196a2 · b637108a · f44c7dbd · f44c7dbd
9 changed file
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -809,10 +809,8 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
 	if (unlikely(!current->io_context))
 		create_task_io_context(current, GFP_ATOMIC, q->node);

-	if (blk_throtl_bio(bio)) {
-		blkcg_bio_issue_init(bio);
+	if (blk_throtl_bio(bio))
 		return false;
-	}

 	blk_cgroup_bio_start(bio);
 	blkcg_bio_issue_init(bio);

--- a/block/blk-ia-ranges.c
+++ b/block/blk-ia-ranges.c
@@ -104,8 +104,8 @@ static struct kobj_type blk_ia_ranges_ktype = {
 };

 /**
- * disk_register_ia_ranges - register with sysfs a set of independent
- *			    access ranges
+ * disk_register_independent_access_ranges - register with sysfs a set of
+ *		independent access ranges
 * @disk:	Target disk
 * @new_iars:	New set of independent access ranges
 *

--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -370,9 +370,6 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
 	bool ret = false;
 	enum hctx_type type;

-	if (bio_queue_enter(bio))
-		return false;
-
 	if (e && e->type->ops.bio_merge) {
 		ret = e->type->ops.bio_merge(q, bio, nr_segs);
 		goto out_put;
@@ -397,7 +394,6 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,

 	spin_unlock(&ctx->lock);
 out_put:
-	blk_queue_exit(q);
 	return ret;
 }


--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2495,8 +2495,9 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
 	return BLK_MAX_REQUEST_COUNT;
 }

-static bool blk_attempt_bio_merge(struct request_queue *q, struct bio *bio,
-				  unsigned int nr_segs, bool *same_queue_rq)
+static bool blk_mq_attempt_bio_merge(struct request_queue *q,
+				     struct bio *bio, unsigned int nr_segs,
+				     bool *same_queue_rq)
 {
 	if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
 		if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq))
@@ -2520,12 +2521,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	};
 	struct request *rq;

-	if (unlikely(bio_queue_enter(bio)))
+	if (blk_mq_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
 		return NULL;
-	if (unlikely(!submit_bio_checks(bio)))
-		goto put_exit;
-	if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
-		goto put_exit;

 	rq_qos_throttle(q, bio);

@@ -2542,26 +2539,44 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	rq_qos_cleanup(q, bio);
 	if (bio->bi_opf & REQ_NOWAIT)
 		bio_wouldblock_error(bio);
-put_exit:
-	blk_queue_exit(q);
+
 	return NULL;
 }

+static inline bool blk_mq_can_use_cached_rq(struct request *rq,
+		struct bio *bio)
+{
+	if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
+		return false;
+
+	if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+		return false;
+
+	return true;
+}
+
 static inline struct request *blk_mq_get_request(struct request_queue *q,
 						 struct blk_plug *plug,
 						 struct bio *bio,
 						 unsigned int nsegs,
 						 bool *same_queue_rq)
 {
+	struct request *rq;
+	bool checked = false;
+
 	if (plug) {
-		struct request *rq;

 		rq = rq_list_peek(&plug->cached_rq);
 		if (rq && rq->q == q) {
 			if (unlikely(!submit_bio_checks(bio)))
 				return NULL;
-			if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
+			if (blk_mq_attempt_bio_merge(q, bio, nsegs,
+						same_queue_rq))
 				return NULL;
+			checked = true;
+			if (!blk_mq_can_use_cached_rq(rq, bio))
+				goto fallback;
+			rq->cmd_flags = bio->bi_opf;
 			plug->cached_rq = rq_list_next(rq);
 			INIT_LIST_HEAD(&rq->queuelist);
 			rq_qos_throttle(q, bio);
@@ -2569,7 +2584,15 @@ static inline struct request *blk_mq_get_request(struct request_queue *q,
 		}
 	}

-	return blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
+fallback:
+	if (unlikely(bio_queue_enter(bio)))
+		return NULL;
+	if (!checked && !submit_bio_checks(bio))
+		return NULL;
+	rq = blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
+	if (!rq)
+		blk_queue_exit(q);
+	return rq;
 }

 /**

--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -89,15 +89,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
 	return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
 }

-/*
- * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
- * @q: request queue
- * @flags: request command flags
- * @ctx: software queue cpu ctx
- */
-static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
-						     unsigned int flags,
-						     struct blk_mq_ctx *ctx)
+static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags)
 {
 	enum hctx_type type = HCTX_TYPE_DEFAULT;

@@ -108,8 +100,20 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
 		type = HCTX_TYPE_POLL;
 	else if ((flags & REQ_OP_MASK) == REQ_OP_READ)
 		type = HCTX_TYPE_READ;
-	
-	return ctx->hctxs[type];
+	return type;
+}
+
+/*
+ * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
+ * @q: request queue
+ * @flags: request command flags
+ * @ctx: software queue cpu ctx
+ */
+static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
+						     unsigned int flags,
+						     struct blk_mq_ctx *ctx)
+{
+	return ctx->hctxs[blk_mq_get_hctx_type(flags)];
 }

 /*
@@ -149,7 +153,7 @@ struct blk_mq_alloc_data {
 	blk_mq_req_flags_t flags;
 	unsigned int shallow_depth;
 	unsigned int cmd_flags;
-	unsigned int rq_flags;
+	req_flags_t rq_flags;

 	/* allocate multiple requests/tags in one go */
 	unsigned int nr_tags;

--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -429,9 +429,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 		op = REQ_OP_ZONE_RESET;

 		/* Invalidate the page cache, including dirty pages. */
+		filemap_invalidate_lock(bdev->bd_inode->i_mapping);
 		ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
 		if (ret)
-			return ret;
+			goto fail;
 		break;
 	case BLKOPENZONE:
 		op = REQ_OP_ZONE_OPEN;
@@ -449,15 +450,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 	ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
 			       GFP_KERNEL);

-	/*
-	 * Invalidate the page cache again for zone reset: writes can only be
-	 * direct for zoned devices so concurrent writes would not add any page
-	 * to the page cache after/during reset. The page cache may be filled
-	 * again due to concurrent reads though and dropping the pages for
-	 * these is fine.
-	 */
-	if (!ret && cmd == BLKRESETZONE)
-		ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+fail:
+	if (cmd == BLKRESETZONE)
+		filemap_invalidate_unlock(bdev->bd_inode->i_mapping);

 	return ret;
 }

--- a/block/genhd.c
+++ b/block/genhd.c
@@ -394,8 +394,8 @@ static void disk_scan_partitions(struct gendisk *disk)
 * This function registers the partitioning information in @disk
 * with the kernel.
 */
-int device_add_disk(struct device *parent, struct gendisk *disk,
-		     const struct attribute_group **groups)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+				 const struct attribute_group **groups)

 {
 	struct device *ddev = disk_to_dev(disk);
@@ -544,7 +544,7 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
 out_free_ext_minor:
 	if (disk->major == BLOCK_EXT_MAJOR)
 		blk_free_ext_minor(disk->first_minor);
-	return WARN_ON_ONCE(ret); /* keep until all callers handle errors */
+	return ret;
 }
 EXPORT_SYMBOL(device_add_disk);


--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -113,6 +113,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 	uint64_t range[2];
 	uint64_t start, len;
 	struct request_queue *q = bdev_get_queue(bdev);
+	struct inode *inode = bdev->bd_inode;
 	int err;

 	if (!(mode & FMODE_WRITE))
@@ -135,12 +136,17 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 	if (start + len > bdev_nr_bytes(bdev))
 		return -EINVAL;

+	filemap_invalidate_lock(inode->i_mapping);
 	err = truncate_bdev_range(bdev, mode, start, start + len - 1);
 	if (err)
-		return err;
+		goto fail;

-	return blkdev_issue_discard(bdev, start >> 9, len >> 9,
-				    GFP_KERNEL, flags);
+	err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
+				   GFP_KERNEL, flags);
+
+fail:
+	filemap_invalidate_unlock(inode->i_mapping);
+	return err;
 }

 static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
@@ -148,6 +154,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 {
 	uint64_t range[2];
 	uint64_t start, end, len;
+	struct inode *inode = bdev->bd_inode;
 	int err;

 	if (!(mode & FMODE_WRITE))
@@ -170,12 +177,17 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 		return -EINVAL;

 	/* Invalidate the page cache, including dirty pages */
+	filemap_invalidate_lock(inode->i_mapping);
 	err = truncate_bdev_range(bdev, mode, start, end);
 	if (err)
-		return err;
+		goto fail;
+
+	err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
+				   BLKDEV_ZERO_NOUNMAP);

-	return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
-			BLKDEV_ZERO_NOUNMAP);
+fail:
+	filemap_invalidate_unlock(inode->i_mapping);
+	return err;
 }

 static int put_ushort(unsigned short __user *argp, unsigned short val)

--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -205,9 +205,9 @@ static inline dev_t disk_devt(struct gendisk *disk)
 void disk_uevent(struct gendisk *disk, enum kobject_action action);

 /* block/genhd.c */
-int device_add_disk(struct device *parent, struct gendisk *disk,
-		const struct attribute_group **groups);
-static inline int add_disk(struct gendisk *disk)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+				 const struct attribute_group **groups);
+static inline int __must_check add_disk(struct gendisk *disk)
 {
 	return device_add_disk(NULL, disk, NULL);
 }