提交 f44c7dbd 编写于 作者: L Linus Torvalds

Merge tag 'block-5.16-2021-11-13' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Set of fixes that should go into this merge window:

   - ioctl vs read data race fixes (Shin'ichiro)

   - blkcg use-after-free fix (Laibin)

   - Last piece of the puzzle for add_disk() error handling, enable
     __must_check for (Luis)

   - Request allocation fixes (Ming)

   - Misc fixes (me)"

* tag 'block-5.16-2021-11-13' of git://git.kernel.dk/linux-block:
  blk-mq: fix filesystem I/O request allocation
  blkcg: Remove extra blkcg_bio_issue_init
  block: Hold invalidate_lock in BLKRESETZONE ioctl
  blk-mq: rename blk_attempt_bio_merge
  blk-mq: don't grab ->q_usage_counter in blk_mq_sched_bio_merge
  block: fix kerneldoc for disk_register_independent_access__ranges()
  block: add __must_check for *add_disk*() callers
  block: use enum type for blk_mq_alloc_data->rq_flags
  block: Hold invalidate_lock in BLKZEROOUT ioctl
  block: Hold invalidate_lock in BLKDISCARD ioctl
......@@ -809,10 +809,8 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
if (unlikely(!current->io_context))
create_task_io_context(current, GFP_ATOMIC, q->node);
if (blk_throtl_bio(bio)) {
blkcg_bio_issue_init(bio);
if (blk_throtl_bio(bio))
return false;
}
blk_cgroup_bio_start(bio);
blkcg_bio_issue_init(bio);
......
......@@ -104,8 +104,8 @@ static struct kobj_type blk_ia_ranges_ktype = {
};
/**
* disk_register_ia_ranges - register with sysfs a set of independent
* access ranges
* disk_register_independent_access_ranges - register with sysfs a set of
* independent access ranges
* @disk: Target disk
* @new_iars: New set of independent access ranges
*
......
......@@ -370,9 +370,6 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
bool ret = false;
enum hctx_type type;
if (bio_queue_enter(bio))
return false;
if (e && e->type->ops.bio_merge) {
ret = e->type->ops.bio_merge(q, bio, nr_segs);
goto out_put;
......@@ -397,7 +394,6 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
spin_unlock(&ctx->lock);
out_put:
blk_queue_exit(q);
return ret;
}
......
......@@ -2495,8 +2495,9 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
return BLK_MAX_REQUEST_COUNT;
}
static bool blk_attempt_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, bool *same_queue_rq)
static bool blk_mq_attempt_bio_merge(struct request_queue *q,
struct bio *bio, unsigned int nr_segs,
bool *same_queue_rq)
{
if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq))
......@@ -2520,12 +2521,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
};
struct request *rq;
if (unlikely(bio_queue_enter(bio)))
if (blk_mq_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
return NULL;
if (unlikely(!submit_bio_checks(bio)))
goto put_exit;
if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
goto put_exit;
rq_qos_throttle(q, bio);
......@@ -2542,26 +2539,44 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
rq_qos_cleanup(q, bio);
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
put_exit:
blk_queue_exit(q);
return NULL;
}
static inline bool blk_mq_can_use_cached_rq(struct request *rq,
struct bio *bio)
{
if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
return false;
if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
return false;
return true;
}
static inline struct request *blk_mq_get_request(struct request_queue *q,
struct blk_plug *plug,
struct bio *bio,
unsigned int nsegs,
bool *same_queue_rq)
{
struct request *rq;
bool checked = false;
if (plug) {
struct request *rq;
rq = rq_list_peek(&plug->cached_rq);
if (rq && rq->q == q) {
if (unlikely(!submit_bio_checks(bio)))
return NULL;
if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
if (blk_mq_attempt_bio_merge(q, bio, nsegs,
same_queue_rq))
return NULL;
checked = true;
if (!blk_mq_can_use_cached_rq(rq, bio))
goto fallback;
rq->cmd_flags = bio->bi_opf;
plug->cached_rq = rq_list_next(rq);
INIT_LIST_HEAD(&rq->queuelist);
rq_qos_throttle(q, bio);
......@@ -2569,7 +2584,15 @@ static inline struct request *blk_mq_get_request(struct request_queue *q,
}
}
return blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
fallback:
if (unlikely(bio_queue_enter(bio)))
return NULL;
if (!checked && !submit_bio_checks(bio))
return NULL;
rq = blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
if (!rq)
blk_queue_exit(q);
return rq;
}
/**
......
......@@ -89,15 +89,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
}
/*
* blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
* @q: request queue
* @flags: request command flags
* @ctx: software queue cpu ctx
*/
static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
unsigned int flags,
struct blk_mq_ctx *ctx)
static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags)
{
enum hctx_type type = HCTX_TYPE_DEFAULT;
......@@ -108,8 +100,20 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
type = HCTX_TYPE_POLL;
else if ((flags & REQ_OP_MASK) == REQ_OP_READ)
type = HCTX_TYPE_READ;
return ctx->hctxs[type];
return type;
}
/*
* blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
* @q: request queue
* @flags: request command flags
* @ctx: software queue cpu ctx
*/
static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
unsigned int flags,
struct blk_mq_ctx *ctx)
{
return ctx->hctxs[blk_mq_get_hctx_type(flags)];
}
/*
......@@ -149,7 +153,7 @@ struct blk_mq_alloc_data {
blk_mq_req_flags_t flags;
unsigned int shallow_depth;
unsigned int cmd_flags;
unsigned int rq_flags;
req_flags_t rq_flags;
/* allocate multiple requests/tags in one go */
unsigned int nr_tags;
......
......@@ -429,9 +429,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
op = REQ_OP_ZONE_RESET;
/* Invalidate the page cache, including dirty pages. */
filemap_invalidate_lock(bdev->bd_inode->i_mapping);
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
if (ret)
return ret;
goto fail;
break;
case BLKOPENZONE:
op = REQ_OP_ZONE_OPEN;
......@@ -449,15 +450,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
GFP_KERNEL);
/*
* Invalidate the page cache again for zone reset: writes can only be
* direct for zoned devices so concurrent writes would not add any page
* to the page cache after/during reset. The page cache may be filled
* again due to concurrent reads though and dropping the pages for
* these is fine.
*/
if (!ret && cmd == BLKRESETZONE)
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
fail:
if (cmd == BLKRESETZONE)
filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
return ret;
}
......
......@@ -394,8 +394,8 @@ static void disk_scan_partitions(struct gendisk *disk)
* This function registers the partitioning information in @disk
* with the kernel.
*/
int device_add_disk(struct device *parent, struct gendisk *disk,
const struct attribute_group **groups)
int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
const struct attribute_group **groups)
{
struct device *ddev = disk_to_dev(disk);
......@@ -544,7 +544,7 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
out_free_ext_minor:
if (disk->major == BLOCK_EXT_MAJOR)
blk_free_ext_minor(disk->first_minor);
return WARN_ON_ONCE(ret); /* keep until all callers handle errors */
return ret;
}
EXPORT_SYMBOL(device_add_disk);
......
......@@ -113,6 +113,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
uint64_t range[2];
uint64_t start, len;
struct request_queue *q = bdev_get_queue(bdev);
struct inode *inode = bdev->bd_inode;
int err;
if (!(mode & FMODE_WRITE))
......@@ -135,12 +136,17 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
if (start + len > bdev_nr_bytes(bdev))
return -EINVAL;
filemap_invalidate_lock(inode->i_mapping);
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
return err;
goto fail;
return blkdev_issue_discard(bdev, start >> 9, len >> 9,
GFP_KERNEL, flags);
err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
GFP_KERNEL, flags);
fail:
filemap_invalidate_unlock(inode->i_mapping);
return err;
}
static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
......@@ -148,6 +154,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
{
uint64_t range[2];
uint64_t start, end, len;
struct inode *inode = bdev->bd_inode;
int err;
if (!(mode & FMODE_WRITE))
......@@ -170,12 +177,17 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
return -EINVAL;
/* Invalidate the page cache, including dirty pages */
filemap_invalidate_lock(inode->i_mapping);
err = truncate_bdev_range(bdev, mode, start, end);
if (err)
return err;
goto fail;
err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
BLKDEV_ZERO_NOUNMAP);
return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
BLKDEV_ZERO_NOUNMAP);
fail:
filemap_invalidate_unlock(inode->i_mapping);
return err;
}
static int put_ushort(unsigned short __user *argp, unsigned short val)
......
......@@ -205,9 +205,9 @@ static inline dev_t disk_devt(struct gendisk *disk)
void disk_uevent(struct gendisk *disk, enum kobject_action action);
/* block/genhd.c */
int device_add_disk(struct device *parent, struct gendisk *disk,
const struct attribute_group **groups);
static inline int add_disk(struct gendisk *disk)
int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
const struct attribute_group **groups);
static inline int __must_check add_disk(struct gendisk *disk)
{
return device_add_disk(NULL, disk, NULL);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册