提交 8f4ae0f6 编写于 作者: L Linus Torvalds

Merge tag 'block-5.13-2021-05-14' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - Fix for shared tag set exit (Bart)

 - Correct ioctl range for zoned ioctls (Damien)

 - Removed dead/unused function (Lin)

 - Fix perf regression for shared tags (Ming)

 - Fix out-of-bounds issue with kyber and preemption (Omar)

 - BFQ merge fix (Paolo)

 - Two error handling fixes for nbd (Sun)

 - Fix weight update in blk-iocost (Tejun)

 - NVMe pull request (Christoph):
      - correct the check for using the inline bio in nvmet (Chaitanya
        Kulkarni)
      - demote unsupported command warnings (Chaitanya Kulkarni)
      - fix corruption due to double initializing ANA state (me, Hou Pu)
      - reset ns->file when open fails (Daniel Wagner)
      - fix a NULL deref when SEND is completed with error in nvmet-rdma
        (Michal Kalderon)

 - Fix kernel-doc warning (Bart)

* tag 'block-5.13-2021-05-14' of git://git.kernel.dk/linux-block:
  block/partitions/efi.c: Fix the efi_partition() kernel-doc header
  blk-mq: Swap two calls in blk_mq_exit_queue()
  blk-mq: plug request for shared sbitmap
  nvmet: use new ana_log_size instead the old one
  nvmet: seset ns->file when open fails
  nbd: share nbd_put and return by goto put_nbd
  nbd: Fix NULL pointer in flush_workqueue
  blkdev.h: remove unused codes blk_account_rq
  block, bfq: avoid circular stable merges
  blk-iocost: fix weight updates of inner active iocgs
  nvmet: demote fabrics cmd parse err msg to debug
  nvmet: use helper to remove the duplicate code
  nvmet: demote discovery cmd parse err msg to debug
  nvmet-rdma: Fix NULL deref when SEND is completed with error
  nvmet: fix inline bio check for passthru
  nvmet: fix inline bio check for bdev-ns
  nvme-multipath: fix double initialization of ANA state
  kyber: fix out of bounds access when preempted
  block: uapi: fix comment about block device ioctl
......@@ -372,9 +372,38 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
return bic->bfqq[is_sync];
}
static void bfq_put_stable_ref(struct bfq_queue *bfqq);
void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
{
/*
* If bfqq != NULL, then a non-stable queue merge between
* bic->bfqq and bfqq is happening here. This causes troubles
* in the following case: bic->bfqq has also been scheduled
* for a possible stable merge with bic->stable_merge_bfqq,
* and bic->stable_merge_bfqq == bfqq happens to
* hold. Troubles occur because bfqq may then undergo a split,
* thereby becoming eligible for a stable merge. Yet, if
* bic->stable_merge_bfqq points exactly to bfqq, then bfqq
* would be stably merged with itself. To avoid this anomaly,
* we cancel the stable merge if
* bic->stable_merge_bfqq == bfqq.
*/
bic->bfqq[is_sync] = bfqq;
if (bfqq && bic->stable_merge_bfqq == bfqq) {
/*
* Actually, these same instructions are executed also
* in bfq_setup_cooperator, in case of abort or actual
* execution of a stable merge. We could avoid
* repeating these instructions there too, but if we
* did so, we would nest even more complexity in this
* function.
*/
bfq_put_stable_ref(bic->stable_merge_bfqq);
bic->stable_merge_bfqq = NULL;
}
}
struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
......@@ -2263,10 +2292,9 @@ static void bfq_remove_request(struct request_queue *q,
}
static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs)
{
struct request_queue *q = hctx->queue;
struct bfq_data *bfqd = q->elevator->elevator_data;
struct request *free = NULL;
/*
......@@ -2631,8 +2659,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd,
struct bfq_queue *bfqq);
static void bfq_put_stable_ref(struct bfq_queue *bfqq);
/*
* Attempt to schedule a merge of bfqq with the currently in-service
* queue or with a close queue among the scheduled queues. Return
......
......@@ -1069,7 +1069,17 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
lockdep_assert_held(&ioc->lock);
/*
* For an active leaf node, its inuse shouldn't be zero or exceed
* @active. An active internal node's inuse is solely determined by the
* inuse to active ratio of its children regardless of @inuse.
*/
if (list_empty(&iocg->active_list) && iocg->child_active_sum) {
inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum,
iocg->child_active_sum);
} else {
inuse = clamp_t(u32, inuse, 1, active);
}
iocg->last_inuse = iocg->inuse;
if (save)
......@@ -1086,7 +1096,7 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
/* update the level sums */
parent->child_active_sum += (s32)(active - child->active);
parent->child_inuse_sum += (s32)(inuse - child->inuse);
/* apply the udpates */
/* apply the updates */
child->active = active;
child->inuse = inuse;
......
......@@ -358,14 +358,16 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs)
{
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
struct blk_mq_ctx *ctx;
struct blk_mq_hw_ctx *hctx;
bool ret = false;
enum hctx_type type;
if (e && e->type->ops.bio_merge)
return e->type->ops.bio_merge(hctx, bio, nr_segs);
return e->type->ops.bio_merge(q, bio, nr_segs);
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
type = hctx->type;
if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
list_empty_careful(&ctx->rq_lists[type]))
......
......@@ -2232,8 +2232,9 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
/* Bypass scheduler for flush requests */
blk_insert_flush(rq);
blk_mq_run_hw_queue(data.hctx, true);
} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
!blk_queue_nonrot(q))) {
} else if (plug && (q->nr_hw_queues == 1 ||
blk_mq_is_sbitmap_shared(rq->mq_hctx->flags) ||
q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
/*
* Use plugging if we have a ->commit_rqs() hook as well, as
* we know the driver uses bd->last in a smart fashion.
......@@ -3287,8 +3288,10 @@ void blk_mq_exit_queue(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
blk_mq_del_queue_tag_set(q);
/* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
/* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */
blk_mq_del_queue_tag_set(q);
}
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
......
......@@ -561,11 +561,12 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
}
}
static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
static bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs)
{
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
struct kyber_hctx_data *khd = hctx->sched_data;
struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
struct list_head *rq_list = &kcq->rq_list[sched_domain];
......
......@@ -461,10 +461,9 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
return ELEVATOR_NO_MERGE;
}
static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs)
{
struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
struct request *free = NULL;
bool ret;
......
......@@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out)
}
/**
* efi_partition(struct parsed_partitions *state)
* efi_partition - scan for GPT partitions
* @state: disk parsed partitions
*
* Description: called from check.c, if the disk contains GPT
......
......@@ -1980,6 +1980,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
* config ref and try to destroy the workqueue from inside the work
* queue.
*/
if (nbd->recv_workq)
flush_workqueue(nbd->recv_workq);
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
......@@ -2014,12 +2015,11 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
mutex_unlock(&nbd_index_mutex);
if (!refcount_inc_not_zero(&nbd->config_refs)) {
nbd_put(nbd);
return 0;
}
if (!refcount_inc_not_zero(&nbd->config_refs))
goto put_nbd;
nbd_disconnect_and_put(nbd);
nbd_config_put(nbd);
put_nbd:
nbd_put(nbd);
return 0;
}
......
......@@ -2901,7 +2901,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
}
ret = nvme_mpath_init(ctrl, id);
ret = nvme_mpath_init_identify(ctrl, id);
if (ret < 0)
goto out_free;
......@@ -4364,6 +4364,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
nvme_mpath_init_ctrl(ctrl);
return 0;
out_free_name:
......
......@@ -781,9 +781,18 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
put_disk(head->disk);
}
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
{
int error;
mutex_init(&ctrl->ana_lock);
timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
INIT_WORK(&ctrl->ana_work, nvme_ana_work);
}
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
{
size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT;
size_t ana_log_size;
int error = 0;
/* check if multipath is enabled and we have the capability */
if (!multipath || !ctrl->subsys ||
......@@ -795,37 +804,31 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
mutex_init(&ctrl->ana_lock);
timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) +
ctrl->max_namespaces * sizeof(__le32);
if (ana_log_size > max_transfer_size) {
dev_err(ctrl->device,
"ANA log page size (%zd) larger than MDTS (%d).\n",
ctrl->ana_log_size,
ctrl->max_hw_sectors << SECTOR_SHIFT);
"ANA log page size (%zd) larger than MDTS (%zd).\n",
ana_log_size, max_transfer_size);
dev_err(ctrl->device, "disabling ANA support.\n");
return 0;
goto out_uninit;
}
INIT_WORK(&ctrl->ana_work, nvme_ana_work);
if (ana_log_size > ctrl->ana_log_size) {
nvme_mpath_stop(ctrl);
kfree(ctrl->ana_log_buf);
ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
if (!ctrl->ana_log_buf) {
error = -ENOMEM;
goto out;
ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
if (!ctrl->ana_log_buf)
return -ENOMEM;
}
ctrl->ana_log_size = ana_log_size;
error = nvme_read_ana_log(ctrl);
if (error)
goto out_free_ana_log_buf;
goto out_uninit;
return 0;
out_free_ana_log_buf:
kfree(ctrl->ana_log_buf);
ctrl->ana_log_buf = NULL;
out:
out_uninit:
nvme_mpath_uninit(ctrl);
return error;
}
......
......@@ -712,7 +712,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
......@@ -780,7 +781,10 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
static inline void nvme_trace_bio_complete(struct request *req)
{
}
static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
{
}
static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
struct nvme_id_ctrl *id)
{
if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)
......
......@@ -975,10 +975,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
case nvme_admin_keep_alive:
req->execute = nvmet_execute_keep_alive;
return 0;
default:
return nvmet_report_invalid_opcode(req);
}
pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
......@@ -379,7 +379,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
req->execute = nvmet_execute_disc_identify;
return 0;
default:
pr_err("unhandled cmd %d\n", cmd->common.opcode);
pr_debug("unhandled cmd %d\n", cmd->common.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
......
......@@ -94,7 +94,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
req->execute = nvmet_execute_prop_get;
break;
default:
pr_err("received unknown capsule type 0x%x\n",
pr_debug("received unknown capsule type 0x%x\n",
cmd->fabrics.fctype);
req->error_loc = offsetof(struct nvmf_common_command, fctype);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
......@@ -284,13 +284,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
struct nvme_command *cmd = req->cmd;
if (!nvme_is_fabrics(cmd)) {
pr_err("invalid command 0x%x on unconnected queue.\n",
pr_debug("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
pr_err("invalid capsule type 0x%x on unconnected queue.\n",
pr_debug("invalid capsule type 0x%x on unconnected queue.\n",
cmd->fabrics.fctype);
req->error_loc = offsetof(struct nvmf_common_command, fctype);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
......
......@@ -258,7 +258,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
if (nvmet_use_inline_bvec(req)) {
bio = &req->b.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
} else {
......
......@@ -49,9 +49,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
ns->file = filp_open(ns->device_path, flags, 0);
if (IS_ERR(ns->file)) {
pr_err("failed to open file %s: (%ld)\n",
ns->device_path, PTR_ERR(ns->file));
return PTR_ERR(ns->file);
ret = PTR_ERR(ns->file);
pr_err("failed to open file %s: (%d)\n",
ns->device_path, ret);
ns->file = NULL;
return ret;
}
ret = nvmet_file_ns_revalidate(ns);
......
......@@ -616,4 +616,10 @@ static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba)
return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT);
}
static inline bool nvmet_use_inline_bvec(struct nvmet_req *req)
{
return req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN &&
req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC;
}
#endif /* _NVMET_H */
......@@ -194,7 +194,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
if (req->sg_cnt > BIO_MAX_VECS)
return -EINVAL;
if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
if (nvmet_use_inline_bvec(req)) {
bio = &req->p.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
} else {
......
......@@ -700,7 +700,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
struct nvmet_rdma_queue *queue = cq->cq_context;
struct nvmet_rdma_queue *queue = wc->qp->qp_context;
nvmet_rdma_release_rsp(rsp);
......@@ -786,7 +786,7 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
struct nvmet_rdma_queue *queue = cq->cq_context;
struct nvmet_rdma_queue *queue = wc->qp->qp_context;
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u16 status;
......
......@@ -676,11 +676,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
static inline bool blk_account_rq(struct request *rq)
{
return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
}
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
......
......@@ -34,7 +34,7 @@ struct elevator_mq_ops {
void (*depth_updated)(struct blk_mq_hw_ctx *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int);
bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
......
......@@ -185,7 +185,7 @@ struct fsxattr {
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
/*
* A jump here: 130-131 are reserved for zoned block devices
* A jump here: 130-136 are reserved for zoned block devices
* (see uapi/linux/blkzoned.h)
*/
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册