diff --git a/MAINTAINERS b/MAINTAINERS index 634f1d0357671e12b4a1359459ed6f82e2779371..82a9308ecb9348a7b6a59d7f1d6415fafbab1a75 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11227,7 +11227,7 @@ F: drivers/video/fbdev/riva/ F: drivers/video/fbdev/nvidia/ NVM EXPRESS DRIVER -M: Keith Busch +M: Keith Busch M: Jens Axboe M: Christoph Hellwig M: Sagi Grimberg diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7da80f3753156d90af62e1eab21b9c351d4efe30..1b7c2afd84cbb5f4971cb2ef8fba6a48a518da4c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1257,9 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return 0; } - effects |= nvme_known_admin_effects(opcode); if (ctrl->effects) effects = le32_to_cpu(ctrl->effects->acs[opcode]); + effects |= nvme_known_admin_effects(opcode); /* * For simplicity, IO to all namespaces is quiesced even if the command @@ -1361,9 +1361,14 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, { #ifdef CONFIG_NVME_MULTIPATH if (disk->fops == &nvme_ns_head_ops) { + struct nvme_ns *ns; + *head = disk->private_data; *srcu_idx = srcu_read_lock(&(*head)->srcu); - return nvme_find_path(*head); + ns = nvme_find_path(*head); + if (!ns) + srcu_read_unlock(&(*head)->srcu, *srcu_idx); + return ns; } #endif *head = NULL; @@ -1377,42 +1382,56 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) srcu_read_unlock(&head->srcu, idx); } -static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg) +static int nvme_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) { + struct nvme_ns_head *head = NULL; + void __user *argp = (void __user *)arg; + struct nvme_ns *ns; + int srcu_idx, ret; + + ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); + if (unlikely(!ns)) + return -EWOULDBLOCK; + + /* + * Handle ioctls that apply to the controller instead of the namespace + * seperately and drop the ns SRCU reference early. This avoids a + * deadlock when deleting namespaces using the passthrough interface. + */ + if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) { + struct nvme_ctrl *ctrl = ns->ctrl; + + nvme_get_ctrl(ns->ctrl); + nvme_put_ns_from_disk(head, srcu_idx); + + if (cmd == NVME_IOCTL_ADMIN_CMD) + ret = nvme_user_cmd(ctrl, NULL, argp); + else + ret = sed_ioctl(ctrl->opal_dev, cmd, argp); + + nvme_put_ctrl(ctrl); + return ret; + } + switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); - return ns->head->ns_id; - case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg); + ret = ns->head->ns_id; + break; case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg); + ret = nvme_user_cmd(ns->ctrl, ns, argp); + break; case NVME_IOCTL_SUBMIT_IO: - return nvme_submit_io(ns, (void __user *)arg); + ret = nvme_submit_io(ns, argp); + break; default: -#ifdef CONFIG_NVM if (ns->ndev) - return nvme_nvm_ioctl(ns, cmd, arg); -#endif - if (is_sed_ioctl(cmd)) - return sed_ioctl(ns->ctrl->opal_dev, cmd, - (void __user *) arg); - return -ENOTTY; + ret = nvme_nvm_ioctl(ns, cmd, arg); + else + ret = -ENOTTY; } -} - -static int nvme_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct nvme_ns_head *head = NULL; - struct nvme_ns *ns; - int srcu_idx, ret; - ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); - if (unlikely(!ns)) - ret = -EWOULDBLOCK; - else - ret = nvme_ns_ioctl(ns, cmd, arg); nvme_put_ns_from_disk(head, srcu_idx); return ret; } @@ -2557,6 +2576,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oacs = le16_to_cpu(id->oacs); ctrl->oncs = le16_to_cpu(id->oncs); + ctrl->mtfa = le16_to_cpu(id->mtfa); ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; @@ -3681,6 +3701,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { + dev_pm_qos_hide_latency_tolerance(ctrl->device); cdev_device_del(&ctrl->cdev, ctrl->device); } EXPORT_SYMBOL_GPL(nvme_uninit_ctrl); @@ -3880,6 +3901,18 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); + +void nvme_sync_queues(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + down_read(&ctrl->namespaces_rwsem); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_sync_queue(ns->queue); + up_read(&ctrl->namespaces_rwsem); +} +EXPORT_SYMBOL_GPL(nvme_sync_queues); + /* * Check we didn't inadvertently grow the command structure sizes: */ diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 5ee75b5ff83f416ca1d0c1e242a7140241a47800..55553d293a98ae5069702dfb7898eb8aa7ed4bc7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -441,6 +441,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); +void nvme_sync_queues(struct nvme_ctrl *ctrl); void nvme_unfreeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2a8708c9ac18fa62b85c7333f0950b11dcf212d0..f562154551ce71ee0eb222617de5f2411a99ee29 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -464,7 +464,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) * affinity), so use the regular blk-mq cpu mapping */ map->queue_offset = qoff; - if (i != HCTX_TYPE_POLL) + if (i != HCTX_TYPE_POLL && offset) blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); else blk_mq_map_queues(map); @@ -1257,7 +1257,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd; - bool shutdown = false; u32 csts = readl(dev->bar + NVME_REG_CSTS); /* If PCI error recovery process is happening, we cannot reset or @@ -1294,17 +1293,18 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * shutdown, so we return BLK_EH_DONE. */ switch (dev->ctrl.state) { - case NVME_CTRL_DELETING: - shutdown = true; - /* fall through */ case NVME_CTRL_CONNECTING: - case NVME_CTRL_RESETTING: + nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); + /* fall through */ + case NVME_CTRL_DELETING: dev_warn_ratelimited(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); - nvme_dev_disable(dev, shutdown); + nvme_dev_disable(dev, true); nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_DONE; + case NVME_CTRL_RESETTING: + return BLK_EH_RESET_TIMER; default: break; } @@ -2376,7 +2376,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - bool dead = true; + bool dead = true, freeze = false; struct pci_dev *pdev = to_pci_dev(dev->dev); mutex_lock(&dev->shutdown_lock); @@ -2384,8 +2384,10 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) u32 csts = readl(dev->bar + NVME_REG_CSTS); if (dev->ctrl.state == NVME_CTRL_LIVE || - dev->ctrl.state == NVME_CTRL_RESETTING) + dev->ctrl.state == NVME_CTRL_RESETTING) { + freeze = true; nvme_start_freeze(&dev->ctrl); + } dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || pdev->error_state != pci_channel_io_normal); } @@ -2394,10 +2396,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * Give the controller a chance to complete all entered requests if * doing a safe shutdown. */ - if (!dead) { - if (shutdown) - nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); - } + if (!dead && shutdown && freeze) + nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); nvme_stop_queues(&dev->ctrl); @@ -2492,6 +2492,7 @@ static void nvme_reset_work(struct work_struct *work) */ if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) nvme_dev_disable(dev, false); + nvme_sync_queues(&dev->ctrl); mutex_lock(&dev->shutdown_lock); result = nvme_pci_enable(dev);