提交 80a0d644 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Small collection of fixes that would be nice to have in -rc1. This
  contains:

   - NVMe pull request form Christoph, mostly with fixes for nvme-pci,
     host memory buffer in particular.

   - Error handling fixup for cgwb_create(), in case allocation of 'wb'
     fails. From Christophe Jaillet.

   - Ensure that trace_block_getrq() gets the 'dev' in an appropriate
     fashion, to avoid a potential NULL deref. From Greg Thelen.

   - Regression fix for dm-mq with blk-mq, fixing a problem with
     stacking IO schedulers. From me.

   - string.h fixup, fixing an issue with memcpy_and_pad(). This
     original change came in through an NVMe dependency, which is why
     I'm including it here. From Martin Wilck.

   - Fix potential int overflow in __blkdev_sectors_to_bio_pages(), from
     Mikulas.

   - MBR enable fix for sed-opal, from Scott"

* 'for-linus' of git://git.kernel.dk/linux-block:
  block: directly insert blk-mq request from blk_insert_cloned_request()
  mm/backing-dev.c: fix an error handling path in 'cgwb_create()'
  string.h: un-fortify memcpy_and_pad
  nvme-pci: implement the HMB entry number and size limitations
  nvme-pci: propagate (some) errors from host memory buffer setup
  nvme-pci: use appropriate initial chunk size for HMB allocation
  nvme-pci: fix host memory buffer allocation fallback
  nvme: fix lightnvm check
  block: fix integer overflow in __blkdev_sectors_to_bio_pages()
  block: sed-opal: Set MBRDone on S3 resume path if TPER is MBREnabled
  block: tolerate tracing of NULL bio
......@@ -2342,7 +2342,12 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
if (q->mq_ops) {
if (blk_queue_io_stat(q))
blk_account_io_start(rq, true);
blk_mq_sched_insert_request(rq, false, true, false, false);
/*
* Since we have a scheduler attached on the top device,
* bypass a potential scheduler on the bottom device for
* insert.
*/
blk_mq_request_bypass_insert(rq);
return BLK_STS_OK;
}
......
......@@ -269,9 +269,9 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
*/
static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
{
sector_t bytes = (nr_sects << 9) + PAGE_SIZE - 1;
sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
return min(bytes >> PAGE_SHIFT, (sector_t)BIO_MAX_PAGES);
return min(pages, (sector_t)BIO_MAX_PAGES);
}
/**
......
......@@ -1401,6 +1401,22 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
blk_mq_hctx_mark_pending(hctx, ctx);
}
/*
* Should only be used carefully, when the caller knows we want to
* bypass a potential IO scheduler on the target device.
*/
void blk_mq_request_bypass_insert(struct request *rq)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
spin_lock(&hctx->lock);
list_add_tail(&rq->queuelist, &hctx->dispatch);
spin_unlock(&hctx->lock);
blk_mq_run_hw_queue(hctx, false);
}
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list)
......
......@@ -54,6 +54,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
*/
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head);
void blk_mq_request_bypass_insert(struct request *rq);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
......
......@@ -46,6 +46,7 @@ enum opal_response_token {
#define GENERIC_HOST_SESSION_NUM 0x41
#define TPER_SYNC_SUPPORTED 0x01
#define MBR_ENABLED_MASK 0x10
#define TINY_ATOM_DATA_MASK 0x3F
#define TINY_ATOM_SIGNED 0x40
......
......@@ -80,6 +80,7 @@ struct parsed_resp {
struct opal_dev {
bool supported;
bool mbr_enabled;
void *data;
sec_send_recv *send_recv;
......@@ -283,6 +284,14 @@ static bool check_tper(const void *data)
return true;
}
static bool check_mbrenabled(const void *data)
{
const struct d0_locking_features *lfeat = data;
u8 sup_feat = lfeat->supported_features;
return !!(sup_feat & MBR_ENABLED_MASK);
}
static bool check_sum(const void *data)
{
const struct d0_single_user_mode *sum = data;
......@@ -417,6 +426,7 @@ static int opal_discovery0_end(struct opal_dev *dev)
u32 hlen = be32_to_cpu(hdr->length);
print_buffer(dev->resp, hlen);
dev->mbr_enabled = false;
if (hlen > IO_BUFFER_LENGTH - sizeof(*hdr)) {
pr_debug("Discovery length overflows buffer (%zu+%u)/%u\n",
......@@ -442,6 +452,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
check_geometry(dev, body);
break;
case FC_LOCKING:
dev->mbr_enabled = check_mbrenabled(body->features);
break;
case FC_ENTERPRISE:
case FC_DATASTORE:
/* some ignored properties */
......@@ -2190,6 +2202,21 @@ static int __opal_lock_unlock(struct opal_dev *dev,
return next(dev);
}
static int __opal_set_mbr_done(struct opal_dev *dev, struct opal_key *key)
{
u8 mbr_done_tf = 1;
const struct opal_step mbrdone_step [] = {
{ opal_discovery0, },
{ start_admin1LSP_opal_session, key },
{ set_mbr_done, &mbr_done_tf },
{ end_opal_session, },
{ NULL, }
};
dev->steps = mbrdone_step;
return next(dev);
}
static int opal_lock_unlock(struct opal_dev *dev,
struct opal_lock_unlock *lk_unlk)
{
......@@ -2345,6 +2372,11 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
suspend->unlk.session.sum);
was_failure = true;
}
if (dev->mbr_enabled) {
ret = __opal_set_mbr_done(dev, &suspend->unlk.session.opal_key);
if (ret)
pr_debug("Failed to set MBR Done in S3 resume\n");
}
}
mutex_unlock(&dev->dev_lock);
return was_failure;
......
......@@ -1897,6 +1897,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->cntlid = le16_to_cpu(id->cntlid);
ctrl->hmpre = le32_to_cpu(id->hmpre);
ctrl->hmmin = le32_to_cpu(id->hmmin);
ctrl->hmminds = le32_to_cpu(id->hmminds);
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
}
kfree(id);
......@@ -2377,10 +2379,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid);
if (nvme_nvm_ns_supported(ns, id) &&
nvme_nvm_register(ns, disk_name, node)) {
dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__);
goto out_free_id;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
if (nvme_nvm_register(ns, disk_name, node)) {
dev_warn(ctrl->device, "LightNVM init failure\n");
goto out_free_id;
}
}
disk = alloc_disk_node(0, node);
......
......@@ -955,29 +955,3 @@ void nvme_nvm_unregister_sysfs(struct nvme_ns *ns)
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
&nvm_dev_attr_group);
}
/* move to shared place when used in multiple places. */
#define PCI_VENDOR_ID_CNEX 0x1d1d
#define PCI_DEVICE_ID_CNEX_WL 0x2807
#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
/* XXX: this is poking into PCI structures from generic code! */
struct pci_dev *pdev = to_pci_dev(ctrl->dev);
/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
id->vs[0] == 0x1)
return 1;
/* CNEX Labs - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
pdev->device == PCI_DEVICE_ID_CNEX_WL &&
id->vs[0] == 0x1)
return 1;
return 0;
}
......@@ -75,6 +75,11 @@ enum nvme_quirks {
* The deepest sleep state should not be used.
*/
NVME_QUIRK_NO_DEEPEST_PS = (1 << 5),
/*
* Supports the LighNVM command set if indicated in vs[1].
*/
NVME_QUIRK_LIGHTNVM = (1 << 6),
};
/*
......@@ -176,8 +181,11 @@ struct nvme_ctrl {
u64 ps_max_latency_us;
bool apst_enabled;
/* PCIe only: */
u32 hmpre;
u32 hmmin;
u32 hmminds;
u16 hmmaxd;
/* Fabrics only */
u16 sqsize;
......@@ -320,7 +328,6 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
#ifdef CONFIG_NVM
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns);
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
......@@ -339,10 +346,6 @@ static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
return 0;
}
static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
return 0;
}
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
unsigned long arg)
{
......
......@@ -1612,21 +1612,23 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
dev->host_mem_descs = NULL;
}
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
u32 chunk_size)
{
struct nvme_host_mem_buf_desc *descs;
u32 chunk_size, max_entries, len;
u32 max_entries, len;
dma_addr_t descs_dma;
int i = 0;
void **bufs;
u64 size = 0, tmp;
/* start big and work our way down */
chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
retry:
tmp = (preferred + chunk_size - 1);
do_div(tmp, chunk_size);
max_entries = tmp;
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
max_entries = dev->ctrl.hmmaxd;
descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
&descs_dma, GFP_KERNEL);
if (!descs)
......@@ -1650,15 +1652,9 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
i++;
}
if (!size || (min && size < min)) {
dev_warn(dev->ctrl.device,
"failed to allocate host memory buffer.\n");
if (!size)
goto out_free_bufs;
}
dev_info(dev->ctrl.device,
"allocated %lld MiB host memory buffer.\n",
size >> ilog2(SZ_1M));
dev->nr_host_mem_descs = i;
dev->host_mem_size = size;
dev->host_mem_descs = descs;
......@@ -1679,21 +1675,35 @@ static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
descs_dma);
out:
/* try a smaller chunk size if we failed early */
if (chunk_size >= PAGE_SIZE * 2 && (i == 0 || size < min)) {
chunk_size /= 2;
goto retry;
}
dev->host_mem_descs = NULL;
return -ENOMEM;
}
static void nvme_setup_host_mem(struct nvme_dev *dev)
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{
u32 chunk_size;
/* start big and work our way down */
for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
chunk_size /= 2) {
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
if (!min || dev->host_mem_size >= min)
return 0;
nvme_free_host_mem(dev);
}
}
return -ENOMEM;
}
static int nvme_setup_host_mem(struct nvme_dev *dev)
{
u64 max = (u64)max_host_mem_size_mb * SZ_1M;
u64 preferred = (u64)dev->ctrl.hmpre * 4096;
u64 min = (u64)dev->ctrl.hmmin * 4096;
u32 enable_bits = NVME_HOST_MEM_ENABLE;
int ret = 0;
preferred = min(preferred, max);
if (min > max) {
......@@ -1701,7 +1711,7 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
"min host memory (%lld MiB) above limit (%d MiB).\n",
min >> ilog2(SZ_1M), max_host_mem_size_mb);
nvme_free_host_mem(dev);
return;
return 0;
}
/*
......@@ -1715,12 +1725,21 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
}
if (!dev->host_mem_descs) {
if (nvme_alloc_host_mem(dev, min, preferred))
return;
if (nvme_alloc_host_mem(dev, min, preferred)) {
dev_warn(dev->ctrl.device,
"failed to allocate host memory buffer.\n");
return 0; /* controller must work without HMB */
}
dev_info(dev->ctrl.device,
"allocated %lld MiB host memory buffer.\n",
dev->host_mem_size >> ilog2(SZ_1M));
}
if (nvme_set_host_mem(dev, enable_bits))
ret = nvme_set_host_mem(dev, enable_bits);
if (ret)
nvme_free_host_mem(dev);
return ret;
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
......@@ -2164,8 +2183,11 @@ static void nvme_reset_work(struct work_struct *work)
"unable to allocate dma for dbbuf\n");
}
if (dev->ctrl.hmpre)
nvme_setup_host_mem(dev);
if (dev->ctrl.hmpre) {
result = nvme_setup_host_mem(dev);
if (result < 0)
goto out;
}
result = nvme_setup_io_queues(dev);
if (result)
......@@ -2497,6 +2519,10 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
......
......@@ -226,7 +226,9 @@ struct nvme_id_ctrl {
__le16 mntmt;
__le16 mxtmt;
__le32 sanicap;
__u8 rsvd332[180];
__le32 hmminds;
__le16 hmmaxd;
__u8 rsvd338[174];
__u8 sqes;
__u8 cqes;
__le16 maxcmd;
......
......@@ -434,20 +434,9 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q)
* @count: The number of bytes to copy
* @pad: Character to use for padding if space is left in destination.
*/
__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len,
const void *src, size_t count, int pad)
static inline void memcpy_and_pad(void *dest, size_t dest_len,
const void *src, size_t count, int pad)
{
size_t dest_size = __builtin_object_size(dest, 0);
size_t src_size = __builtin_object_size(src, 0);
if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) {
if (dest_size < dest_len && dest_size < count)
__write_overflow();
else if (src_size < dest_len && src_size < count)
__read_overflow3();
}
if (dest_size < dest_len)
fortify_panic(__func__);
if (dest_len > count) {
memcpy(dest, src, count);
memset(dest + count, pad, dest_len - count);
......
......@@ -397,7 +397,6 @@ DECLARE_EVENT_CLASS(block_get_rq,
TP_fast_assign(
__entry->dev = bio ? bio_dev(bio) : 0;
__entry->dev = bio_dev(bio);
__entry->sector = bio ? bio->bi_iter.bi_sector : 0;
__entry->nr_sector = bio ? bio_sectors(bio) : 0;
blk_fill_rwbs(__entry->rwbs,
......@@ -414,7 +413,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
/**
* block_getrq - get a free request entry in queue for block IO operations
* @q: queue for operations
* @bio: pending block IO operation
* @bio: pending block IO operation (can be %NULL)
* @rw: low bit indicates a read (%0) or a write (%1)
*
* A request struct for queue @q has been allocated to handle the
......@@ -430,7 +429,7 @@ DEFINE_EVENT(block_get_rq, block_getrq,
/**
* block_sleeprq - waiting to get a free request entry in queue for block IO operation
* @q: queue for operation
* @bio: pending block IO operation
* @bio: pending block IO operation (can be %NULL)
* @rw: low bit indicates a read (%0) or a write (%1)
*
* In the case where a request struct cannot be provided for queue @q
......
......@@ -569,8 +569,10 @@ static int cgwb_create(struct backing_dev_info *bdi,
/* need to create a new one */
wb = kmalloc(sizeof(*wb), gfp);
if (!wb)
return -ENOMEM;
if (!wb) {
ret = -ENOMEM;
goto out_put;
}
ret = wb_init(wb, bdi, blkcg_css->id, gfp);
if (ret)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册