提交 a07b4970 编写于 作者: C Christoph Hellwig 提交者: Jens Axboe

nvmet: add a generic NVMe target

This patch introduces a implementation of NVMe subsystems,
controllers and discovery service which allows to export
NVMe namespaces across fabrics such as Ethernet, FC etc.

The implementation conforms to the NVMe 1.2.1 specification
and interoperates with NVMe over fabrics host implementations.

Configuration works using configfs, and is best performed using
the nvmetcli tool from http://git.infradead.org/users/hch/nvmetcli.git,
which also has a detailed explanation of the required steps in the
README file.
Signed-off-by: NArmen Baloyan <armenx.baloyan@intel.com>
Signed-off-by: NAnthony Knapp <anthony.j.knapp@intel.com>
Signed-off-by: NJay Freyensee <james.p.freyensee@intel.com>
Signed-off-by: NMing Lin <ming.l@ssi.samsung.com>
Signed-off-by: NSagi Grimberg <sagi@grimberg.me>
Signed-off-by: NChristoph Hellwig <hch@lst.de>
Reviewed-by: NSteve Wise <swise@opengridcomputing.com>
Signed-off-by: NJens Axboe <axboe@fb.com>
上级 9645c1a2
......@@ -8153,6 +8153,13 @@ S: Supported
F: drivers/nvme/host/
F: include/linux/nvme.h
NVM EXPRESS TARGET DRIVER
M: Christoph Hellwig <hch@lst.de>
M: Sagi Grimberg <sagi@grimberg.me>
L: linux-nvme@lists.infradead.org
S: Supported
F: drivers/nvme/target/
NVMEM FRAMEWORK
M: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
M: Maxime Ripard <maxime.ripard@free-electrons.com>
......
source "drivers/nvme/host/Kconfig"
source "drivers/nvme/target/Kconfig"
obj-y += host/
obj-y += target/
config NVME_TARGET
tristate "NVMe Target support"
depends on BLOCK
depends on CONFIGFS_FS
help
This enabled target side support for the NVMe protocol, that is
it allows the Linux kernel to implement NVMe subsystems and
controllers and export Linux block devices as NVMe namespaces.
You need to select at least one of the transports below to make this
functionality useful.
To configure the NVMe target you probably want to use the nvmetcli
tool from http://git.infradead.org/users/hch/nvmetcli.git.
If unsure, say N.
obj-$(CONFIG_NVME_TARGET) += nvmet.o
nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
discovery.o
/*
* NVMe admin command implementation.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/random.h>
#include <generated/utsrelease.h>
#include "nvmet.h"
u32 nvmet_get_log_page_len(struct nvme_command *cmd)
{
u32 len = le16_to_cpu(cmd->get_log_page.numdu);
len <<= 16;
len += le16_to_cpu(cmd->get_log_page.numdl);
/* NUMD is a 0's based value */
len += 1;
len *= sizeof(u32);
return len;
}
static void nvmet_execute_get_log_page(struct nvmet_req *req)
{
size_t data_len = nvmet_get_log_page_len(req->cmd);
void *buf;
u16 status = 0;
buf = kzalloc(data_len, GFP_KERNEL);
if (!buf) {
status = NVME_SC_INTERNAL;
goto out;
}
switch (req->cmd->get_log_page.lid) {
case 0x01:
/*
* We currently never set the More bit in the status field,
* so all error log entries are invalid and can be zeroed out.
* This is called a minum viable implementation (TM) of this
* mandatory log page.
*/
break;
case 0x02:
/*
* XXX: fill out actual smart log
*
* We might have a hard time coming up with useful values for
* many of the fields, and even when we have useful data
* available (e.g. units or commands read/written) those aren't
* persistent over power loss.
*/
break;
case 0x03:
/*
* We only support a single firmware slot which always is
* active, so we can zero out the whole firmware slot log and
* still claim to fully implement this mandatory log page.
*/
break;
default:
BUG();
}
status = nvmet_copy_to_sgl(req, 0, buf, data_len);
kfree(buf);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
u64 serial;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
goto out;
}
/* XXX: figure out how to assign real vendors IDs. */
id->vid = 0;
id->ssvid = 0;
/* generate a random serial number as our controllers are ephemeral: */
get_random_bytes(&serial, sizeof(serial));
memset(id->sn, ' ', sizeof(id->sn));
snprintf(id->sn, sizeof(id->sn), "%llx", serial);
memset(id->mn, ' ', sizeof(id->mn));
strncpy((char *)id->mn, "Linux", sizeof(id->mn));
memset(id->fr, ' ', sizeof(id->fr));
strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
id->rab = 6;
/*
* XXX: figure out how we can assign a IEEE OUI, but until then
* the safest is to leave it as zeroes.
*/
/* we support multiple ports and multiples hosts: */
id->mic = (1 << 0) | (1 << 1);
/* no limit on data transfer sizes for now */
id->mdts = 0;
id->cntlid = cpu_to_le16(ctrl->cntlid);
id->ver = cpu_to_le32(ctrl->subsys->ver);
/* XXX: figure out what to do about RTD3R/RTD3 */
id->oaes = cpu_to_le32(1 << 8);
id->ctratt = cpu_to_le32(1 << 0);
id->oacs = 0;
/*
* We don't really have a practical limit on the number of abort
* comands. But we don't do anything useful for abort either, so
* no point in allowing more abort commands than the spec requires.
*/
id->acl = 3;
id->aerl = NVMET_ASYNC_EVENTS - 1;
/* first slot is read-only, only one slot supported */
id->frmw = (1 << 0) | (1 << 1);
id->lpa = (1 << 0) | (1 << 2);
id->elpe = NVMET_ERROR_LOG_SLOTS - 1;
id->npss = 0;
/* We support keep-alive timeout in granularity of seconds */
id->kas = cpu_to_le16(NVMET_KAS);
id->sqes = (0x6 << 4) | 0x6;
id->cqes = (0x4 << 4) | 0x4;
/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM);
/* XXX: don't report vwc if the underlying device is write through */
id->vwc = NVME_CTRL_VWC_PRESENT;
/*
* We can't support atomic writes bigger than a LBA without support
* from the backend device.
*/
id->awun = 0;
id->awupf = 0;
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls)
id->sgls |= cpu_to_le32(1 << 2);
if (ctrl->ops->sqe_inline_size)
id->sgls |= cpu_to_le32(1 << 20);
strcpy(id->subnqn, ctrl->subsys->subsysnqn);
/* Max command capsule size is sqe + single page of in-capsule data */
id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
ctrl->ops->sqe_inline_size) / 16);
/* Max response capsule size is cqe */
id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
id->msdbd = ctrl->ops->msdbd;
/*
* Meh, we don't really support any power state. Fake up the same
* values that qemu does.
*/
id->psd[0].max_power = cpu_to_le16(0x9c4);
id->psd[0].entry_lat = cpu_to_le32(0x10);
id->psd[0].exit_lat = cpu_to_le32(0x4);
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
kfree(id);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_ns(struct nvmet_req *req)
{
struct nvmet_ns *ns;
struct nvme_id_ns *id;
u16 status = 0;
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
if (!ns) {
status = NVME_SC_INVALID_NS | NVME_SC_DNR;
goto out;
}
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
goto out_put_ns;
}
/*
* nuse = ncap = nsze isn't aways true, but we have no way to find
* that out from the underlying device.
*/
id->ncap = id->nuse = id->nsze =
cpu_to_le64(ns->size >> ns->blksize_shift);
/*
* We just provide a single LBA format that matches what the
* underlying device reports.
*/
id->nlbaf = 0;
id->flbas = 0;
/*
* Our namespace might always be shared. Not just with other
* controllers, but also with any other user of the block device.
*/
id->nmic = (1 << 0);
memcpy(&id->nguid, &ns->nguid, sizeof(uuid_le));
id->lbaf[0].ds = ns->blksize_shift;
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
kfree(id);
out_put_ns:
nvmet_put_namespace(ns);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_nslist(struct nvmet_req *req)
{
static const int buf_size = 4096;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
__le32 *list;
u16 status = 0;
int i = 0;
list = kzalloc(buf_size, GFP_KERNEL);
if (!list) {
status = NVME_SC_INTERNAL;
goto out;
}
rcu_read_lock();
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
if (ns->nsid <= min_nsid)
continue;
list[i++] = cpu_to_le32(ns->nsid);
if (i == buf_size / sizeof(__le32))
break;
}
rcu_read_unlock();
status = nvmet_copy_to_sgl(req, 0, list, buf_size);
kfree(list);
out:
nvmet_req_complete(req, status);
}
/*
* A "mimimum viable" abort implementation: the command is mandatory in the
* spec, but we are not required to do any useful work. We couldn't really
* do a useful abort, so don't bother even with waiting for the command
* to be exectuted and return immediately telling the command to abort
* wasn't found.
*/
static void nvmet_execute_abort(struct nvmet_req *req)
{
nvmet_set_result(req, 1);
nvmet_req_complete(req, 0);
}
static void nvmet_execute_set_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
u64 val;
u32 val32;
u16 status = 0;
switch (cdw10 & 0xf) {
case NVME_FEAT_NUM_QUEUES:
nvmet_set_result(req,
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
break;
case NVME_FEAT_KATO:
val = le64_to_cpu(req->cmd->prop_set.value);
val32 = val & 0xffff;
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_set_result(req, req->sq->ctrl->kato);
break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
nvmet_req_complete(req, status);
}
static void nvmet_execute_get_features(struct nvmet_req *req)
{
struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]);
u16 status = 0;
switch (cdw10 & 0xf) {
/*
* These features are mandatory in the spec, but we don't
* have a useful way to implement them. We'll eventually
* need to come up with some fake values for these.
*/
#if 0
case NVME_FEAT_ARBITRATION:
break;
case NVME_FEAT_POWER_MGMT:
break;
case NVME_FEAT_TEMP_THRESH:
break;
case NVME_FEAT_ERR_RECOVERY:
break;
case NVME_FEAT_IRQ_COALESCE:
break;
case NVME_FEAT_IRQ_CONFIG:
break;
case NVME_FEAT_WRITE_ATOMIC:
break;
case NVME_FEAT_ASYNC_EVENT:
break;
#endif
case NVME_FEAT_VOLATILE_WC:
nvmet_set_result(req, 1);
break;
case NVME_FEAT_NUM_QUEUES:
nvmet_set_result(req,
(subsys->max_qid-1) | ((subsys->max_qid-1) << 16));
break;
case NVME_FEAT_KATO:
nvmet_set_result(req, req->sq->ctrl->kato * 1000);
break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
nvmet_req_complete(req, status);
}
static void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
mutex_lock(&ctrl->lock);
if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
mutex_unlock(&ctrl->lock);
nvmet_req_complete(req, NVME_SC_ASYNC_LIMIT | NVME_SC_DNR);
return;
}
ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req;
mutex_unlock(&ctrl->lock);
schedule_work(&ctrl->async_event_work);
}
static void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
pr_debug("ctrl %d update keep-alive timer for %d secs\n",
ctrl->cntlid, ctrl->kato);
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
nvmet_req_complete(req, 0);
}
int nvmet_parse_admin_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
req->ns = NULL;
if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
pr_err("nvmet: got admin cmd %d while CC.EN == 0\n",
cmd->common.opcode);
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("nvmet: got admin cmd %d while CSTS.RDY == 0\n",
cmd->common.opcode);
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
switch (cmd->common.opcode) {
case nvme_admin_get_log_page:
req->data_len = nvmet_get_log_page_len(cmd);
switch (cmd->get_log_page.lid) {
case 0x01:
case 0x02:
case 0x03:
req->execute = nvmet_execute_get_log_page;
return 0;
}
break;
case nvme_admin_identify:
req->data_len = 4096;
switch (le32_to_cpu(cmd->identify.cns)) {
case 0x00:
req->execute = nvmet_execute_identify_ns;
return 0;
case 0x01:
req->execute = nvmet_execute_identify_ctrl;
return 0;
case 0x02:
req->execute = nvmet_execute_identify_nslist;
return 0;
}
break;
case nvme_admin_abort_cmd:
req->execute = nvmet_execute_abort;
req->data_len = 0;
return 0;
case nvme_admin_set_features:
req->execute = nvmet_execute_set_features;
req->data_len = 0;
return 0;
case nvme_admin_get_features:
req->execute = nvmet_execute_get_features;
req->data_len = 0;
return 0;
case nvme_admin_async_event:
req->execute = nvmet_execute_async_event;
req->data_len = 0;
return 0;
case nvme_admin_keep_alive:
req->execute = nvmet_execute_keep_alive;
req->data_len = 0;
return 0;
}
pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
此差异已折叠。
此差异已折叠。
/*
* Discovery service for the NVMe over Fabrics target.
* Copyright (C) 2016 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/slab.h>
#include <generated/utsrelease.h>
#include "nvmet.h"
struct nvmet_subsys *nvmet_disc_subsys;
u64 nvmet_genctr;
void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port)
{
down_write(&nvmet_config_sem);
if (list_empty(&port->entry)) {
list_add_tail(&port->entry, &parent->referrals);
port->enabled = true;
nvmet_genctr++;
}
up_write(&nvmet_config_sem);
}
void nvmet_referral_disable(struct nvmet_port *port)
{
down_write(&nvmet_config_sem);
if (!list_empty(&port->entry)) {
port->enabled = false;
list_del_init(&port->entry);
nvmet_genctr++;
}
up_write(&nvmet_config_sem);
}
static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
struct nvmet_port *port, char *subsys_nqn, u8 type, u32 numrec)
{
struct nvmf_disc_rsp_page_entry *e = &hdr->entries[numrec];
e->trtype = port->disc_addr.trtype;
e->adrfam = port->disc_addr.adrfam;
e->treq = port->disc_addr.treq;
e->portid = port->disc_addr.portid;
/* we support only dynamic controllers */
e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
e->nqntype = type;
memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE);
memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE);
}
static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
{
const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmf_disc_rsp_page_hdr *hdr;
size_t data_len = nvmet_get_log_page_len(req->cmd);
size_t alloc_len = max(data_len, sizeof(*hdr));
int residual_len = data_len - sizeof(*hdr);
struct nvmet_subsys_link *p;
struct nvmet_port *r;
u32 numrec = 0;
u16 status = 0;
/*
* Make sure we're passing at least a buffer of response header size.
* If host provided data len is less than the header size, only the
* number of bytes requested by host will be sent to host.
*/
hdr = kzalloc(alloc_len, GFP_KERNEL);
if (!hdr) {
status = NVME_SC_INTERNAL;
goto out;
}
down_read(&nvmet_config_sem);
list_for_each_entry(p, &req->port->subsystems, entry) {
if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn))
continue;
if (residual_len >= entry_size) {
nvmet_format_discovery_entry(hdr, req->port,
p->subsys->subsysnqn,
NVME_NQN_NVME, numrec);
residual_len -= entry_size;
}
numrec++;
}
list_for_each_entry(r, &req->port->referrals, entry) {
if (residual_len >= entry_size) {
nvmet_format_discovery_entry(hdr, r,
NVME_DISC_SUBSYS_NAME,
NVME_NQN_DISC, numrec);
residual_len -= entry_size;
}
numrec++;
}
hdr->genctr = cpu_to_le64(nvmet_genctr);
hdr->numrec = cpu_to_le64(numrec);
hdr->recfmt = cpu_to_le16(0);
up_read(&nvmet_config_sem);
status = nvmet_copy_to_sgl(req, 0, hdr, data_len);
kfree(hdr);
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
goto out;
}
memset(id->fr, ' ', sizeof(id->fr));
strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
/* no limit on data transfer sizes for now */
id->mdts = 0;
id->cntlid = cpu_to_le16(ctrl->cntlid);
id->ver = cpu_to_le32(ctrl->subsys->ver);
id->lpa = (1 << 2);
/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls)
id->sgls |= cpu_to_le32(1 << 2);
if (ctrl->ops->sqe_inline_size)
id->sgls |= cpu_to_le32(1 << 20);
strcpy(id->subnqn, ctrl->subsys->subsysnqn);
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
kfree(id);
out:
nvmet_req_complete(req, status);
}
int nvmet_parse_discovery_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
req->ns = NULL;
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("nvmet: got cmd %d while not ready\n",
cmd->common.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
switch (cmd->common.opcode) {
case nvme_admin_get_log_page:
req->data_len = nvmet_get_log_page_len(cmd);
switch (cmd->get_log_page.lid) {
case NVME_LOG_DISC:
req->execute = nvmet_execute_get_disc_log_page;
return 0;
default:
pr_err("nvmet: unsupported get_log_page lid %d\n",
cmd->get_log_page.lid);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
case nvme_admin_identify:
req->data_len = 4096;
switch (le32_to_cpu(cmd->identify.cns)) {
case 0x01:
req->execute =
nvmet_execute_identify_disc_ctrl;
return 0;
default:
pr_err("nvmet: unsupported identify cns %d\n",
le32_to_cpu(cmd->identify.cns));
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
default:
pr_err("nvmet: unsupported cmd %d\n",
cmd->common.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
int __init nvmet_init_discovery(void)
{
nvmet_disc_subsys =
nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC);
if (!nvmet_disc_subsys)
return -ENOMEM;
return 0;
}
void nvmet_exit_discovery(void)
{
nvmet_subsys_put(nvmet_disc_subsys);
}
/*
* NVMe Fabrics command implementation.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/blkdev.h>
#include "nvmet.h"
static void nvmet_execute_prop_set(struct nvmet_req *req)
{
u16 status = 0;
if (!(req->cmd->prop_set.attrib & 1)) {
u64 val = le64_to_cpu(req->cmd->prop_set.value);
switch (le32_to_cpu(req->cmd->prop_set.offset)) {
case NVME_REG_CC:
nvmet_update_cc(req->sq->ctrl, val);
break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
} else {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
}
nvmet_req_complete(req, status);
}
static void nvmet_execute_prop_get(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
u16 status = 0;
u64 val = 0;
if (req->cmd->prop_get.attrib & 1) {
switch (le32_to_cpu(req->cmd->prop_get.offset)) {
case NVME_REG_CAP:
val = ctrl->cap;
break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
} else {
switch (le32_to_cpu(req->cmd->prop_get.offset)) {
case NVME_REG_VS:
val = ctrl->subsys->ver;
break;
case NVME_REG_CC:
val = ctrl->cc;
break;
case NVME_REG_CSTS:
val = ctrl->csts;
break;
default:
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
}
req->rsp->result64 = cpu_to_le64(val);
nvmet_req_complete(req, status);
}
int nvmet_parse_fabrics_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
req->ns = NULL;
switch (cmd->fabrics.fctype) {
case nvme_fabrics_type_property_set:
req->data_len = 0;
req->execute = nvmet_execute_prop_set;
break;
case nvme_fabrics_type_property_get:
req->data_len = 0;
req->execute = nvmet_execute_prop_get;
break;
default:
pr_err("received unknown capsule type 0x%x\n",
cmd->fabrics.fctype);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
return 0;
}
static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
{
struct nvmf_connect_command *c = &req->cmd->connect;
u16 qid = le16_to_cpu(c->qid);
u16 sqsize = le16_to_cpu(c->sqsize);
struct nvmet_ctrl *old;
old = cmpxchg(&req->sq->ctrl, NULL, ctrl);
if (old) {
pr_warn("queue already connected!\n");
return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
}
nvmet_cq_setup(ctrl, req->cq, qid, sqsize);
nvmet_sq_setup(ctrl, req->sq, qid, sqsize);
return 0;
}
static void nvmet_execute_admin_connect(struct nvmet_req *req)
{
struct nvmf_connect_command *c = &req->cmd->connect;
struct nvmf_connect_data *d;
struct nvmet_ctrl *ctrl = NULL;
u16 status = 0;
d = kmap(sg_page(req->sg)) + req->sg->offset;
/* zero out initial completion result, assign values as needed */
req->rsp->result = 0;
if (c->recfmt != 0) {
pr_warn("invalid connect version (%d).\n",
le16_to_cpu(c->recfmt));
status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
goto out;
}
if (unlikely(d->cntlid != cpu_to_le16(0xffff))) {
pr_warn("connect attempt for invalid controller ID %#x\n",
d->cntlid);
status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid);
goto out;
}
status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
le32_to_cpu(c->kato), &ctrl);
if (status)
goto out;
status = nvmet_install_queue(ctrl, req);
if (status) {
nvmet_ctrl_put(ctrl);
goto out;
}
pr_info("creating controller %d for NQN %s.\n",
ctrl->cntlid, ctrl->hostnqn);
req->rsp->result16 = cpu_to_le16(ctrl->cntlid);
out:
kunmap(sg_page(req->sg));
nvmet_req_complete(req, status);
}
static void nvmet_execute_io_connect(struct nvmet_req *req)
{
struct nvmf_connect_command *c = &req->cmd->connect;
struct nvmf_connect_data *d;
struct nvmet_ctrl *ctrl = NULL;
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
d = kmap(sg_page(req->sg)) + req->sg->offset;
/* zero out initial completion result, assign values as needed */
req->rsp->result = 0;
if (c->recfmt != 0) {
pr_warn("invalid connect version (%d).\n",
le16_to_cpu(c->recfmt));
status = NVME_SC_CONNECT_FORMAT | NVME_SC_DNR;
goto out;
}
status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
le16_to_cpu(d->cntlid),
req, &ctrl);
if (status)
goto out;
if (unlikely(qid > ctrl->subsys->max_qid)) {
pr_warn("invalid queue id (%d)\n", qid);
status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
req->rsp->result = IPO_IATTR_CONNECT_SQE(qid);
goto out_ctrl_put;
}
status = nvmet_install_queue(ctrl, req);
if (status) {
/* pass back cntlid that had the issue of installing queue */
req->rsp->result16 = cpu_to_le16(ctrl->cntlid);
goto out_ctrl_put;
}
pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid);
out:
kunmap(sg_page(req->sg));
nvmet_req_complete(req, status);
return;
out_ctrl_put:
nvmet_ctrl_put(ctrl);
goto out;
}
int nvmet_parse_connect_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
req->ns = NULL;
if (req->cmd->common.opcode != nvme_fabrics_command) {
pr_err("invalid command 0x%x on unconnected queue.\n",
cmd->fabrics.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
pr_err("invalid capsule type 0x%x on unconnected queue.\n",
cmd->fabrics.fctype);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
req->data_len = sizeof(struct nvmf_connect_data);
if (cmd->connect.qid == 0)
req->execute = nvmet_execute_admin_connect;
else
req->execute = nvmet_execute_io_connect;
return 0;
}
/*
* NVMe I/O command implementation.
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/blkdev.h>
#include <linux/module.h>
#include "nvmet.h"
static void nvmet_bio_done(struct bio *bio)
{
struct nvmet_req *req = bio->bi_private;
nvmet_req_complete(req,
bio->bi_error ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
if (bio != &req->inline_bio)
bio_put(bio);
}
static inline u32 nvmet_rw_len(struct nvmet_req *req)
{
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
static void nvmet_inline_bio_init(struct nvmet_req *req)
{
struct bio *bio = &req->inline_bio;
bio_init(bio);
bio->bi_max_vecs = NVMET_MAX_INLINE_BIOVEC;
bio->bi_io_vec = req->inline_bvec;
}
static void nvmet_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
struct scatterlist *sg;
struct bio *bio;
sector_t sector;
blk_qc_t cookie;
int op, op_flags = 0, i;
if (!req->sg_cnt) {
nvmet_req_complete(req, 0);
return;
}
if (req->cmd->rw.opcode == nvme_cmd_write) {
op = REQ_OP_WRITE;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
op_flags |= REQ_FUA;
} else {
op = REQ_OP_READ;
}
sector = le64_to_cpu(req->cmd->rw.slba);
sector <<= (req->ns->blksize_shift - 9);
nvmet_inline_bio_init(req);
bio = &req->inline_bio;
bio->bi_bdev = req->ns->bdev;
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
bio_set_op_attrs(bio, op, op_flags);
for_each_sg(req->sg, sg, req->sg_cnt, i) {
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
!= sg->length) {
struct bio *prev = bio;
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
bio->bi_bdev = req->ns->bdev;
bio->bi_iter.bi_sector = sector;
bio_set_op_attrs(bio, op, op_flags);
bio_chain(bio, prev);
cookie = submit_bio(prev);
}
sector += sg->length >> 9;
sg_cnt--;
}
cookie = submit_bio(bio);
blk_poll(bdev_get_queue(req->ns->bdev), cookie);
}
static void nvmet_execute_flush(struct nvmet_req *req)
{
struct bio *bio;
nvmet_inline_bio_init(req);
bio = &req->inline_bio;
bio->bi_bdev = req->ns->bdev;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
submit_bio(bio);
}
static u16 nvmet_discard_range(struct nvmet_ns *ns,
struct nvme_dsm_range *range, struct bio **bio)
{
if (__blkdev_issue_discard(ns->bdev,
le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
GFP_KERNEL, 0, bio))
return NVME_SC_INTERNAL | NVME_SC_DNR;
return 0;
}
static void nvmet_execute_discard(struct nvmet_req *req)
{
struct nvme_dsm_range range;
struct bio *bio = NULL;
int i;
u16 status;
for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
sizeof(range));
if (status)
break;
status = nvmet_discard_range(req->ns, &range, &bio);
if (status)
break;
}
if (bio) {
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
if (status) {
bio->bi_error = -EIO;
bio_endio(bio);
} else {
submit_bio(bio);
}
} else {
nvmet_req_complete(req, status);
}
}
static void nvmet_execute_dsm(struct nvmet_req *req)
{
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
case NVME_DSMGMT_AD:
nvmet_execute_discard(req);
return;
case NVME_DSMGMT_IDR:
case NVME_DSMGMT_IDW:
default:
/* Not supported yet */
nvmet_req_complete(req, 0);
return;
}
}
int nvmet_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
pr_err("nvmet: got io cmd %d while CC.EN == 0\n",
cmd->common.opcode);
req->ns = NULL;
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
pr_err("nvmet: got io cmd %d while CSTS.RDY == 0\n",
cmd->common.opcode);
req->ns = NULL;
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
}
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
if (!req->ns)
return NVME_SC_INVALID_NS | NVME_SC_DNR;
switch (cmd->common.opcode) {
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_execute_rw;
req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_execute_flush;
req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_execute_dsm;
req->data_len = le32_to_cpu(cmd->dsm.nr) *
sizeof(struct nvme_dsm_range);
return 0;
default:
pr_err("nvmet: unhandled cmd %d\n", cmd->common.opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
}
/*
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _NVMET_H
#define _NVMET_H
#include <linux/dma-mapping.h>
#include <linux/types.h>
#include <linux/device.h>
#include <linux/kref.h>
#include <linux/percpu-refcount.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/nvme.h>
#include <linux/configfs.h>
#include <linux/rcupdate.h>
#include <linux/blkdev.h>
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
/* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM
* The 16 bit shift is to set IATTR bit to 1, which means offending
* offset starts in the data section of connect()
*/
#define IPO_IATTR_CONNECT_DATA(x) \
(cpu_to_le32((1 << 16) | (offsetof(struct nvmf_connect_data, x))))
#define IPO_IATTR_CONNECT_SQE(x) \
(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
struct nvmet_ns {
struct list_head dev_link;
struct percpu_ref ref;
struct block_device *bdev;
u32 nsid;
u32 blksize_shift;
loff_t size;
u8 nguid[16];
struct nvmet_subsys *subsys;
const char *device_path;
struct config_group device_group;
struct config_group group;
struct completion disable_done;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_ns, group);
}
static inline bool nvmet_ns_enabled(struct nvmet_ns *ns)
{
return !list_empty_careful(&ns->dev_link);
}
struct nvmet_cq {
u16 qid;
u16 size;
};
struct nvmet_sq {
struct nvmet_ctrl *ctrl;
struct percpu_ref ref;
u16 qid;
u16 size;
struct completion free_done;
};
/**
* struct nvmet_port - Common structure to keep port
* information for the target.
* @entry: List head for holding a list of these elements.
* @disc_addr: Address information is stored in a format defined
* for a discovery log page entry.
* @group: ConfigFS group for this element's folder.
* @priv: Private data for the transport.
*/
struct nvmet_port {
struct list_head entry;
struct nvmf_disc_rsp_page_entry disc_addr;
struct config_group group;
struct config_group subsys_group;
struct list_head subsystems;
struct config_group referrals_group;
struct list_head referrals;
void *priv;
bool enabled;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_port,
group);
}
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_cq **cqs;
struct nvmet_sq **sqs;
struct mutex lock;
u64 cap;
u32 cc;
u32 csts;
u16 cntlid;
u32 kato;
struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
unsigned int nr_async_event_cmds;
struct list_head async_events;
struct work_struct async_event_work;
struct list_head subsys_entry;
struct kref ref;
struct delayed_work ka_work;
struct work_struct fatal_err_work;
struct nvmet_fabrics_ops *ops;
char subsysnqn[NVMF_NQN_FIELD_LEN];
char hostnqn[NVMF_NQN_FIELD_LEN];
};
struct nvmet_subsys {
enum nvme_subsys_type type;
struct mutex lock;
struct kref ref;
struct list_head namespaces;
unsigned int max_nsid;
struct list_head ctrls;
struct ida cntlid_ida;
struct list_head hosts;
bool allow_any_host;
u16 max_qid;
u64 ver;
char *subsysnqn;
struct config_group group;
struct config_group namespaces_group;
struct config_group allowed_hosts_group;
};
static inline struct nvmet_subsys *to_subsys(struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_subsys, group);
}
static inline struct nvmet_subsys *namespaces_to_subsys(
struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_subsys,
namespaces_group);
}
struct nvmet_host {
struct config_group group;
};
static inline struct nvmet_host *to_host(struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_host, group);
}
static inline char *nvmet_host_name(struct nvmet_host *host)
{
return config_item_name(&host->group.cg_item);
}
struct nvmet_host_link {
struct list_head entry;
struct nvmet_host *host;
};
struct nvmet_subsys_link {
struct list_head entry;
struct nvmet_subsys *subsys;
};
struct nvmet_req;
struct nvmet_fabrics_ops {
struct module *owner;
unsigned int type;
unsigned int sqe_inline_size;
unsigned int msdbd;
bool has_keyed_sgls : 1;
void (*queue_response)(struct nvmet_req *req);
int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port);
void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
};
#define NVMET_MAX_INLINE_BIOVEC 8
struct nvmet_req {
struct nvme_command *cmd;
struct nvme_completion *rsp;
struct nvmet_sq *sq;
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
struct bio inline_bio;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
int sg_cnt;
size_t data_len;
struct nvmet_port *port;
void (*execute)(struct nvmet_req *req);
struct nvmet_fabrics_ops *ops;
};
static inline void nvmet_set_status(struct nvmet_req *req, u16 status)
{
req->rsp->status = cpu_to_le16(status << 1);
}
static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
{
req->rsp->result = cpu_to_le32(result);
}
/*
* NVMe command writes actually are DMA reads for us on the target side.
*/
static inline enum dma_data_direction
nvmet_data_dir(struct nvmet_req *req)
{
return nvme_is_write(req->cmd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
}
struct nvmet_async_event {
struct list_head entry;
u8 event_type;
u8 event_info;
u8 log_page;
};
int nvmet_parse_connect_cmd(struct nvmet_req *req);
int nvmet_parse_io_cmd(struct nvmet_req *req);
int nvmet_parse_admin_cmd(struct nvmet_req *req);
int nvmet_parse_discovery_cmd(struct nvmet_req *req);
int nvmet_parse_fabrics_cmd(struct nvmet_req *req);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
u16 size);
void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
u16 size);
void nvmet_sq_destroy(struct nvmet_sq *sq);
int nvmet_sq_init(struct nvmet_sq *sq);
void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
struct nvmet_req *req, struct nvmet_ctrl **ret);
void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
enum nvme_subsys_type type);
void nvmet_subsys_put(struct nvmet_subsys *subsys);
struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid);
void nvmet_put_namespace(struct nvmet_ns *ns);
int nvmet_ns_enable(struct nvmet_ns *ns);
void nvmet_ns_disable(struct nvmet_ns *ns);
struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_ns_free(struct nvmet_ns *ns);
int nvmet_register_transport(struct nvmet_fabrics_ops *ops);
void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops);
int nvmet_enable_port(struct nvmet_port *port);
void nvmet_disable_port(struct nvmet_port *port);
void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port);
void nvmet_referral_disable(struct nvmet_port *port);
u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
size_t len);
u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
size_t len);
u32 nvmet_get_log_page_len(struct nvme_command *cmd);
#define NVMET_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 64
#define NVMET_MAX_CMD NVMET_QUEUE_SIZE
#define NVMET_KAS 10
#define NVMET_DISC_KATO 120
int __init nvmet_init_configfs(void);
void __exit nvmet_exit_configfs(void);
int __init nvmet_init_discovery(void);
void nvmet_exit_discovery(void);
extern struct nvmet_subsys *nvmet_disc_subsys;
extern u64 nvmet_genctr;
extern struct rw_semaphore nvmet_config_sem;
bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
const char *hostnqn);
#endif /* _NVMET_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册