提交 a479335b 编写于 作者: D Dmitry Fomichev 提交者: Klaus Jensen

hw/block/nvme: Support Zoned Namespace Command Set

The emulation code has been changed to advertise NVM Command Set when
"zoned" device property is not set (default) and Zoned Namespace
Command Set otherwise.

Define values and structures that are needed to support Zoned
Namespace Command Set (NVMe TP 4053) in PCI NVMe controller emulator.
Define trace events where needed in newly introduced code.

In order to improve scalability, all open, closed and full zones
are organized in separate linked lists. Consequently, almost all
zone operations don't require scanning of the entire zone array
(which potentially can be quite large) - it is only necessary to
enumerate one or more zone lists.

Handlers for three new NVMe commands introduced in Zoned Namespace
Command Set specification are added, namely for Zone Management
Receive, Zone Management Send and Zone Append.

Device initialization code has been extended to create a proper
configuration for zoned operation using device properties.

Read/Write command handler is modified to only allow writes at the
write pointer if the namespace is zoned. For Zone Append command,
writes implicitly happen at the write pointer and the starting write
pointer value is returned as the result of the command. Write Zeroes
handler is modified to add zoned checks that are identical to those
done as a part of Write flow.

Subsequent commits in this series add ZDE support and checks for
active and open zone limits.
Signed-off-by: NNiklas Cassel <niklas.cassel@wdc.com>
Signed-off-by: NHans Holmberg <hans.holmberg@wdc.com>
Signed-off-by: NAjay Joshi <ajay.joshi@wdc.com>
Signed-off-by: NChaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: NMatias Bjorling <matias.bjorling@wdc.com>
Signed-off-by: NAravind Ramesh <aravind.ramesh@wdc.com>
Signed-off-by: NShin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: NAdam Manzanares <adam.manzanares@wdc.com>
Signed-off-by: NDmitry Fomichev <dmitry.fomichev@wdc.com>
Reviewed-by: NNiklas Cassel <Niklas.Cassel@wdc.com>
Reviewed-by: NKeith Busch <kbusch@kernel.org>
Signed-off-by: NKlaus Jensen <k.jensen@samsung.com>
上级 e9ba46ee
......@@ -25,6 +25,7 @@
#include "hw/qdev-properties.h"
#include "hw/qdev-core.h"
#include "trace.h"
#include "nvme.h"
#include "nvme-ns.h"
......@@ -95,6 +96,147 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
return 0;
}
static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
{
uint64_t zone_size, zone_cap;
uint32_t lbasz = ns->blkconf.logical_block_size;
/* Make sure that the values of ZNS properties are sane */
if (ns->params.zone_size_bs) {
zone_size = ns->params.zone_size_bs;
} else {
zone_size = NVME_DEFAULT_ZONE_SIZE;
}
if (ns->params.zone_cap_bs) {
zone_cap = ns->params.zone_cap_bs;
} else {
zone_cap = zone_size;
}
if (zone_cap > zone_size) {
error_setg(errp, "zone capacity %"PRIu64"B exceeds "
"zone size %"PRIu64"B", zone_cap, zone_size);
return -1;
}
if (zone_size < lbasz) {
error_setg(errp, "zone size %"PRIu64"B too small, "
"must be at least %"PRIu32"B", zone_size, lbasz);
return -1;
}
if (zone_cap < lbasz) {
error_setg(errp, "zone capacity %"PRIu64"B too small, "
"must be at least %"PRIu32"B", zone_cap, lbasz);
return -1;
}
/*
* Save the main zone geometry values to avoid
* calculating them later again.
*/
ns->zone_size = zone_size / lbasz;
ns->zone_capacity = zone_cap / lbasz;
ns->num_zones = ns->size / lbasz / ns->zone_size;
return 0;
}
static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
{
uint64_t start = 0, zone_size = ns->zone_size;
uint64_t capacity = ns->num_zones * zone_size;
NvmeZone *zone;
int i;
ns->zone_array = g_new0(NvmeZone, ns->num_zones);
QTAILQ_INIT(&ns->exp_open_zones);
QTAILQ_INIT(&ns->imp_open_zones);
QTAILQ_INIT(&ns->closed_zones);
QTAILQ_INIT(&ns->full_zones);
zone = ns->zone_array;
for (i = 0; i < ns->num_zones; i++, zone++) {
if (start + zone_size > capacity) {
zone_size = capacity - start;
}
zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
zone->d.za = 0;
zone->d.zcap = ns->zone_capacity;
zone->d.zslba = start;
zone->d.wp = start;
zone->w_ptr = start;
start += zone_size;
}
ns->zone_size_log2 = 0;
if (is_power_of_2(ns->zone_size)) {
ns->zone_size_log2 = 63 - clz64(ns->zone_size);
}
}
static void nvme_ns_init_zoned(NvmeCtrl *n, NvmeNamespace *ns, int lba_index)
{
NvmeIdNsZoned *id_ns_z;
nvme_ns_zoned_init_state(ns);
id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));
/* MAR/MOR are zeroes-based, 0xffffffff means no limit */
id_ns_z->mar = 0xffffffff;
id_ns_z->mor = 0xffffffff;
id_ns_z->zoc = 0;
id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;
id_ns_z->lbafe[lba_index].zsze = cpu_to_le64(ns->zone_size);
id_ns_z->lbafe[lba_index].zdes = 0;
ns->csi = NVME_CSI_ZONED;
ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
ns->id_ns.ncap = ns->id_ns.nsze;
ns->id_ns.nuse = ns->id_ns.ncap;
ns->id_ns_zoned = id_ns_z;
}
static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
{
uint8_t state;
zone->w_ptr = zone->d.wp;
state = nvme_get_zone_state(zone);
if (zone->d.wp != zone->d.zslba) {
if (state != NVME_ZONE_STATE_CLOSED) {
trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
}
QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
} else {
trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
}
}
/*
* Close all the zones that are currently open.
*/
static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
{
NvmeZone *zone, *next;
QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
nvme_clear_zone(ns, zone);
}
QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
nvme_clear_zone(ns, zone);
}
QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
nvme_clear_zone(ns, zone);
}
}
static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
{
if (!ns->blkconf.blk) {
......@@ -118,6 +260,12 @@ int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
if (nvme_ns_init(ns, errp)) {
return -1;
}
if (ns->params.zoned) {
if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
return -1;
}
nvme_ns_init_zoned(n, ns, 0);
}
if (nvme_register_namespace(n, ns, errp)) {
return -1;
......@@ -134,6 +282,17 @@ void nvme_ns_drain(NvmeNamespace *ns)
void nvme_ns_shutdown(NvmeNamespace *ns)
{
blk_flush(ns->blkconf.blk);
if (ns->params.zoned) {
nvme_zoned_ns_shutdown(ns);
}
}
void nvme_ns_cleanup(NvmeNamespace *ns)
{
if (ns->params.zoned) {
g_free(ns->id_ns_zoned);
g_free(ns->zone_array);
}
}
static void nvme_ns_realize(DeviceState *dev, Error **errp)
......@@ -154,6 +313,13 @@ static Property nvme_ns_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
NVME_DEFAULT_ZONE_SIZE),
DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
0),
DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
params.cross_zone_read, false),
DEFINE_PROP_END_OF_LIST(),
};
......
......@@ -19,9 +19,20 @@
#define NVME_NS(obj) \
OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
typedef struct NvmeZone {
NvmeZoneDescr d;
uint64_t w_ptr;
QTAILQ_ENTRY(NvmeZone) entry;
} NvmeZone;
typedef struct NvmeNamespaceParams {
uint32_t nsid;
QemuUUID uuid;
bool zoned;
bool cross_zone_read;
uint64_t zone_size_bs;
uint64_t zone_cap_bs;
} NvmeNamespaceParams;
typedef struct NvmeNamespace {
......@@ -33,6 +44,17 @@ typedef struct NvmeNamespace {
const uint32_t *iocs;
uint8_t csi;
NvmeIdNsZoned *id_ns_zoned;
NvmeZone *zone_array;
QTAILQ_HEAD(, NvmeZone) exp_open_zones;
QTAILQ_HEAD(, NvmeZone) imp_open_zones;
QTAILQ_HEAD(, NvmeZone) closed_zones;
QTAILQ_HEAD(, NvmeZone) full_zones;
uint32_t num_zones;
uint64_t zone_size;
uint64_t zone_capacity;
uint32_t zone_size_log2;
NvmeNamespaceParams params;
struct {
......@@ -74,8 +96,38 @@ static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
typedef struct NvmeCtrl NvmeCtrl;
static inline enum NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
{
return zone->d.zs >> 4;
}
static inline void nvme_set_zone_state(NvmeZone *zone, enum NvmeZoneState state)
{
zone->d.zs = state << 4;
}
static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
{
return zone->d.zslba + ns->zone_size;
}
static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
{
return zone->d.zslba + zone->d.zcap;
}
static inline bool nvme_wp_is_valid(NvmeZone *zone)
{
uint8_t st = nvme_get_zone_state(zone);
return st != NVME_ZONE_STATE_FULL &&
st != NVME_ZONE_STATE_READ_ONLY &&
st != NVME_ZONE_STATE_OFFLINE;
}
int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
void nvme_ns_drain(NvmeNamespace *ns);
void nvme_ns_shutdown(NvmeNamespace *ns);
void nvme_ns_cleanup(NvmeNamespace *ns);
#endif /* NVME_NS_H */
此差异已折叠。
......@@ -6,6 +6,9 @@
#define NVME_MAX_NAMESPACES 256
#define NVME_DEFAULT_ZONE_SIZE (128 * MiB)
#define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
typedef struct NvmeParams {
char *serial;
uint32_t num_queues; /* deprecated since 5.1 */
......@@ -16,6 +19,7 @@ typedef struct NvmeParams {
uint32_t aer_max_queued;
uint8_t mdts;
bool use_intel_id;
uint32_t zasl_bs;
} NvmeParams;
typedef struct NvmeAsyncEvent {
......@@ -149,6 +153,8 @@ typedef struct NvmeCtrl {
QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
int aer_queued;
uint8_t zasl;
NvmeNamespace namespace;
NvmeNamespace *namespaces[NVME_MAX_NAMESPACES];
NvmeSQueue **sq;
......
......@@ -95,6 +95,14 @@ pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded"
pci_nvme_mmio_stopped(void) "cleared controller enable bit"
pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
pci_nvme_clear_ns_close(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Closed state"
pci_nvme_clear_ns_reset(uint32_t state, uint64_t slba) "zone state=%"PRIu32", slba=%"PRIu64" transitioned to Empty state"
# nvme traces for error conditions
pci_nvme_err_mdts(uint16_t cid, size_t len) "cid %"PRIu16" len %zu"
......@@ -113,6 +121,13 @@ pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8""
pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8""
pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64""
pci_nvme_err_invalid_log_page_offset(uint64_t ofs, uint64_t size) "must be <= %"PRIu64", got %"PRIu64""
pci_nvme_err_unaligned_zone_cmd(uint8_t action, uint64_t slba, uint64_t zslba) "unaligned zone op 0x%"PRIx32", got slba=%"PRIu64", zslba=%"PRIu64""
pci_nvme_err_invalid_zone_state_transition(uint8_t action, uint64_t slba, uint8_t attrs) "action=0x%"PRIx8", slba=%"PRIu64", attrs=0x%"PRIx32""
pci_nvme_err_write_not_at_wp(uint64_t slba, uint64_t zone, uint64_t wp) "writing at slba=%"PRIu64", zone=%"PRIu64", but wp=%"PRIu64""
pci_nvme_err_append_not_at_start(uint64_t slba, uint64_t zone) "appending at slba=%"PRIu64", but zone=%"PRIu64""
pci_nvme_err_zone_write_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
pci_nvme_err_zone_read_not_ok(uint64_t slba, uint32_t nlb, uint16_t status) "slba=%"PRIu64", nlb=%"PRIu32", status=0x%"PRIx16""
pci_nvme_err_append_too_large(uint64_t slba, uint32_t nlb, uint8_t zasl) "slba=%"PRIu64", nlb=%"PRIu32", zasl=%"PRIu8""
pci_nvme_err_invalid_iocsci(uint32_t idx) "unsupported command set combination index %"PRIu32""
pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16""
pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16""
......@@ -146,7 +161,9 @@ pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_
pci_nvme_err_startfail_css(uint8_t css) "nvme_start_ctrl failed because invalid command set selected:%u"
pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero"
pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero"
pci_nvme_err_startfail_zasl_too_small(uint32_t zasl, uint32_t pagesz) "nvme_start_ctrl failed because zone append size limit %"PRIu32" is too small, needs to be >= %"PRIu32""
pci_nvme_err_startfail(void) "setting controller enable bit failed"
pci_nvme_err_invalid_mgmt_action(uint8_t action) "action=0x%"PRIx8""
# Traces for undefined behavior
pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册