nvme.h 13.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * QEMU NVM Express
 *
 * Copyright (c) 2012 Intel Corporation
 * Copyright (c) 2021 Minwoo Im
 * Copyright (c) 2021 Samsung Electronics Co., Ltd.
 *
 * Authors:
 *   Keith Busch            <kbusch@kernel.org>
 *   Klaus Jensen           <k.jensen@samsung.com>
 *   Gollu Appalanaidu      <anaidu.gollu@samsung.com>
 *   Dmitry Fomichev        <dmitry.fomichev@wdc.com>
 *   Minwoo Im              <minwoo.im.dev@gmail.com>
 *
 * This code is licensed under the GNU GPL v2 or later.
 */

18 19
#ifndef HW_NVME_NVME_H
#define HW_NVME_NVME_H
20

21
#include "qemu/uuid.h"
22
#include "hw/pci/pci.h"
23 24 25
#include "hw/block/block.h"

#include "block/nvme.h"
26

27 28
#define NVME_MAX_CONTROLLERS 32
#define NVME_MAX_NAMESPACES  256
29
#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
30

31 32
QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);

33 34 35
typedef struct NvmeCtrl NvmeCtrl;
typedef struct NvmeNamespace NvmeNamespace;

36 37 38 39 40 41 42
#define TYPE_NVME_BUS "nvme-bus"
OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)

typedef struct NvmeBus {
    BusState parent_bus;
} NvmeBus;

43 44 45 46 47 48
#define TYPE_NVME_SUBSYS "nvme-subsys"
#define NVME_SUBSYS(obj) \
    OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)

typedef struct NvmeSubsystem {
    DeviceState parent_obj;
49
    NvmeBus     bus;
50
    uint8_t     subnqn[256];
51
    char        *serial;
52 53 54 55 56 57 58 59 60 61

    NvmeCtrl      *ctrls[NVME_MAX_CONTROLLERS];
    NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];

    struct {
        char *nqn;
    } params;
} NvmeSubsystem;

int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
62
void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98

static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
                                         uint32_t cntlid)
{
    if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
        return NULL;
    }

    return subsys->ctrls[cntlid];
}

static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
                                            uint32_t nsid)
{
    if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
        return NULL;
    }

    return subsys->namespaces[nsid];
}

#define TYPE_NVME_NS "nvme-ns"
#define NVME_NS(obj) \
    OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)

typedef struct NvmeZone {
    NvmeZoneDescr   d;
    uint64_t        w_ptr;
    QTAILQ_ENTRY(NvmeZone) entry;
} NvmeZone;

typedef struct NvmeNamespaceParams {
    bool     detached;
    bool     shared;
    uint32_t nsid;
    QemuUUID uuid;
99
    uint64_t eui64;
100
    bool     eui64_default;
101 102 103 104 105

    uint16_t ms;
    uint8_t  mset;
    uint8_t  pi;
    uint8_t  pil;
N
Naveen Nagar 已提交
106
    uint8_t  pif;
107 108 109 110 111 112 113 114 115 116 117 118

    uint16_t mssrl;
    uint32_t mcl;
    uint8_t  msrc;

    bool     zoned;
    bool     cross_zone_read;
    uint64_t zone_size_bs;
    uint64_t zone_cap_bs;
    uint32_t max_active_zones;
    uint32_t max_open_zones;
    uint32_t zd_extension_size;
119 120 121 122

    uint32_t numzrwa;
    uint64_t zrwas;
    uint64_t zrwafg;
123 124 125 126 127 128 129
} NvmeNamespaceParams;

typedef struct NvmeNamespace {
    DeviceState  parent_obj;
    BlockConf    blkconf;
    int32_t      bootindex;
    int64_t      size;
130
    int64_t      moff;
131
    NvmeIdNs     id_ns;
N
Naveen Nagar 已提交
132
    NvmeIdNsNvm  id_ns_nvm;
133
    NvmeLBAF     lbaf;
134
    unsigned int nlbaf;
135
    size_t       lbasz;
136 137 138 139
    const uint32_t *iocs;
    uint8_t      csi;
    uint16_t     status;
    int          attached;
N
Naveen Nagar 已提交
140
    uint8_t      pif;
141

142 143 144 145 146 147
    struct {
        uint16_t zrwas;
        uint16_t zrwafg;
        uint32_t numzrwa;
    } zns;

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
    QTAILQ_ENTRY(NvmeNamespace) entry;

    NvmeIdNsZoned   *id_ns_zoned;
    NvmeZone        *zone_array;
    QTAILQ_HEAD(, NvmeZone) exp_open_zones;
    QTAILQ_HEAD(, NvmeZone) imp_open_zones;
    QTAILQ_HEAD(, NvmeZone) closed_zones;
    QTAILQ_HEAD(, NvmeZone) full_zones;
    uint32_t        num_zones;
    uint64_t        zone_size;
    uint64_t        zone_capacity;
    uint32_t        zone_size_log2;
    uint8_t         *zd_extensions;
    int32_t         nr_open_zones;
    int32_t         nr_active_zones;

    NvmeNamespaceParams params;

    struct {
        uint32_t err_rec;
    } features;
} NvmeNamespace;

static inline uint32_t nvme_nsid(NvmeNamespace *ns)
{
    if (ns) {
        return ns->params.nsid;
    }

    return 0;
}

static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
{
182
    return lba << ns->lbaf.ds;
183 184 185 186
}

static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
{
187
    return ns->lbaf.ms * lba;
188 189
}

190 191 192 193 194
static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
{
    return ns->moff + nvme_m2b(ns, lba);
}

195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
static inline bool nvme_ns_ext(NvmeNamespace *ns)
{
    return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
}

static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
{
    return zone->d.zs >> 4;
}

static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
{
    zone->d.zs = state << 4;
}

static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
{
    return zone->d.zslba + ns->zone_size;
}

static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
{
    return zone->d.zslba + zone->d.zcap;
}

static inline bool nvme_wp_is_valid(NvmeZone *zone)
{
    uint8_t st = nvme_get_zone_state(zone);

    return st != NVME_ZONE_STATE_FULL &&
           st != NVME_ZONE_STATE_READ_ONLY &&
           st != NVME_ZONE_STATE_OFFLINE;
}

static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
                                             uint32_t zone_idx)
{
    return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
}

static inline void nvme_aor_inc_open(NvmeNamespace *ns)
{
    assert(ns->nr_open_zones >= 0);
    if (ns->params.max_open_zones) {
        ns->nr_open_zones++;
        assert(ns->nr_open_zones <= ns->params.max_open_zones);
    }
}

static inline void nvme_aor_dec_open(NvmeNamespace *ns)
{
    if (ns->params.max_open_zones) {
        assert(ns->nr_open_zones > 0);
        ns->nr_open_zones--;
    }
    assert(ns->nr_open_zones >= 0);
}

static inline void nvme_aor_inc_active(NvmeNamespace *ns)
{
    assert(ns->nr_active_zones >= 0);
    if (ns->params.max_active_zones) {
        ns->nr_active_zones++;
        assert(ns->nr_active_zones <= ns->params.max_active_zones);
    }
}

static inline void nvme_aor_dec_active(NvmeNamespace *ns)
{
    if (ns->params.max_active_zones) {
        assert(ns->nr_active_zones > 0);
        ns->nr_active_zones--;
        assert(ns->nr_active_zones >= ns->nr_open_zones);
    }
    assert(ns->nr_active_zones >= 0);
}

void nvme_ns_init_format(NvmeNamespace *ns);
273
int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
274 275 276
void nvme_ns_drain(NvmeNamespace *ns);
void nvme_ns_shutdown(NvmeNamespace *ns);
void nvme_ns_cleanup(NvmeNamespace *ns);
277

278
typedef struct NvmeAsyncEvent {
279
    QTAILQ_ENTRY(NvmeAsyncEvent) entry;
280 281 282
    NvmeAerResult result;
} NvmeAsyncEvent;

283 284 285 286 287 288 289 290 291 292 293 294 295 296
enum {
    NVME_SG_ALLOC = 1 << 0,
    NVME_SG_DMA   = 1 << 1,
};

typedef struct NvmeSg {
    int flags;

    union {
        QEMUSGList   qsg;
        QEMUIOVector iov;
    };
} NvmeSg;

297 298 299 300 301
typedef enum NvmeTxDirection {
    NVME_TX_DIRECTION_TO_DEVICE   = 0,
    NVME_TX_DIRECTION_FROM_DEVICE = 1,
} NvmeTxDirection;

302 303
typedef struct NvmeRequest {
    struct NvmeSQueue       *sq;
304
    struct NvmeNamespace    *ns;
305
    BlockAIOCB              *aiocb;
306
    uint16_t                status;
307
    void                    *opaque;
308
    NvmeCqe                 cqe;
309
    NvmeCmd                 cmd;
310
    BlockAcctCookie         acct;
311
    NvmeSg                  sg;
312 313 314
    QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;

315 316 317 318 319 320 321 322 323
typedef struct NvmeBounceContext {
    NvmeRequest *req;

    struct {
        QEMUIOVector iov;
        uint8_t *bounce;
    } data, mdata;
} NvmeBounceContext;

324 325 326 327 328 329 330 331 332 333 334 335 336
static inline const char *nvme_adm_opc_str(uint8_t opc)
{
    switch (opc) {
    case NVME_ADM_CMD_DELETE_SQ:        return "NVME_ADM_CMD_DELETE_SQ";
    case NVME_ADM_CMD_CREATE_SQ:        return "NVME_ADM_CMD_CREATE_SQ";
    case NVME_ADM_CMD_GET_LOG_PAGE:     return "NVME_ADM_CMD_GET_LOG_PAGE";
    case NVME_ADM_CMD_DELETE_CQ:        return "NVME_ADM_CMD_DELETE_CQ";
    case NVME_ADM_CMD_CREATE_CQ:        return "NVME_ADM_CMD_CREATE_CQ";
    case NVME_ADM_CMD_IDENTIFY:         return "NVME_ADM_CMD_IDENTIFY";
    case NVME_ADM_CMD_ABORT:            return "NVME_ADM_CMD_ABORT";
    case NVME_ADM_CMD_SET_FEATURES:     return "NVME_ADM_CMD_SET_FEATURES";
    case NVME_ADM_CMD_GET_FEATURES:     return "NVME_ADM_CMD_GET_FEATURES";
    case NVME_ADM_CMD_ASYNC_EV_REQ:     return "NVME_ADM_CMD_ASYNC_EV_REQ";
337
    case NVME_ADM_CMD_NS_ATTACHMENT:    return "NVME_ADM_CMD_NS_ATTACHMENT";
338
    case NVME_ADM_CMD_FORMAT_NVM:       return "NVME_ADM_CMD_FORMAT_NVM";
339 340 341 342 343 344 345 346 347 348
    default:                            return "NVME_ADM_CMD_UNKNOWN";
    }
}

static inline const char *nvme_io_opc_str(uint8_t opc)
{
    switch (opc) {
    case NVME_CMD_FLUSH:            return "NVME_NVM_CMD_FLUSH";
    case NVME_CMD_WRITE:            return "NVME_NVM_CMD_WRITE";
    case NVME_CMD_READ:             return "NVME_NVM_CMD_READ";
349
    case NVME_CMD_COMPARE:          return "NVME_NVM_CMD_COMPARE";
350
    case NVME_CMD_WRITE_ZEROES:     return "NVME_NVM_CMD_WRITE_ZEROES";
351
    case NVME_CMD_DSM:              return "NVME_NVM_CMD_DSM";
352
    case NVME_CMD_VERIFY:           return "NVME_NVM_CMD_VERIFY";
353
    case NVME_CMD_COPY:             return "NVME_NVM_CMD_COPY";
354 355 356
    case NVME_CMD_ZONE_MGMT_SEND:   return "NVME_ZONED_CMD_MGMT_SEND";
    case NVME_CMD_ZONE_MGMT_RECV:   return "NVME_ZONED_CMD_MGMT_RECV";
    case NVME_CMD_ZONE_APPEND:      return "NVME_ZONED_CMD_ZONE_APPEND";
357 358 359 360
    default:                        return "NVME_NVM_CMD_UNKNOWN";
    }
}

361 362 363 364 365 366 367 368 369 370
typedef struct NvmeSQueue {
    struct NvmeCtrl *ctrl;
    uint16_t    sqid;
    uint16_t    cqid;
    uint32_t    head;
    uint32_t    tail;
    uint32_t    size;
    uint64_t    dma_addr;
    QEMUTimer   *timer;
    NvmeRequest *io_req;
371 372
    QTAILQ_HEAD(, NvmeRequest) req_list;
    QTAILQ_HEAD(, NvmeRequest) out_req_list;
373 374 375 376 377 378 379 380 381 382 383 384 385 386
    QTAILQ_ENTRY(NvmeSQueue) entry;
} NvmeSQueue;

typedef struct NvmeCQueue {
    struct NvmeCtrl *ctrl;
    uint8_t     phase;
    uint16_t    cqid;
    uint16_t    irq_enabled;
    uint32_t    head;
    uint32_t    tail;
    uint32_t    vector;
    uint32_t    size;
    uint64_t    dma_addr;
    QEMUTimer   *timer;
387 388
    QTAILQ_HEAD(, NvmeSQueue) sq_list;
    QTAILQ_HEAD(, NvmeRequest) req_list;
389 390 391 392 393 394
} NvmeCQueue;

#define TYPE_NVME "nvme"
#define NVME(obj) \
        OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)

395 396 397 398 399 400 401 402 403 404 405 406
typedef struct NvmeParams {
    char     *serial;
    uint32_t num_queues; /* deprecated since 5.1 */
    uint32_t max_ioqpairs;
    uint16_t msix_qsize;
    uint32_t cmb_size_mb;
    uint8_t  aerl;
    uint32_t aer_max_queued;
    uint8_t  mdts;
    uint8_t  vsl;
    bool     use_intel_id;
    uint8_t  zasl;
407
    bool     auto_transition_zones;
408 409
    bool     legacy_cmb;
} NvmeParams;
410

411 412
typedef struct NvmeCtrl {
    PCIDevice    parent_obj;
413
    MemoryRegion bar0;
414 415
    MemoryRegion iomem;
    NvmeBar      bar;
416
    NvmeParams   params;
417
    NvmeBus      bus;
418

419
    uint16_t    cntlid;
420
    bool        qs_created;
A
Anton Blanchard 已提交
421
    uint32_t    page_size;
422 423 424 425 426 427
    uint16_t    page_bits;
    uint16_t    max_prp_ents;
    uint16_t    cqe_size;
    uint16_t    sqe_size;
    uint32_t    reg_size;
    uint32_t    max_q_ents;
428
    uint8_t     outstanding_aers;
429
    uint32_t    irq_status;
430
    int         cq_pending;
431 432
    uint64_t    host_timestamp;                 /* Timestamp sent by the host */
    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
433 434
    uint64_t    starttime_ms;
    uint16_t    temperature;
435
    uint8_t     smart_critical_warning;
436

437 438 439 440 441 442 443
    struct {
        MemoryRegion mem;
        uint8_t      *buf;
        bool         cmse;
        hwaddr       cba;
    } cmb;

444 445 446 447 448
    struct {
        HostMemoryBackend *dev;
        bool              cmse;
        hwaddr            cba;
    } pmr;
449

450 451 452 453 454
    uint8_t     aer_mask;
    NvmeRequest **aer_reqs;
    QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
    int         aer_queued;

455 456
    uint32_t    dmrsl;

457 458 459 460
    /* Namespace ID is started with 1 so bitmap should be 1-based */
#define NVME_CHANGED_NSID_SIZE  (NVME_MAX_NAMESPACES + 1)
    DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);

461 462
    NvmeSubsystem   *subsys;

463
    NvmeNamespace   namespace;
464
    NvmeNamespace   *namespaces[NVME_MAX_NAMESPACES + 1];
465 466 467 468 469
    NvmeSQueue      **sq;
    NvmeCQueue      **cq;
    NvmeSQueue      admin_sq;
    NvmeCQueue      admin_cq;
    NvmeIdCtrl      id_ctrl;
470 471 472 473 474 475

    struct {
        struct {
            uint16_t temp_thresh_hi;
            uint16_t temp_thresh_low;
        };
476 477 478

        uint32_t                async_config;
        NvmeHostBehaviorSupport hbs;
479
    } features;
480 481
} NvmeCtrl;

482
static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
483
{
484
    if (!nsid || nsid > NVME_MAX_NAMESPACES) {
485 486 487
        return NULL;
    }

488
    return n->namespaces[nsid];
489 490
}

491 492 493 494 495 496 497 498 499 500 501 502 503 504
static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
{
    NvmeSQueue *sq = req->sq;
    NvmeCtrl *n = sq->ctrl;

    return n->cq[sq->cqid];
}

static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
{
    NvmeSQueue *sq = req->sq;
    return sq->ctrl;
}

505 506 507 508 509 510 511 512 513
static inline uint16_t nvme_cid(NvmeRequest *req)
{
    if (!req) {
        return 0xffff;
    }

    return le16_to_cpu(req->cqe.cid);
}

514
void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
515
uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
516
                          NvmeTxDirection dir, NvmeRequest *req);
517
uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
518 519 520 521
                           NvmeTxDirection dir, NvmeRequest *req);
void nvme_rw_complete_cb(void *opaque, int ret);
uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
                       NvmeCmd *cmd);
522

523
#endif /* HW_NVME_NVME_H */