提交 a9aa31cd 编写于 作者: L Linus Torvalds

Merge branch 'for-4.4/drivers' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "Here are the block driver changes for 4.4.  This pull request
  contains:

   - NVMe:
        - Refactor and moving of code to prepare for proper target
          support. From Christoph and Jay.

        - 32-bit nvme warning fix from Arnd.

        - Error initialization fix from me.

        - Proper namespace removal and reference counting support from
          Keith.

        - Device resume fix on IO failure, also from Keith.

        - Dependency fix from Keith, now that nvme isn't under the
          umbrella of the block anymore.

        - Target location and maintainers update from Jay.

   - From Ming Lei, the long awaited DIO/AIO support for loop.

   - Enable BD-RE writeable opens, from Georgios"

* 'for-4.4/drivers' of git://git.kernel.dk/linux-block: (24 commits)
  Update target repo for nvme patch contributions
  NVMe: initialize error to '0'
  nvme: use an integer value to Linux errno values
  nvme: fix 32-bit build warning
  NVMe: Add explicit block config dependency
  nvme: include <linux/types.ĥ> in <linux/nvme.h>
  nvme: move to a new drivers/nvme/host directory
  nvme.h: add missing nvme_id_ctrl endianess annotations
  nvme: move hardware structures out of the uapi version of nvme.h
  nvme: add a local nvme.h header
  nvme: properly handle partially initialized queues in nvme_create_io_queues
  nvme: merge nvme_dev_start, nvme_dev_resume and nvme_async_probe
  nvme: factor reset code into a common helper
  nvme: merge nvme_dev_reset into nvme_reset_failed_dev
  nvme: delete dev from dev_list in nvme_reset
  NVMe: Simplify device resume on io queue failure
  NVMe: Namespace removal simplifications
  NVMe: Reference count open namespaces
  cdrom: Random writing support for BD-RE media
  block: loop: support DIO & AIO
  ...
......@@ -7486,11 +7486,13 @@ F: drivers/video/fbdev/riva/
F: drivers/video/fbdev/nvidia/
NVM EXPRESS DRIVER
M: Matthew Wilcox <willy@linux.intel.com>
M: Keith Busch <keith.busch@intel.com>
M: Jens Axboe <axboe@fb.com>
L: linux-nvme@lists.infradead.org
T: git git://git.infradead.org/users/willy/linux-nvme.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
W: https://kernel.googlesource.com/pub/scm/linux/kernel/git/axboe/linux-block/
S: Supported
F: drivers/block/nvme*
F: drivers/nvme/host/
F: include/linux/nvme.h
NVMEM FRAMEWORK
......
......@@ -18,6 +18,8 @@ source "drivers/pnp/Kconfig"
source "drivers/block/Kconfig"
source "drivers/nvme/Kconfig"
# misc before ide - BLK_DEV_SGIIOC4 depends on SGI_IOC4
source "drivers/misc/Kconfig"
......
......@@ -70,6 +70,7 @@ obj-$(CONFIG_NUBUS) += nubus/
obj-y += macintosh/
obj-$(CONFIG_IDE) += ide/
obj-$(CONFIG_SCSI) += scsi/
obj-y += nvme/
obj-$(CONFIG_ATA) += ata/
obj-$(CONFIG_TARGET_CORE) += target/
obj-$(CONFIG_MTD) += mtd/
......
......@@ -310,17 +310,6 @@ config BLK_DEV_NBD
If unsure, say N.
config BLK_DEV_NVME
tristate "NVM Express block device"
depends on PCI
---help---
The NVM Express driver is for solid state drives directly
connected to the PCI or PCI Express bus. If you know you
don't have one of these, it is safe to answer N.
To compile this driver as a module, choose M here: the
module will be called nvme.
config BLK_DEV_SKD
tristate "STEC S1120 Block Driver"
depends on PCI
......
......@@ -22,7 +22,6 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o
obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
obj-$(CONFIG_MG_DISK) += mg_disk.o
obj-$(CONFIG_SUNVDC) += sunvdc.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_BLK_DEV_SKD) += skd.o
obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o
......@@ -44,6 +43,5 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
obj-$(CONFIG_ZRAM) += zram/
nvme-y := nvme-core.o nvme-scsi.o
skd-y := skd_main.o
swim_mod-y := swim.o swim_asm.o
......@@ -164,6 +164,62 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
return get_size(lo->lo_offset, lo->lo_sizelimit, file);
}
static void __loop_update_dio(struct loop_device *lo, bool dio)
{
struct file *file = lo->lo_backing_file;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
unsigned short sb_bsize = 0;
unsigned dio_align = 0;
bool use_dio;
if (inode->i_sb->s_bdev) {
sb_bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
dio_align = sb_bsize - 1;
}
/*
* We support direct I/O only if lo_offset is aligned with the
* logical I/O size of backing device, and the logical block
* size of loop is bigger than the backing device's and the loop
* needn't transform transfer.
*
* TODO: the above condition may be loosed in the future, and
* direct I/O may be switched runtime at that time because most
* of requests in sane appplications should be PAGE_SIZE algined
*/
if (dio) {
if (queue_logical_block_size(lo->lo_queue) >= sb_bsize &&
!(lo->lo_offset & dio_align) &&
mapping->a_ops->direct_IO &&
!lo->transfer)
use_dio = true;
else
use_dio = false;
} else {
use_dio = false;
}
if (lo->use_dio == use_dio)
return;
/* flush dirty pages before changing direct IO */
vfs_fsync(file, 0);
/*
* The flag of LO_FLAGS_DIRECT_IO is handled similarly with
* LO_FLAGS_READ_ONLY, both are set from kernel, and losetup
* will get updated by ioctl(LOOP_GET_STATUS)
*/
blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
if (use_dio)
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
else
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
blk_mq_unfreeze_queue(lo->lo_queue);
}
static int
figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
{
......@@ -389,6 +445,89 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
return ret;
}
static inline void handle_partial_read(struct loop_cmd *cmd, long bytes)
{
if (bytes < 0 || (cmd->rq->cmd_flags & REQ_WRITE))
return;
if (unlikely(bytes < blk_rq_bytes(cmd->rq))) {
struct bio *bio = cmd->rq->bio;
bio_advance(bio, bytes);
zero_fill_bio(bio);
}
}
static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
{
struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
struct request *rq = cmd->rq;
handle_partial_read(cmd, ret);
if (ret > 0)
ret = 0;
else if (ret < 0)
ret = -EIO;
blk_mq_complete_request(rq, ret);
}
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
loff_t pos, bool rw)
{
struct iov_iter iter;
struct bio_vec *bvec;
struct bio *bio = cmd->rq->bio;
struct file *file = lo->lo_backing_file;
int ret;
/* nomerge for loop request queue */
WARN_ON(cmd->rq->bio != cmd->rq->biotail);
bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
bio_segments(bio), blk_rq_bytes(cmd->rq));
cmd->iocb.ki_pos = pos;
cmd->iocb.ki_filp = file;
cmd->iocb.ki_complete = lo_rw_aio_complete;
cmd->iocb.ki_flags = IOCB_DIRECT;
if (rw == WRITE)
ret = file->f_op->write_iter(&cmd->iocb, &iter);
else
ret = file->f_op->read_iter(&cmd->iocb, &iter);
if (ret != -EIOCBQUEUED)
cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
return 0;
}
static inline int lo_rw_simple(struct loop_device *lo,
struct request *rq, loff_t pos, bool rw)
{
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
if (cmd->use_aio)
return lo_rw_aio(lo, cmd, pos, rw);
/*
* lo_write_simple and lo_read_simple should have been covered
* by io submit style function like lo_rw_aio(), one blocker
* is that lo_read_simple() need to call flush_dcache_page after
* the page is written from kernel, and it isn't easy to handle
* this in io submit style function which submits all segments
* of the req at one time. And direct read IO doesn't need to
* run flush_dcache_page().
*/
if (rw == WRITE)
return lo_write_simple(lo, rq, pos);
else
return lo_read_simple(lo, rq, pos);
}
static int do_req_filebacked(struct loop_device *lo, struct request *rq)
{
loff_t pos;
......@@ -404,13 +543,13 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
else if (lo->transfer)
ret = lo_write_transfer(lo, rq, pos);
else
ret = lo_write_simple(lo, rq, pos);
ret = lo_rw_simple(lo, rq, pos, WRITE);
} else {
if (lo->transfer)
ret = lo_read_transfer(lo, rq, pos);
else
ret = lo_read_simple(lo, rq, pos);
ret = lo_rw_simple(lo, rq, pos, READ);
}
return ret;
......@@ -421,6 +560,12 @@ struct switch_request {
struct completion wait;
};
static inline void loop_update_dio(struct loop_device *lo)
{
__loop_update_dio(lo, io_is_direct(lo->lo_backing_file) |
lo->use_dio);
}
/*
* Do the actual switch; called from the BIO completion routine
*/
......@@ -441,6 +586,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
loop_update_dio(lo);
}
/*
......@@ -627,11 +773,19 @@ static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf)
return sprintf(buf, "%s\n", partscan ? "1" : "0");
}
static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf)
{
int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO);
return sprintf(buf, "%s\n", dio ? "1" : "0");
}
LOOP_ATTR_RO(backing_file);
LOOP_ATTR_RO(offset);
LOOP_ATTR_RO(sizelimit);
LOOP_ATTR_RO(autoclear);
LOOP_ATTR_RO(partscan);
LOOP_ATTR_RO(dio);
static struct attribute *loop_attrs[] = {
&loop_attr_backing_file.attr,
......@@ -639,6 +793,7 @@ static struct attribute *loop_attrs[] = {
&loop_attr_sizelimit.attr,
&loop_attr_autoclear.attr,
&loop_attr_partscan.attr,
&loop_attr_dio.attr,
NULL,
};
......@@ -688,6 +843,23 @@ static void loop_config_discard(struct loop_device *lo)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
static void loop_unprepare_queue(struct loop_device *lo)
{
flush_kthread_worker(&lo->worker);
kthread_stop(lo->worker_task);
}
static int loop_prepare_queue(struct loop_device *lo)
{
init_kthread_worker(&lo->worker);
lo->worker_task = kthread_run(kthread_worker_fn,
&lo->worker, "loop%d", lo->lo_number);
if (IS_ERR(lo->worker_task))
return -ENOMEM;
set_user_nice(lo->worker_task, MIN_NICE);
return 0;
}
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct block_device *bdev, unsigned int arg)
{
......@@ -745,17 +917,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
goto out_putf;
error = -ENOMEM;
lo->wq = alloc_workqueue("kloopd%d",
WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16,
lo->lo_number);
if (!lo->wq)
error = loop_prepare_queue(lo);
if (error)
goto out_putf;
error = 0;
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = false;
lo->lo_blocksize = lo_blocksize;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
......@@ -769,6 +939,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_flush(lo->lo_queue, REQ_FLUSH);
loop_update_dio(lo);
set_capacity(lo->lo_disk, size);
bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
......@@ -903,8 +1074,7 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
destroy_workqueue(lo->wq);
lo->wq = NULL;
loop_unprepare_queue(lo);
mutex_unlock(&lo->lo_ctl_mutex);
/*
* Need not hold lo_ctl_mutex to fput backing file.
......@@ -988,6 +1158,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
lo->lo_key_owner = uid;
}
/* update dio if lo_offset or transfer is changed */
__loop_update_dio(lo, lo->use_dio);
return 0;
}
......@@ -1138,6 +1311,20 @@ static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
{
int error = -ENXIO;
if (lo->lo_state != Lo_bound)
goto out;
__loop_update_dio(lo, !!arg);
if (lo->use_dio == !!arg)
return 0;
error = -EINVAL;
out:
return error;
}
static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
......@@ -1181,6 +1368,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_capacity(lo, bdev);
break;
case LOOP_SET_DIRECT_IO:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
err = loop_set_dio(lo, arg);
break;
default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
}
......@@ -1461,23 +1653,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
if (lo->lo_state != Lo_bound)
return -EIO;
if (cmd->rq->cmd_flags & REQ_WRITE) {
struct loop_device *lo = cmd->rq->q->queuedata;
bool need_sched = true;
spin_lock_irq(&lo->lo_lock);
if (lo->write_started)
need_sched = false;
else
lo->write_started = true;
list_add_tail(&cmd->list, &lo->write_cmd_head);
spin_unlock_irq(&lo->lo_lock);
if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH |
REQ_DISCARD)))
cmd->use_aio = true;
else
cmd->use_aio = false;
if (need_sched)
queue_work(lo->wq, &lo->write_work);
} else {
queue_work(lo->wq, &cmd->read_work);
}
queue_kthread_work(&lo->worker, &cmd->work);
return BLK_MQ_RQ_QUEUE_OK;
}
......@@ -1495,38 +1677,15 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
ret = do_req_filebacked(lo, cmd->rq);
failed:
blk_mq_complete_request(cmd->rq, ret ? -EIO : 0);
/* complete non-aio request */
if (!cmd->use_aio || ret)
blk_mq_complete_request(cmd->rq, ret ? -EIO : 0);
}
static void loop_queue_write_work(struct work_struct *work)
{
struct loop_device *lo =
container_of(work, struct loop_device, write_work);
LIST_HEAD(cmd_list);
spin_lock_irq(&lo->lo_lock);
repeat:
list_splice_init(&lo->write_cmd_head, &cmd_list);
spin_unlock_irq(&lo->lo_lock);
while (!list_empty(&cmd_list)) {
struct loop_cmd *cmd = list_first_entry(&cmd_list,
struct loop_cmd, list);
list_del_init(&cmd->list);
loop_handle_cmd(cmd);
}
spin_lock_irq(&lo->lo_lock);
if (!list_empty(&lo->write_cmd_head))
goto repeat;
lo->write_started = false;
spin_unlock_irq(&lo->lo_lock);
}
static void loop_queue_read_work(struct work_struct *work)
static void loop_queue_work(struct kthread_work *work)
{
struct loop_cmd *cmd =
container_of(work, struct loop_cmd, read_work);
container_of(work, struct loop_cmd, work);
loop_handle_cmd(cmd);
}
......@@ -1538,7 +1697,7 @@ static int loop_init_request(void *data, struct request *rq,
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
cmd->rq = rq;
INIT_WORK(&cmd->read_work, loop_queue_read_work);
init_kthread_work(&cmd->work, loop_queue_work);
return 0;
}
......@@ -1594,8 +1753,11 @@ static int loop_add(struct loop_device **l, int i)
}
lo->lo_queue->queuedata = lo;
INIT_LIST_HEAD(&lo->write_cmd_head);
INIT_WORK(&lo->write_work, loop_queue_write_work);
/*
* It doesn't make sense to enable merge because the I/O
* submitted to backing file is handled page by page.
*/
queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue);
disk = lo->lo_disk = alloc_disk(1 << part_shift);
if (!disk)
......
......@@ -14,7 +14,7 @@
#include <linux/blk-mq.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <uapi/linux/loop.h>
/* Possible states of device */
......@@ -54,12 +54,11 @@ struct loop_device {
gfp_t old_gfp_mask;
spinlock_t lo_lock;
struct workqueue_struct *wq;
struct list_head write_cmd_head;
struct work_struct write_work;
bool write_started;
int lo_state;
struct mutex lo_ctl_mutex;
struct kthread_worker worker;
struct task_struct *worker_task;
bool use_dio;
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
......@@ -67,9 +66,11 @@ struct loop_device {
};
struct loop_cmd {
struct work_struct read_work;
struct kthread_work work;
struct request *rq;
struct list_head list;
bool use_aio; /* use AIO interface to handle I/O */
struct kiocb iocb;
};
/* Support for loadable transfer modules */
......
......@@ -885,6 +885,7 @@ static int cdrom_is_dvd_rw(struct cdrom_device_info *cdi)
switch (cdi->mmc3_profile) {
case 0x12: /* DVD-RAM */
case 0x1A: /* DVD+RW */
case 0x43: /* BD-RE */
return 0;
default:
return 1;
......
source "drivers/nvme/host/Kconfig"
config BLK_DEV_NVME
tristate "NVM Express block device"
depends on PCI && BLOCK
---help---
The NVM Express driver is for solid state drives directly
connected to the PCI or PCI Express bus. If you know you
don't have one of these, it is safe to answer N.
To compile this driver as a module, choose M here: the
module will be called nvme.
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
nvme-y += pci.o scsi.o
/*
* Copyright (c) 2011-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _NVME_H
#define _NVME_H
#include <linux/nvme.h>
#include <linux/pci.h>
#include <linux/kref.h>
#include <linux/blk-mq.h>
extern unsigned char nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
struct nvme_dev {
struct list_head node;
struct nvme_queue **queues;
struct request_queue *admin_q;
struct blk_mq_tag_set tagset;
struct blk_mq_tag_set admin_tagset;
u32 __iomem *dbs;
struct device *dev;
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool;
int instance;
unsigned queue_count;
unsigned online_queues;
unsigned max_qid;
int q_depth;
u32 db_stride;
u32 ctrl_config;
struct msix_entry *entry;
struct nvme_bar __iomem *bar;
struct list_head namespaces;
struct kref kref;
struct device *device;
struct work_struct reset_work;
struct work_struct probe_work;
struct work_struct scan_work;
char name[12];
char serial[20];
char model[40];
char firmware_rev[8];
bool subsystem;
u32 max_hw_sectors;
u32 stripe_size;
u32 page_size;
void __iomem *cmb;
dma_addr_t cmb_dma_addr;
u64 cmb_size;
u32 cmbsz;
u16 oncs;
u16 abort_limit;
u8 event_limit;
u8 vwc;
};
/*
* An NVM Express namespace is equivalent to a SCSI LUN
*/
struct nvme_ns {
struct list_head list;
struct nvme_dev *dev;
struct request_queue *queue;
struct gendisk *disk;
struct kref kref;
unsigned ns_id;
int lba_shift;
u16 ms;
bool ext;
u8 pi_type;
u64 mode_select_num_blocks;
u32 mode_select_block_len;
};
/*
* The nvme_iod describes the data in an I/O, including the list of PRP
* entries. You can't see it in this data structure because C doesn't let
* me express that. Use nvme_alloc_iod to ensure there's enough space
* allocated to store the PRP list.
*/
struct nvme_iod {
unsigned long private; /* For the use of the submitter of the I/O */
int npages; /* In the PRP list. 0 means small pool in use */
int offset; /* Of PRP list */
int nents; /* Used in scatterlist */
int length; /* Of data, in bytes */
dma_addr_t first_dma;
struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
struct scatterlist sg[0];
};
static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
{
return (sector >> (ns->lba_shift - 9));
}
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buffer, void __user *ubuffer, unsigned bufflen,
u32 *result, unsigned timeout);
int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
struct nvme_id_ns **id);
int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
dma_addr_t dma_addr, u32 *result);
int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result);
struct sg_io_hdr;
int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
int nvme_sg_get_version_num(int __user *ip);
#endif /* _NVME_H */
......@@ -12,7 +12,6 @@
* more details.
*/
#include <linux/nvme.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
......@@ -43,6 +42,9 @@
#include <scsi/sg.h>
#include <asm-generic/io-64-nonatomic-lo-hi.h>
#include <uapi/linux/nvme_ioctl.h>
#include "nvme.h"
#define NVME_MINORS (1U << MINORBITS)
#define NVME_Q_DEPTH 1024
#define NVME_AQ_DEPTH 256
......@@ -84,9 +86,10 @@ static wait_queue_head_t nvme_kthread_wait;
static struct class *nvme_class;
static void nvme_reset_failed_dev(struct work_struct *ws);
static int __nvme_reset(struct nvme_dev *dev);
static int nvme_reset(struct nvme_dev *dev);
static int nvme_process_cq(struct nvme_queue *nvmeq);
static void nvme_dead_ctrl(struct nvme_dev *dev);
struct async_cmd_info {
struct kthread_work work;
......@@ -1283,18 +1286,13 @@ static void nvme_abort_req(struct request *req)
struct nvme_command cmd;
if (!nvmeq->qid || cmd_rq->aborted) {
unsigned long flags;
spin_lock_irqsave(&dev_list_lock, flags);
if (work_busy(&dev->reset_work))
goto out;
list_del_init(&dev->node);
dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work);
out:
spin_unlock_irqrestore(&dev_list_lock, flags);
spin_lock(&dev_list_lock);
if (!__nvme_reset(dev)) {
dev_warn(dev->dev,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
}
spin_unlock(&dev_list_lock);
return;
}
......@@ -1949,6 +1947,20 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
#define nvme_compat_ioctl NULL
#endif
static void nvme_free_dev(struct kref *kref);
static void nvme_free_ns(struct kref *kref)
{
struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
spin_lock(&dev_list_lock);
ns->disk->private_data = NULL;
spin_unlock(&dev_list_lock);
kref_put(&ns->dev->kref, nvme_free_dev);
put_disk(ns->disk);
kfree(ns);
}
static int nvme_open(struct block_device *bdev, fmode_t mode)
{
int ret = 0;
......@@ -1958,21 +1970,17 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
ns = bdev->bd_disk->private_data;
if (!ns)
ret = -ENXIO;
else if (!kref_get_unless_zero(&ns->dev->kref))
else if (!kref_get_unless_zero(&ns->kref))
ret = -ENXIO;
spin_unlock(&dev_list_lock);
return ret;
}
static void nvme_free_dev(struct kref *kref);
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
struct nvme_ns *ns = disk->private_data;
struct nvme_dev *dev = ns->dev;
kref_put(&dev->kref, nvme_free_dev);
kref_put(&ns->kref, nvme_free_ns);
}
static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
......@@ -2079,14 +2087,11 @@ static int nvme_kthread(void *data)
if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
csts & NVME_CSTS_CFS) {
if (work_busy(&dev->reset_work))
continue;
list_del_init(&dev->node);
dev_warn(dev->dev,
"Failed status: %x, reset controller\n",
readl(&dev->bar->csts));
dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work);
if (!__nvme_reset(dev)) {
dev_warn(dev->dev,
"Failed status: %x, reset controller\n",
readl(&dev->bar->csts));
}
continue;
}
for (i = 0; i < dev->queue_count; i++) {
......@@ -2132,6 +2137,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
if (!disk)
goto out_free_queue;
kref_init(&ns->kref);
ns->ns_id = nsid;
ns->disk = disk;
ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
......@@ -2168,6 +2174,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
if (nvme_revalidate_disk(ns->disk))
goto out_free_disk;
kref_get(&dev->kref);
add_disk(ns->disk);
if (ns->ms) {
struct block_device *bd = bdget_disk(ns->disk, 0);
......@@ -2190,6 +2197,13 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
kfree(ns);
}
/*
* Create I/O queues. Failing to create an I/O queue is not an issue,
* we can continue with less than the desired amount of queues, and
* even a controller without I/O queues an still be used to issue
* admin commands. This might be useful to upgrade a buggy firmware
* for example.
*/
static void nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i;
......@@ -2199,8 +2213,10 @@ static void nvme_create_io_queues(struct nvme_dev *dev)
break;
for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
if (nvme_create_queue(dev->queues[i], i))
if (nvme_create_queue(dev->queues[i], i)) {
nvme_free_queues(dev, i);
break;
}
}
static int set_queue_count(struct nvme_dev *dev, int count)
......@@ -2363,18 +2379,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return result;
}
static void nvme_free_namespace(struct nvme_ns *ns)
{
list_del(&ns->list);
spin_lock(&dev_list_lock);
ns->disk->private_data = NULL;
spin_unlock(&dev_list_lock);
put_disk(ns->disk);
kfree(ns);
}
static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
{
struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
......@@ -2416,7 +2420,9 @@ static void nvme_ns_remove(struct nvme_ns *ns)
if (kill || !blk_queue_dying(ns->queue)) {
blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue);
}
}
list_del_init(&ns->list);
kref_put(&ns->kref, nvme_free_ns);
}
static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
......@@ -2427,18 +2433,14 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
for (i = 1; i <= nn; i++) {
ns = nvme_find_ns(dev, i);
if (ns) {
if (revalidate_disk(ns->disk)) {
if (revalidate_disk(ns->disk))
nvme_ns_remove(ns);
nvme_free_namespace(ns);
}
} else
nvme_alloc_ns(dev, i);
}
list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
if (ns->ns_id > nn) {
if (ns->ns_id > nn)
nvme_ns_remove(ns);
nvme_free_namespace(ns);
}
}
list_sort(NULL, &dev->namespaces, ns_cmp);
}
......@@ -2828,9 +2830,9 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
static void nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns;
struct nvme_ns *ns, *next;
list_for_each_entry(ns, &dev->namespaces, list)
list_for_each_entry_safe(ns, next, &dev->namespaces, list)
nvme_ns_remove(ns);
}
......@@ -2886,21 +2888,12 @@ static void nvme_release_instance(struct nvme_dev *dev)
spin_unlock(&dev_list_lock);
}
static void nvme_free_namespaces(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
list_for_each_entry_safe(ns, next, &dev->namespaces, list)
nvme_free_namespace(ns);
}
static void nvme_free_dev(struct kref *kref)
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
put_device(dev->dev);
put_device(dev->device);
nvme_free_namespaces(dev);
nvme_release_instance(dev);
if (dev->tagset.tags)
blk_mq_free_tag_set(&dev->tagset);
......@@ -2974,14 +2967,15 @@ static const struct file_operations nvme_dev_fops = {
.compat_ioctl = nvme_dev_ioctl,
};
static int nvme_dev_start(struct nvme_dev *dev)
static void nvme_probe_work(struct work_struct *work)
{
int result;
struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
bool start_thread = false;
int result;
result = nvme_dev_map(dev);
if (result)
return result;
goto out;
result = nvme_configure_admin_queue(dev);
if (result)
......@@ -3016,7 +3010,20 @@ static int nvme_dev_start(struct nvme_dev *dev)
goto free_tags;
dev->event_limit = 1;
return result;
/*
* Keep the controller around but remove all namespaces if we don't have
* any working I/O queue.
*/
if (dev->online_queues < 2) {
dev_warn(dev->dev, "IO queues not created\n");
nvme_dev_remove(dev);
} else {
nvme_unfreeze_queues(dev);
nvme_dev_add(dev);
}
return;
free_tags:
nvme_dev_remove_admin(dev);
......@@ -3028,7 +3035,9 @@ static int nvme_dev_start(struct nvme_dev *dev)
nvme_dev_list_remove(dev);
unmap:
nvme_dev_unmap(dev);
return result;
out:
if (!work_busy(&dev->reset_work))
nvme_dead_ctrl(dev);
}
static int nvme_remove_dead_ctrl(void *arg)
......@@ -3042,33 +3051,6 @@ static int nvme_remove_dead_ctrl(void *arg)
return 0;
}
static void nvme_remove_disks(struct work_struct *ws)
{
struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
nvme_free_queues(dev, 1);
nvme_dev_remove(dev);
}
static int nvme_dev_resume(struct nvme_dev *dev)
{
int ret;
ret = nvme_dev_start(dev);
if (ret)
return ret;
if (dev->online_queues < 2) {
spin_lock(&dev_list_lock);
dev->reset_workfn = nvme_remove_disks;
queue_work(nvme_workq, &dev->reset_work);
spin_unlock(&dev_list_lock);
} else {
nvme_unfreeze_queues(dev);
nvme_dev_add(dev);
}
return 0;
}
static void nvme_dead_ctrl(struct nvme_dev *dev)
{
dev_warn(dev->dev, "Device failed to resume\n");
......@@ -3081,8 +3063,9 @@ static void nvme_dead_ctrl(struct nvme_dev *dev)
}
}
static void nvme_dev_reset(struct nvme_dev *dev)
static void nvme_reset_work(struct work_struct *ws)
{
struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
bool in_probe = work_busy(&dev->probe_work);
nvme_dev_shutdown(dev);
......@@ -3102,31 +3085,24 @@ static void nvme_dev_reset(struct nvme_dev *dev)
schedule_work(&dev->probe_work);
}
static void nvme_reset_failed_dev(struct work_struct *ws)
{
struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
nvme_dev_reset(dev);
}
static void nvme_reset_workfn(struct work_struct *work)
static int __nvme_reset(struct nvme_dev *dev)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
dev->reset_workfn(work);
if (work_pending(&dev->reset_work))
return -EBUSY;
list_del_init(&dev->node);
queue_work(nvme_workq, &dev->reset_work);
return 0;
}
static int nvme_reset(struct nvme_dev *dev)
{
int ret = -EBUSY;
int ret;
if (!dev->admin_q || blk_queue_dying(dev->admin_q))
return -ENODEV;
spin_lock(&dev_list_lock);
if (!work_pending(&dev->reset_work)) {
dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work);
ret = 0;
}
ret = __nvme_reset(dev);
spin_unlock(&dev_list_lock);
if (!ret) {
......@@ -3153,7 +3129,6 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
}
static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
static void nvme_async_probe(struct work_struct *work);
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
......@@ -3176,8 +3151,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto free;
INIT_LIST_HEAD(&dev->namespaces);
dev->reset_workfn = nvme_reset_failed_dev;
INIT_WORK(&dev->reset_work, nvme_reset_workfn);
INIT_WORK(&dev->reset_work, nvme_reset_work);
dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
result = nvme_set_instance(dev);
......@@ -3205,7 +3179,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&dev->node);
INIT_WORK(&dev->scan_work, nvme_dev_scan);
INIT_WORK(&dev->probe_work, nvme_async_probe);
INIT_WORK(&dev->probe_work, nvme_probe_work);
schedule_work(&dev->probe_work);
return 0;
......@@ -3225,14 +3199,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return result;
}
static void nvme_async_probe(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
nvme_dead_ctrl(dev);
}
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
......@@ -3240,7 +3206,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
if (prepare)
nvme_dev_shutdown(dev);
else
nvme_dev_resume(dev);
schedule_work(&dev->probe_work);
}
static void nvme_shutdown(struct pci_dev *pdev)
......@@ -3294,10 +3260,7 @@ static int nvme_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) {
ndev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &ndev->reset_work);
}
schedule_work(&ndev->probe_work);
return 0;
}
#endif
......
......@@ -17,7 +17,6 @@
* each command is translated.
*/
#include <linux/nvme.h>
#include <linux/bio.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
......@@ -45,6 +44,7 @@
#include <scsi/sg.h>
#include <scsi/scsi.h>
#include "nvme.h"
static int sg_version_num = 30534; /* 2 digits for each component */
......
......@@ -15,10 +15,7 @@
#ifndef _LINUX_NVME_H
#define _LINUX_NVME_H
#include <uapi/linux/nvme.h>
#include <linux/pci.h>
#include <linux/kref.h>
#include <linux/blk-mq.h>
#include <linux/types.h>
struct nvme_bar {
__u64 cap; /* Controller Capabilities */
......@@ -76,115 +73,528 @@ enum {
NVME_CSTS_SHST_MASK = 3 << 2,
};
extern unsigned char nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
struct nvme_id_power_state {
__le16 max_power; /* centiwatts */
__u8 rsvd2;
__u8 flags;
__le32 entry_lat; /* microseconds */
__le32 exit_lat; /* microseconds */
__u8 read_tput;
__u8 read_lat;
__u8 write_tput;
__u8 write_lat;
__le16 idle_power;
__u8 idle_scale;
__u8 rsvd19;
__le16 active_power;
__u8 active_work_scale;
__u8 rsvd23[9];
};
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
struct nvme_dev {
struct list_head node;
struct nvme_queue **queues;
struct request_queue *admin_q;
struct blk_mq_tag_set tagset;
struct blk_mq_tag_set admin_tagset;
u32 __iomem *dbs;
struct device *dev;
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool;
int instance;
unsigned queue_count;
unsigned online_queues;
unsigned max_qid;
int q_depth;
u32 db_stride;
u32 ctrl_config;
struct msix_entry *entry;
struct nvme_bar __iomem *bar;
struct list_head namespaces;
struct kref kref;
struct device *device;
work_func_t reset_workfn;
struct work_struct reset_work;
struct work_struct probe_work;
struct work_struct scan_work;
char name[12];
char serial[20];
char model[40];
char firmware_rev[8];
bool subsystem;
u32 max_hw_sectors;
u32 stripe_size;
u32 page_size;
void __iomem *cmb;
dma_addr_t cmb_dma_addr;
u64 cmb_size;
u32 cmbsz;
u16 oncs;
u16 abort_limit;
u8 event_limit;
u8 vwc;
enum {
NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0,
NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
};
/*
* An NVM Express namespace is equivalent to a SCSI LUN
*/
struct nvme_ns {
struct list_head list;
struct nvme_id_ctrl {
__le16 vid;
__le16 ssvid;
char sn[20];
char mn[40];
char fr[8];
__u8 rab;
__u8 ieee[3];
__u8 mic;
__u8 mdts;
__le16 cntlid;
__le32 ver;
__u8 rsvd84[172];
__le16 oacs;
__u8 acl;
__u8 aerl;
__u8 frmw;
__u8 lpa;
__u8 elpe;
__u8 npss;
__u8 avscc;
__u8 apsta;
__le16 wctemp;
__le16 cctemp;
__u8 rsvd270[242];
__u8 sqes;
__u8 cqes;
__u8 rsvd514[2];
__le32 nn;
__le16 oncs;
__le16 fuses;
__u8 fna;
__u8 vwc;
__le16 awun;
__le16 awupf;
__u8 nvscc;
__u8 rsvd531;
__le16 acwu;
__u8 rsvd534[2];
__le32 sgls;
__u8 rsvd540[1508];
struct nvme_id_power_state psd[32];
__u8 vs[1024];
};
struct nvme_dev *dev;
struct request_queue *queue;
struct gendisk *disk;
enum {
NVME_CTRL_ONCS_COMPARE = 1 << 0,
NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
NVME_CTRL_ONCS_DSM = 1 << 2,
NVME_CTRL_VWC_PRESENT = 1 << 0,
};
unsigned ns_id;
int lba_shift;
u16 ms;
bool ext;
u8 pi_type;
u64 mode_select_num_blocks;
u32 mode_select_block_len;
struct nvme_lbaf {
__le16 ms;
__u8 ds;
__u8 rp;
};
/*
* The nvme_iod describes the data in an I/O, including the list of PRP
* entries. You can't see it in this data structure because C doesn't let
* me express that. Use nvme_alloc_iod to ensure there's enough space
* allocated to store the PRP list.
*/
struct nvme_iod {
unsigned long private; /* For the use of the submitter of the I/O */
int npages; /* In the PRP list. 0 means small pool in use */
int offset; /* Of PRP list */
int nents; /* Used in scatterlist */
int length; /* Of data, in bytes */
dma_addr_t first_dma;
struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
struct scatterlist sg[0];
};
static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
{
return (sector >> (ns->lba_shift - 9));
}
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buffer, void __user *ubuffer, unsigned bufflen,
u32 *result, unsigned timeout);
int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
struct nvme_id_ns **id);
int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
dma_addr_t dma_addr, u32 *result);
int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result);
struct sg_io_hdr;
int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
int nvme_sg_get_version_num(int __user *ip);
struct nvme_id_ns {
__le64 nsze;
__le64 ncap;
__le64 nuse;
__u8 nsfeat;
__u8 nlbaf;
__u8 flbas;
__u8 mc;
__u8 dpc;
__u8 dps;
__u8 nmic;
__u8 rescap;
__u8 fpi;
__u8 rsvd33;
__le16 nawun;
__le16 nawupf;
__le16 nacwu;
__le16 nabsn;
__le16 nabo;
__le16 nabspf;
__u16 rsvd46;
__le64 nvmcap[2];
__u8 rsvd64[40];
__u8 nguid[16];
__u8 eui64[8];
struct nvme_lbaf lbaf[16];
__u8 rsvd192[192];
__u8 vs[3712];
};
enum {
NVME_NS_FEAT_THIN = 1 << 0,
NVME_NS_FLBAS_LBA_MASK = 0xf,
NVME_NS_FLBAS_META_EXT = 0x10,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
NVME_LBAF_RP_DEGRADED = 3,
NVME_NS_DPC_PI_LAST = 1 << 4,
NVME_NS_DPC_PI_FIRST = 1 << 3,
NVME_NS_DPC_PI_TYPE3 = 1 << 2,
NVME_NS_DPC_PI_TYPE2 = 1 << 1,
NVME_NS_DPC_PI_TYPE1 = 1 << 0,
NVME_NS_DPS_PI_FIRST = 1 << 3,
NVME_NS_DPS_PI_MASK = 0x7,
NVME_NS_DPS_PI_TYPE1 = 1,
NVME_NS_DPS_PI_TYPE2 = 2,
NVME_NS_DPS_PI_TYPE3 = 3,
};
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
__u8 avail_spare;
__u8 spare_thresh;
__u8 percent_used;
__u8 rsvd6[26];
__u8 data_units_read[16];
__u8 data_units_written[16];
__u8 host_reads[16];
__u8 host_writes[16];
__u8 ctrl_busy_time[16];
__u8 power_cycles[16];
__u8 power_on_hours[16];
__u8 unsafe_shutdowns[16];
__u8 media_errors[16];
__u8 num_err_log_entries[16];
__le32 warning_temp_time;
__le32 critical_comp_time;
__le16 temp_sensor[8];
__u8 rsvd216[296];
};
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
NVME_SMART_CRIT_RELIABILITY = 1 << 2,
NVME_SMART_CRIT_MEDIA = 1 << 3,
NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
};
enum {
NVME_AER_NOTICE_NS_CHANGED = 0x0002,
};
struct nvme_lba_range_type {
__u8 type;
__u8 attributes;
__u8 rsvd2[14];
__u64 slba;
__u64 nlb;
__u8 guid[16];
__u8 rsvd48[16];
};
enum {
NVME_LBART_TYPE_FS = 0x01,
NVME_LBART_TYPE_RAID = 0x02,
NVME_LBART_TYPE_CACHE = 0x03,
NVME_LBART_TYPE_SWAP = 0x04,
NVME_LBART_ATTRIB_TEMP = 1 << 0,
NVME_LBART_ATTRIB_HIDE = 1 << 1,
};
struct nvme_reservation_status {
__le32 gen;
__u8 rtype;
__u8 regctl[2];
__u8 resv5[2];
__u8 ptpls;
__u8 resv10[13];
struct {
__le16 cntlid;
__u8 rcsts;
__u8 resv3[5];
__le64 hostid;
__le64 rkey;
} regctl_ds[];
};
/* I/O commands */
enum nvme_opcode {
nvme_cmd_flush = 0x00,
nvme_cmd_write = 0x01,
nvme_cmd_read = 0x02,
nvme_cmd_write_uncor = 0x04,
nvme_cmd_compare = 0x05,
nvme_cmd_write_zeroes = 0x08,
nvme_cmd_dsm = 0x09,
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
nvme_cmd_resv_release = 0x15,
};
struct nvme_common_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le32 cdw2[2];
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le32 cdw10[6];
};
struct nvme_rw_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le64 slba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le32 reftag;
__le16 apptag;
__le16 appmask;
};
enum {
NVME_RW_LR = 1 << 15,
NVME_RW_FUA = 1 << 14,
NVME_RW_DSM_FREQ_UNSPEC = 0,
NVME_RW_DSM_FREQ_TYPICAL = 1,
NVME_RW_DSM_FREQ_RARE = 2,
NVME_RW_DSM_FREQ_READS = 3,
NVME_RW_DSM_FREQ_WRITES = 4,
NVME_RW_DSM_FREQ_RW = 5,
NVME_RW_DSM_FREQ_ONCE = 6,
NVME_RW_DSM_FREQ_PREFETCH = 7,
NVME_RW_DSM_FREQ_TEMP = 8,
NVME_RW_DSM_LATENCY_NONE = 0 << 4,
NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
NVME_RW_DSM_LATENCY_NORM = 2 << 4,
NVME_RW_DSM_LATENCY_LOW = 3 << 4,
NVME_RW_DSM_SEQ_REQ = 1 << 6,
NVME_RW_DSM_COMPRESSED = 1 << 7,
NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
};
struct nvme_dsm_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 nr;
__le32 attributes;
__u32 rsvd12[4];
};
enum {
NVME_DSMGMT_IDR = 1 << 0,
NVME_DSMGMT_IDW = 1 << 1,
NVME_DSMGMT_AD = 1 << 2,
};
struct nvme_dsm_range {
__le32 cattr;
__le32 nlb;
__le64 slba;
};
/* Admin commands */
enum nvme_admin_opcode {
nvme_admin_delete_sq = 0x00,
nvme_admin_create_sq = 0x01,
nvme_admin_get_log_page = 0x02,
nvme_admin_delete_cq = 0x04,
nvme_admin_create_cq = 0x05,
nvme_admin_identify = 0x06,
nvme_admin_abort_cmd = 0x08,
nvme_admin_set_features = 0x09,
nvme_admin_get_features = 0x0a,
nvme_admin_async_event = 0x0c,
nvme_admin_activate_fw = 0x10,
nvme_admin_download_fw = 0x11,
nvme_admin_format_nvm = 0x80,
nvme_admin_security_send = 0x81,
nvme_admin_security_recv = 0x82,
};
enum {
NVME_QUEUE_PHYS_CONTIG = (1 << 0),
NVME_CQ_IRQ_ENABLED = (1 << 1),
NVME_SQ_PRIO_URGENT = (0 << 1),
NVME_SQ_PRIO_HIGH = (1 << 1),
NVME_SQ_PRIO_MEDIUM = (2 << 1),
NVME_SQ_PRIO_LOW = (3 << 1),
NVME_FEAT_ARBITRATION = 0x01,
NVME_FEAT_POWER_MGMT = 0x02,
NVME_FEAT_LBA_RANGE = 0x03,
NVME_FEAT_TEMP_THRESH = 0x04,
NVME_FEAT_ERR_RECOVERY = 0x05,
NVME_FEAT_VOLATILE_WC = 0x06,
NVME_FEAT_NUM_QUEUES = 0x07,
NVME_FEAT_IRQ_COALESCE = 0x08,
NVME_FEAT_IRQ_CONFIG = 0x09,
NVME_FEAT_WRITE_ATOMIC = 0x0a,
NVME_FEAT_ASYNC_EVENT = 0x0b,
NVME_FEAT_AUTO_PST = 0x0c,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
NVME_FEAT_RESV_PERSIST = 0x83,
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
NVME_FWACT_REPL_ACTV = (1 << 3),
NVME_FWACT_ACTV = (2 << 3),
};
struct nvme_identify {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 cns;
__u32 rsvd11[5];
};
struct nvme_features {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 fid;
__le32 dword11;
__u32 rsvd12[4];
};
struct nvme_create_cq {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__u64 rsvd8;
__le16 cqid;
__le16 qsize;
__le16 cq_flags;
__le16 irq_vector;
__u32 rsvd12[4];
};
struct nvme_create_sq {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__u64 rsvd8;
__le16 sqid;
__le16 qsize;
__le16 sq_flags;
__le16 cqid;
__u32 rsvd12[4];
};
struct nvme_delete_queue {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[9];
__le16 qid;
__u16 rsvd10;
__u32 rsvd11[5];
};
struct nvme_abort_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[9];
__le16 sqid;
__u16 cid;
__u32 rsvd11[5];
};
struct nvme_download_firmware {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__le64 prp2;
__le32 numd;
__le32 offset;
__u32 rsvd12[4];
};
struct nvme_format_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[4];
__le32 cdw10;
__u32 rsvd11[5];
};
struct nvme_command {
union {
struct nvme_common_command common;
struct nvme_rw_command rw;
struct nvme_identify identify;
struct nvme_features features;
struct nvme_create_cq create_cq;
struct nvme_create_sq create_sq;
struct nvme_delete_queue delete_queue;
struct nvme_download_firmware dlfw;
struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm;
struct nvme_abort_cmd abort;
};
};
enum {
NVME_SC_SUCCESS = 0x0,
NVME_SC_INVALID_OPCODE = 0x1,
NVME_SC_INVALID_FIELD = 0x2,
NVME_SC_CMDID_CONFLICT = 0x3,
NVME_SC_DATA_XFER_ERROR = 0x4,
NVME_SC_POWER_LOSS = 0x5,
NVME_SC_INTERNAL = 0x6,
NVME_SC_ABORT_REQ = 0x7,
NVME_SC_ABORT_QUEUE = 0x8,
NVME_SC_FUSED_FAIL = 0x9,
NVME_SC_FUSED_MISSING = 0xa,
NVME_SC_INVALID_NS = 0xb,
NVME_SC_CMD_SEQ_ERROR = 0xc,
NVME_SC_SGL_INVALID_LAST = 0xd,
NVME_SC_SGL_INVALID_COUNT = 0xe,
NVME_SC_SGL_INVALID_DATA = 0xf,
NVME_SC_SGL_INVALID_METADATA = 0x10,
NVME_SC_SGL_INVALID_TYPE = 0x11,
NVME_SC_LBA_RANGE = 0x80,
NVME_SC_CAP_EXCEEDED = 0x81,
NVME_SC_NS_NOT_READY = 0x82,
NVME_SC_RESERVATION_CONFLICT = 0x83,
NVME_SC_CQ_INVALID = 0x100,
NVME_SC_QID_INVALID = 0x101,
NVME_SC_QUEUE_SIZE = 0x102,
NVME_SC_ABORT_LIMIT = 0x103,
NVME_SC_ABORT_MISSING = 0x104,
NVME_SC_ASYNC_LIMIT = 0x105,
NVME_SC_FIRMWARE_SLOT = 0x106,
NVME_SC_FIRMWARE_IMAGE = 0x107,
NVME_SC_INVALID_VECTOR = 0x108,
NVME_SC_INVALID_LOG_PAGE = 0x109,
NVME_SC_INVALID_FORMAT = 0x10a,
NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b,
NVME_SC_INVALID_QUEUE = 0x10c,
NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d,
NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e,
NVME_SC_FEATURE_NOT_PER_NS = 0x10f,
NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110,
NVME_SC_BAD_ATTRIBUTES = 0x180,
NVME_SC_INVALID_PI = 0x181,
NVME_SC_READ_ONLY = 0x182,
NVME_SC_WRITE_FAULT = 0x280,
NVME_SC_READ_ERROR = 0x281,
NVME_SC_GUARD_CHECK = 0x282,
NVME_SC_APPTAG_CHECK = 0x283,
NVME_SC_REFTAG_CHECK = 0x284,
NVME_SC_COMPARE_FAILED = 0x285,
NVME_SC_ACCESS_DENIED = 0x286,
NVME_SC_DNR = 0x4000,
};
struct nvme_completion {
__le32 result; /* Used by admin commands to return data */
__u32 rsvd;
__le16 sq_head; /* how much of this queue may be reclaimed */
__le16 sq_id; /* submission queue that generated this entry */
__u16 command_id; /* of the command which completed */
__le16 status; /* did the command fail, and if so, why? */
};
#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8))
#endif /* _LINUX_NVME_H */
......@@ -21,6 +21,7 @@ enum {
LO_FLAGS_READ_ONLY = 1,
LO_FLAGS_AUTOCLEAR = 4,
LO_FLAGS_PARTSCAN = 8,
LO_FLAGS_DIRECT_IO = 16,
};
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
......@@ -86,6 +87,7 @@ struct loop_info64 {
#define LOOP_GET_STATUS64 0x4C05
#define LOOP_CHANGE_FD 0x4C06
#define LOOP_SET_CAPACITY 0x4C07
#define LOOP_SET_DIRECT_IO 0x4C08
/* /dev/loop-control interface */
#define LOOP_CTL_ADD 0x4C80
......
/*
* Definitions for the NVM Express interface
* Copyright (c) 2011-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _UAPI_LINUX_NVME_H
#define _UAPI_LINUX_NVME_H
#include <linux/types.h>
struct nvme_id_power_state {
__le16 max_power; /* centiwatts */
__u8 rsvd2;
__u8 flags;
__le32 entry_lat; /* microseconds */
__le32 exit_lat; /* microseconds */
__u8 read_tput;
__u8 read_lat;
__u8 write_tput;
__u8 write_lat;
__le16 idle_power;
__u8 idle_scale;
__u8 rsvd19;
__le16 active_power;
__u8 active_work_scale;
__u8 rsvd23[9];
};
enum {
NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0,
NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
};
struct nvme_id_ctrl {
__le16 vid;
__le16 ssvid;
char sn[20];
char mn[40];
char fr[8];
__u8 rab;
__u8 ieee[3];
__u8 mic;
__u8 mdts;
__u16 cntlid;
__u32 ver;
__u8 rsvd84[172];
__le16 oacs;
__u8 acl;
__u8 aerl;
__u8 frmw;
__u8 lpa;
__u8 elpe;
__u8 npss;
__u8 avscc;
__u8 apsta;
__le16 wctemp;
__le16 cctemp;
__u8 rsvd270[242];
__u8 sqes;
__u8 cqes;
__u8 rsvd514[2];
__le32 nn;
__le16 oncs;
__le16 fuses;
__u8 fna;
__u8 vwc;
__le16 awun;
__le16 awupf;
__u8 nvscc;
__u8 rsvd531;
__le16 acwu;
__u8 rsvd534[2];
__le32 sgls;
__u8 rsvd540[1508];
struct nvme_id_power_state psd[32];
__u8 vs[1024];
};
enum {
NVME_CTRL_ONCS_COMPARE = 1 << 0,
NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
NVME_CTRL_ONCS_DSM = 1 << 2,
NVME_CTRL_VWC_PRESENT = 1 << 0,
};
struct nvme_lbaf {
__le16 ms;
__u8 ds;
__u8 rp;
};
struct nvme_id_ns {
__le64 nsze;
__le64 ncap;
__le64 nuse;
__u8 nsfeat;
__u8 nlbaf;
__u8 flbas;
__u8 mc;
__u8 dpc;
__u8 dps;
__u8 nmic;
__u8 rescap;
__u8 fpi;
__u8 rsvd33;
__le16 nawun;
__le16 nawupf;
__le16 nacwu;
__le16 nabsn;
__le16 nabo;
__le16 nabspf;
__u16 rsvd46;
__le64 nvmcap[2];
__u8 rsvd64[40];
__u8 nguid[16];
__u8 eui64[8];
struct nvme_lbaf lbaf[16];
__u8 rsvd192[192];
__u8 vs[3712];
};
enum {
NVME_NS_FEAT_THIN = 1 << 0,
NVME_NS_FLBAS_LBA_MASK = 0xf,
NVME_NS_FLBAS_META_EXT = 0x10,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
NVME_LBAF_RP_DEGRADED = 3,
NVME_NS_DPC_PI_LAST = 1 << 4,
NVME_NS_DPC_PI_FIRST = 1 << 3,
NVME_NS_DPC_PI_TYPE3 = 1 << 2,
NVME_NS_DPC_PI_TYPE2 = 1 << 1,
NVME_NS_DPC_PI_TYPE1 = 1 << 0,
NVME_NS_DPS_PI_FIRST = 1 << 3,
NVME_NS_DPS_PI_MASK = 0x7,
NVME_NS_DPS_PI_TYPE1 = 1,
NVME_NS_DPS_PI_TYPE2 = 2,
NVME_NS_DPS_PI_TYPE3 = 3,
};
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
__u8 avail_spare;
__u8 spare_thresh;
__u8 percent_used;
__u8 rsvd6[26];
__u8 data_units_read[16];
__u8 data_units_written[16];
__u8 host_reads[16];
__u8 host_writes[16];
__u8 ctrl_busy_time[16];
__u8 power_cycles[16];
__u8 power_on_hours[16];
__u8 unsafe_shutdowns[16];
__u8 media_errors[16];
__u8 num_err_log_entries[16];
__le32 warning_temp_time;
__le32 critical_comp_time;
__le16 temp_sensor[8];
__u8 rsvd216[296];
};
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
NVME_SMART_CRIT_RELIABILITY = 1 << 2,
NVME_SMART_CRIT_MEDIA = 1 << 3,
NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
};
enum {
NVME_AER_NOTICE_NS_CHANGED = 0x0002,
};
struct nvme_lba_range_type {
__u8 type;
__u8 attributes;
__u8 rsvd2[14];
__u64 slba;
__u64 nlb;
__u8 guid[16];
__u8 rsvd48[16];
};
enum {
NVME_LBART_TYPE_FS = 0x01,
NVME_LBART_TYPE_RAID = 0x02,
NVME_LBART_TYPE_CACHE = 0x03,
NVME_LBART_TYPE_SWAP = 0x04,
NVME_LBART_ATTRIB_TEMP = 1 << 0,
NVME_LBART_ATTRIB_HIDE = 1 << 1,
};
struct nvme_reservation_status {
__le32 gen;
__u8 rtype;
__u8 regctl[2];
__u8 resv5[2];
__u8 ptpls;
__u8 resv10[13];
struct {
__le16 cntlid;
__u8 rcsts;
__u8 resv3[5];
__le64 hostid;
__le64 rkey;
} regctl_ds[];
};
/* I/O commands */
enum nvme_opcode {
nvme_cmd_flush = 0x00,
nvme_cmd_write = 0x01,
nvme_cmd_read = 0x02,
nvme_cmd_write_uncor = 0x04,
nvme_cmd_compare = 0x05,
nvme_cmd_write_zeroes = 0x08,
nvme_cmd_dsm = 0x09,
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
nvme_cmd_resv_release = 0x15,
};
struct nvme_common_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le32 cdw2[2];
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le32 cdw10[6];
};
struct nvme_rw_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le64 slba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le32 reftag;
__le16 apptag;
__le16 appmask;
};
enum {
NVME_RW_LR = 1 << 15,
NVME_RW_FUA = 1 << 14,
NVME_RW_DSM_FREQ_UNSPEC = 0,
NVME_RW_DSM_FREQ_TYPICAL = 1,
NVME_RW_DSM_FREQ_RARE = 2,
NVME_RW_DSM_FREQ_READS = 3,
NVME_RW_DSM_FREQ_WRITES = 4,
NVME_RW_DSM_FREQ_RW = 5,
NVME_RW_DSM_FREQ_ONCE = 6,
NVME_RW_DSM_FREQ_PREFETCH = 7,
NVME_RW_DSM_FREQ_TEMP = 8,
NVME_RW_DSM_LATENCY_NONE = 0 << 4,
NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
NVME_RW_DSM_LATENCY_NORM = 2 << 4,
NVME_RW_DSM_LATENCY_LOW = 3 << 4,
NVME_RW_DSM_SEQ_REQ = 1 << 6,
NVME_RW_DSM_COMPRESSED = 1 << 7,
NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13,
};
struct nvme_dsm_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 nr;
__le32 attributes;
__u32 rsvd12[4];
};
enum {
NVME_DSMGMT_IDR = 1 << 0,
NVME_DSMGMT_IDW = 1 << 1,
NVME_DSMGMT_AD = 1 << 2,
};
struct nvme_dsm_range {
__le32 cattr;
__le32 nlb;
__le64 slba;
};
/* Admin commands */
enum nvme_admin_opcode {
nvme_admin_delete_sq = 0x00,
nvme_admin_create_sq = 0x01,
nvme_admin_get_log_page = 0x02,
nvme_admin_delete_cq = 0x04,
nvme_admin_create_cq = 0x05,
nvme_admin_identify = 0x06,
nvme_admin_abort_cmd = 0x08,
nvme_admin_set_features = 0x09,
nvme_admin_get_features = 0x0a,
nvme_admin_async_event = 0x0c,
nvme_admin_activate_fw = 0x10,
nvme_admin_download_fw = 0x11,
nvme_admin_format_nvm = 0x80,
nvme_admin_security_send = 0x81,
nvme_admin_security_recv = 0x82,
};
enum {
NVME_QUEUE_PHYS_CONTIG = (1 << 0),
NVME_CQ_IRQ_ENABLED = (1 << 1),
NVME_SQ_PRIO_URGENT = (0 << 1),
NVME_SQ_PRIO_HIGH = (1 << 1),
NVME_SQ_PRIO_MEDIUM = (2 << 1),
NVME_SQ_PRIO_LOW = (3 << 1),
NVME_FEAT_ARBITRATION = 0x01,
NVME_FEAT_POWER_MGMT = 0x02,
NVME_FEAT_LBA_RANGE = 0x03,
NVME_FEAT_TEMP_THRESH = 0x04,
NVME_FEAT_ERR_RECOVERY = 0x05,
NVME_FEAT_VOLATILE_WC = 0x06,
NVME_FEAT_NUM_QUEUES = 0x07,
NVME_FEAT_IRQ_COALESCE = 0x08,
NVME_FEAT_IRQ_CONFIG = 0x09,
NVME_FEAT_WRITE_ATOMIC = 0x0a,
NVME_FEAT_ASYNC_EVENT = 0x0b,
NVME_FEAT_AUTO_PST = 0x0c,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
NVME_FEAT_RESV_PERSIST = 0x83,
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
NVME_FWACT_REPL_ACTV = (1 << 3),
NVME_FWACT_ACTV = (2 << 3),
};
struct nvme_identify {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 cns;
__u32 rsvd11[5];
};
struct nvme_features {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 fid;
__le32 dword11;
__u32 rsvd12[4];
};
struct nvme_create_cq {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__u64 rsvd8;
__le16 cqid;
__le16 qsize;
__le16 cq_flags;
__le16 irq_vector;
__u32 rsvd12[4];
};
struct nvme_create_sq {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__u64 rsvd8;
__le16 sqid;
__le16 qsize;
__le16 sq_flags;
__le16 cqid;
__u32 rsvd12[4];
};
struct nvme_delete_queue {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[9];
__le16 qid;
__u16 rsvd10;
__u32 rsvd11[5];
};
struct nvme_abort_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[9];
__le16 sqid;
__u16 cid;
__u32 rsvd11[5];
};
struct nvme_download_firmware {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__le64 prp2;
__le32 numd;
__le32 offset;
__u32 rsvd12[4];
};
struct nvme_format_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[4];
__le32 cdw10;
__u32 rsvd11[5];
};
struct nvme_command {
union {
struct nvme_common_command common;
struct nvme_rw_command rw;
struct nvme_identify identify;
struct nvme_features features;
struct nvme_create_cq create_cq;
struct nvme_create_sq create_sq;
struct nvme_delete_queue delete_queue;
struct nvme_download_firmware dlfw;
struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm;
struct nvme_abort_cmd abort;
};
};
enum {
NVME_SC_SUCCESS = 0x0,
NVME_SC_INVALID_OPCODE = 0x1,
NVME_SC_INVALID_FIELD = 0x2,
NVME_SC_CMDID_CONFLICT = 0x3,
NVME_SC_DATA_XFER_ERROR = 0x4,
NVME_SC_POWER_LOSS = 0x5,
NVME_SC_INTERNAL = 0x6,
NVME_SC_ABORT_REQ = 0x7,
NVME_SC_ABORT_QUEUE = 0x8,
NVME_SC_FUSED_FAIL = 0x9,
NVME_SC_FUSED_MISSING = 0xa,
NVME_SC_INVALID_NS = 0xb,
NVME_SC_CMD_SEQ_ERROR = 0xc,
NVME_SC_SGL_INVALID_LAST = 0xd,
NVME_SC_SGL_INVALID_COUNT = 0xe,
NVME_SC_SGL_INVALID_DATA = 0xf,
NVME_SC_SGL_INVALID_METADATA = 0x10,
NVME_SC_SGL_INVALID_TYPE = 0x11,
NVME_SC_LBA_RANGE = 0x80,
NVME_SC_CAP_EXCEEDED = 0x81,
NVME_SC_NS_NOT_READY = 0x82,
NVME_SC_RESERVATION_CONFLICT = 0x83,
NVME_SC_CQ_INVALID = 0x100,
NVME_SC_QID_INVALID = 0x101,
NVME_SC_QUEUE_SIZE = 0x102,
NVME_SC_ABORT_LIMIT = 0x103,
NVME_SC_ABORT_MISSING = 0x104,
NVME_SC_ASYNC_LIMIT = 0x105,
NVME_SC_FIRMWARE_SLOT = 0x106,
NVME_SC_FIRMWARE_IMAGE = 0x107,
NVME_SC_INVALID_VECTOR = 0x108,
NVME_SC_INVALID_LOG_PAGE = 0x109,
NVME_SC_INVALID_FORMAT = 0x10a,
NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b,
NVME_SC_INVALID_QUEUE = 0x10c,
NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d,
NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e,
NVME_SC_FEATURE_NOT_PER_NS = 0x10f,
NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110,
NVME_SC_BAD_ATTRIBUTES = 0x180,
NVME_SC_INVALID_PI = 0x181,
NVME_SC_READ_ONLY = 0x182,
NVME_SC_WRITE_FAULT = 0x280,
NVME_SC_READ_ERROR = 0x281,
NVME_SC_GUARD_CHECK = 0x282,
NVME_SC_APPTAG_CHECK = 0x283,
NVME_SC_REFTAG_CHECK = 0x284,
NVME_SC_COMPARE_FAILED = 0x285,
NVME_SC_ACCESS_DENIED = 0x286,
NVME_SC_DNR = 0x4000,
};
struct nvme_completion {
__le32 result; /* Used by admin commands to return data */
__u32 rsvd;
__le16 sq_head; /* how much of this queue may be reclaimed */
__le16 sq_id; /* submission queue that generated this entry */
__u16 command_id; /* of the command which completed */
__le16 status; /* did the command fail, and if so, why? */
};
struct nvme_user_io {
__u8 opcode;
__u8 flags;
__u16 control;
__u16 nblocks;
__u16 rsvd;
__u64 metadata;
__u64 addr;
__u64 slba;
__u32 dsmgmt;
__u32 reftag;
__u16 apptag;
__u16 appmask;
};
struct nvme_passthru_cmd {
__u8 opcode;
__u8 flags;
__u16 rsvd1;
__u32 nsid;
__u32 cdw2;
__u32 cdw3;
__u64 metadata;
__u64 addr;
__u32 metadata_len;
__u32 data_len;
__u32 cdw10;
__u32 cdw11;
__u32 cdw12;
__u32 cdw13;
__u32 cdw14;
__u32 cdw15;
__u32 timeout_ms;
__u32 result;
};
#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8))
#define nvme_admin_cmd nvme_passthru_cmd
#define NVME_IOCTL_ID _IO('N', 0x40)
#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
#define NVME_IOCTL_RESET _IO('N', 0x44)
#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
#endif /* _UAPI_LINUX_NVME_H */
/*
* Definitions for the NVM Express ioctl interface
* Copyright (c) 2011-2014, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _UAPI_LINUX_NVME_IOCTL_H
#define _UAPI_LINUX_NVME_IOCTL_H
#include <linux/types.h>
struct nvme_user_io {
__u8 opcode;
__u8 flags;
__u16 control;
__u16 nblocks;
__u16 rsvd;
__u64 metadata;
__u64 addr;
__u64 slba;
__u32 dsmgmt;
__u32 reftag;
__u16 apptag;
__u16 appmask;
};
struct nvme_passthru_cmd {
__u8 opcode;
__u8 flags;
__u16 rsvd1;
__u32 nsid;
__u32 cdw2;
__u32 cdw3;
__u64 metadata;
__u64 addr;
__u32 metadata_len;
__u32 data_len;
__u32 cdw10;
__u32 cdw11;
__u32 cdw12;
__u32 cdw13;
__u32 cdw14;
__u32 cdw15;
__u32 timeout_ms;
__u32 result;
};
#define nvme_admin_cmd nvme_passthru_cmd
#define NVME_IOCTL_ID _IO('N', 0x40)
#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
#define NVME_IOCTL_RESET _IO('N', 0x44)
#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
#endif /* _UAPI_LINUX_NVME_IOCTL_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册