提交 d29ec824 编写于 作者: C Christoph Hellwig 提交者: Jens Axboe

nvme: submit internal commands through the block layer

Use block layer queues with an internal cmd_type to submit internally
generated NVMe commands.  This both simplifies the code a lot and allow
for a better structure.  For example now the LighNVM code can construct
commands without knowing the details of the underlying I/O descriptors.
Or a future NVMe over network target could inject commands, as well as
could the SCSI translation and ioctl code be reused for such a beast.
Signed-off-by: NChristoph Hellwig <hch@lst.de>
Signed-off-by: NJens Axboe <axboe@fb.com>
上级 772ce435
...@@ -445,7 +445,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev, ...@@ -445,7 +445,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
(unsigned long) rq, gfp); (unsigned long) rq, gfp);
} }
void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
{ {
const int last_prp = dev->page_size / 8 - 1; const int last_prp = dev->page_size / 8 - 1;
int i; int i;
...@@ -605,7 +605,12 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, ...@@ -605,7 +605,12 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
spin_unlock_irqrestore(req->q->queue_lock, flags); spin_unlock_irqrestore(req->q->queue_lock, flags);
return; return;
} }
req->errors = nvme_error_status(status); if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
req->sense_len = le32_to_cpup(&cqe->result);
req->errors = status;
} else {
req->errors = nvme_error_status(status);
}
} else } else
req->errors = 0; req->errors = 0;
...@@ -630,8 +635,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, ...@@ -630,8 +635,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
} }
/* length is in bytes. gfp flags indicates whether we may sleep. */ /* length is in bytes. gfp flags indicates whether we may sleep. */
int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
gfp_t gfp) int total_len, gfp_t gfp)
{ {
struct dma_pool *pool; struct dma_pool *pool;
int length = total_len; int length = total_len;
...@@ -709,6 +714,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, ...@@ -709,6 +714,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
return total_len; return total_len;
} }
static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
struct nvme_iod *iod)
{
struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
memcpy(cmnd, req->cmd, sizeof(struct nvme_command));
cmnd->rw.command_id = req->tag;
if (req->nr_phys_segments) {
cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
}
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
writel(nvmeq->sq_tail, nvmeq->q_db);
}
/* /*
* We reuse the small pool to allocate the 16-byte range here as it is not * We reuse the small pool to allocate the 16-byte range here as it is not
* worth having a special pool for these or additional cases to handle freeing * worth having a special pool for these or additional cases to handle freeing
...@@ -807,11 +829,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, ...@@ -807,11 +829,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
return 0; return 0;
} }
/*
* NOTE: ns is NULL when called on the admin queue.
*/
static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd) const struct blk_mq_queue_data *bd)
{ {
struct nvme_ns *ns = hctx->queue->queuedata; struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_queue *nvmeq = hctx->driver_data; struct nvme_queue *nvmeq = hctx->driver_data;
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq; struct request *req = bd->rq;
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
struct nvme_iod *iod; struct nvme_iod *iod;
...@@ -822,7 +848,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -822,7 +848,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
* unless this namespace is formated such that the metadata can be * unless this namespace is formated such that the metadata can be
* stripped/generated by the controller with PRACT=1. * stripped/generated by the controller with PRACT=1.
*/ */
if (ns->ms && !blk_integrity_rq(req)) { if (ns && ns->ms && !blk_integrity_rq(req)) {
if (!(ns->pi_type && ns->ms == 8)) { if (!(ns->pi_type && ns->ms == 8)) {
req->errors = -EFAULT; req->errors = -EFAULT;
blk_mq_complete_request(req); blk_mq_complete_request(req);
...@@ -830,7 +856,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -830,7 +856,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
} }
} }
iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC); iod = nvme_alloc_iod(req, dev, GFP_ATOMIC);
if (!iod) if (!iod)
return BLK_MQ_RQ_QUEUE_BUSY; return BLK_MQ_RQ_QUEUE_BUSY;
...@@ -841,8 +867,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -841,8 +867,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
* as it is not worth having a special pool for these or * as it is not worth having a special pool for these or
* additional cases to handle freeing the iod. * additional cases to handle freeing the iod.
*/ */
range = dma_pool_alloc(nvmeq->dev->prp_small_pool, range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
GFP_ATOMIC,
&iod->first_dma); &iod->first_dma);
if (!range) if (!range)
goto retry_cmd; goto retry_cmd;
...@@ -860,9 +885,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -860,9 +885,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
goto retry_cmd; goto retry_cmd;
if (blk_rq_bytes(req) != if (blk_rq_bytes(req) !=
nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
dma_unmap_sg(nvmeq->dev->dev, iod->sg, dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
iod->nents, dma_dir);
goto retry_cmd; goto retry_cmd;
} }
if (blk_integrity_rq(req)) { if (blk_integrity_rq(req)) {
...@@ -884,7 +908,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -884,7 +908,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
nvme_set_info(cmd, iod, req_completion); nvme_set_info(cmd, iod, req_completion);
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
if (req->cmd_flags & REQ_DISCARD) if (req->cmd_type == REQ_TYPE_DRV_PRIV)
nvme_submit_priv(nvmeq, req, iod);
else if (req->cmd_flags & REQ_DISCARD)
nvme_submit_discard(nvmeq, ns, req, iod); nvme_submit_discard(nvmeq, ns, req, iod);
else if (req->cmd_flags & REQ_FLUSH) else if (req->cmd_flags & REQ_FLUSH)
nvme_submit_flush(nvmeq, ns, req->tag); nvme_submit_flush(nvmeq, ns, req->tag);
...@@ -896,10 +922,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -896,10 +922,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_MQ_RQ_QUEUE_OK; return BLK_MQ_RQ_QUEUE_OK;
error_cmd: error_cmd:
nvme_free_iod(nvmeq->dev, iod); nvme_free_iod(dev, iod);
return BLK_MQ_RQ_QUEUE_ERROR; return BLK_MQ_RQ_QUEUE_ERROR;
retry_cmd: retry_cmd:
nvme_free_iod(nvmeq->dev, iod); nvme_free_iod(dev, iod);
return BLK_MQ_RQ_QUEUE_BUSY; return BLK_MQ_RQ_QUEUE_BUSY;
} }
...@@ -942,15 +968,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq) ...@@ -942,15 +968,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
return 1; return 1;
} }
/* Admin queue isn't initialized as a request queue. If at some point this
* happens anyway, make sure to notify the user */
static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
WARN_ON_ONCE(1);
return BLK_MQ_RQ_QUEUE_ERROR;
}
static irqreturn_t nvme_irq(int irq, void *data) static irqreturn_t nvme_irq(int irq, void *data)
{ {
irqreturn_t result; irqreturn_t result;
...@@ -972,59 +989,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data) ...@@ -972,59 +989,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
return IRQ_WAKE_THREAD; return IRQ_WAKE_THREAD;
} }
struct sync_cmd_info {
struct task_struct *task;
u32 result;
int status;
};
static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
struct nvme_completion *cqe)
{
struct sync_cmd_info *cmdinfo = ctx;
cmdinfo->result = le32_to_cpup(&cqe->result);
cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
wake_up_process(cmdinfo->task);
}
/* /*
* Returns 0 on success. If the result is negative, it's a Linux error code; * Returns 0 on success. If the result is negative, it's a Linux error code;
* if the result is positive, it's an NVM Express status code * if the result is positive, it's an NVM Express status code
*/ */
static int __nvme_submit_sync_cmd(struct request_queue *q, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
struct nvme_command *cmd, u32 *result, unsigned timeout) void *buffer, void __user *ubuffer, unsigned bufflen,
u32 *result, unsigned timeout)
{ {
struct sync_cmd_info cmdinfo; bool write = cmd->common.opcode & 1;
struct nvme_cmd_info *cmd_rq; struct bio *bio = NULL;
struct request *req; struct request *req;
int res; int ret;
req = blk_mq_alloc_request(q, WRITE, GFP_KERNEL, false); req = blk_mq_alloc_request(q, write, GFP_KERNEL, false);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
cmdinfo.task = current; req->cmd_type = REQ_TYPE_DRV_PRIV;
cmdinfo.status = -EINTR; req->__data_len = 0;
req->__sector = (sector_t) -1;
req->bio = req->biotail = NULL;
cmd->common.command_id = req->tag; req->timeout = ADMIN_TIMEOUT;
cmd_rq = blk_mq_rq_to_pdu(req); req->cmd = (unsigned char *)cmd;
nvme_set_info(cmd_rq, &cmdinfo, sync_completion); req->cmd_len = sizeof(struct nvme_command);
req->sense = NULL;
req->sense_len = 0;
set_current_state(TASK_UNINTERRUPTIBLE); if (buffer && bufflen) {
nvme_submit_cmd(cmd_rq->nvmeq, cmd); ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
schedule(); if (ret)
goto out;
} else if (ubuffer && bufflen) {
ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT);
if (ret)
goto out;
bio = req->bio;
}
blk_execute_rq(req->q, NULL, req, 0);
if (bio)
blk_rq_unmap_user(bio);
if (result) if (result)
*result = cmdinfo.result; *result = req->sense_len;
res = cmdinfo.status; ret = req->errors;
out:
blk_mq_free_request(req); blk_mq_free_request(req);
return res; return ret;
} }
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd) int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buffer, unsigned bufflen)
{ {
return __nvme_submit_sync_cmd(q, cmd, NULL, 0); return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
} }
static int nvme_submit_async_admin_req(struct nvme_dev *dev) static int nvme_submit_async_admin_req(struct nvme_dev *dev)
...@@ -1081,7 +1100,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) ...@@ -1081,7 +1100,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
c.delete_queue.opcode = opcode; c.delete_queue.opcode = opcode;
c.delete_queue.qid = cpu_to_le16(id); c.delete_queue.qid = cpu_to_le16(id);
return nvme_submit_sync_cmd(dev->admin_q, &c); return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
} }
static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
...@@ -1090,6 +1109,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, ...@@ -1090,6 +1109,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
struct nvme_command c; struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
/*
* Note: we (ab)use the fact the the prp fields survive if no data
* is attached to the request.
*/
memset(&c, 0, sizeof(c)); memset(&c, 0, sizeof(c));
c.create_cq.opcode = nvme_admin_create_cq; c.create_cq.opcode = nvme_admin_create_cq;
c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
...@@ -1098,7 +1121,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, ...@@ -1098,7 +1121,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
c.create_cq.cq_flags = cpu_to_le16(flags); c.create_cq.cq_flags = cpu_to_le16(flags);
c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
return nvme_submit_sync_cmd(dev->admin_q, &c); return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
} }
static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
...@@ -1107,6 +1130,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, ...@@ -1107,6 +1130,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
struct nvme_command c; struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
/*
* Note: we (ab)use the fact the the prp fields survive if no data
* is attached to the request.
*/
memset(&c, 0, sizeof(c)); memset(&c, 0, sizeof(c));
c.create_sq.opcode = nvme_admin_create_sq; c.create_sq.opcode = nvme_admin_create_sq;
c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
...@@ -1115,7 +1142,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, ...@@ -1115,7 +1142,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
c.create_sq.sq_flags = cpu_to_le16(flags); c.create_sq.sq_flags = cpu_to_le16(flags);
c.create_sq.cqid = cpu_to_le16(qid); c.create_sq.cqid = cpu_to_le16(qid);
return nvme_submit_sync_cmd(dev->admin_q, &c); return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
} }
static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
...@@ -1128,18 +1155,43 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) ...@@ -1128,18 +1155,43 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
} }
int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
dma_addr_t dma_addr)
{ {
struct nvme_command c; struct nvme_command c = {
.identify.opcode = nvme_admin_identify,
.identify.cns = cpu_to_le32(1),
};
int error;
memset(&c, 0, sizeof(c)); *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
c.identify.opcode = nvme_admin_identify; if (!*id)
c.identify.nsid = cpu_to_le32(nsid); return -ENOMEM;
c.identify.prp1 = cpu_to_le64(dma_addr);
c.identify.cns = cpu_to_le32(cns);
return nvme_submit_sync_cmd(dev->admin_q, &c); error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
sizeof(struct nvme_id_ctrl));
if (error)
kfree(*id);
return error;
}
int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
struct nvme_id_ns **id)
{
struct nvme_command c = {
.identify.opcode = nvme_admin_identify,
.identify.nsid = cpu_to_le32(nsid),
};
int error;
*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
if (!*id)
return -ENOMEM;
error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
sizeof(struct nvme_id_ns));
if (error)
kfree(*id);
return error;
} }
int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
...@@ -1153,7 +1205,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, ...@@ -1153,7 +1205,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
c.features.prp1 = cpu_to_le64(dma_addr); c.features.prp1 = cpu_to_le64(dma_addr);
c.features.fid = cpu_to_le32(fid); c.features.fid = cpu_to_le32(fid);
return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
result, 0);
} }
int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
...@@ -1167,7 +1220,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, ...@@ -1167,7 +1220,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
c.features.fid = cpu_to_le32(fid); c.features.fid = cpu_to_le32(fid);
c.features.dword11 = cpu_to_le32(dword11); c.features.dword11 = cpu_to_le32(dword11);
return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0); return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
result, 0);
}
int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
{
struct nvme_command c = {
.common.opcode = nvme_admin_get_log_page,
.common.nsid = cpu_to_le32(0xFFFFFFFF),
.common.cdw10[0] = cpu_to_le32(
(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
NVME_LOG_SMART),
};
int error;
*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
if (!*log)
return -ENOMEM;
error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
sizeof(struct nvme_smart_log));
if (error)
kfree(*log);
return error;
} }
/** /**
...@@ -1523,7 +1599,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev) ...@@ -1523,7 +1599,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
} }
static struct blk_mq_ops nvme_mq_admin_ops = { static struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_admin_queue_rq, .queue_rq = nvme_queue_rq,
.map_queue = blk_mq_map_queue, .map_queue = blk_mq_map_queue,
.init_hctx = nvme_admin_init_hctx, .init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_exit_hctx, .exit_hctx = nvme_exit_hctx,
...@@ -1644,122 +1720,41 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) ...@@ -1644,122 +1720,41 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result; return result;
} }
struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
unsigned long addr, unsigned length)
{
int i, err, count, nents, offset;
struct scatterlist *sg;
struct page **pages;
struct nvme_iod *iod;
if (addr & 3)
return ERR_PTR(-EINVAL);
if (!length || length > INT_MAX - PAGE_SIZE)
return ERR_PTR(-EINVAL);
offset = offset_in_page(addr);
count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
if (!pages)
return ERR_PTR(-ENOMEM);
err = get_user_pages_fast(addr, count, 1, pages);
if (err < count) {
count = err;
err = -EFAULT;
goto put_pages;
}
err = -ENOMEM;
iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL);
if (!iod)
goto put_pages;
sg = iod->sg;
sg_init_table(sg, count);
for (i = 0; i < count; i++) {
sg_set_page(&sg[i], pages[i],
min_t(unsigned, length, PAGE_SIZE - offset),
offset);
length -= (PAGE_SIZE - offset);
offset = 0;
}
sg_mark_end(&sg[i - 1]);
iod->nents = count;
nents = dma_map_sg(dev->dev, sg, count,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (!nents)
goto free_iod;
kfree(pages);
return iod;
free_iod:
kfree(iod);
put_pages:
for (i = 0; i < count; i++)
put_page(pages[i]);
kfree(pages);
return ERR_PTR(err);
}
void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
struct nvme_iod *iod)
{
int i;
dma_unmap_sg(dev->dev, iod->sg, iod->nents,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
for (i = 0; i < iod->nents; i++)
put_page(sg_page(&iod->sg[i]));
}
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{ {
struct nvme_dev *dev = ns->dev; struct nvme_dev *dev = ns->dev;
struct nvme_user_io io; struct nvme_user_io io;
struct nvme_command c; struct nvme_command c;
unsigned length, meta_len, prp_len; unsigned length, meta_len;
int status, write; int status, write;
struct nvme_iod *iod;
dma_addr_t meta_dma = 0; dma_addr_t meta_dma = 0;
void *meta = NULL; void *meta = NULL;
if (copy_from_user(&io, uio, sizeof(io))) if (copy_from_user(&io, uio, sizeof(io)))
return -EFAULT; return -EFAULT;
length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms;
if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
return -EINVAL;
else if (meta_len && ns->ext) {
length += meta_len;
meta_len = 0;
}
write = io.opcode & 1;
switch (io.opcode) { switch (io.opcode) {
case nvme_cmd_write: case nvme_cmd_write:
case nvme_cmd_read: case nvme_cmd_read:
case nvme_cmd_compare: case nvme_cmd_compare:
iod = nvme_map_user_pages(dev, write, io.addr, length);
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
if (IS_ERR(iod)) length = (io.nblocks + 1) << ns->lba_shift;
return PTR_ERR(iod); meta_len = (io.nblocks + 1) * ns->ms;
write = io.opcode & 1;
prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
if (length != prp_len) {
status = -ENOMEM;
goto unmap;
}
if (meta_len) { if (meta_len) {
if (((io.metadata & 3) || !io.metadata) && !ns->ext)
return -EINVAL;
if (ns->ext) {
length += meta_len;
meta_len = 0;
}
meta = dma_alloc_coherent(dev->dev, meta_len, meta = dma_alloc_coherent(dev->dev, meta_len,
&meta_dma, GFP_KERNEL); &meta_dma, GFP_KERNEL);
if (!meta) { if (!meta) {
...@@ -1786,13 +1781,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) ...@@ -1786,13 +1781,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.reftag = cpu_to_le32(io.reftag); c.rw.reftag = cpu_to_le32(io.reftag);
c.rw.apptag = cpu_to_le16(io.apptag); c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask); c.rw.appmask = cpu_to_le16(io.appmask);
c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
c.rw.prp2 = cpu_to_le64(iod->first_dma);
c.rw.metadata = cpu_to_le64(meta_dma); c.rw.metadata = cpu_to_le64(meta_dma);
status = nvme_submit_sync_cmd(ns->queue, &c);
status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
(void __user *)io.addr, length, NULL, 0);
unmap: unmap:
nvme_unmap_user_pages(dev, write, iod);
nvme_free_iod(dev, iod);
if (meta) { if (meta) {
if (status == NVME_SC_SUCCESS && !write) { if (status == NVME_SC_SUCCESS && !write) {
if (copy_to_user((void __user *)io.metadata, meta, if (copy_to_user((void __user *)io.metadata, meta,
...@@ -1809,9 +1802,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, ...@@ -1809,9 +1802,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
{ {
struct nvme_passthru_cmd cmd; struct nvme_passthru_cmd cmd;
struct nvme_command c; struct nvme_command c;
int status, length; unsigned timeout = 0;
struct nvme_iod *uninitialized_var(iod); int status;
unsigned timeout;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EACCES; return -EACCES;
...@@ -1831,38 +1823,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, ...@@ -1831,38 +1823,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
length = cmd.data_len; if (cmd.timeout_ms)
if (cmd.data_len) { timeout = msecs_to_jiffies(cmd.timeout_ms);
iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr,
length);
if (IS_ERR(iod))
return PTR_ERR(iod);
length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
c.common.prp2 = cpu_to_le64(iod->first_dma);
}
timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
ADMIN_TIMEOUT;
if (length != cmd.data_len) {
status = -ENOMEM;
goto out;
}
status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
&cmd.result, timeout); NULL, (void __user *)cmd.addr, cmd.data_len,
&cmd.result, timeout);
out: if (status >= 0) {
if (cmd.data_len) { if (put_user(cmd.result, &ucmd->result))
nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); return -EFAULT;
nvme_free_iod(dev, iod);
} }
if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result,
sizeof(cmd.result)))
status = -EFAULT;
return status; return status;
} }
...@@ -1954,22 +1925,14 @@ static int nvme_revalidate_disk(struct gendisk *disk) ...@@ -1954,22 +1925,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
struct nvme_ns *ns = disk->private_data; struct nvme_ns *ns = disk->private_data;
struct nvme_dev *dev = ns->dev; struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id; struct nvme_id_ns *id;
dma_addr_t dma_addr;
u8 lbaf, pi_type; u8 lbaf, pi_type;
u16 old_ms; u16 old_ms;
unsigned short bs; unsigned short bs;
id = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); if (nvme_identify_ns(dev, ns->ns_id, &id)) {
if (!id) { dev_warn(dev->dev, "%s: Identify failure\n", __func__);
dev_warn(dev->dev, "%s: Memory alocation failure\n", __func__);
return 0; return 0;
} }
if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
dev_warn(dev->dev,
"identify failed ns:%d, setting capacity to 0\n",
ns->ns_id);
memset(id, 0, sizeof(*id));
}
old_ms = ns->ms; old_ms = ns->ms;
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
...@@ -2010,7 +1973,7 @@ static int nvme_revalidate_disk(struct gendisk *disk) ...@@ -2010,7 +1973,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
if (dev->oncs & NVME_CTRL_ONCS_DSM) if (dev->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns); nvme_config_discard(ns);
dma_free_coherent(dev->dev, 4096, id, dma_addr); kfree(id);
return 0; return 0;
} }
...@@ -2250,22 +2213,14 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2250,22 +2213,14 @@ static int nvme_dev_add(struct nvme_dev *dev)
int res; int res;
unsigned nn, i; unsigned nn, i;
struct nvme_id_ctrl *ctrl; struct nvme_id_ctrl *ctrl;
void *mem;
dma_addr_t dma_addr;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
mem = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL); res = nvme_identify_ctrl(dev, &ctrl);
if (!mem)
return -ENOMEM;
res = nvme_identify(dev, 0, 1, dma_addr);
if (res) { if (res) {
dev_err(dev->dev, "Identify Controller failed (%d)\n", res); dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
dma_free_coherent(dev->dev, 4096, mem, dma_addr);
return -EIO; return -EIO;
} }
ctrl = mem;
nn = le32_to_cpup(&ctrl->nn); nn = le32_to_cpup(&ctrl->nn);
dev->oncs = le16_to_cpup(&ctrl->oncs); dev->oncs = le16_to_cpup(&ctrl->oncs);
dev->abort_limit = ctrl->acl + 1; dev->abort_limit = ctrl->acl + 1;
...@@ -2287,7 +2242,7 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2287,7 +2242,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
} else } else
dev->max_hw_sectors = max_hw_sectors; dev->max_hw_sectors = max_hw_sectors;
} }
dma_free_coherent(dev->dev, 4096, mem, dma_addr); kfree(ctrl);
dev->tagset.ops = &nvme_mq_ops; dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1; dev->tagset.nr_hw_queues = dev->online_queues - 1;
......
此差异已折叠。
...@@ -146,21 +146,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) ...@@ -146,21 +146,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
return (sector >> (ns->lba_shift - 9)); return (sector >> (ns->lba_shift - 9));
} }
/** int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
* nvme_free_iod - frees an nvme_iod void *buf, unsigned bufflen);
* @dev: The device that the I/O was submitted to int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
* @iod: The memory to free void *buffer, void __user *ubuffer, unsigned bufflen,
*/ u32 *result, unsigned timeout);
void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t); struct nvme_id_ns **id);
struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
unsigned long addr, unsigned length);
void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
struct nvme_iod *iod);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd);
int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
dma_addr_t dma_addr);
int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
dma_addr_t dma_addr, u32 *result); dma_addr_t dma_addr, u32 *result);
int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册