提交 8613dec0 编写于 作者: J Jens Axboe

Merge tag 'nvme-6.2-2022-11-29' of git://git.infradead.org/nvme into for-6.2/block

Pull NVMe updates from Christoph:

"nvme updates for Linux 6.2

 - support some passthrough commands without CAP_SYS_ADMIN
   (Kanchan Joshi)
 - refactor PCIe probing and reset (Christoph Hellwig)
 - various fabrics authentication fixes and improvements (Sagi Grimberg)
 - avoid fallback to sequential scan due to transient issues
   (Uday Shankar)
 - implement support for the DEAC bit in Write Zeroes (Christoph Hellwig)
 - allow overriding the IEEE OUI and firmware revision in configfs for
   nvmet (Aleksandr Miloserdov)
 - force reconnect when number of queue changes in nvmet (Daniel Wagner)
 - minor fixes and improvements (Uros Bizjak, Joel Granados,
   Sagi Grimberg, Christoph Hellwig, Christophe JAILLET)"

* tag 'nvme-6.2-2022-11-29' of git://git.infradead.org/nvme: (45 commits)
  nvmet: expose firmware revision to configfs
  nvmet: expose IEEE OUI to configfs
  nvme: rename the queue quiescing helpers
  nvmet: fix a memory leak in nvmet_auth_set_key
  nvme: return err on nvme_init_non_mdts_limits fail
  nvme: avoid fallback to sequential scan due to transient issues
  nvme-rdma: stop auth work after tearing down queues in error recovery
  nvme-tcp: stop auth work after tearing down queues in error recovery
  nvme-auth: have dhchap_auth_work wait for queues auth to complete
  nvme-auth: remove redundant auth_work flush
  nvme-auth: convert dhchap_auth_list to an array
  nvme-auth: check chap ctrl_key once constructed
  nvme-auth: no need to reset chap contexts on re-authentication
  nvme-auth: remove redundant deallocations
  nvme-auth: clear sensitive info right after authentication completes
  nvme-auth: guarantee dhchap buffers under memory pressure
  nvme-auth: don't keep long lived 4k dhchap buffer
  nvme-auth: remove redundant if statement
  nvme-auth: don't override ctrl keys before validation
  nvme-auth: don't ignore key generation failures when initializing ctrl keys
  ...
...@@ -821,7 +821,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) ...@@ -821,7 +821,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
if (!dead && shutdown && freeze) if (!dead && shutdown && freeze)
nvme_wait_freeze_timeout(&anv->ctrl, NVME_IO_TIMEOUT); nvme_wait_freeze_timeout(&anv->ctrl, NVME_IO_TIMEOUT);
nvme_stop_queues(&anv->ctrl); nvme_quiesce_io_queues(&anv->ctrl);
if (!dead) { if (!dead) {
if (READ_ONCE(anv->ioq.enabled)) { if (READ_ONCE(anv->ioq.enabled)) {
...@@ -837,7 +837,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) ...@@ -837,7 +837,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
WRITE_ONCE(anv->ioq.enabled, false); WRITE_ONCE(anv->ioq.enabled, false);
WRITE_ONCE(anv->adminq.enabled, false); WRITE_ONCE(anv->adminq.enabled, false);
mb(); /* ensure that nvme_queue_rq() sees that enabled is cleared */ mb(); /* ensure that nvme_queue_rq() sees that enabled is cleared */
nvme_stop_admin_queue(&anv->ctrl); nvme_quiesce_admin_queue(&anv->ctrl);
/* last chance to complete any requests before nvme_cancel_request */ /* last chance to complete any requests before nvme_cancel_request */
spin_lock_irqsave(&anv->lock, flags); spin_lock_irqsave(&anv->lock, flags);
...@@ -854,8 +854,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) ...@@ -854,8 +854,8 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
* deadlocking blk-mq hot-cpu notifier. * deadlocking blk-mq hot-cpu notifier.
*/ */
if (shutdown) { if (shutdown) {
nvme_start_queues(&anv->ctrl); nvme_unquiesce_io_queues(&anv->ctrl);
nvme_start_admin_queue(&anv->ctrl); nvme_unquiesce_admin_queue(&anv->ctrl);
} }
} }
...@@ -1093,7 +1093,7 @@ static void apple_nvme_reset_work(struct work_struct *work) ...@@ -1093,7 +1093,7 @@ static void apple_nvme_reset_work(struct work_struct *work)
dev_dbg(anv->dev, "Starting admin queue"); dev_dbg(anv->dev, "Starting admin queue");
apple_nvme_init_queue(&anv->adminq); apple_nvme_init_queue(&anv->adminq);
nvme_start_admin_queue(&anv->ctrl); nvme_unquiesce_admin_queue(&anv->ctrl);
if (!nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_CONNECTING)) { if (!nvme_change_ctrl_state(&anv->ctrl, NVME_CTRL_CONNECTING)) {
dev_warn(anv->ctrl.device, dev_warn(anv->ctrl.device,
...@@ -1102,7 +1102,7 @@ static void apple_nvme_reset_work(struct work_struct *work) ...@@ -1102,7 +1102,7 @@ static void apple_nvme_reset_work(struct work_struct *work)
goto out; goto out;
} }
ret = nvme_init_ctrl_finish(&anv->ctrl); ret = nvme_init_ctrl_finish(&anv->ctrl, false);
if (ret) if (ret)
goto out; goto out;
...@@ -1127,7 +1127,7 @@ static void apple_nvme_reset_work(struct work_struct *work) ...@@ -1127,7 +1127,7 @@ static void apple_nvme_reset_work(struct work_struct *work)
anv->ctrl.queue_count = nr_io_queues + 1; anv->ctrl.queue_count = nr_io_queues + 1;
nvme_start_queues(&anv->ctrl); nvme_unquiesce_io_queues(&anv->ctrl);
nvme_wait_freeze(&anv->ctrl); nvme_wait_freeze(&anv->ctrl);
blk_mq_update_nr_hw_queues(&anv->tagset, 1); blk_mq_update_nr_hw_queues(&anv->tagset, 1);
nvme_unfreeze(&anv->ctrl); nvme_unfreeze(&anv->ctrl);
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
#include "fabrics.h" #include "fabrics.h"
#include <linux/nvme-auth.h> #include <linux/nvme-auth.h>
#define CHAP_BUF_SIZE 4096
static struct kmem_cache *nvme_chap_buf_cache;
static mempool_t *nvme_chap_buf_pool;
struct nvme_dhchap_queue_context { struct nvme_dhchap_queue_context {
struct list_head entry; struct list_head entry;
struct work_struct auth_work; struct work_struct auth_work;
...@@ -20,7 +24,6 @@ struct nvme_dhchap_queue_context { ...@@ -20,7 +24,6 @@ struct nvme_dhchap_queue_context {
struct crypto_shash *shash_tfm; struct crypto_shash *shash_tfm;
struct crypto_kpp *dh_tfm; struct crypto_kpp *dh_tfm;
void *buf; void *buf;
size_t buf_size;
int qid; int qid;
int error; int error;
u32 s1; u32 s1;
...@@ -47,6 +50,12 @@ struct nvme_dhchap_queue_context { ...@@ -47,6 +50,12 @@ struct nvme_dhchap_queue_context {
#define nvme_auth_queue_from_qid(ctrl, qid) \ #define nvme_auth_queue_from_qid(ctrl, qid) \
(qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q (qid == 0) ? (ctrl)->fabrics_q : (ctrl)->connect_q
static inline int ctrl_max_dhchaps(struct nvme_ctrl *ctrl)
{
return ctrl->opts->nr_io_queues + ctrl->opts->nr_write_queues +
ctrl->opts->nr_poll_queues + 1;
}
static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid, static int nvme_auth_submit(struct nvme_ctrl *ctrl, int qid,
void *data, size_t data_len, bool auth_send) void *data, size_t data_len, bool auth_send)
{ {
...@@ -112,7 +121,7 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl, ...@@ -112,7 +121,7 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl,
struct nvmf_auth_dhchap_negotiate_data *data = chap->buf; struct nvmf_auth_dhchap_negotiate_data *data = chap->buf;
size_t size = sizeof(*data) + sizeof(union nvmf_auth_protocol); size_t size = sizeof(*data) + sizeof(union nvmf_auth_protocol);
if (chap->buf_size < size) { if (size > CHAP_BUF_SIZE) {
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return -EINVAL; return -EINVAL;
} }
...@@ -147,7 +156,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, ...@@ -147,7 +156,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
const char *gid_name = nvme_auth_dhgroup_name(data->dhgid); const char *gid_name = nvme_auth_dhgroup_name(data->dhgid);
const char *hmac_name, *kpp_name; const char *hmac_name, *kpp_name;
if (chap->buf_size < size) { if (size > CHAP_BUF_SIZE) {
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return NVME_SC_INVALID_FIELD; return NVME_SC_INVALID_FIELD;
} }
...@@ -197,12 +206,6 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, ...@@ -197,12 +206,6 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
return NVME_SC_AUTH_REQUIRED; return NVME_SC_AUTH_REQUIRED;
} }
/* Reset host response if the hash had been changed */
if (chap->hash_id != data->hashid) {
kfree(chap->host_response);
chap->host_response = NULL;
}
chap->hash_id = data->hashid; chap->hash_id = data->hashid;
chap->hash_len = data->hl; chap->hash_len = data->hl;
dev_dbg(ctrl->device, "qid %d: selected hash %s\n", dev_dbg(ctrl->device, "qid %d: selected hash %s\n",
...@@ -219,14 +222,6 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl, ...@@ -219,14 +222,6 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
return NVME_SC_AUTH_REQUIRED; return NVME_SC_AUTH_REQUIRED;
} }
/* Clear host and controller key to avoid accidental reuse */
kfree_sensitive(chap->host_key);
chap->host_key = NULL;
chap->host_key_len = 0;
kfree_sensitive(chap->ctrl_key);
chap->ctrl_key = NULL;
chap->ctrl_key_len = 0;
if (chap->dhgroup_id == data->dhgid && if (chap->dhgroup_id == data->dhgid &&
(data->dhgid == NVME_AUTH_DHGROUP_NULL || chap->dh_tfm)) { (data->dhgid == NVME_AUTH_DHGROUP_NULL || chap->dh_tfm)) {
dev_dbg(ctrl->device, dev_dbg(ctrl->device,
...@@ -302,7 +297,7 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, ...@@ -302,7 +297,7 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl,
if (chap->host_key_len) if (chap->host_key_len)
size += chap->host_key_len; size += chap->host_key_len;
if (chap->buf_size < size) { if (size > CHAP_BUF_SIZE) {
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return -EINVAL; return -EINVAL;
} }
...@@ -344,10 +339,10 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, ...@@ -344,10 +339,10 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl,
struct nvmf_auth_dhchap_success1_data *data = chap->buf; struct nvmf_auth_dhchap_success1_data *data = chap->buf;
size_t size = sizeof(*data); size_t size = sizeof(*data);
if (ctrl->ctrl_key) if (chap->ctrl_key)
size += chap->hash_len; size += chap->hash_len;
if (chap->buf_size < size) { if (size > CHAP_BUF_SIZE) {
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return NVME_SC_INVALID_FIELD; return NVME_SC_INVALID_FIELD;
} }
...@@ -521,6 +516,7 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl, ...@@ -521,6 +516,7 @@ static int nvme_auth_dhchap_setup_ctrl_response(struct nvme_ctrl *ctrl,
ret = PTR_ERR(ctrl_response); ret = PTR_ERR(ctrl_response);
return ret; return ret;
} }
ret = crypto_shash_setkey(chap->shash_tfm, ret = crypto_shash_setkey(chap->shash_tfm,
ctrl_response, ctrl->ctrl_key->len); ctrl_response, ctrl->ctrl_key->len);
if (ret) { if (ret) {
...@@ -621,9 +617,6 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl, ...@@ -621,9 +617,6 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl,
if (ret) { if (ret) {
dev_dbg(ctrl->device, dev_dbg(ctrl->device,
"failed to generate public key, error %d\n", ret); "failed to generate public key, error %d\n", ret);
kfree(chap->host_key);
chap->host_key = NULL;
chap->host_key_len = 0;
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return ret; return ret;
} }
...@@ -643,9 +636,6 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl, ...@@ -643,9 +636,6 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl,
if (ret) { if (ret) {
dev_dbg(ctrl->device, dev_dbg(ctrl->device,
"failed to generate shared secret, error %d\n", ret); "failed to generate shared secret, error %d\n", ret);
kfree_sensitive(chap->sess_key);
chap->sess_key = NULL;
chap->sess_key_len = 0;
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD; chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
return ret; return ret;
} }
...@@ -654,7 +644,7 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl, ...@@ -654,7 +644,7 @@ static int nvme_auth_dhchap_exponential(struct nvme_ctrl *ctrl,
return 0; return 0;
} }
static void __nvme_auth_reset(struct nvme_dhchap_queue_context *chap) static void nvme_auth_reset_dhchap(struct nvme_dhchap_queue_context *chap)
{ {
kfree_sensitive(chap->host_response); kfree_sensitive(chap->host_response);
chap->host_response = NULL; chap->host_response = NULL;
...@@ -674,24 +664,20 @@ static void __nvme_auth_reset(struct nvme_dhchap_queue_context *chap) ...@@ -674,24 +664,20 @@ static void __nvme_auth_reset(struct nvme_dhchap_queue_context *chap)
chap->transaction = 0; chap->transaction = 0;
memset(chap->c1, 0, sizeof(chap->c1)); memset(chap->c1, 0, sizeof(chap->c1));
memset(chap->c2, 0, sizeof(chap->c2)); memset(chap->c2, 0, sizeof(chap->c2));
mempool_free(chap->buf, nvme_chap_buf_pool);
chap->buf = NULL;
} }
static void __nvme_auth_free(struct nvme_dhchap_queue_context *chap) static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap)
{ {
__nvme_auth_reset(chap); nvme_auth_reset_dhchap(chap);
if (chap->shash_tfm) if (chap->shash_tfm)
crypto_free_shash(chap->shash_tfm); crypto_free_shash(chap->shash_tfm);
if (chap->dh_tfm) if (chap->dh_tfm)
crypto_free_kpp(chap->dh_tfm); crypto_free_kpp(chap->dh_tfm);
kfree_sensitive(chap->ctrl_key);
kfree_sensitive(chap->host_key);
kfree_sensitive(chap->sess_key);
kfree_sensitive(chap->host_response);
kfree(chap->buf);
kfree(chap);
} }
static void __nvme_auth_work(struct work_struct *work) static void nvme_queue_auth_work(struct work_struct *work)
{ {
struct nvme_dhchap_queue_context *chap = struct nvme_dhchap_queue_context *chap =
container_of(work, struct nvme_dhchap_queue_context, auth_work); container_of(work, struct nvme_dhchap_queue_context, auth_work);
...@@ -699,6 +685,16 @@ static void __nvme_auth_work(struct work_struct *work) ...@@ -699,6 +685,16 @@ static void __nvme_auth_work(struct work_struct *work)
size_t tl; size_t tl;
int ret = 0; int ret = 0;
/*
* Allocate a large enough buffer for the entire negotiation:
* 4k is enough to ffdhe8192.
*/
chap->buf = mempool_alloc(nvme_chap_buf_pool, GFP_KERNEL);
if (!chap->buf) {
chap->error = -ENOMEM;
return;
}
chap->transaction = ctrl->transaction++; chap->transaction = ctrl->transaction++;
/* DH-HMAC-CHAP Step 1: send negotiate */ /* DH-HMAC-CHAP Step 1: send negotiate */
...@@ -720,8 +716,9 @@ static void __nvme_auth_work(struct work_struct *work) ...@@ -720,8 +716,9 @@ static void __nvme_auth_work(struct work_struct *work)
dev_dbg(ctrl->device, "%s: qid %d receive challenge\n", dev_dbg(ctrl->device, "%s: qid %d receive challenge\n",
__func__, chap->qid); __func__, chap->qid);
memset(chap->buf, 0, chap->buf_size); memset(chap->buf, 0, CHAP_BUF_SIZE);
ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, chap->buf_size, false); ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, CHAP_BUF_SIZE,
false);
if (ret) { if (ret) {
dev_warn(ctrl->device, dev_warn(ctrl->device,
"qid %d failed to receive challenge, %s %d\n", "qid %d failed to receive challenge, %s %d\n",
...@@ -757,11 +754,14 @@ static void __nvme_auth_work(struct work_struct *work) ...@@ -757,11 +754,14 @@ static void __nvme_auth_work(struct work_struct *work)
dev_dbg(ctrl->device, "%s: qid %d host response\n", dev_dbg(ctrl->device, "%s: qid %d host response\n",
__func__, chap->qid); __func__, chap->qid);
mutex_lock(&ctrl->dhchap_auth_mutex);
ret = nvme_auth_dhchap_setup_host_response(ctrl, chap); ret = nvme_auth_dhchap_setup_host_response(ctrl, chap);
if (ret) { if (ret) {
mutex_unlock(&ctrl->dhchap_auth_mutex);
chap->error = ret; chap->error = ret;
goto fail2; goto fail2;
} }
mutex_unlock(&ctrl->dhchap_auth_mutex);
/* DH-HMAC-CHAP Step 3: send reply */ /* DH-HMAC-CHAP Step 3: send reply */
dev_dbg(ctrl->device, "%s: qid %d send reply\n", dev_dbg(ctrl->device, "%s: qid %d send reply\n",
...@@ -783,8 +783,9 @@ static void __nvme_auth_work(struct work_struct *work) ...@@ -783,8 +783,9 @@ static void __nvme_auth_work(struct work_struct *work)
dev_dbg(ctrl->device, "%s: qid %d receive success1\n", dev_dbg(ctrl->device, "%s: qid %d receive success1\n",
__func__, chap->qid); __func__, chap->qid);
memset(chap->buf, 0, chap->buf_size); memset(chap->buf, 0, CHAP_BUF_SIZE);
ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, chap->buf_size, false); ret = nvme_auth_submit(ctrl, chap->qid, chap->buf, CHAP_BUF_SIZE,
false);
if (ret) { if (ret) {
dev_warn(ctrl->device, dev_warn(ctrl->device,
"qid %d failed to receive success1, %s %d\n", "qid %d failed to receive success1, %s %d\n",
...@@ -801,16 +802,19 @@ static void __nvme_auth_work(struct work_struct *work) ...@@ -801,16 +802,19 @@ static void __nvme_auth_work(struct work_struct *work)
return; return;
} }
mutex_lock(&ctrl->dhchap_auth_mutex);
if (ctrl->ctrl_key) { if (ctrl->ctrl_key) {
dev_dbg(ctrl->device, dev_dbg(ctrl->device,
"%s: qid %d controller response\n", "%s: qid %d controller response\n",
__func__, chap->qid); __func__, chap->qid);
ret = nvme_auth_dhchap_setup_ctrl_response(ctrl, chap); ret = nvme_auth_dhchap_setup_ctrl_response(ctrl, chap);
if (ret) { if (ret) {
mutex_unlock(&ctrl->dhchap_auth_mutex);
chap->error = ret; chap->error = ret;
goto fail2; goto fail2;
} }
} }
mutex_unlock(&ctrl->dhchap_auth_mutex);
ret = nvme_auth_process_dhchap_success1(ctrl, chap); ret = nvme_auth_process_dhchap_success1(ctrl, chap);
if (ret) { if (ret) {
...@@ -819,7 +823,7 @@ static void __nvme_auth_work(struct work_struct *work) ...@@ -819,7 +823,7 @@ static void __nvme_auth_work(struct work_struct *work)
goto fail2; goto fail2;
} }
if (ctrl->ctrl_key) { if (chap->ctrl_key) {
/* DH-HMAC-CHAP Step 5: send success2 */ /* DH-HMAC-CHAP Step 5: send success2 */
dev_dbg(ctrl->device, "%s: qid %d send success2\n", dev_dbg(ctrl->device, "%s: qid %d send success2\n",
__func__, chap->qid); __func__, chap->qid);
...@@ -860,42 +864,8 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid) ...@@ -860,42 +864,8 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
return -ENOKEY; return -ENOKEY;
} }
mutex_lock(&ctrl->dhchap_auth_mutex); chap = &ctrl->dhchap_ctxs[qid];
/* Check if the context is already queued */ cancel_work_sync(&chap->auth_work);
list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) {
WARN_ON(!chap->buf);
if (chap->qid == qid) {
dev_dbg(ctrl->device, "qid %d: re-using context\n", qid);
mutex_unlock(&ctrl->dhchap_auth_mutex);
flush_work(&chap->auth_work);
__nvme_auth_reset(chap);
queue_work(nvme_wq, &chap->auth_work);
return 0;
}
}
chap = kzalloc(sizeof(*chap), GFP_KERNEL);
if (!chap) {
mutex_unlock(&ctrl->dhchap_auth_mutex);
return -ENOMEM;
}
chap->qid = (qid == NVME_QID_ANY) ? 0 : qid;
chap->ctrl = ctrl;
/*
* Allocate a large enough buffer for the entire negotiation:
* 4k should be enough to ffdhe8192.
*/
chap->buf_size = 4096;
chap->buf = kzalloc(chap->buf_size, GFP_KERNEL);
if (!chap->buf) {
mutex_unlock(&ctrl->dhchap_auth_mutex);
kfree(chap);
return -ENOMEM;
}
INIT_WORK(&chap->auth_work, __nvme_auth_work);
list_add(&chap->entry, &ctrl->dhchap_auth_list);
mutex_unlock(&ctrl->dhchap_auth_mutex);
queue_work(nvme_wq, &chap->auth_work); queue_work(nvme_wq, &chap->auth_work);
return 0; return 0;
} }
...@@ -906,40 +876,28 @@ int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid) ...@@ -906,40 +876,28 @@ int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid)
struct nvme_dhchap_queue_context *chap; struct nvme_dhchap_queue_context *chap;
int ret; int ret;
mutex_lock(&ctrl->dhchap_auth_mutex); chap = &ctrl->dhchap_ctxs[qid];
list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) { flush_work(&chap->auth_work);
if (chap->qid != qid) ret = chap->error;
continue; /* clear sensitive info */
mutex_unlock(&ctrl->dhchap_auth_mutex); nvme_auth_reset_dhchap(chap);
flush_work(&chap->auth_work); return ret;
ret = chap->error;
return ret;
}
mutex_unlock(&ctrl->dhchap_auth_mutex);
return -ENXIO;
} }
EXPORT_SYMBOL_GPL(nvme_auth_wait); EXPORT_SYMBOL_GPL(nvme_auth_wait);
void nvme_auth_reset(struct nvme_ctrl *ctrl) static void nvme_ctrl_auth_work(struct work_struct *work)
{
struct nvme_dhchap_queue_context *chap;
mutex_lock(&ctrl->dhchap_auth_mutex);
list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) {
mutex_unlock(&ctrl->dhchap_auth_mutex);
flush_work(&chap->auth_work);
__nvme_auth_reset(chap);
}
mutex_unlock(&ctrl->dhchap_auth_mutex);
}
EXPORT_SYMBOL_GPL(nvme_auth_reset);
static void nvme_dhchap_auth_work(struct work_struct *work)
{ {
struct nvme_ctrl *ctrl = struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, dhchap_auth_work); container_of(work, struct nvme_ctrl, dhchap_auth_work);
int ret, q; int ret, q;
/*
* If the ctrl is no connected, bail as reconnect will handle
* authentication.
*/
if (ctrl->state != NVME_CTRL_LIVE)
return;
/* Authenticate admin queue first */ /* Authenticate admin queue first */
ret = nvme_auth_negotiate(ctrl, 0); ret = nvme_auth_negotiate(ctrl, 0);
if (ret) { if (ret) {
...@@ -968,43 +926,75 @@ static void nvme_dhchap_auth_work(struct work_struct *work) ...@@ -968,43 +926,75 @@ static void nvme_dhchap_auth_work(struct work_struct *work)
* Failure is a soft-state; credentials remain valid until * Failure is a soft-state; credentials remain valid until
* the controller terminates the connection. * the controller terminates the connection.
*/ */
for (q = 1; q < ctrl->queue_count; q++) {
ret = nvme_auth_wait(ctrl, q);
if (ret)
dev_warn(ctrl->device,
"qid %d: authentication failed\n", q);
}
} }
void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
{ {
INIT_LIST_HEAD(&ctrl->dhchap_auth_list); struct nvme_dhchap_queue_context *chap;
INIT_WORK(&ctrl->dhchap_auth_work, nvme_dhchap_auth_work); int i, ret;
mutex_init(&ctrl->dhchap_auth_mutex); mutex_init(&ctrl->dhchap_auth_mutex);
INIT_WORK(&ctrl->dhchap_auth_work, nvme_ctrl_auth_work);
if (!ctrl->opts) if (!ctrl->opts)
return; return 0;
nvme_auth_generate_key(ctrl->opts->dhchap_secret, &ctrl->host_key); ret = nvme_auth_generate_key(ctrl->opts->dhchap_secret,
nvme_auth_generate_key(ctrl->opts->dhchap_ctrl_secret, &ctrl->ctrl_key); &ctrl->host_key);
if (ret)
return ret;
ret = nvme_auth_generate_key(ctrl->opts->dhchap_ctrl_secret,
&ctrl->ctrl_key);
if (ret)
goto err_free_dhchap_secret;
if (!ctrl->opts->dhchap_secret && !ctrl->opts->dhchap_ctrl_secret)
return ret;
ctrl->dhchap_ctxs = kvcalloc(ctrl_max_dhchaps(ctrl),
sizeof(*chap), GFP_KERNEL);
if (!ctrl->dhchap_ctxs) {
ret = -ENOMEM;
goto err_free_dhchap_ctrl_secret;
}
for (i = 0; i < ctrl_max_dhchaps(ctrl); i++) {
chap = &ctrl->dhchap_ctxs[i];
chap->qid = i;
chap->ctrl = ctrl;
INIT_WORK(&chap->auth_work, nvme_queue_auth_work);
}
return 0;
err_free_dhchap_ctrl_secret:
nvme_auth_free_key(ctrl->ctrl_key);
ctrl->ctrl_key = NULL;
err_free_dhchap_secret:
nvme_auth_free_key(ctrl->host_key);
ctrl->host_key = NULL;
return ret;
} }
EXPORT_SYMBOL_GPL(nvme_auth_init_ctrl); EXPORT_SYMBOL_GPL(nvme_auth_init_ctrl);
void nvme_auth_stop(struct nvme_ctrl *ctrl) void nvme_auth_stop(struct nvme_ctrl *ctrl)
{ {
struct nvme_dhchap_queue_context *chap = NULL, *tmp;
cancel_work_sync(&ctrl->dhchap_auth_work); cancel_work_sync(&ctrl->dhchap_auth_work);
mutex_lock(&ctrl->dhchap_auth_mutex);
list_for_each_entry_safe(chap, tmp, &ctrl->dhchap_auth_list, entry)
cancel_work_sync(&chap->auth_work);
mutex_unlock(&ctrl->dhchap_auth_mutex);
} }
EXPORT_SYMBOL_GPL(nvme_auth_stop); EXPORT_SYMBOL_GPL(nvme_auth_stop);
void nvme_auth_free(struct nvme_ctrl *ctrl) void nvme_auth_free(struct nvme_ctrl *ctrl)
{ {
struct nvme_dhchap_queue_context *chap = NULL, *tmp; int i;
mutex_lock(&ctrl->dhchap_auth_mutex); if (ctrl->dhchap_ctxs) {
list_for_each_entry_safe(chap, tmp, &ctrl->dhchap_auth_list, entry) { for (i = 0; i < ctrl_max_dhchaps(ctrl); i++)
list_del_init(&chap->entry); nvme_auth_free_dhchap(&ctrl->dhchap_ctxs[i]);
flush_work(&chap->auth_work); kfree(ctrl->dhchap_ctxs);
__nvme_auth_free(chap);
} }
mutex_unlock(&ctrl->dhchap_auth_mutex);
if (ctrl->host_key) { if (ctrl->host_key) {
nvme_auth_free_key(ctrl->host_key); nvme_auth_free_key(ctrl->host_key);
ctrl->host_key = NULL; ctrl->host_key = NULL;
...@@ -1015,3 +1005,27 @@ void nvme_auth_free(struct nvme_ctrl *ctrl) ...@@ -1015,3 +1005,27 @@ void nvme_auth_free(struct nvme_ctrl *ctrl)
} }
} }
EXPORT_SYMBOL_GPL(nvme_auth_free); EXPORT_SYMBOL_GPL(nvme_auth_free);
int __init nvme_init_auth(void)
{
nvme_chap_buf_cache = kmem_cache_create("nvme-chap-buf-cache",
CHAP_BUF_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
if (!nvme_chap_buf_cache)
return -ENOMEM;
nvme_chap_buf_pool = mempool_create(16, mempool_alloc_slab,
mempool_free_slab, nvme_chap_buf_cache);
if (!nvme_chap_buf_pool)
goto err_destroy_chap_buf_cache;
return 0;
err_destroy_chap_buf_cache:
kmem_cache_destroy(nvme_chap_buf_cache);
return -ENOMEM;
}
void __exit nvme_exit_auth(void)
{
mempool_destroy(nvme_chap_buf_pool);
kmem_cache_destroy(nvme_chap_buf_cache);
}
...@@ -850,8 +850,11 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, ...@@ -850,8 +850,11 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cmnd->write_zeroes.length = cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
if (!(req->cmd_flags & REQ_NOUNMAP) && (ns->features & NVME_NS_DEAC))
cmnd->write_zeroes.control |= cpu_to_le16(NVME_WZ_DEAC);
if (nvme_ns_has_pi(ns)) { if (nvme_ns_has_pi(ns)) {
cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT); cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);
switch (ns->pi_type) { switch (ns->pi_type) {
case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE1:
...@@ -1120,8 +1123,10 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, ...@@ -1120,8 +1123,10 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
mutex_unlock(&ctrl->subsys->lock); mutex_unlock(&ctrl->subsys->lock);
mutex_unlock(&ctrl->scan_lock); mutex_unlock(&ctrl->scan_lock);
} }
if (effects & NVME_CMD_EFFECTS_CCC) if (effects & NVME_CMD_EFFECTS_CCC) {
nvme_init_ctrl_finish(ctrl); dev_info(ctrl->device,
"controller capabilities changed, reset may be required to take effect.\n");
}
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
nvme_queue_scan(ctrl); nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work); flush_work(&ctrl->scan_work);
...@@ -2003,6 +2008,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ...@@ -2003,6 +2008,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
} }
} }
/*
* Only set the DEAC bit if the device guarantees that reads from
* deallocated data return zeroes. While the DEAC bit does not
* require that, it must be a no-op if reads from deallocated data
* do not return zeroes.
*/
if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
ns->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
set_bit(NVME_NS_READY, &ns->flags); set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue); blk_mq_unfreeze_queue(ns->disk->queue);
...@@ -2179,7 +2192,7 @@ const struct pr_ops nvme_pr_ops = { ...@@ -2179,7 +2192,7 @@ const struct pr_ops nvme_pr_ops = {
}; };
#ifdef CONFIG_BLK_SED_OPAL #ifdef CONFIG_BLK_SED_OPAL
int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, static int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send) bool send)
{ {
struct nvme_ctrl *ctrl = data; struct nvme_ctrl *ctrl = data;
...@@ -2196,7 +2209,23 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, ...@@ -2196,7 +2209,23 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
NVME_QID_ANY, 1, 0); NVME_QID_ANY, 1, 0);
} }
EXPORT_SYMBOL_GPL(nvme_sec_submit);
static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
{
if (ctrl->oacs & NVME_CTRL_OACS_SEC_SUPP) {
if (!ctrl->opal_dev)
ctrl->opal_dev = init_opal_dev(ctrl, &nvme_sec_submit);
else if (was_suspended)
opal_unlock_from_suspend(ctrl->opal_dev);
} else {
free_opal_dev(ctrl->opal_dev);
ctrl->opal_dev = NULL;
}
}
#else
static void nvme_configure_opal(struct nvme_ctrl *ctrl, bool was_suspended)
{
}
#endif /* CONFIG_BLK_SED_OPAL */ #endif /* CONFIG_BLK_SED_OPAL */
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
...@@ -3049,7 +3078,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) ...@@ -3049,7 +3078,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
id = kzalloc(sizeof(*id), GFP_KERNEL); id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) if (!id)
return 0; return -ENOMEM;
c.identify.opcode = nvme_admin_identify; c.identify.opcode = nvme_admin_identify;
c.identify.cns = NVME_ID_CNS_CS_CTRL; c.identify.cns = NVME_ID_CNS_CS_CTRL;
...@@ -3229,7 +3258,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -3229,7 +3258,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
* register in our nvme_ctrl structure. This should be called as soon as * register in our nvme_ctrl structure. This should be called as soon as
* the admin queue is fully up and running. * the admin queue is fully up and running.
*/ */
int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended)
{ {
int ret; int ret;
...@@ -3260,6 +3289,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) ...@@ -3260,6 +3289,8 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
if (ret < 0) if (ret < 0)
return ret; return ret;
nvme_configure_opal(ctrl, was_suspended);
if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
/* /*
* Do not return errors unless we are in a controller reset, * Do not return errors unless we are in a controller reset,
...@@ -3745,15 +3776,19 @@ static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev, ...@@ -3745,15 +3776,19 @@ static ssize_t nvme_ctrl_dhchap_secret_store(struct device *dev,
memcpy(dhchap_secret, buf, count); memcpy(dhchap_secret, buf, count);
nvme_auth_stop(ctrl); nvme_auth_stop(ctrl);
if (strcmp(dhchap_secret, opts->dhchap_secret)) { if (strcmp(dhchap_secret, opts->dhchap_secret)) {
struct nvme_dhchap_key *key, *host_key;
int ret; int ret;
ret = nvme_auth_generate_key(dhchap_secret, &ctrl->host_key); ret = nvme_auth_generate_key(dhchap_secret, &key);
if (ret) if (ret)
return ret; return ret;
kfree(opts->dhchap_secret); kfree(opts->dhchap_secret);
opts->dhchap_secret = dhchap_secret; opts->dhchap_secret = dhchap_secret;
/* Key has changed; re-authentication with new key */ host_key = ctrl->host_key;
nvme_auth_reset(ctrl); mutex_lock(&ctrl->dhchap_auth_mutex);
ctrl->host_key = key;
mutex_unlock(&ctrl->dhchap_auth_mutex);
nvme_auth_free_key(host_key);
} }
/* Start re-authentication */ /* Start re-authentication */
dev_info(ctrl->device, "re-authenticating controller\n"); dev_info(ctrl->device, "re-authenticating controller\n");
...@@ -3795,15 +3830,19 @@ static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev, ...@@ -3795,15 +3830,19 @@ static ssize_t nvme_ctrl_dhchap_ctrl_secret_store(struct device *dev,
memcpy(dhchap_secret, buf, count); memcpy(dhchap_secret, buf, count);
nvme_auth_stop(ctrl); nvme_auth_stop(ctrl);
if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) { if (strcmp(dhchap_secret, opts->dhchap_ctrl_secret)) {
struct nvme_dhchap_key *key, *ctrl_key;
int ret; int ret;
ret = nvme_auth_generate_key(dhchap_secret, &ctrl->ctrl_key); ret = nvme_auth_generate_key(dhchap_secret, &key);
if (ret) if (ret)
return ret; return ret;
kfree(opts->dhchap_ctrl_secret); kfree(opts->dhchap_ctrl_secret);
opts->dhchap_ctrl_secret = dhchap_secret; opts->dhchap_ctrl_secret = dhchap_secret;
/* Key has changed; re-authentication with new key */ ctrl_key = ctrl->ctrl_key;
nvme_auth_reset(ctrl); mutex_lock(&ctrl->dhchap_auth_mutex);
ctrl->ctrl_key = key;
mutex_unlock(&ctrl->dhchap_auth_mutex);
nvme_auth_free_key(ctrl_key);
} }
/* Start re-authentication */ /* Start re-authentication */
dev_info(ctrl->device, "re-authenticating controller\n"); dev_info(ctrl->device, "re-authenticating controller\n");
...@@ -3875,10 +3914,11 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, ...@@ -3875,10 +3914,11 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
return a->mode; return a->mode;
} }
static const struct attribute_group nvme_dev_attrs_group = { const struct attribute_group nvme_dev_attrs_group = {
.attrs = nvme_dev_attrs, .attrs = nvme_dev_attrs,
.is_visible = nvme_dev_attrs_are_visible, .is_visible = nvme_dev_attrs_are_visible,
}; };
EXPORT_SYMBOL_GPL(nvme_dev_attrs_group);
static const struct attribute_group *nvme_dev_attr_groups[] = { static const struct attribute_group *nvme_dev_attr_groups[] = {
&nvme_dev_attrs_group, &nvme_dev_attrs_group,
...@@ -4420,9 +4460,6 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) ...@@ -4420,9 +4460,6 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
u32 prev = 0; u32 prev = 0;
int ret = 0, i; int ret = 0, i;
if (nvme_ctrl_limited_cns(ctrl))
return -EOPNOTSUPP;
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
if (!ns_list) if (!ns_list)
return -ENOMEM; return -ENOMEM;
...@@ -4530,8 +4567,18 @@ static void nvme_scan_work(struct work_struct *work) ...@@ -4530,8 +4567,18 @@ static void nvme_scan_work(struct work_struct *work)
} }
mutex_lock(&ctrl->scan_lock); mutex_lock(&ctrl->scan_lock);
if (nvme_scan_ns_list(ctrl) != 0) if (nvme_ctrl_limited_cns(ctrl)) {
nvme_scan_ns_sequential(ctrl); nvme_scan_ns_sequential(ctrl);
} else {
/*
* Fall back to sequential scan if DNR is set to handle broken
* devices which should support Identify NS List (as per the VS
* they report) but don't actually support it.
*/
ret = nvme_scan_ns_list(ctrl);
if (ret > 0 && ret & NVME_SC_DNR)
nvme_scan_ns_sequential(ctrl);
}
mutex_unlock(&ctrl->scan_lock); mutex_unlock(&ctrl->scan_lock);
} }
...@@ -4563,7 +4610,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) ...@@ -4563,7 +4610,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
*/ */
if (ctrl->state == NVME_CTRL_DEAD) { if (ctrl->state == NVME_CTRL_DEAD) {
nvme_mark_namespaces_dead(ctrl); nvme_mark_namespaces_dead(ctrl);
nvme_start_queues(ctrl); nvme_unquiesce_io_queues(ctrl);
} }
/* this is a no-op when called from the controller reset handler */ /* this is a no-op when called from the controller reset handler */
...@@ -4690,7 +4737,7 @@ static void nvme_fw_act_work(struct work_struct *work) ...@@ -4690,7 +4737,7 @@ static void nvme_fw_act_work(struct work_struct *work)
fw_act_timeout = jiffies + fw_act_timeout = jiffies +
msecs_to_jiffies(admin_timeout * 1000); msecs_to_jiffies(admin_timeout * 1000);
nvme_stop_queues(ctrl); nvme_quiesce_io_queues(ctrl);
while (nvme_ctrl_pp_status(ctrl)) { while (nvme_ctrl_pp_status(ctrl)) {
if (time_after(jiffies, fw_act_timeout)) { if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device, dev_warn(ctrl->device,
...@@ -4704,7 +4751,7 @@ static void nvme_fw_act_work(struct work_struct *work) ...@@ -4704,7 +4751,7 @@ static void nvme_fw_act_work(struct work_struct *work)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return; return;
nvme_start_queues(ctrl); nvme_unquiesce_io_queues(ctrl);
/* read FW slot information to clear the AER */ /* read FW slot information to clear the AER */
nvme_get_fw_slot_info(ctrl); nvme_get_fw_slot_info(ctrl);
...@@ -4949,7 +4996,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) ...@@ -4949,7 +4996,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->queue_count > 1) { if (ctrl->queue_count > 1) {
nvme_queue_scan(ctrl); nvme_queue_scan(ctrl);
nvme_start_queues(ctrl); nvme_unquiesce_io_queues(ctrl);
nvme_mpath_update(ctrl); nvme_mpath_update(ctrl);
} }
...@@ -4994,6 +5041,7 @@ static void nvme_free_ctrl(struct device *dev) ...@@ -4994,6 +5041,7 @@ static void nvme_free_ctrl(struct device *dev)
nvme_auth_stop(ctrl); nvme_auth_stop(ctrl);
nvme_auth_free(ctrl); nvme_auth_free(ctrl);
__free_page(ctrl->discard_page); __free_page(ctrl->discard_page);
free_opal_dev(ctrl->opal_dev);
if (subsys) { if (subsys) {
mutex_lock(&nvme_subsystems_lock); mutex_lock(&nvme_subsystems_lock);
...@@ -5059,7 +5107,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -5059,7 +5107,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->instance); ctrl->instance);
ctrl->device->class = nvme_class; ctrl->device->class = nvme_class;
ctrl->device->parent = ctrl->dev; ctrl->device->parent = ctrl->dev;
ctrl->device->groups = nvme_dev_attr_groups; if (ops->dev_attr_groups)
ctrl->device->groups = ops->dev_attr_groups;
else
ctrl->device->groups = nvme_dev_attr_groups;
ctrl->device->release = nvme_free_ctrl; ctrl->device->release = nvme_free_ctrl;
dev_set_drvdata(ctrl->device, ctrl); dev_set_drvdata(ctrl->device, ctrl);
ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance); ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
...@@ -5083,9 +5134,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -5083,9 +5134,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device)); nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
nvme_mpath_init_ctrl(ctrl); nvme_mpath_init_ctrl(ctrl);
nvme_auth_init_ctrl(ctrl); ret = nvme_auth_init_ctrl(ctrl);
if (ret)
goto out_free_cdev;
return 0; return 0;
out_free_cdev:
cdev_device_del(&ctrl->cdev, ctrl->device);
out_free_name: out_free_name:
nvme_put_ctrl(ctrl); nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name); kfree_const(ctrl->device->kobj.name);
...@@ -5158,37 +5213,37 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) ...@@ -5158,37 +5213,37 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
} }
EXPORT_SYMBOL_GPL(nvme_start_freeze); EXPORT_SYMBOL_GPL(nvme_start_freeze);
void nvme_stop_queues(struct nvme_ctrl *ctrl) void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl)
{ {
if (!test_and_set_bit(NVME_CTRL_STOPPED, &ctrl->flags)) if (!test_and_set_bit(NVME_CTRL_STOPPED, &ctrl->flags))
blk_mq_quiesce_tagset(ctrl->tagset); blk_mq_quiesce_tagset(ctrl->tagset);
else else
blk_mq_wait_quiesce_done(ctrl->tagset); blk_mq_wait_quiesce_done(ctrl->tagset);
} }
EXPORT_SYMBOL_GPL(nvme_stop_queues); EXPORT_SYMBOL_GPL(nvme_quiesce_io_queues);
void nvme_start_queues(struct nvme_ctrl *ctrl) void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl)
{ {
if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags)) if (test_and_clear_bit(NVME_CTRL_STOPPED, &ctrl->flags))
blk_mq_unquiesce_tagset(ctrl->tagset); blk_mq_unquiesce_tagset(ctrl->tagset);
} }
EXPORT_SYMBOL_GPL(nvme_start_queues); EXPORT_SYMBOL_GPL(nvme_unquiesce_io_queues);
void nvme_stop_admin_queue(struct nvme_ctrl *ctrl) void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl)
{ {
if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
blk_mq_quiesce_queue(ctrl->admin_q); blk_mq_quiesce_queue(ctrl->admin_q);
else else
blk_mq_wait_quiesce_done(ctrl->admin_q->tag_set); blk_mq_wait_quiesce_done(ctrl->admin_q->tag_set);
} }
EXPORT_SYMBOL_GPL(nvme_stop_admin_queue); EXPORT_SYMBOL_GPL(nvme_quiesce_admin_queue);
void nvme_start_admin_queue(struct nvme_ctrl *ctrl) void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl)
{ {
if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags)) if (test_and_clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
blk_mq_unquiesce_queue(ctrl->admin_q); blk_mq_unquiesce_queue(ctrl->admin_q);
} }
EXPORT_SYMBOL_GPL(nvme_start_admin_queue); EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue);
void nvme_sync_io_queues(struct nvme_ctrl *ctrl) void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
{ {
...@@ -5299,8 +5354,13 @@ static int __init nvme_core_init(void) ...@@ -5299,8 +5354,13 @@ static int __init nvme_core_init(void)
goto unregister_generic_ns; goto unregister_generic_ns;
} }
result = nvme_init_auth();
if (result)
goto destroy_ns_chr;
return 0; return 0;
destroy_ns_chr:
class_destroy(nvme_ns_chr_class);
unregister_generic_ns: unregister_generic_ns:
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
destroy_subsys_class: destroy_subsys_class:
...@@ -5321,6 +5381,7 @@ static int __init nvme_core_init(void) ...@@ -5321,6 +5381,7 @@ static int __init nvme_core_init(void)
static void __exit nvme_core_exit(void) static void __exit nvme_core_exit(void)
{ {
nvme_exit_auth();
class_destroy(nvme_ns_chr_class); class_destroy(nvme_ns_chr_class);
class_destroy(nvme_subsys_class); class_destroy(nvme_subsys_class);
class_destroy(nvme_class); class_destroy(nvme_class);
......
...@@ -1475,6 +1475,8 @@ nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) ...@@ -1475,6 +1475,8 @@ nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
fc_dma_unmap_single(lport->dev, lsop->rspdma, fc_dma_unmap_single(lport->dev, lsop->rspdma,
sizeof(*lsop->rspbuf), DMA_TO_DEVICE); sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
kfree(lsop->rspbuf);
kfree(lsop->rqstbuf);
kfree(lsop); kfree(lsop);
nvme_fc_rport_put(rport); nvme_fc_rport_put(rport);
...@@ -1751,20 +1753,17 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, ...@@ -1751,20 +1753,17 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
goto out_put; goto out_put;
} }
lsop = kzalloc(sizeof(*lsop) + lsop = kzalloc(sizeof(*lsop), GFP_KERNEL);
sizeof(union nvmefc_ls_requests) + lsop->rqstbuf = kzalloc(sizeof(*lsop->rqstbuf), GFP_KERNEL);
sizeof(union nvmefc_ls_responses), lsop->rspbuf = kzalloc(sizeof(*lsop->rspbuf), GFP_KERNEL);
GFP_KERNEL); if (!lsop || !lsop->rqstbuf || !lsop->rspbuf) {
if (!lsop) {
dev_info(lport->dev, dev_info(lport->dev,
"RCV %s LS failed: No memory\n", "RCV %s LS failed: No memory\n",
(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
nvmefc_ls_names[w0->ls_cmd] : ""); nvmefc_ls_names[w0->ls_cmd] : "");
ret = -ENOMEM; ret = -ENOMEM;
goto out_put; goto out_free;
} }
lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1];
lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1];
lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf, lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf,
sizeof(*lsop->rspbuf), sizeof(*lsop->rspbuf),
...@@ -1801,6 +1800,8 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, ...@@ -1801,6 +1800,8 @@ nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
fc_dma_unmap_single(lport->dev, lsop->rspdma, fc_dma_unmap_single(lport->dev, lsop->rspdma,
sizeof(*lsop->rspbuf), DMA_TO_DEVICE); sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
out_free: out_free:
kfree(lsop->rspbuf);
kfree(lsop->rqstbuf);
kfree(lsop); kfree(lsop);
out_put: out_put:
nvme_fc_rport_put(rport); nvme_fc_rport_put(rport);
...@@ -2391,7 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref) ...@@ -2391,7 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref)
list_del(&ctrl->ctrl_list); list_del(&ctrl->ctrl_list);
spin_unlock_irqrestore(&ctrl->rport->lock, flags); spin_unlock_irqrestore(&ctrl->rport->lock, flags);
nvme_start_admin_queue(&ctrl->ctrl); nvme_unquiesce_admin_queue(&ctrl->ctrl);
nvme_remove_admin_tag_set(&ctrl->ctrl); nvme_remove_admin_tag_set(&ctrl->ctrl);
kfree(ctrl->queues); kfree(ctrl->queues);
...@@ -2492,13 +2493,13 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) ...@@ -2492,13 +2493,13 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
* (but with error status). * (but with error status).
*/ */
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl); nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl);
blk_mq_tagset_busy_iter(&ctrl->tag_set, blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl); nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->tag_set); blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
if (start_queues) if (start_queues)
nvme_start_queues(&ctrl->ctrl); nvme_unquiesce_io_queues(&ctrl->ctrl);
} }
/* /*
...@@ -2516,13 +2517,13 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) ...@@ -2516,13 +2517,13 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
/* /*
* clean up the admin queue. Same thing as above. * clean up the admin queue. Same thing as above.
*/ */
nvme_stop_admin_queue(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q); blk_sync_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl); nvme_fc_terminate_exchange, &ctrl->ctrl);
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
if (start_queues) if (start_queues)
nvme_start_admin_queue(&ctrl->ctrl); nvme_unquiesce_admin_queue(&ctrl->ctrl);
} }
static void static void
...@@ -3104,9 +3105,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ...@@ -3104,9 +3105,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments << ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments <<
(ilog2(SZ_4K) - 9); (ilog2(SZ_4K) - 9);
nvme_start_admin_queue(&ctrl->ctrl); nvme_unquiesce_admin_queue(&ctrl->ctrl);
ret = nvme_init_ctrl_finish(&ctrl->ctrl); ret = nvme_init_ctrl_finish(&ctrl->ctrl, false);
if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
goto out_disconnect_admin_queue; goto out_disconnect_admin_queue;
...@@ -3250,10 +3251,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) ...@@ -3250,10 +3251,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
nvme_fc_free_queue(&ctrl->queues[0]); nvme_fc_free_queue(&ctrl->queues[0]);
/* re-enable the admin_q so anything new can fast fail */ /* re-enable the admin_q so anything new can fast fail */
nvme_start_admin_queue(&ctrl->ctrl); nvme_unquiesce_admin_queue(&ctrl->ctrl);
/* resume the io queues so that things will fast fail */ /* resume the io queues so that things will fast fail */
nvme_start_queues(&ctrl->ctrl); nvme_unquiesce_io_queues(&ctrl->ctrl);
nvme_fc_ctlr_inactive_on_rport(ctrl); nvme_fc_ctlr_inactive_on_rport(ctrl);
} }
......
...@@ -8,6 +8,48 @@ ...@@ -8,6 +8,48 @@
#include <linux/io_uring.h> #include <linux/io_uring.h>
#include "nvme.h" #include "nvme.h"
static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
fmode_t mode)
{
if (capable(CAP_SYS_ADMIN))
return true;
/*
* Do not allow unprivileged processes to send vendor specific or fabrics
* commands as we can't be sure about their effects.
*/
if (c->common.opcode >= nvme_cmd_vendor_start ||
c->common.opcode == nvme_fabrics_command)
return false;
/*
* Do not allow unprivileged passthrough of admin commands except
* for a subset of identify commands that contain information required
* to form proper I/O commands in userspace and do not expose any
* potentially sensitive information.
*/
if (!ns) {
if (c->common.opcode == nvme_admin_identify) {
switch (c->identify.cns) {
case NVME_ID_CNS_NS:
case NVME_ID_CNS_CS_NS:
case NVME_ID_CNS_NS_CS_INDEP:
return true;
}
}
return false;
}
/*
* Only allow I/O commands that transfer data to the controller if the
* special file is open for writing, but always allow I/O commands that
* transfer data from the controller.
*/
if (nvme_is_write(c))
return mode & FMODE_WRITE;
return true;
}
/* /*
* Convert integer values from ioctl structures to user pointers, silently * Convert integer values from ioctl structures to user pointers, silently
* ignoring the upper bits in the compat case to match behaviour of 32-bit * ignoring the upper bits in the compat case to match behaviour of 32-bit
...@@ -261,7 +303,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, ...@@ -261,7 +303,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
} }
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd __user *ucmd) struct nvme_passthru_cmd __user *ucmd, fmode_t mode)
{ {
struct nvme_passthru_cmd cmd; struct nvme_passthru_cmd cmd;
struct nvme_command c; struct nvme_command c;
...@@ -269,8 +311,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -269,8 +311,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u64 result; u64 result;
int status; int status;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd))) if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT; return -EFAULT;
if (cmd.flags) if (cmd.flags)
...@@ -291,6 +331,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -291,6 +331,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15); c.common.cdw15 = cpu_to_le32(cmd.cdw15);
if (!nvme_cmd_allowed(ns, &c, mode))
return -EACCES;
if (cmd.timeout_ms) if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms); timeout = msecs_to_jiffies(cmd.timeout_ms);
...@@ -308,15 +351,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -308,15 +351,14 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
} }
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd64 __user *ucmd, bool vec) struct nvme_passthru_cmd64 __user *ucmd, bool vec,
fmode_t mode)
{ {
struct nvme_passthru_cmd64 cmd; struct nvme_passthru_cmd64 cmd;
struct nvme_command c; struct nvme_command c;
unsigned timeout = 0; unsigned timeout = 0;
int status; int status;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd))) if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT; return -EFAULT;
if (cmd.flags) if (cmd.flags)
...@@ -337,6 +379,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -337,6 +379,9 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15); c.common.cdw15 = cpu_to_le32(cmd.cdw15);
if (!nvme_cmd_allowed(ns, &c, mode))
return -EACCES;
if (cmd.timeout_ms) if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms); timeout = msecs_to_jiffies(cmd.timeout_ms);
...@@ -483,9 +528,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -483,9 +528,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
void *meta = NULL; void *meta = NULL;
int ret; int ret;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
c.common.opcode = READ_ONCE(cmd->opcode); c.common.opcode = READ_ONCE(cmd->opcode);
c.common.flags = READ_ONCE(cmd->flags); c.common.flags = READ_ONCE(cmd->flags);
if (c.common.flags) if (c.common.flags)
...@@ -507,6 +549,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -507,6 +549,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15)); c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
if (!nvme_cmd_allowed(ns, &c, ioucmd->file->f_mode))
return -EACCES;
d.metadata = READ_ONCE(cmd->metadata); d.metadata = READ_ONCE(cmd->metadata);
d.addr = READ_ONCE(cmd->addr); d.addr = READ_ONCE(cmd->addr);
d.data_len = READ_ONCE(cmd->data_len); d.data_len = READ_ONCE(cmd->data_len);
...@@ -570,13 +615,13 @@ static bool is_ctrl_ioctl(unsigned int cmd) ...@@ -570,13 +615,13 @@ static bool is_ctrl_ioctl(unsigned int cmd)
} }
static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd,
void __user *argp) void __user *argp, fmode_t mode)
{ {
switch (cmd) { switch (cmd) {
case NVME_IOCTL_ADMIN_CMD: case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp); return nvme_user_cmd(ctrl, NULL, argp, mode);
case NVME_IOCTL_ADMIN64_CMD: case NVME_IOCTL_ADMIN64_CMD:
return nvme_user_cmd64(ctrl, NULL, argp, false); return nvme_user_cmd64(ctrl, NULL, argp, false, mode);
default: default:
return sed_ioctl(ctrl->opal_dev, cmd, argp); return sed_ioctl(ctrl->opal_dev, cmd, argp);
} }
...@@ -601,14 +646,14 @@ struct nvme_user_io32 { ...@@ -601,14 +646,14 @@ struct nvme_user_io32 {
#endif /* COMPAT_FOR_U64_ALIGNMENT */ #endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp) void __user *argp, fmode_t mode)
{ {
switch (cmd) { switch (cmd) {
case NVME_IOCTL_ID: case NVME_IOCTL_ID:
force_successful_syscall_return(); force_successful_syscall_return();
return ns->head->ns_id; return ns->head->ns_id;
case NVME_IOCTL_IO_CMD: case NVME_IOCTL_IO_CMD:
return nvme_user_cmd(ns->ctrl, ns, argp); return nvme_user_cmd(ns->ctrl, ns, argp, mode);
/* /*
* struct nvme_user_io can have different padding on some 32-bit ABIs. * struct nvme_user_io can have different padding on some 32-bit ABIs.
* Just accept the compat version as all fields that are used are the * Just accept the compat version as all fields that are used are the
...@@ -620,19 +665,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, ...@@ -620,19 +665,20 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
case NVME_IOCTL_SUBMIT_IO: case NVME_IOCTL_SUBMIT_IO:
return nvme_submit_io(ns, argp); return nvme_submit_io(ns, argp);
case NVME_IOCTL_IO64_CMD: case NVME_IOCTL_IO64_CMD:
return nvme_user_cmd64(ns->ctrl, ns, argp, false); return nvme_user_cmd64(ns->ctrl, ns, argp, false, mode);
case NVME_IOCTL_IO64_CMD_VEC: case NVME_IOCTL_IO64_CMD_VEC:
return nvme_user_cmd64(ns->ctrl, ns, argp, true); return nvme_user_cmd64(ns->ctrl, ns, argp, true, mode);
default: default:
return -ENOTTY; return -ENOTTY;
} }
} }
static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg) static int __nvme_ioctl(struct nvme_ns *ns, unsigned int cmd, void __user *arg,
fmode_t mode)
{ {
if (is_ctrl_ioctl(cmd)) if (is_ctrl_ioctl(cmd))
return nvme_ctrl_ioctl(ns->ctrl, cmd, arg); return nvme_ctrl_ioctl(ns->ctrl, cmd, arg, mode);
return nvme_ns_ioctl(ns, cmd, arg); return nvme_ns_ioctl(ns, cmd, arg, mode);
} }
int nvme_ioctl(struct block_device *bdev, fmode_t mode, int nvme_ioctl(struct block_device *bdev, fmode_t mode,
...@@ -640,7 +686,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -640,7 +686,7 @@ int nvme_ioctl(struct block_device *bdev, fmode_t mode,
{ {
struct nvme_ns *ns = bdev->bd_disk->private_data; struct nvme_ns *ns = bdev->bd_disk->private_data;
return __nvme_ioctl(ns, cmd, (void __user *)arg); return __nvme_ioctl(ns, cmd, (void __user *)arg, mode);
} }
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
...@@ -648,7 +694,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -648,7 +694,7 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct nvme_ns *ns = struct nvme_ns *ns =
container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev);
return __nvme_ioctl(ns, cmd, (void __user *)arg); return __nvme_ioctl(ns, cmd, (void __user *)arg, file->f_mode);
} }
static int nvme_uring_cmd_checks(unsigned int issue_flags) static int nvme_uring_cmd_checks(unsigned int issue_flags)
...@@ -716,7 +762,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, ...@@ -716,7 +762,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
} }
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp, struct nvme_ns_head *head, int srcu_idx) void __user *argp, struct nvme_ns_head *head, int srcu_idx,
fmode_t mode)
__releases(&head->srcu) __releases(&head->srcu)
{ {
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
...@@ -724,7 +771,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, ...@@ -724,7 +771,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
nvme_get_ctrl(ns->ctrl); nvme_get_ctrl(ns->ctrl);
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp); ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, mode);
nvme_put_ctrl(ctrl); nvme_put_ctrl(ctrl);
return ret; return ret;
...@@ -749,9 +796,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -749,9 +796,10 @@ int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
* deadlock when deleting namespaces using the passthrough interface. * deadlock when deleting namespaces using the passthrough interface.
*/ */
if (is_ctrl_ioctl(cmd)) if (is_ctrl_ioctl(cmd))
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
mode);
ret = nvme_ns_ioctl(ns, cmd, argp); ret = nvme_ns_ioctl(ns, cmd, argp, mode);
out_unlock: out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
return ret; return ret;
...@@ -773,9 +821,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, ...@@ -773,9 +821,10 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
goto out_unlock; goto out_unlock;
if (is_ctrl_ioctl(cmd)) if (is_ctrl_ioctl(cmd))
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx); return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
file->f_mode);
ret = nvme_ns_ioctl(ns, cmd, argp); ret = nvme_ns_ioctl(ns, cmd, argp, file->f_mode);
out_unlock: out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
return ret; return ret;
...@@ -849,7 +898,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) ...@@ -849,7 +898,8 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
return ret; return ret;
} }
static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
fmode_t mode)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int ret; int ret;
...@@ -873,7 +923,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) ...@@ -873,7 +923,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
kref_get(&ns->kref); kref_get(&ns->kref);
up_read(&ctrl->namespaces_rwsem); up_read(&ctrl->namespaces_rwsem);
ret = nvme_user_cmd(ctrl, ns, argp); ret = nvme_user_cmd(ctrl, ns, argp, mode);
nvme_put_ns(ns); nvme_put_ns(ns);
return ret; return ret;
...@@ -890,11 +940,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, ...@@ -890,11 +940,11 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) { switch (cmd) {
case NVME_IOCTL_ADMIN_CMD: case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp); return nvme_user_cmd(ctrl, NULL, argp, file->f_mode);
case NVME_IOCTL_ADMIN64_CMD: case NVME_IOCTL_ADMIN64_CMD:
return nvme_user_cmd64(ctrl, NULL, argp, false); return nvme_user_cmd64(ctrl, NULL, argp, false, file->f_mode);
case NVME_IOCTL_IO_CMD: case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp); return nvme_dev_user_cmd(ctrl, argp, file->f_mode);
case NVME_IOCTL_RESET: case NVME_IOCTL_RESET:
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EACCES; return -EACCES;
......
...@@ -337,8 +337,8 @@ struct nvme_ctrl { ...@@ -337,8 +337,8 @@ struct nvme_ctrl {
#ifdef CONFIG_NVME_AUTH #ifdef CONFIG_NVME_AUTH
struct work_struct dhchap_auth_work; struct work_struct dhchap_auth_work;
struct list_head dhchap_auth_list;
struct mutex dhchap_auth_mutex; struct mutex dhchap_auth_mutex;
struct nvme_dhchap_queue_context *dhchap_ctxs;
struct nvme_dhchap_key *host_key; struct nvme_dhchap_key *host_key;
struct nvme_dhchap_key *ctrl_key; struct nvme_dhchap_key *ctrl_key;
u16 transaction; u16 transaction;
...@@ -455,6 +455,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head) ...@@ -455,6 +455,7 @@ static inline bool nvme_ns_head_multipath(struct nvme_ns_head *head)
enum nvme_ns_features { enum nvme_ns_features {
NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
NVME_NS_DEAC, /* DEAC bit in Write Zeores supported */
}; };
struct nvme_ns { struct nvme_ns {
...@@ -507,6 +508,7 @@ struct nvme_ctrl_ops { ...@@ -507,6 +508,7 @@ struct nvme_ctrl_ops {
unsigned int flags; unsigned int flags;
#define NVME_F_FABRICS (1 << 0) #define NVME_F_FABRICS (1 << 0)
#define NVME_F_METADATA_SUPPORTED (1 << 1) #define NVME_F_METADATA_SUPPORTED (1 << 1)
const struct attribute_group **dev_attr_groups;
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
...@@ -735,7 +737,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -735,7 +737,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl); int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl, bool was_suspended);
int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
const struct blk_mq_ops *ops, unsigned int flags, const struct blk_mq_ops *ops, unsigned int flags,
unsigned int cmd_size); unsigned int cmd_size);
...@@ -747,16 +749,13 @@ void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl); ...@@ -747,16 +749,13 @@ void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
bool send);
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
volatile union nvme_result *res); volatile union nvme_result *res);
void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_quiesce_io_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_unquiesce_io_queues(struct nvme_ctrl *ctrl);
void nvme_stop_admin_queue(struct nvme_ctrl *ctrl); void nvme_quiesce_admin_queue(struct nvme_ctrl *ctrl);
void nvme_start_admin_queue(struct nvme_ctrl *ctrl); void nvme_unquiesce_admin_queue(struct nvme_ctrl *ctrl);
void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl); void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl);
void nvme_sync_queues(struct nvme_ctrl *ctrl); void nvme_sync_queues(struct nvme_ctrl *ctrl);
void nvme_sync_io_queues(struct nvme_ctrl *ctrl); void nvme_sync_io_queues(struct nvme_ctrl *ctrl);
...@@ -856,6 +855,7 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); ...@@ -856,6 +855,7 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct pr_ops nvme_pr_ops; extern const struct pr_ops nvme_pr_ops;
extern const struct block_device_operations nvme_ns_head_ops; extern const struct block_device_operations nvme_ns_head_ops;
extern const struct attribute_group nvme_dev_attrs_group;
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
...@@ -1018,14 +1018,25 @@ static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl) ...@@ -1018,14 +1018,25 @@ static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
} }
#ifdef CONFIG_NVME_AUTH #ifdef CONFIG_NVME_AUTH
void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl); int __init nvme_init_auth(void);
void __exit nvme_exit_auth(void);
int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl);
void nvme_auth_stop(struct nvme_ctrl *ctrl); void nvme_auth_stop(struct nvme_ctrl *ctrl);
int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid); int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid);
int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid); int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid);
void nvme_auth_reset(struct nvme_ctrl *ctrl);
void nvme_auth_free(struct nvme_ctrl *ctrl); void nvme_auth_free(struct nvme_ctrl *ctrl);
#else #else
static inline void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) {}; static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
{
return 0;
}
static inline int __init nvme_init_auth(void)
{
return 0;
}
static inline void __exit nvme_exit_auth(void)
{
}
static inline void nvme_auth_stop(struct nvme_ctrl *ctrl) {}; static inline void nvme_auth_stop(struct nvme_ctrl *ctrl) {};
static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid) static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
{ {
......
...@@ -130,7 +130,6 @@ struct nvme_dev { ...@@ -130,7 +130,6 @@ struct nvme_dev {
u32 db_stride; u32 db_stride;
void __iomem *bar; void __iomem *bar;
unsigned long bar_mapped_size; unsigned long bar_mapped_size;
struct work_struct remove_work;
struct mutex shutdown_lock; struct mutex shutdown_lock;
bool subsystem; bool subsystem;
u64 cmb_size; u64 cmb_size;
...@@ -158,8 +157,6 @@ struct nvme_dev { ...@@ -158,8 +157,6 @@ struct nvme_dev {
unsigned int nr_allocated_queues; unsigned int nr_allocated_queues;
unsigned int nr_write_queues; unsigned int nr_write_queues;
unsigned int nr_poll_queues; unsigned int nr_poll_queues;
bool attrs_added;
}; };
static int io_queue_depth_set(const char *val, const struct kernel_param *kp) static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
...@@ -241,10 +238,13 @@ static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) ...@@ -241,10 +238,13 @@ static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
return dev->nr_allocated_queues * 8 * dev->db_stride; return dev->nr_allocated_queues * 8 * dev->db_stride;
} }
static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) static void nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
{ {
unsigned int mem_size = nvme_dbbuf_size(dev); unsigned int mem_size = nvme_dbbuf_size(dev);
if (!(dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP))
return;
if (dev->dbbuf_dbs) { if (dev->dbbuf_dbs) {
/* /*
* Clear the dbbuf memory so the driver doesn't observe stale * Clear the dbbuf memory so the driver doesn't observe stale
...@@ -252,25 +252,27 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) ...@@ -252,25 +252,27 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
*/ */
memset(dev->dbbuf_dbs, 0, mem_size); memset(dev->dbbuf_dbs, 0, mem_size);
memset(dev->dbbuf_eis, 0, mem_size); memset(dev->dbbuf_eis, 0, mem_size);
return 0; return;
} }
dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
&dev->dbbuf_dbs_dma_addr, &dev->dbbuf_dbs_dma_addr,
GFP_KERNEL); GFP_KERNEL);
if (!dev->dbbuf_dbs) if (!dev->dbbuf_dbs)
return -ENOMEM; goto fail;
dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size,
&dev->dbbuf_eis_dma_addr, &dev->dbbuf_eis_dma_addr,
GFP_KERNEL); GFP_KERNEL);
if (!dev->dbbuf_eis) { if (!dev->dbbuf_eis)
dma_free_coherent(dev->dev, mem_size, goto fail_free_dbbuf_dbs;
dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); return;
dev->dbbuf_dbs = NULL;
return -ENOMEM;
}
return 0; fail_free_dbbuf_dbs:
dma_free_coherent(dev->dev, mem_size, dev->dbbuf_dbs,
dev->dbbuf_dbs_dma_addr);
dev->dbbuf_dbs = NULL;
fail:
dev_warn(dev->dev, "unable to allocate dma for dbbuf\n");
} }
static void nvme_dbbuf_dma_free(struct nvme_dev *dev) static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
...@@ -392,14 +394,6 @@ static int nvme_pci_npages_sgl(void) ...@@ -392,14 +394,6 @@ static int nvme_pci_npages_sgl(void)
PAGE_SIZE); PAGE_SIZE);
} }
static size_t nvme_pci_iod_alloc_size(void)
{
size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
return sizeof(__le64 *) * npages +
sizeof(struct scatterlist) * NVME_MAX_SEGS;
}
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx) unsigned int hctx_idx)
{ {
...@@ -1487,7 +1481,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) ...@@ -1487,7 +1481,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
nvmeq->dev->online_queues--; nvmeq->dev->online_queues--;
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
nvme_stop_admin_queue(&nvmeq->dev->ctrl); nvme_quiesce_admin_queue(&nvmeq->dev->ctrl);
if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags))
pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq);
return 0; return 0;
...@@ -1747,8 +1741,9 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) ...@@ -1747,8 +1741,9 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
* user requests may be waiting on a stopped queue. Start the * user requests may be waiting on a stopped queue. Start the
* queue to flush these to completion. * queue to flush these to completion.
*/ */
nvme_start_admin_queue(&dev->ctrl); nvme_unquiesce_admin_queue(&dev->ctrl);
blk_mq_destroy_queue(dev->ctrl.admin_q); blk_mq_destroy_queue(dev->ctrl.admin_q);
blk_put_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset); blk_mq_free_tag_set(&dev->admin_tagset);
} }
} }
...@@ -2106,6 +2101,9 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) ...@@ -2106,6 +2101,9 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
u32 enable_bits = NVME_HOST_MEM_ENABLE; u32 enable_bits = NVME_HOST_MEM_ENABLE;
int ret; int ret;
if (!dev->ctrl.hmpre)
return 0;
preferred = min(preferred, max); preferred = min(preferred, max);
if (min > max) { if (min > max) {
dev_warn(dev->ctrl.device, dev_warn(dev->ctrl.device,
...@@ -2234,11 +2232,17 @@ static struct attribute *nvme_pci_attrs[] = { ...@@ -2234,11 +2232,17 @@ static struct attribute *nvme_pci_attrs[] = {
NULL, NULL,
}; };
static const struct attribute_group nvme_pci_attr_group = { static const struct attribute_group nvme_pci_dev_attrs_group = {
.attrs = nvme_pci_attrs, .attrs = nvme_pci_attrs,
.is_visible = nvme_pci_attrs_are_visible, .is_visible = nvme_pci_attrs_are_visible,
}; };
static const struct attribute_group *nvme_pci_dev_attr_groups[] = {
&nvme_dev_attrs_group,
&nvme_pci_dev_attrs_group,
NULL,
};
/* /*
* nirqs is the number of interrupts available for write and read * nirqs is the number of interrupts available for write and read
* queues. The core already reserved an interrupt for the admin queue. * queues. The core already reserved an interrupt for the admin queue.
...@@ -2642,7 +2646,8 @@ static int nvme_pci_enable(struct nvme_dev *dev) ...@@ -2642,7 +2646,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
pci_enable_pcie_error_reporting(pdev); pci_enable_pcie_error_reporting(pdev);
pci_save_state(pdev); pci_save_state(pdev);
return 0;
return nvme_pci_configure_admin_queue(dev);
disable: disable:
pci_disable_device(pdev); pci_disable_device(pdev);
...@@ -2698,7 +2703,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) ...@@ -2698,7 +2703,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
if (!dead && shutdown && freeze) if (!dead && shutdown && freeze)
nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
nvme_stop_queues(&dev->ctrl); nvme_quiesce_io_queues(&dev->ctrl);
if (!dead && dev->ctrl.queue_count > 0) { if (!dead && dev->ctrl.queue_count > 0) {
nvme_disable_io_queues(dev); nvme_disable_io_queues(dev);
...@@ -2718,9 +2723,9 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) ...@@ -2718,9 +2723,9 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
* deadlocking blk-mq hot-cpu notifier. * deadlocking blk-mq hot-cpu notifier.
*/ */
if (shutdown) { if (shutdown) {
nvme_start_queues(&dev->ctrl); nvme_unquiesce_io_queues(&dev->ctrl);
if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q))
nvme_start_admin_queue(&dev->ctrl); nvme_unquiesce_admin_queue(&dev->ctrl);
} }
mutex_unlock(&dev->shutdown_lock); mutex_unlock(&dev->shutdown_lock);
} }
...@@ -2757,6 +2762,22 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) ...@@ -2757,6 +2762,22 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_small_pool); dma_pool_destroy(dev->prp_small_pool);
} }
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
{
size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
size_t alloc_size = sizeof(__le64 *) * npages +
sizeof(struct scatterlist) * NVME_MAX_SEGS;
WARN_ON_ONCE(alloc_size > PAGE_SIZE);
dev->iod_mempool = mempool_create_node(1,
mempool_kmalloc, mempool_kfree,
(void *)alloc_size, GFP_KERNEL,
dev_to_node(dev->dev));
if (!dev->iod_mempool)
return -ENOMEM;
return 0;
}
static void nvme_free_tagset(struct nvme_dev *dev) static void nvme_free_tagset(struct nvme_dev *dev)
{ {
if (dev->tagset.tags) if (dev->tagset.tags)
...@@ -2764,35 +2785,17 @@ static void nvme_free_tagset(struct nvme_dev *dev) ...@@ -2764,35 +2785,17 @@ static void nvme_free_tagset(struct nvme_dev *dev)
dev->ctrl.tagset = NULL; dev->ctrl.tagset = NULL;
} }
/* pairs with nvme_pci_alloc_dev */
static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{ {
struct nvme_dev *dev = to_nvme_dev(ctrl); struct nvme_dev *dev = to_nvme_dev(ctrl);
nvme_dbbuf_dma_free(dev);
nvme_free_tagset(dev); nvme_free_tagset(dev);
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
free_opal_dev(dev->ctrl.opal_dev);
mempool_destroy(dev->iod_mempool);
put_device(dev->dev); put_device(dev->dev);
kfree(dev->queues); kfree(dev->queues);
kfree(dev); kfree(dev);
} }
static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{
/*
* Set state to deleting now to avoid blocking nvme_wait_reset(), which
* may be holding this pci_dev's device lock.
*/
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
nvme_mark_namespaces_dead(&dev->ctrl);
if (!queue_work(nvme_wq, &dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
}
static void nvme_reset_work(struct work_struct *work) static void nvme_reset_work(struct work_struct *work)
{ {
struct nvme_dev *dev = struct nvme_dev *dev =
...@@ -2819,34 +2822,7 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2819,34 +2822,7 @@ static void nvme_reset_work(struct work_struct *work)
result = nvme_pci_enable(dev); result = nvme_pci_enable(dev);
if (result) if (result)
goto out_unlock; goto out_unlock;
nvme_unquiesce_admin_queue(&dev->ctrl);
result = nvme_pci_configure_admin_queue(dev);
if (result)
goto out_unlock;
if (!dev->ctrl.admin_q) {
result = nvme_pci_alloc_admin_tag_set(dev);
if (result)
goto out_unlock;
} else {
nvme_start_admin_queue(&dev->ctrl);
}
dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
/*
* Limit the max command size to prevent iod->sg allocations going
* over a single page.
*/
dev->ctrl.max_hw_sectors = min_t(u32,
NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
dev->ctrl.max_segments = NVME_MAX_SEGS;
/*
* Don't limit the IOMMU merged segment size.
*/
dma_set_max_seg_size(dev->dev, 0xffffffff);
mutex_unlock(&dev->shutdown_lock); mutex_unlock(&dev->shutdown_lock);
/* /*
...@@ -2860,75 +2836,37 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2860,75 +2836,37 @@ static void nvme_reset_work(struct work_struct *work)
goto out; goto out;
} }
/* result = nvme_init_ctrl_finish(&dev->ctrl, was_suspend);
* We do not support an SGL for metadata (yet), so we are limited to a
* single integrity segment for the separate metadata pointer.
*/
dev->ctrl.max_integrity_segments = 1;
result = nvme_init_ctrl_finish(&dev->ctrl);
if (result) if (result)
goto out; goto out;
if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) { nvme_dbbuf_dma_alloc(dev);
if (!dev->ctrl.opal_dev)
dev->ctrl.opal_dev =
init_opal_dev(&dev->ctrl, &nvme_sec_submit);
else if (was_suspend)
opal_unlock_from_suspend(dev->ctrl.opal_dev);
} else {
free_opal_dev(dev->ctrl.opal_dev);
dev->ctrl.opal_dev = NULL;
}
if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { result = nvme_setup_host_mem(dev);
result = nvme_dbbuf_dma_alloc(dev); if (result < 0)
if (result) goto out;
dev_warn(dev->dev,
"unable to allocate dma for dbbuf\n");
}
if (dev->ctrl.hmpre) {
result = nvme_setup_host_mem(dev);
if (result < 0)
goto out;
}
result = nvme_setup_io_queues(dev); result = nvme_setup_io_queues(dev);
if (result) if (result)
goto out; goto out;
if (dev->ctrl.tagset) { /*
/* * Freeze and update the number of I/O queues as thos might have
* This is a controller reset and we already have a tagset. * changed. If there are no I/O queues left after this reset, keep the
* Freeze and update the number of I/O queues as thos might have * controller around but remove all namespaces.
* changed. If there are no I/O queues left after this reset, */
* keep the controller around but remove all namespaces. if (dev->online_queues > 1) {
*/ nvme_unquiesce_io_queues(&dev->ctrl);
if (dev->online_queues > 1) { nvme_wait_freeze(&dev->ctrl);
nvme_start_queues(&dev->ctrl); nvme_pci_update_nr_queues(dev);
nvme_wait_freeze(&dev->ctrl); nvme_dbbuf_set(dev);
nvme_pci_update_nr_queues(dev); nvme_unfreeze(&dev->ctrl);
nvme_dbbuf_set(dev);
nvme_unfreeze(&dev->ctrl);
} else {
dev_warn(dev->ctrl.device, "IO queues lost\n");
nvme_mark_namespaces_dead(&dev->ctrl);
nvme_start_queues(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
nvme_free_tagset(dev);
}
} else { } else {
/* dev_warn(dev->ctrl.device, "IO queues lost\n");
* First probe. Still allow the controller to show up even if nvme_mark_namespaces_dead(&dev->ctrl);
* there are no namespaces. nvme_unquiesce_io_queues(&dev->ctrl);
*/ nvme_remove_namespaces(&dev->ctrl);
if (dev->online_queues > 1) { nvme_free_tagset(dev);
nvme_pci_alloc_tag_set(dev);
nvme_dbbuf_set(dev);
} else {
dev_warn(dev->ctrl.device, "IO queues not created\n");
}
} }
/* /*
...@@ -2942,30 +2880,22 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2942,30 +2880,22 @@ static void nvme_reset_work(struct work_struct *work)
goto out; goto out;
} }
if (!dev->attrs_added && !sysfs_create_group(&dev->ctrl.device->kobj,
&nvme_pci_attr_group))
dev->attrs_added = true;
nvme_start_ctrl(&dev->ctrl); nvme_start_ctrl(&dev->ctrl);
return; return;
out_unlock: out_unlock:
mutex_unlock(&dev->shutdown_lock); mutex_unlock(&dev->shutdown_lock);
out: out:
if (result) /*
dev_warn(dev->ctrl.device, * Set state to deleting now to avoid blocking nvme_wait_reset(), which
"Removing after probe failure status: %d\n", result); * may be holding this pci_dev's device lock.
nvme_remove_dead_ctrl(dev); */
} dev_warn(dev->ctrl.device, "Disabling device after reset failure: %d\n",
result);
static void nvme_remove_dead_ctrl_work(struct work_struct *work) nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
{ nvme_dev_disable(dev, true);
struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); nvme_mark_namespaces_dead(&dev->ctrl);
struct pci_dev *pdev = to_pci_dev(dev->dev); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
if (pci_get_drvdata(pdev))
device_release_driver(&pdev->dev);
nvme_put_ctrl(&dev->ctrl);
} }
static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
...@@ -3018,6 +2948,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { ...@@ -3018,6 +2948,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie", .name = "pcie",
.module = THIS_MODULE, .module = THIS_MODULE,
.flags = NVME_F_METADATA_SUPPORTED, .flags = NVME_F_METADATA_SUPPORTED,
.dev_attr_groups = nvme_pci_dev_attr_groups,
.reg_read32 = nvme_pci_reg_read32, .reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32, .reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64, .reg_read64 = nvme_pci_reg_read64,
...@@ -3087,29 +3018,22 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) ...@@ -3087,29 +3018,22 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return 0; return 0;
} }
static void nvme_async_probe(void *data, async_cookie_t cookie) static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
const struct pci_device_id *id)
{ {
struct nvme_dev *dev = data;
flush_work(&dev->ctrl.reset_work);
flush_work(&dev->ctrl.scan_work);
nvme_put_ctrl(&dev->ctrl);
}
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
struct nvme_dev *dev;
unsigned long quirks = id->driver_data; unsigned long quirks = id->driver_data;
size_t alloc_size; int node = dev_to_node(&pdev->dev);
struct nvme_dev *dev;
int ret = -ENOMEM;
node = dev_to_node(&pdev->dev);
if (node == NUMA_NO_NODE) if (node == NUMA_NO_NODE)
set_dev_node(&pdev->dev, first_memory_node); set_dev_node(&pdev->dev, first_memory_node);
dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
if (!dev) if (!dev)
return -ENOMEM; return NULL;
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
mutex_init(&dev->shutdown_lock);
dev->nr_write_queues = write_queues; dev->nr_write_queues = write_queues;
dev->nr_poll_queues = poll_queues; dev->nr_poll_queues = poll_queues;
...@@ -3117,25 +3041,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -3117,25 +3041,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev->queues = kcalloc_node(dev->nr_allocated_queues, dev->queues = kcalloc_node(dev->nr_allocated_queues,
sizeof(struct nvme_queue), GFP_KERNEL, node); sizeof(struct nvme_queue), GFP_KERNEL, node);
if (!dev->queues) if (!dev->queues)
goto free; goto out_free_dev;
dev->dev = get_device(&pdev->dev); dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
result = nvme_dev_map(dev);
if (result)
goto put_pci;
INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
mutex_init(&dev->shutdown_lock);
result = nvme_setup_prp_pools(dev);
if (result)
goto unmap;
quirks |= check_vendor_combination_bug(pdev); quirks |= check_vendor_combination_bug(pdev);
if (!noacpi && acpi_storage_d3(&pdev->dev)) { if (!noacpi && acpi_storage_d3(&pdev->dev)) {
/* /*
* Some systems use a bios work around to ask for D3 on * Some systems use a bios work around to ask for D3 on
...@@ -3145,46 +3055,128 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -3145,46 +3055,128 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
"platform quirk: setting simple suspend\n"); "platform quirk: setting simple suspend\n");
quirks |= NVME_QUIRK_SIMPLE_SUSPEND; quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
} }
ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
quirks);
if (ret)
goto out_put_device;
dma_set_min_align_mask(&pdev->dev, NVME_CTRL_PAGE_SIZE - 1);
dma_set_max_seg_size(&pdev->dev, 0xffffffff);
/* /*
* Double check that our mempool alloc size will cover the biggest * Limit the max command size to prevent iod->sg allocations going
* command we support. * over a single page.
*/ */
alloc_size = nvme_pci_iod_alloc_size(); dev->ctrl.max_hw_sectors = min_t(u32,
WARN_ON_ONCE(alloc_size > PAGE_SIZE); NVME_MAX_KB_SZ << 1, dma_max_mapping_size(&pdev->dev) >> 9);
dev->ctrl.max_segments = NVME_MAX_SEGS;
dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, /*
mempool_kfree, * There is no support for SGLs for metadata (yet), so we are limited to
(void *) alloc_size, * a single integrity segment for the separate metadata pointer.
GFP_KERNEL, node); */
if (!dev->iod_mempool) { dev->ctrl.max_integrity_segments = 1;
result = -ENOMEM; return dev;
goto release_pools;
} out_put_device:
put_device(dev->dev);
kfree(dev->queues);
out_free_dev:
kfree(dev);
return ERR_PTR(ret);
}
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct nvme_dev *dev;
int result = -ENOMEM;
result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, dev = nvme_pci_alloc_dev(pdev, id);
quirks); if (!dev)
return -ENOMEM;
result = nvme_dev_map(dev);
if (result) if (result)
goto release_mempool; goto out_uninit_ctrl;
result = nvme_setup_prp_pools(dev);
if (result)
goto out_dev_unmap;
result = nvme_pci_alloc_iod_mempool(dev);
if (result)
goto out_release_prp_pools;
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
nvme_reset_ctrl(&dev->ctrl); result = nvme_pci_enable(dev);
async_schedule(nvme_async_probe, dev); if (result)
goto out_release_iod_mempool;
result = nvme_pci_alloc_admin_tag_set(dev);
if (result)
goto out_disable;
/*
* Mark the controller as connecting before sending admin commands to
* allow the timeout handler to do the right thing.
*/
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
dev_warn(dev->ctrl.device,
"failed to mark controller CONNECTING\n");
result = -EBUSY;
goto out_disable;
}
result = nvme_init_ctrl_finish(&dev->ctrl, false);
if (result)
goto out_disable;
nvme_dbbuf_dma_alloc(dev);
result = nvme_setup_host_mem(dev);
if (result < 0)
goto out_disable;
result = nvme_setup_io_queues(dev);
if (result)
goto out_disable;
if (dev->online_queues > 1) {
nvme_pci_alloc_tag_set(dev);
nvme_dbbuf_set(dev);
} else {
dev_warn(dev->ctrl.device, "IO queues not created\n");
}
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
dev_warn(dev->ctrl.device,
"failed to mark controller live state\n");
result = -ENODEV;
goto out_disable;
}
pci_set_drvdata(pdev, dev);
nvme_start_ctrl(&dev->ctrl);
nvme_put_ctrl(&dev->ctrl);
return 0; return 0;
release_mempool: out_disable:
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_dev_disable(dev, true);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_dbbuf_dma_free(dev);
nvme_free_queues(dev, 0);
out_release_iod_mempool:
mempool_destroy(dev->iod_mempool); mempool_destroy(dev->iod_mempool);
release_pools: out_release_prp_pools:
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
unmap: out_dev_unmap:
nvme_dev_unmap(dev); nvme_dev_unmap(dev);
put_pci: out_uninit_ctrl:
put_device(dev->dev); nvme_uninit_ctrl(&dev->ctrl);
free:
kfree(dev->queues);
kfree(dev);
return result; return result;
} }
...@@ -3216,13 +3208,6 @@ static void nvme_shutdown(struct pci_dev *pdev) ...@@ -3216,13 +3208,6 @@ static void nvme_shutdown(struct pci_dev *pdev)
nvme_disable_prepare_reset(dev, true); nvme_disable_prepare_reset(dev, true);
} }
static void nvme_remove_attrs(struct nvme_dev *dev)
{
if (dev->attrs_added)
sysfs_remove_group(&dev->ctrl.device->kobj,
&nvme_pci_attr_group);
}
/* /*
* The driver's remove may be called on a device in a partially initialized * The driver's remove may be called on a device in a partially initialized
* state. This function must not have any dependencies on the device state in * state. This function must not have any dependencies on the device state in
...@@ -3244,10 +3229,11 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -3244,10 +3229,11 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_stop_ctrl(&dev->ctrl); nvme_stop_ctrl(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl);
nvme_dev_disable(dev, true); nvme_dev_disable(dev, true);
nvme_remove_attrs(dev);
nvme_free_host_mem(dev); nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev); nvme_dev_remove_admin(dev);
nvme_dbbuf_dma_free(dev);
nvme_free_queues(dev, 0); nvme_free_queues(dev, 0);
mempool_destroy(dev->iod_mempool);
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
nvme_dev_unmap(dev); nvme_dev_unmap(dev);
nvme_uninit_ctrl(&dev->ctrl); nvme_uninit_ctrl(&dev->ctrl);
...@@ -3580,11 +3566,12 @@ static struct pci_driver nvme_driver = { ...@@ -3580,11 +3566,12 @@ static struct pci_driver nvme_driver = {
.probe = nvme_probe, .probe = nvme_probe,
.remove = nvme_remove, .remove = nvme_remove,
.shutdown = nvme_shutdown, .shutdown = nvme_shutdown,
#ifdef CONFIG_PM_SLEEP
.driver = { .driver = {
.pm = &nvme_dev_pm_ops, .probe_type = PROBE_PREFER_ASYNCHRONOUS,
}, #ifdef CONFIG_PM_SLEEP
.pm = &nvme_dev_pm_ops,
#endif #endif
},
.sriov_configure = pci_sriov_configure_simple, .sriov_configure = pci_sriov_configure_simple,
.err_handler = &nvme_err_handler, .err_handler = &nvme_err_handler,
}; };
......
...@@ -869,16 +869,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -869,16 +869,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
else else
ctrl->ctrl.max_integrity_segments = 0; ctrl->ctrl.max_integrity_segments = 0;
nvme_start_admin_queue(&ctrl->ctrl); nvme_unquiesce_admin_queue(&ctrl->ctrl);
error = nvme_init_ctrl_finish(&ctrl->ctrl); error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
if (error) if (error)
goto out_quiesce_queue; goto out_quiesce_queue;
return 0; return 0;
out_quiesce_queue: out_quiesce_queue:
nvme_stop_admin_queue(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q); blk_sync_queue(ctrl->ctrl.admin_q);
out_stop_queue: out_stop_queue:
nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_rdma_stop_queue(&ctrl->queues[0]);
...@@ -922,7 +922,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -922,7 +922,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
goto out_cleanup_tagset; goto out_cleanup_tagset;
if (!new) { if (!new) {
nvme_start_queues(&ctrl->ctrl); nvme_unquiesce_io_queues(&ctrl->ctrl);
if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) { if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
/* /*
* If we timed out waiting for freeze we are likely to * If we timed out waiting for freeze we are likely to
...@@ -949,7 +949,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -949,7 +949,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
return 0; return 0;
out_wait_freeze_timed_out: out_wait_freeze_timed_out:
nvme_stop_queues(&ctrl->ctrl); nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl); nvme_rdma_stop_io_queues(ctrl);
out_cleanup_tagset: out_cleanup_tagset:
...@@ -964,12 +964,12 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -964,12 +964,12 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove) bool remove)
{ {
nvme_stop_admin_queue(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q); blk_sync_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_cancel_admin_tagset(&ctrl->ctrl);
if (remove) { if (remove) {
nvme_start_admin_queue(&ctrl->ctrl); nvme_unquiesce_admin_queue(&ctrl->ctrl);
nvme_remove_admin_tag_set(&ctrl->ctrl); nvme_remove_admin_tag_set(&ctrl->ctrl);
} }
nvme_rdma_destroy_admin_queue(ctrl); nvme_rdma_destroy_admin_queue(ctrl);
...@@ -980,12 +980,12 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, ...@@ -980,12 +980,12 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
{ {
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
nvme_start_freeze(&ctrl->ctrl); nvme_start_freeze(&ctrl->ctrl);
nvme_stop_queues(&ctrl->ctrl); nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl); nvme_rdma_stop_io_queues(ctrl);
nvme_cancel_tagset(&ctrl->ctrl); nvme_cancel_tagset(&ctrl->ctrl);
if (remove) { if (remove) {
nvme_start_queues(&ctrl->ctrl); nvme_unquiesce_io_queues(&ctrl->ctrl);
nvme_remove_io_tag_set(&ctrl->ctrl); nvme_remove_io_tag_set(&ctrl->ctrl);
} }
nvme_rdma_free_io_queues(ctrl); nvme_rdma_free_io_queues(ctrl);
...@@ -1106,7 +1106,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -1106,7 +1106,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
destroy_io: destroy_io:
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl); nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl); nvme_rdma_stop_io_queues(ctrl);
nvme_cancel_tagset(&ctrl->ctrl); nvme_cancel_tagset(&ctrl->ctrl);
...@@ -1115,7 +1115,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -1115,7 +1115,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
nvme_rdma_free_io_queues(ctrl); nvme_rdma_free_io_queues(ctrl);
} }
destroy_admin: destroy_admin:
nvme_stop_admin_queue(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl);
blk_sync_queue(ctrl->ctrl.admin_q); blk_sync_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_rdma_stop_queue(&ctrl->queues[0]);
nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_cancel_admin_tagset(&ctrl->ctrl);
...@@ -1153,13 +1153,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) ...@@ -1153,13 +1153,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, err_work); struct nvme_rdma_ctrl, err_work);
nvme_auth_stop(&ctrl->ctrl);
nvme_stop_keep_alive(&ctrl->ctrl); nvme_stop_keep_alive(&ctrl->ctrl);
flush_work(&ctrl->ctrl.async_event_work); flush_work(&ctrl->ctrl.async_event_work);
nvme_rdma_teardown_io_queues(ctrl, false); nvme_rdma_teardown_io_queues(ctrl, false);
nvme_start_queues(&ctrl->ctrl); nvme_unquiesce_io_queues(&ctrl->ctrl);
nvme_rdma_teardown_admin_queue(ctrl, false); nvme_rdma_teardown_admin_queue(ctrl, false);
nvme_start_admin_queue(&ctrl->ctrl); nvme_unquiesce_admin_queue(&ctrl->ctrl);
nvme_auth_stop(&ctrl->ctrl);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */ /* state change failure is ok if we started ctrl delete */
...@@ -2207,7 +2207,7 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { ...@@ -2207,7 +2207,7 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{ {
nvme_rdma_teardown_io_queues(ctrl, shutdown); nvme_rdma_teardown_io_queues(ctrl, shutdown);
nvme_stop_admin_queue(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl);
if (shutdown) if (shutdown)
nvme_shutdown_ctrl(&ctrl->ctrl); nvme_shutdown_ctrl(&ctrl->ctrl);
else else
......
...@@ -1875,7 +1875,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) ...@@ -1875,7 +1875,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
goto out_cleanup_connect_q; goto out_cleanup_connect_q;
if (!new) { if (!new) {
nvme_start_queues(ctrl); nvme_unquiesce_io_queues(ctrl);
if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
/* /*
* If we timed out waiting for freeze we are likely to * If we timed out waiting for freeze we are likely to
...@@ -1902,7 +1902,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) ...@@ -1902,7 +1902,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
return 0; return 0;
out_wait_freeze_timed_out: out_wait_freeze_timed_out:
nvme_stop_queues(ctrl); nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl); nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl);
out_cleanup_connect_q: out_cleanup_connect_q:
...@@ -1947,16 +1947,16 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) ...@@ -1947,16 +1947,16 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
if (error) if (error)
goto out_stop_queue; goto out_stop_queue;
nvme_start_admin_queue(ctrl); nvme_unquiesce_admin_queue(ctrl);
error = nvme_init_ctrl_finish(ctrl); error = nvme_init_ctrl_finish(ctrl, false);
if (error) if (error)
goto out_quiesce_queue; goto out_quiesce_queue;
return 0; return 0;
out_quiesce_queue: out_quiesce_queue:
nvme_stop_admin_queue(ctrl); nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q); blk_sync_queue(ctrl->admin_q);
out_stop_queue: out_stop_queue:
nvme_tcp_stop_queue(ctrl, 0); nvme_tcp_stop_queue(ctrl, 0);
...@@ -1972,12 +1972,12 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) ...@@ -1972,12 +1972,12 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
bool remove) bool remove)
{ {
nvme_stop_admin_queue(ctrl); nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q); blk_sync_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0); nvme_tcp_stop_queue(ctrl, 0);
nvme_cancel_admin_tagset(ctrl); nvme_cancel_admin_tagset(ctrl);
if (remove) if (remove)
nvme_start_admin_queue(ctrl); nvme_unquiesce_admin_queue(ctrl);
nvme_tcp_destroy_admin_queue(ctrl, remove); nvme_tcp_destroy_admin_queue(ctrl, remove);
} }
...@@ -1986,14 +1986,14 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, ...@@ -1986,14 +1986,14 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
{ {
if (ctrl->queue_count <= 1) if (ctrl->queue_count <= 1)
return; return;
nvme_stop_admin_queue(ctrl); nvme_quiesce_admin_queue(ctrl);
nvme_start_freeze(ctrl); nvme_start_freeze(ctrl);
nvme_stop_queues(ctrl); nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl); nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl);
nvme_cancel_tagset(ctrl); nvme_cancel_tagset(ctrl);
if (remove) if (remove)
nvme_start_queues(ctrl); nvme_unquiesce_io_queues(ctrl);
nvme_tcp_destroy_io_queues(ctrl, remove); nvme_tcp_destroy_io_queues(ctrl, remove);
} }
...@@ -2074,14 +2074,14 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) ...@@ -2074,14 +2074,14 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
destroy_io: destroy_io:
if (ctrl->queue_count > 1) { if (ctrl->queue_count > 1) {
nvme_stop_queues(ctrl); nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl); nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl);
nvme_cancel_tagset(ctrl); nvme_cancel_tagset(ctrl);
nvme_tcp_destroy_io_queues(ctrl, new); nvme_tcp_destroy_io_queues(ctrl, new);
} }
destroy_admin: destroy_admin:
nvme_stop_admin_queue(ctrl); nvme_quiesce_admin_queue(ctrl);
blk_sync_queue(ctrl->admin_q); blk_sync_queue(ctrl->admin_q);
nvme_tcp_stop_queue(ctrl, 0); nvme_tcp_stop_queue(ctrl, 0);
nvme_cancel_admin_tagset(ctrl); nvme_cancel_admin_tagset(ctrl);
...@@ -2119,14 +2119,14 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) ...@@ -2119,14 +2119,14 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
struct nvme_tcp_ctrl, err_work); struct nvme_tcp_ctrl, err_work);
struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
nvme_auth_stop(ctrl);
nvme_stop_keep_alive(ctrl); nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work); flush_work(&ctrl->async_event_work);
nvme_tcp_teardown_io_queues(ctrl, false); nvme_tcp_teardown_io_queues(ctrl, false);
/* unquiesce to fail fast pending requests */ /* unquiesce to fail fast pending requests */
nvme_start_queues(ctrl); nvme_unquiesce_io_queues(ctrl);
nvme_tcp_teardown_admin_queue(ctrl, false); nvme_tcp_teardown_admin_queue(ctrl, false);
nvme_start_admin_queue(ctrl); nvme_unquiesce_admin_queue(ctrl);
nvme_auth_stop(ctrl);
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
/* state change failure is ok if we started ctrl delete */ /* state change failure is ok if we started ctrl delete */
...@@ -2141,7 +2141,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) ...@@ -2141,7 +2141,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
{ {
nvme_tcp_teardown_io_queues(ctrl, shutdown); nvme_tcp_teardown_io_queues(ctrl, shutdown);
nvme_stop_admin_queue(ctrl); nvme_quiesce_admin_queue(ctrl);
if (shutdown) if (shutdown)
nvme_shutdown_ctrl(ctrl); nvme_shutdown_ctrl(ctrl);
else else
......
...@@ -370,7 +370,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -370,7 +370,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number, memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number,
strlen(subsys->model_number), ' '); strlen(subsys->model_number), ' ');
memcpy_and_pad(id->fr, sizeof(id->fr), memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' '); subsys->firmware_rev, strlen(subsys->firmware_rev), ' ');
put_unaligned_le24(subsys->ieee_oui, id->ieee);
id->rab = 6; id->rab = 6;
...@@ -379,11 +381,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -379,11 +381,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
else else
id->cntrltype = NVME_CTRL_IO; id->cntrltype = NVME_CTRL_IO;
/*
* XXX: figure out how we can assign a IEEE OUI, but until then
* the safest is to leave it as zeroes.
*/
/* we support multiple ports, multiples hosts and ANA: */ /* we support multiple ports, multiples hosts and ANA: */
id->cmic = NVME_CTRL_CMIC_MULTI_PORT | NVME_CTRL_CMIC_MULTI_CTRL | id->cmic = NVME_CTRL_CMIC_MULTI_PORT | NVME_CTRL_CMIC_MULTI_CTRL |
NVME_CTRL_CMIC_ANA; NVME_CTRL_CMIC_ANA;
......
...@@ -45,9 +45,11 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, ...@@ -45,9 +45,11 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret,
if (!dhchap_secret) if (!dhchap_secret)
return -ENOMEM; return -ENOMEM;
if (set_ctrl) { if (set_ctrl) {
kfree(host->dhchap_ctrl_secret);
host->dhchap_ctrl_secret = strim(dhchap_secret); host->dhchap_ctrl_secret = strim(dhchap_secret);
host->dhchap_ctrl_key_hash = key_hash; host->dhchap_ctrl_key_hash = key_hash;
} else { } else {
kfree(host->dhchap_secret);
host->dhchap_secret = strim(dhchap_secret); host->dhchap_secret = strim(dhchap_secret);
host->dhchap_key_hash = key_hash; host->dhchap_key_hash = key_hash;
} }
......
...@@ -1259,6 +1259,116 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, ...@@ -1259,6 +1259,116 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
} }
CONFIGFS_ATTR(nvmet_subsys_, attr_model); CONFIGFS_ATTR(nvmet_subsys_, attr_model);
static ssize_t nvmet_subsys_attr_ieee_oui_show(struct config_item *item,
char *page)
{
struct nvmet_subsys *subsys = to_subsys(item);
return sysfs_emit(page, "0x%06x\n", subsys->ieee_oui);
}
static ssize_t nvmet_subsys_attr_ieee_oui_store_locked(struct nvmet_subsys *subsys,
const char *page, size_t count)
{
uint32_t val = 0;
int ret;
if (subsys->subsys_discovered) {
pr_err("Can't set IEEE OUI. 0x%06x is already assigned\n",
subsys->ieee_oui);
return -EINVAL;
}
ret = kstrtou32(page, 0, &val);
if (ret < 0)
return ret;
if (val >= 0x1000000)
return -EINVAL;
subsys->ieee_oui = val;
return count;
}
static ssize_t nvmet_subsys_attr_ieee_oui_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_subsys *subsys = to_subsys(item);
ssize_t ret;
down_write(&nvmet_config_sem);
mutex_lock(&subsys->lock);
ret = nvmet_subsys_attr_ieee_oui_store_locked(subsys, page, count);
mutex_unlock(&subsys->lock);
up_write(&nvmet_config_sem);
return ret;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_ieee_oui);
static ssize_t nvmet_subsys_attr_firmware_show(struct config_item *item,
char *page)
{
struct nvmet_subsys *subsys = to_subsys(item);
return sysfs_emit(page, "%s\n", subsys->firmware_rev);
}
static ssize_t nvmet_subsys_attr_firmware_store_locked(struct nvmet_subsys *subsys,
const char *page, size_t count)
{
int pos = 0, len;
char *val;
if (subsys->subsys_discovered) {
pr_err("Can't set firmware revision. %s is already assigned\n",
subsys->firmware_rev);
return -EINVAL;
}
len = strcspn(page, "\n");
if (!len)
return -EINVAL;
if (len > NVMET_FR_MAX_SIZE) {
pr_err("Firmware revision size can not exceed %d Bytes\n",
NVMET_FR_MAX_SIZE);
return -EINVAL;
}
for (pos = 0; pos < len; pos++) {
if (!nvmet_is_ascii(page[pos]))
return -EINVAL;
}
val = kmemdup_nul(page, len, GFP_KERNEL);
if (!val)
return -ENOMEM;
kfree(subsys->firmware_rev);
subsys->firmware_rev = val;
return count;
}
static ssize_t nvmet_subsys_attr_firmware_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_subsys *subsys = to_subsys(item);
ssize_t ret;
down_write(&nvmet_config_sem);
mutex_lock(&subsys->lock);
ret = nvmet_subsys_attr_firmware_store_locked(subsys, page, count);
mutex_unlock(&subsys->lock);
up_write(&nvmet_config_sem);
return ret;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_firmware);
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
char *page) char *page)
...@@ -1290,6 +1400,8 @@ static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item, ...@@ -1290,6 +1400,8 @@ static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item,
static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item, static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item,
const char *page, size_t cnt) const char *page, size_t cnt)
{ {
struct nvmet_subsys *subsys = to_subsys(item);
struct nvmet_ctrl *ctrl;
u16 qid_max; u16 qid_max;
if (sscanf(page, "%hu\n", &qid_max) != 1) if (sscanf(page, "%hu\n", &qid_max) != 1)
...@@ -1299,8 +1411,13 @@ static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item, ...@@ -1299,8 +1411,13 @@ static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item,
return -EINVAL; return -EINVAL;
down_write(&nvmet_config_sem); down_write(&nvmet_config_sem);
to_subsys(item)->max_qid = qid_max; subsys->max_qid = qid_max;
/* Force reconnect */
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
ctrl->ops->delete_ctrl(ctrl);
up_write(&nvmet_config_sem); up_write(&nvmet_config_sem);
return cnt; return cnt;
} }
CONFIGFS_ATTR(nvmet_subsys_, attr_qid_max); CONFIGFS_ATTR(nvmet_subsys_, attr_qid_max);
...@@ -1313,6 +1430,8 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { ...@@ -1313,6 +1430,8 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model, &nvmet_subsys_attr_attr_model,
&nvmet_subsys_attr_attr_qid_max, &nvmet_subsys_attr_attr_qid_max,
&nvmet_subsys_attr_attr_ieee_oui,
&nvmet_subsys_attr_attr_firmware,
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_subsys_attr_attr_pi_enable, &nvmet_subsys_attr_attr_pi_enable,
#endif #endif
......
...@@ -10,11 +10,14 @@ ...@@ -10,11 +10,14 @@
#include <linux/pci-p2pdma.h> #include <linux/pci-p2pdma.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <generated/utsrelease.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "trace.h" #include "trace.h"
#include "nvmet.h" #include "nvmet.h"
struct kmem_cache *nvmet_bvec_cache;
struct workqueue_struct *buffered_io_wq; struct workqueue_struct *buffered_io_wq;
struct workqueue_struct *zbd_wq; struct workqueue_struct *zbd_wq;
static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
...@@ -695,11 +698,10 @@ static void nvmet_update_sq_head(struct nvmet_req *req) ...@@ -695,11 +698,10 @@ static void nvmet_update_sq_head(struct nvmet_req *req)
if (req->sq->size) { if (req->sq->size) {
u32 old_sqhd, new_sqhd; u32 old_sqhd, new_sqhd;
old_sqhd = READ_ONCE(req->sq->sqhd);
do { do {
old_sqhd = req->sq->sqhd;
new_sqhd = (old_sqhd + 1) % req->sq->size; new_sqhd = (old_sqhd + 1) % req->sq->size;
} while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != } while (!try_cmpxchg(&req->sq->sqhd, &old_sqhd, new_sqhd));
old_sqhd);
} }
req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
} }
...@@ -1561,6 +1563,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, ...@@ -1561,6 +1563,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
goto free_subsys; goto free_subsys;
} }
subsys->ieee_oui = 0;
subsys->firmware_rev = kstrndup(UTS_RELEASE, NVMET_FR_MAX_SIZE, GFP_KERNEL);
if (!subsys->firmware_rev) {
ret = -ENOMEM;
goto free_mn;
}
switch (type) { switch (type) {
case NVME_NQN_NVME: case NVME_NQN_NVME:
subsys->max_qid = NVMET_NR_QUEUES; subsys->max_qid = NVMET_NR_QUEUES;
...@@ -1572,14 +1582,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, ...@@ -1572,14 +1582,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
default: default:
pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
ret = -EINVAL; ret = -EINVAL;
goto free_mn; goto free_fr;
} }
subsys->type = type; subsys->type = type;
subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
GFP_KERNEL); GFP_KERNEL);
if (!subsys->subsysnqn) { if (!subsys->subsysnqn) {
ret = -ENOMEM; ret = -ENOMEM;
goto free_mn; goto free_fr;
} }
subsys->cntlid_min = NVME_CNTLID_MIN; subsys->cntlid_min = NVME_CNTLID_MIN;
subsys->cntlid_max = NVME_CNTLID_MAX; subsys->cntlid_max = NVME_CNTLID_MAX;
...@@ -1592,6 +1602,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, ...@@ -1592,6 +1602,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
return subsys; return subsys;
free_fr:
kfree(subsys->firmware_rev);
free_mn: free_mn:
kfree(subsys->model_number); kfree(subsys->model_number);
free_subsys: free_subsys:
...@@ -1611,6 +1623,7 @@ static void nvmet_subsys_free(struct kref *ref) ...@@ -1611,6 +1623,7 @@ static void nvmet_subsys_free(struct kref *ref)
kfree(subsys->subsysnqn); kfree(subsys->subsysnqn);
kfree(subsys->model_number); kfree(subsys->model_number);
kfree(subsys->firmware_rev);
kfree(subsys); kfree(subsys);
} }
...@@ -1631,26 +1644,28 @@ void nvmet_subsys_put(struct nvmet_subsys *subsys) ...@@ -1631,26 +1644,28 @@ void nvmet_subsys_put(struct nvmet_subsys *subsys)
static int __init nvmet_init(void) static int __init nvmet_init(void)
{ {
int error; int error = -ENOMEM;
nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
nvmet_bvec_cache = kmem_cache_create("nvmet-bvec",
NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!nvmet_bvec_cache)
return -ENOMEM;
zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0); zbd_wq = alloc_workqueue("nvmet-zbd-wq", WQ_MEM_RECLAIM, 0);
if (!zbd_wq) if (!zbd_wq)
return -ENOMEM; goto out_destroy_bvec_cache;
buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
WQ_MEM_RECLAIM, 0); WQ_MEM_RECLAIM, 0);
if (!buffered_io_wq) { if (!buffered_io_wq)
error = -ENOMEM;
goto out_free_zbd_work_queue; goto out_free_zbd_work_queue;
}
nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0); nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0);
if (!nvmet_wq) { if (!nvmet_wq)
error = -ENOMEM;
goto out_free_buffered_work_queue; goto out_free_buffered_work_queue;
}
error = nvmet_init_discovery(); error = nvmet_init_discovery();
if (error) if (error)
...@@ -1669,6 +1684,8 @@ static int __init nvmet_init(void) ...@@ -1669,6 +1684,8 @@ static int __init nvmet_init(void)
destroy_workqueue(buffered_io_wq); destroy_workqueue(buffered_io_wq);
out_free_zbd_work_queue: out_free_zbd_work_queue:
destroy_workqueue(zbd_wq); destroy_workqueue(zbd_wq);
out_destroy_bvec_cache:
kmem_cache_destroy(nvmet_bvec_cache);
return error; return error;
} }
...@@ -1680,6 +1697,7 @@ static void __exit nvmet_exit(void) ...@@ -1680,6 +1697,7 @@ static void __exit nvmet_exit(void)
destroy_workqueue(nvmet_wq); destroy_workqueue(nvmet_wq);
destroy_workqueue(buffered_io_wq); destroy_workqueue(buffered_io_wq);
destroy_workqueue(zbd_wq); destroy_workqueue(zbd_wq);
kmem_cache_destroy(nvmet_bvec_cache);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
#include <linux/fs.h> #include <linux/fs.h>
#include "nvmet.h" #include "nvmet.h"
#define NVMET_MAX_MPOOL_BVEC 16
#define NVMET_MIN_MPOOL_OBJ 16 #define NVMET_MIN_MPOOL_OBJ 16
void nvmet_file_ns_revalidate(struct nvmet_ns *ns) void nvmet_file_ns_revalidate(struct nvmet_ns *ns)
...@@ -26,8 +25,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) ...@@ -26,8 +25,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
flush_workqueue(buffered_io_wq); flush_workqueue(buffered_io_wq);
mempool_destroy(ns->bvec_pool); mempool_destroy(ns->bvec_pool);
ns->bvec_pool = NULL; ns->bvec_pool = NULL;
kmem_cache_destroy(ns->bvec_cache);
ns->bvec_cache = NULL;
fput(ns->file); fput(ns->file);
ns->file = NULL; ns->file = NULL;
} }
...@@ -59,16 +56,8 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) ...@@ -59,16 +56,8 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
ns->blksize_shift = min_t(u8, ns->blksize_shift = min_t(u8,
file_inode(ns->file)->i_blkbits, 12); file_inode(ns->file)->i_blkbits, 12);
ns->bvec_cache = kmem_cache_create("nvmet-bvec",
NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!ns->bvec_cache) {
ret = -ENOMEM;
goto err;
}
ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab, ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
mempool_free_slab, ns->bvec_cache); mempool_free_slab, nvmet_bvec_cache);
if (!ns->bvec_pool) { if (!ns->bvec_pool) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -77,9 +66,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) ...@@ -77,9 +66,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
return ret; return ret;
err: err:
fput(ns->file);
ns->file = NULL;
ns->size = 0; ns->size = 0;
ns->blksize_shift = 0; ns->blksize_shift = 0;
nvmet_file_ns_disable(ns);
return ret; return ret;
} }
......
...@@ -375,9 +375,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ...@@ -375,9 +375,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_hw_sectors =
(NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9); (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
nvme_start_admin_queue(&ctrl->ctrl); nvme_unquiesce_admin_queue(&ctrl->ctrl);
error = nvme_init_ctrl_finish(&ctrl->ctrl); error = nvme_init_ctrl_finish(&ctrl->ctrl, false);
if (error) if (error)
goto out_cleanup_tagset; goto out_cleanup_tagset;
...@@ -394,12 +394,12 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ...@@ -394,12 +394,12 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
{ {
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl); nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_cancel_tagset(&ctrl->ctrl); nvme_cancel_tagset(&ctrl->ctrl);
nvme_loop_destroy_io_queues(ctrl); nvme_loop_destroy_io_queues(ctrl);
} }
nvme_stop_admin_queue(&ctrl->ctrl); nvme_quiesce_admin_queue(&ctrl->ctrl);
if (ctrl->ctrl.state == NVME_CTRL_LIVE) if (ctrl->ctrl.state == NVME_CTRL_LIVE)
nvme_shutdown_ctrl(&ctrl->ctrl); nvme_shutdown_ctrl(&ctrl->ctrl);
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define NVMET_DEFAULT_CTRL_MODEL "Linux" #define NVMET_DEFAULT_CTRL_MODEL "Linux"
#define NVMET_MN_MAX_SIZE 40 #define NVMET_MN_MAX_SIZE 40
#define NVMET_SN_MAX_SIZE 20 #define NVMET_SN_MAX_SIZE 20
#define NVMET_FR_MAX_SIZE 8
/* /*
* Supported optional AENs: * Supported optional AENs:
...@@ -77,7 +78,6 @@ struct nvmet_ns { ...@@ -77,7 +78,6 @@ struct nvmet_ns {
struct completion disable_done; struct completion disable_done;
mempool_t *bvec_pool; mempool_t *bvec_pool;
struct kmem_cache *bvec_cache;
int use_p2pmem; int use_p2pmem;
struct pci_dev *p2p_dev; struct pci_dev *p2p_dev;
...@@ -264,6 +264,8 @@ struct nvmet_subsys { ...@@ -264,6 +264,8 @@ struct nvmet_subsys {
struct config_group allowed_hosts_group; struct config_group allowed_hosts_group;
char *model_number; char *model_number;
u32 ieee_oui;
char *firmware_rev;
#ifdef CONFIG_NVME_TARGET_PASSTHRU #ifdef CONFIG_NVME_TARGET_PASSTHRU
struct nvme_ctrl *passthru_ctrl; struct nvme_ctrl *passthru_ctrl;
...@@ -393,6 +395,8 @@ struct nvmet_req { ...@@ -393,6 +395,8 @@ struct nvmet_req {
u64 error_slba; u64 error_slba;
}; };
#define NVMET_MAX_MPOOL_BVEC 16
extern struct kmem_cache *nvmet_bvec_cache;
extern struct workqueue_struct *buffered_io_wq; extern struct workqueue_struct *buffered_io_wq;
extern struct workqueue_struct *zbd_wq; extern struct workqueue_struct *zbd_wq;
extern struct workqueue_struct *nvmet_wq; extern struct workqueue_struct *nvmet_wq;
......
...@@ -797,6 +797,7 @@ enum nvme_opcode { ...@@ -797,6 +797,7 @@ enum nvme_opcode {
nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a, nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d, nvme_cmd_zone_append = 0x7d,
nvme_cmd_vendor_start = 0x80,
}; };
#define nvme_opcode_name(opcode) { opcode, #opcode } #define nvme_opcode_name(opcode) { opcode, #opcode }
...@@ -963,6 +964,7 @@ enum { ...@@ -963,6 +964,7 @@ enum {
NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
NVME_RW_PRINFO_PRACT = 1 << 13, NVME_RW_PRINFO_PRACT = 1 << 13,
NVME_RW_DTYPE_STREAMS = 1 << 4, NVME_RW_DTYPE_STREAMS = 1 << 4,
NVME_WZ_DEAC = 1 << 9,
}; };
struct nvme_dsm_cmd { struct nvme_dsm_cmd {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册