admin-cmd.c 26.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7
/*
 * NVMe admin command implementation.
 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
 */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
8
#include <linux/rculist.h>
9
#include <linux/part_stat.h>
10

11
#include <generated/utsrelease.h>
12
#include <asm/unaligned.h>
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
#include "nvmet.h"

u32 nvmet_get_log_page_len(struct nvme_command *cmd)
{
	u32 len = le16_to_cpu(cmd->get_log_page.numdu);

	len <<= 16;
	len += le16_to_cpu(cmd->get_log_page.numdl);
	/* NUMD is a 0's based value */
	len += 1;
	len *= sizeof(u32);

	return len;
}

28 29 30 31 32 33 34 35 36 37
static u32 nvmet_feat_data_len(struct nvmet_req *req, u32 cdw10)
{
	switch (cdw10 & 0xff) {
	case NVME_FEAT_HOST_ID:
		return sizeof(req->sq->ctrl->hostid);
	default:
		return 0;
	}
}

38 39 40 41 42
u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
{
	return le64_to_cpu(cmd->get_log_page.lpo);
}

43 44
static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
{
45
	nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->transfer_len));
46 47
}

48 49 50 51 52 53 54 55 56 57 58 59
static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
{
	struct nvmet_ctrl *ctrl = req->sq->ctrl;
	unsigned long flags;
	off_t offset = 0;
	u64 slot;
	u64 i;

	spin_lock_irqsave(&ctrl->error_lock, flags);
	slot = ctrl->err_counter % NVMET_ERROR_LOG_SLOTS;

	for (i = 0; i < NVMET_ERROR_LOG_SLOTS; i++) {
60 61
		if (nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot],
				sizeof(struct nvme_error_slot)))
62 63 64 65 66 67 68 69 70
			break;

		if (slot == 0)
			slot = NVMET_ERROR_LOG_SLOTS - 1;
		else
			slot--;
		offset += sizeof(struct nvme_error_slot);
	}
	spin_unlock_irqrestore(&ctrl->error_lock, flags);
71
	nvmet_req_complete(req, 0);
72 73
}

74 75 76 77
static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
		struct nvme_smart_log *slog)
{
	u64 host_reads, host_writes, data_units_read, data_units_written;
78
	u16 status;
79

80 81 82
	status = nvmet_req_find_ns(req);
	if (status)
		return status;
83

84
	/* we don't have the right data for file backed ns */
85 86
	if (!req->ns->bdev)
		return NVME_SC_SUCCESS;
87

88
	host_reads = part_stat_read(req->ns->bdev, ios[READ]);
89
	data_units_read =
90 91
		DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[READ]), 1000);
	host_writes = part_stat_read(req->ns->bdev, ios[WRITE]);
92
	data_units_written =
93
		DIV_ROUND_UP(part_stat_read(req->ns->bdev, sectors[WRITE]), 1000);
94 95 96 97 98

	put_unaligned_le64(host_reads, &slog->host_reads[0]);
	put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
	put_unaligned_le64(host_writes, &slog->host_writes[0]);
	put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
99 100

	return NVME_SC_SUCCESS;
101 102 103 104 105 106 107 108 109
}

static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
		struct nvme_smart_log *slog)
{
	u64 host_reads = 0, host_writes = 0;
	u64 data_units_read = 0, data_units_written = 0;
	struct nvmet_ns *ns;
	struct nvmet_ctrl *ctrl;
110
	unsigned long idx;
111 112

	ctrl = req->sq->ctrl;
113
	xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
114 115 116
		/* we don't have the right data for file backed ns */
		if (!ns->bdev)
			continue;
117
		host_reads += part_stat_read(ns->bdev, ios[READ]);
118
		data_units_read += DIV_ROUND_UP(
119 120
			part_stat_read(ns->bdev, sectors[READ]), 1000);
		host_writes += part_stat_read(ns->bdev, ios[WRITE]);
121
		data_units_written += DIV_ROUND_UP(
122
			part_stat_read(ns->bdev, sectors[WRITE]), 1000);
123 124 125 126 127 128 129
	}

	put_unaligned_le64(host_reads, &slog->host_reads[0]);
	put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
	put_unaligned_le64(host_writes, &slog->host_writes[0]);
	put_unaligned_le64(data_units_written, &slog->data_units_written[0]);

130
	return NVME_SC_SUCCESS;
131 132
}

133
static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
134
{
135 136
	struct nvme_smart_log *log;
	u16 status = NVME_SC_INTERNAL;
137
	unsigned long flags;
138

139
	if (req->transfer_len != sizeof(*log))
140 141
		goto out;

142 143 144
	log = kzalloc(sizeof(*log), GFP_KERNEL);
	if (!log)
		goto out;
145

146 147 148 149 150
	if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL))
		status = nvmet_get_smart_log_all(req, log);
	else
		status = nvmet_get_smart_log_nsid(req, log);
	if (status)
151
		goto out_free_log;
152

153 154 155 156 157
	spin_lock_irqsave(&req->sq->ctrl->error_lock, flags);
	put_unaligned_le64(req->sq->ctrl->err_counter,
			&log->num_err_log_entries);
	spin_unlock_irqrestore(&req->sq->ctrl->error_lock, flags);

158
	status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
159 160
out_free_log:
	kfree(log);
161 162 163 164
out:
	nvmet_req_complete(req, status);
}

165
static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
166 167 168 169 170 171 172 173 174 175 176 177 178 179
{
	log->acs[nvme_admin_get_log_page]	= cpu_to_le32(1 << 0);
	log->acs[nvme_admin_identify]		= cpu_to_le32(1 << 0);
	log->acs[nvme_admin_abort_cmd]		= cpu_to_le32(1 << 0);
	log->acs[nvme_admin_set_features]	= cpu_to_le32(1 << 0);
	log->acs[nvme_admin_get_features]	= cpu_to_le32(1 << 0);
	log->acs[nvme_admin_async_event]	= cpu_to_le32(1 << 0);
	log->acs[nvme_admin_keep_alive]		= cpu_to_le32(1 << 0);

	log->iocs[nvme_cmd_read]		= cpu_to_le32(1 << 0);
	log->iocs[nvme_cmd_write]		= cpu_to_le32(1 << 0);
	log->iocs[nvme_cmd_flush]		= cpu_to_le32(1 << 0);
	log->iocs[nvme_cmd_dsm]			= cpu_to_le32(1 << 0);
	log->iocs[nvme_cmd_write_zeroes]	= cpu_to_le32(1 << 0);
180
}
181

182 183 184 185 186 187 188
static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log)
{
	log->iocs[nvme_cmd_zone_append]		= cpu_to_le32(1 << 0);
	log->iocs[nvme_cmd_zone_mgmt_send]	= cpu_to_le32(1 << 0);
	log->iocs[nvme_cmd_zone_mgmt_recv]	= cpu_to_le32(1 << 0);
}

189 190 191 192
static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
{
	struct nvme_effects_log *log;
	u16 status = NVME_SC_SUCCESS;
193

194 195 196 197 198 199 200 201 202 203
	log = kzalloc(sizeof(*log), GFP_KERNEL);
	if (!log) {
		status = NVME_SC_INTERNAL;
		goto out;
	}

	switch (req->cmd->get_log_page.csi) {
	case NVME_CSI_NVM:
		nvmet_get_cmd_effects_nvm(log);
		break;
204 205 206 207 208 209 210 211
	case NVME_CSI_ZNS:
		if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
			status = NVME_SC_INVALID_IO_CMD_SET;
			goto free;
		}
		nvmet_get_cmd_effects_nvm(log);
		nvmet_get_cmd_effects_zns(log);
		break;
212 213 214 215 216 217 218
	default:
		status = NVME_SC_INVALID_LOG_PAGE;
		goto free;
	}

	status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
free:
219 220 221 222 223
	kfree(log);
out:
	nvmet_req_complete(req, status);
}

224 225 226 227 228 229
static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
{
	struct nvmet_ctrl *ctrl = req->sq->ctrl;
	u16 status = NVME_SC_INTERNAL;
	size_t len;

230
	if (req->transfer_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
231 232 233 234 235 236 237 238 239
		goto out;

	mutex_lock(&ctrl->lock);
	if (ctrl->nr_changed_ns == U32_MAX)
		len = sizeof(__le32);
	else
		len = ctrl->nr_changed_ns * sizeof(__le32);
	status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len);
	if (!status)
240
		status = nvmet_zero_sgl(req, len, req->transfer_len - len);
241
	ctrl->nr_changed_ns = 0;
242
	nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR);
243 244 245 246 247
	mutex_unlock(&ctrl->lock);
out:
	nvmet_req_complete(req, status);
}

248 249 250 251 252
static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
		struct nvme_ana_group_desc *desc)
{
	struct nvmet_ctrl *ctrl = req->sq->ctrl;
	struct nvmet_ns *ns;
253
	unsigned long idx;
254 255 256
	u32 count = 0;

	if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
257
		xa_for_each(&ctrl->subsys->namespaces, idx, ns)
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
			if (ns->anagrpid == grpid)
				desc->nsids[count++] = cpu_to_le32(ns->nsid);
	}

	desc->grpid = cpu_to_le32(grpid);
	desc->nnsids = cpu_to_le32(count);
	desc->chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
	desc->state = req->port->ana_state[grpid];
	memset(desc->rsvd17, 0, sizeof(desc->rsvd17));
	return sizeof(struct nvme_ana_group_desc) + count * sizeof(__le32);
}

static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
{
	struct nvme_ana_rsp_hdr hdr = { 0, };
	struct nvme_ana_group_desc *desc;
	size_t offset = sizeof(struct nvme_ana_rsp_hdr); /* start beyond hdr */
	size_t len;
	u32 grpid;
	u16 ngrps = 0;
	u16 status;

	status = NVME_SC_INTERNAL;
	desc = kmalloc(sizeof(struct nvme_ana_group_desc) +
			NVMET_MAX_NAMESPACES * sizeof(__le32), GFP_KERNEL);
	if (!desc)
		goto out;

	down_read(&nvmet_ana_sem);
	for (grpid = 1; grpid <= NVMET_MAX_ANAGRPS; grpid++) {
		if (!nvmet_ana_group_enabled[grpid])
			continue;
		len = nvmet_format_ana_group(req, grpid, desc);
		status = nvmet_copy_to_sgl(req, offset, desc, len);
		if (status)
			break;
		offset += len;
		ngrps++;
	}
297 298 299 300
	for ( ; grpid <= NVMET_MAX_ANAGRPS; grpid++) {
		if (nvmet_ana_group_enabled[grpid])
			ngrps++;
	}
301 302 303

	hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
	hdr.ngrps = cpu_to_le16(ngrps);
304
	nvmet_clear_aen_bit(req, NVME_AEN_BIT_ANA_CHANGE);
305 306 307 308 309 310 311 312 313 314
	up_read(&nvmet_ana_sem);

	kfree(desc);

	/* copy the header last once we know the number of groups */
	status = nvmet_copy_to_sgl(req, 0, &hdr, sizeof(hdr));
out:
	nvmet_req_complete(req, status);
}

315 316
static void nvmet_execute_get_log_page(struct nvmet_req *req)
{
317
	if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
318 319
		return;

320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
	switch (req->cmd->get_log_page.lid) {
	case NVME_LOG_ERROR:
		return nvmet_execute_get_log_page_error(req);
	case NVME_LOG_SMART:
		return nvmet_execute_get_log_page_smart(req);
	case NVME_LOG_FW_SLOT:
		/*
		 * We only support a single firmware slot which always is
		 * active, so we can zero out the whole firmware slot log and
		 * still claim to fully implement this mandatory log page.
		 */
		return nvmet_execute_get_log_page_noop(req);
	case NVME_LOG_CHANGED_NS:
		return nvmet_execute_get_log_changed_ns(req);
	case NVME_LOG_CMD_EFFECTS:
		return nvmet_execute_get_log_cmd_effects_ns(req);
	case NVME_LOG_ANA:
		return nvmet_execute_get_log_page_ana(req);
	}
339
	pr_debug("unhandled lid %d on qid %d\n",
340 341 342 343 344
	       req->cmd->get_log_page.lid, req->sq->qid);
	req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
	nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
}

345 346 347
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
	struct nvmet_ctrl *ctrl = req->sq->ctrl;
348
	struct nvmet_subsys *subsys = ctrl->subsys;
349
	struct nvme_id_ctrl *id;
350
	u32 cmd_capsule_size;
351 352
	u16 status = 0;

353 354 355 356 357 358
	if (!subsys->subsys_discovered) {
		mutex_lock(&subsys->lock);
		subsys->subsys_discovered = true;
		mutex_unlock(&subsys->lock);
	}

359 360 361 362 363 364 365 366 367 368
	id = kzalloc(sizeof(*id), GFP_KERNEL);
	if (!id) {
		status = NVME_SC_INTERNAL;
		goto out;
	}

	/* XXX: figure out how to assign real vendors IDs. */
	id->vid = 0;
	id->ssvid = 0;

369
	memcpy(id->sn, ctrl->subsys->serial, NVMET_SN_MAX_SIZE);
370 371
	memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number,
		       strlen(subsys->model_number), ' ');
372 373
	memcpy_and_pad(id->fr, sizeof(id->fr),
		       UTS_RELEASE, strlen(UTS_RELEASE), ' ');
374 375 376 377 378 379 380 381

	id->rab = 6;

	/*
	 * XXX: figure out how we can assign a IEEE OUI, but until then
	 * the safest is to leave it as zeroes.
	 */

382 383
	/* we support multiple ports, multiples hosts and ANA: */
	id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
384

385 386 387 388 389 390
	/* Limit MDTS according to transport capability */
	if (ctrl->ops->get_mdts)
		id->mdts = ctrl->ops->get_mdts(ctrl);
	else
		id->mdts = 0;

391 392 393 394
	id->cntlid = cpu_to_le16(ctrl->cntlid);
	id->ver = cpu_to_le32(ctrl->subsys->ver);

	/* XXX: figure out what to do about RTD3R/RTD3 */
395
	id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
396 397
	id->ctratt = cpu_to_le32(NVME_CTRL_ATTR_HID_128_BIT |
		NVME_CTRL_ATTR_TBKAS);
398 399 400 401 402 403 404 405 406 407 408 409 410 411

	id->oacs = 0;

	/*
	 * We don't really have a practical limit on the number of abort
	 * comands.  But we don't do anything useful for abort either, so
	 * no point in allowing more abort commands than the spec requires.
	 */
	id->acl = 3;

	id->aerl = NVMET_ASYNC_EVENTS - 1;

	/* first slot is read-only, only one slot supported */
	id->frmw = (1 << 0) | (1 << 1);
412
	id->lpa = (1 << 0) | (1 << 1) | (1 << 2);
413 414 415 416 417 418 419 420 421 422 423 424 425
	id->elpe = NVMET_ERROR_LOG_SLOTS - 1;
	id->npss = 0;

	/* We support keep-alive timeout in granularity of seconds */
	id->kas = cpu_to_le16(NVMET_KAS);

	id->sqes = (0x6 << 4) | 0x6;
	id->cqes = (0x4 << 4) | 0x4;

	/* no enforcement soft-limit for maxcmd - pick arbitrary high value */
	id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);

	id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
426
	id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
427 428
	id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
			NVME_CTRL_ONCS_WRITE_ZEROES);
429 430 431 432 433 434 435 436 437 438 439 440

	/* XXX: don't report vwc if the underlying device is write through */
	id->vwc = NVME_CTRL_VWC_PRESENT;

	/*
	 * We can't support atomic writes bigger than a LBA without support
	 * from the backend device.
	 */
	id->awun = 0;
	id->awupf = 0;

	id->sgls = cpu_to_le32(1 << 0);	/* we always support SGLs */
441
	if (ctrl->ops->flags & NVMF_KEYED_SGLS)
442
		id->sgls |= cpu_to_le32(1 << 2);
443
	if (req->port->inline_data_size)
444 445
		id->sgls |= cpu_to_le32(1 << 20);

446
	strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
447

448 449 450 451 452 453 454 455 456
	/*
	 * Max command capsule size is sqe + in-capsule data size.
	 * Disable in-capsule data for Metadata capable controllers.
	 */
	cmd_capsule_size = sizeof(struct nvme_command);
	if (!ctrl->pi_support)
		cmd_capsule_size += req->port->inline_data_size;
	id->ioccsz = cpu_to_le32(cmd_capsule_size / 16);

457 458 459 460 461
	/* Max response capsule size is cqe */
	id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);

	id->msdbd = ctrl->ops->msdbd;

462 463 464 465 466
	id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
	id->anatt = 10; /* random value */
	id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
	id->nanagrpid = cpu_to_le32(NVMET_MAX_ANAGRPS);

467 468 469 470 471 472 473 474
	/*
	 * Meh, we don't really support any power state.  Fake up the same
	 * values that qemu does.
	 */
	id->psd[0].max_power = cpu_to_le16(0x9c4);
	id->psd[0].entry_lat = cpu_to_le32(0x10);
	id->psd[0].exit_lat = cpu_to_le32(0x4);

475 476
	id->nwpc = 1 << 0; /* write protect and no write protect */

477 478 479 480 481 482 483 484 485 486
	status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));

	kfree(id);
out:
	nvmet_req_complete(req, status);
}

static void nvmet_execute_identify_ns(struct nvmet_req *req)
{
	struct nvme_id_ns *id;
487
	u16 status;
488

489
	if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
490
		req->error_loc = offsetof(struct nvme_identify, nsid);
491 492 493 494 495 496 497
		status = NVME_SC_INVALID_NS | NVME_SC_DNR;
		goto out;
	}

	id = kzalloc(sizeof(*id), GFP_KERNEL);
	if (!id) {
		status = NVME_SC_INTERNAL;
498
		goto out;
499 500
	}

501
	/* return an all zeroed buffer if we can't find an active namespace */
502 503
	status = nvmet_req_find_ns(req);
	if (status) {
504
		status = 0;
505
		goto done;
506
	}
507

508
	nvmet_ns_revalidate(req->ns);
509

510
	/*
511
	 * nuse = ncap = nsze isn't always true, but we have no way to find
512 513
	 * that out from the underlying device.
	 */
514 515 516
	id->ncap = id->nsze =
		cpu_to_le64(req->ns->size >> req->ns->blksize_shift);
	switch (req->port->ana_state[req->ns->anagrpid]) {
517 518 519 520 521 522
	case NVME_ANA_INACCESSIBLE:
	case NVME_ANA_PERSISTENT_LOSS:
		break;
	default:
		id->nuse = id->nsze;
		break;
523
	}
524

525 526
	if (req->ns->bdev)
		nvmet_bdev_set_limits(req->ns->bdev, id);
527

528 529 530 531 532 533 534 535 536 537 538 539
	/*
	 * We just provide a single LBA format that matches what the
	 * underlying device reports.
	 */
	id->nlbaf = 0;
	id->flbas = 0;

	/*
	 * Our namespace might always be shared.  Not just with other
	 * controllers, but also with any other user of the block device.
	 */
	id->nmic = (1 << 0);
540
	id->anagrpid = cpu_to_le32(req->ns->anagrpid);
541

542
	memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid));
543

544
	id->lbaf[0].ds = req->ns->blksize_shift;
545

546
	if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) {
547 548 549 550
		id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST |
			  NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 |
			  NVME_NS_DPC_PI_TYPE3;
		id->mc = NVME_MC_EXTENDED_LBA;
551
		id->dps = req->ns->pi_type;
552
		id->flbas = NVME_NS_FLBAS_META_EXT;
553
		id->lbaf[0].ms = cpu_to_le16(req->ns->metadata_size);
554 555
	}

556
	if (req->ns->readonly)
557
		id->nsattr |= (1 << 0);
558
done:
559 560 561
	if (!status)
		status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));

562 563 564 565 566 567 568
	kfree(id);
out:
	nvmet_req_complete(req, status);
}

static void nvmet_execute_identify_nslist(struct nvmet_req *req)
{
569
	static const int buf_size = NVME_IDENTIFY_DATA_SIZE;
570 571
	struct nvmet_ctrl *ctrl = req->sq->ctrl;
	struct nvmet_ns *ns;
572
	unsigned long idx;
573 574 575 576 577 578 579 580 581 582 583
	u32 min_nsid = le32_to_cpu(req->cmd->identify.nsid);
	__le32 *list;
	u16 status = 0;
	int i = 0;

	list = kzalloc(buf_size, GFP_KERNEL);
	if (!list) {
		status = NVME_SC_INTERNAL;
		goto out;
	}

584
	xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
585 586 587 588 589 590 591 592 593 594 595 596 597 598
		if (ns->nsid <= min_nsid)
			continue;
		list[i++] = cpu_to_le32(ns->nsid);
		if (i == buf_size / sizeof(__le32))
			break;
	}

	status = nvmet_copy_to_sgl(req, 0, list, buf_size);

	kfree(list);
out:
	nvmet_req_complete(req, status);
}

599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
static u16 nvmet_copy_ns_identifier(struct nvmet_req *req, u8 type, u8 len,
				    void *id, off_t *off)
{
	struct nvme_ns_id_desc desc = {
		.nidt = type,
		.nidl = len,
	};
	u16 status;

	status = nvmet_copy_to_sgl(req, *off, &desc, sizeof(desc));
	if (status)
		return status;
	*off += sizeof(desc);

	status = nvmet_copy_to_sgl(req, *off, id, len);
	if (status)
		return status;
	*off += len;

	return 0;
}

static void nvmet_execute_identify_desclist(struct nvmet_req *req)
{
	off_t off = 0;
624
	u16 status;
625

626 627
	status = nvmet_req_find_ns(req);
	if (status)
628 629
		goto out;

630
	if (memchr_inv(&req->ns->uuid, 0, sizeof(req->ns->uuid))) {
631 632
		status = nvmet_copy_ns_identifier(req, NVME_NIDT_UUID,
						  NVME_NIDT_UUID_LEN,
633
						  &req->ns->uuid, &off);
634
		if (status)
635
			goto out;
636
	}
637
	if (memchr_inv(req->ns->nguid, 0, sizeof(req->ns->nguid))) {
638 639
		status = nvmet_copy_ns_identifier(req, NVME_NIDT_NGUID,
						  NVME_NIDT_NGUID_LEN,
640
						  &req->ns->nguid, &off);
641
		if (status)
642
			goto out;
643 644
	}

645 646 647 648 649 650
	status = nvmet_copy_ns_identifier(req, NVME_NIDT_CSI,
					  NVME_NIDT_CSI_LEN,
					  &req->ns->csi, &off);
	if (status)
		goto out;

651 652 653
	if (sg_zero_buffer(req->sg, req->sg_cnt, NVME_IDENTIFY_DATA_SIZE - off,
			off) != NVME_IDENTIFY_DATA_SIZE - off)
		status = NVME_SC_INTERNAL | NVME_SC_DNR;
654

655 656 657 658
out:
	nvmet_req_complete(req, status);
}

659 660 661 662 663 664
static bool nvmet_handle_identify_desclist(struct nvmet_req *req)
{
	switch (req->cmd->identify.csi) {
	case NVME_CSI_NVM:
		nvmet_execute_identify_desclist(req);
		return true;
665 666 667 668 669 670
	case NVME_CSI_ZNS:
		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
			nvmet_execute_identify_desclist(req);
			return true;
		}
		return false;
671 672 673 674 675
	default:
		return false;
	}
}

676 677
static void nvmet_execute_identify(struct nvmet_req *req)
{
678
	if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
679 680
		return;

681 682
	switch (req->cmd->identify.cns) {
	case NVME_ID_CNS_NS:
683 684 685 686 687 688 689
		switch (req->cmd->identify.csi) {
		case NVME_CSI_NVM:
			return nvmet_execute_identify_ns(req);
		default:
			break;
		}
		break;
690 691 692 693 694 695 696 697 698 699
	case NVME_ID_CNS_CS_NS:
		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
			switch (req->cmd->identify.csi) {
			case NVME_CSI_ZNS:
				return nvmet_execute_identify_cns_cs_ns(req);
			default:
				break;
			}
		}
		break;
700
	case NVME_ID_CNS_CTRL:
701 702 703 704 705
		switch (req->cmd->identify.csi) {
		case NVME_CSI_NVM:
			return nvmet_execute_identify_ctrl(req);
		}
		break;
706 707 708 709 710 711 712 713 714 715
	case NVME_ID_CNS_CS_CTRL:
		if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
			switch (req->cmd->identify.csi) {
			case NVME_CSI_ZNS:
				return nvmet_execute_identify_cns_cs_ctrl(req);
			default:
				break;
			}
		}
		break;
716
	case NVME_ID_CNS_NS_ACTIVE_LIST:
717 718 719 720 721 722 723
		switch (req->cmd->identify.csi) {
		case NVME_CSI_NVM:
			return nvmet_execute_identify_nslist(req);
		default:
			break;
		}
		break;
724
	case NVME_ID_CNS_NS_DESC_LIST:
725 726 727
		if (nvmet_handle_identify_desclist(req) == true)
			return;
		break;
728 729
	}

730
	nvmet_req_cns_error_complete(req);
731 732
}

733
/*
734
 * A "minimum viable" abort implementation: the command is mandatory in the
735 736 737 738 739 740 741
 * spec, but we are not required to do any useful work.  We couldn't really
 * do a useful abort, so don't bother even with waiting for the command
 * to be exectuted and return immediately telling the command to abort
 * wasn't found.
 */
static void nvmet_execute_abort(struct nvmet_req *req)
{
742
	if (!nvmet_check_transfer_len(req, 0))
743
		return;
744 745 746 747
	nvmet_set_result(req, 1);
	nvmet_req_complete(req, 0);
}

748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763
static u16 nvmet_write_protect_flush_sync(struct nvmet_req *req)
{
	u16 status;

	if (req->ns->file)
		status = nvmet_file_flush(req);
	else
		status = nvmet_bdev_flush(req);

	if (status)
		pr_err("write protect flush failed nsid: %u\n", req->ns->nsid);
	return status;
}

static u16 nvmet_set_feat_write_protect(struct nvmet_req *req)
{
764
	u32 write_protect = le32_to_cpu(req->cmd->common.cdw11);
765
	struct nvmet_subsys *subsys = nvmet_req_subsys(req);
766
	u16 status;
767

768 769 770
	status = nvmet_req_find_ns(req);
	if (status)
		return status;
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793

	mutex_lock(&subsys->lock);
	switch (write_protect) {
	case NVME_NS_WRITE_PROTECT:
		req->ns->readonly = true;
		status = nvmet_write_protect_flush_sync(req);
		if (status)
			req->ns->readonly = false;
		break;
	case NVME_NS_NO_WRITE_PROTECT:
		req->ns->readonly = false;
		status = 0;
		break;
	default:
		break;
	}

	if (!status)
		nvmet_ns_changed(subsys, req->ns->nsid);
	mutex_unlock(&subsys->lock);
	return status;
}

794 795
u16 nvmet_set_feat_kato(struct nvmet_req *req)
{
796
	u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
797

798
	nvmet_stop_keep_alive_timer(req->sq->ctrl);
799
	req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
800
	nvmet_start_keep_alive_timer(req->sq->ctrl);
801 802 803 804 805 806 807 808

	nvmet_set_result(req, req->sq->ctrl->kato);

	return 0;
}

u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask)
{
809
	u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
810

811 812
	if (val32 & ~mask) {
		req->error_loc = offsetof(struct nvme_common_command, cdw11);
813
		return NVME_SC_INVALID_FIELD | NVME_SC_DNR;
814
	}
815 816 817 818 819 820 821

	WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
	nvmet_set_result(req, val32);

	return 0;
}

822
void nvmet_execute_set_features(struct nvmet_req *req)
823
{
824
	struct nvmet_subsys *subsys = nvmet_req_subsys(req);
825
	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
826
	u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11);
827
	u16 status = 0;
828 829
	u16 nsqr;
	u16 ncqr;
830

831
	if (!nvmet_check_transfer_len(req, 0))
832 833
		return;

834
	switch (cdw10 & 0xff) {
835
	case NVME_FEAT_NUM_QUEUES:
836 837 838 839 840 841
		ncqr = (cdw11 >> 16) & 0xffff;
		nsqr = cdw11 & 0xffff;
		if (ncqr == 0xffff || nsqr == 0xffff) {
			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
			break;
		}
842 843 844 845
		nvmet_set_result(req,
			(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
		break;
	case NVME_FEAT_KATO:
846
		status = nvmet_set_feat_kato(req);
847
		break;
848
	case NVME_FEAT_ASYNC_EVENT:
849
		status = nvmet_set_feat_async_event(req, NVMET_AEN_CFG_ALL);
850
		break;
851 852 853
	case NVME_FEAT_HOST_ID:
		status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
		break;
854 855 856
	case NVME_FEAT_WRITE_PROTECT:
		status = nvmet_set_feat_write_protect(req);
		break;
857
	default:
858
		req->error_loc = offsetof(struct nvme_common_command, cdw10);
859 860 861 862 863 864 865
		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
		break;
	}

	nvmet_req_complete(req, status);
}

866 867
static u16 nvmet_get_feat_write_protect(struct nvmet_req *req)
{
868
	struct nvmet_subsys *subsys = nvmet_req_subsys(req);
869 870
	u32 result;

871 872 873 874
	result = nvmet_req_find_ns(req);
	if (result)
		return result;

875 876 877 878 879 880 881 882 883 884 885
	mutex_lock(&subsys->lock);
	if (req->ns->readonly == true)
		result = NVME_NS_WRITE_PROTECT;
	else
		result = NVME_NS_NO_WRITE_PROTECT;
	nvmet_set_result(req, result);
	mutex_unlock(&subsys->lock);

	return 0;
}

886 887 888 889 890 891 892 893 894 895
void nvmet_get_feat_kato(struct nvmet_req *req)
{
	nvmet_set_result(req, req->sq->ctrl->kato * 1000);
}

void nvmet_get_feat_async_event(struct nvmet_req *req)
{
	nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
}

896
void nvmet_execute_get_features(struct nvmet_req *req)
897
{
898
	struct nvmet_subsys *subsys = nvmet_req_subsys(req);
899
	u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
900 901
	u16 status = 0;

902
	if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10)))
903 904
		return;

905
	switch (cdw10 & 0xff) {
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
	/*
	 * These features are mandatory in the spec, but we don't
	 * have a useful way to implement them.  We'll eventually
	 * need to come up with some fake values for these.
	 */
#if 0
	case NVME_FEAT_ARBITRATION:
		break;
	case NVME_FEAT_POWER_MGMT:
		break;
	case NVME_FEAT_TEMP_THRESH:
		break;
	case NVME_FEAT_ERR_RECOVERY:
		break;
	case NVME_FEAT_IRQ_COALESCE:
		break;
	case NVME_FEAT_IRQ_CONFIG:
		break;
	case NVME_FEAT_WRITE_ATOMIC:
		break;
926
#endif
927
	case NVME_FEAT_ASYNC_EVENT:
928
		nvmet_get_feat_async_event(req);
929 930 931 932 933 934 935 936 937
		break;
	case NVME_FEAT_VOLATILE_WC:
		nvmet_set_result(req, 1);
		break;
	case NVME_FEAT_NUM_QUEUES:
		nvmet_set_result(req,
			(subsys->max_qid-1) | ((subsys->max_qid-1) << 16));
		break;
	case NVME_FEAT_KATO:
938
		nvmet_get_feat_kato(req);
939
		break;
940 941
	case NVME_FEAT_HOST_ID:
		/* need 128-bit host identifier flag */
942
		if (!(req->cmd->common.cdw11 & cpu_to_le32(1 << 0))) {
943 944
			req->error_loc =
				offsetof(struct nvme_common_command, cdw11);
945 946 947 948 949 950 951
			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
			break;
		}

		status = nvmet_copy_to_sgl(req, 0, &req->sq->ctrl->hostid,
				sizeof(req->sq->ctrl->hostid));
		break;
952 953 954
	case NVME_FEAT_WRITE_PROTECT:
		status = nvmet_get_feat_write_protect(req);
		break;
955
	default:
956 957
		req->error_loc =
			offsetof(struct nvme_common_command, cdw10);
958 959 960 961 962 963 964
		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
		break;
	}

	nvmet_req_complete(req, status);
}

965
void nvmet_execute_async_event(struct nvmet_req *req)
966 967 968
{
	struct nvmet_ctrl *ctrl = req->sq->ctrl;

969
	if (!nvmet_check_transfer_len(req, 0))
970 971
		return;

972 973 974 975 976 977 978 979 980 981 982 983
	mutex_lock(&ctrl->lock);
	if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
		mutex_unlock(&ctrl->lock);
		nvmet_req_complete(req, NVME_SC_ASYNC_LIMIT | NVME_SC_DNR);
		return;
	}
	ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req;
	mutex_unlock(&ctrl->lock);

	schedule_work(&ctrl->async_event_work);
}

984
void nvmet_execute_keep_alive(struct nvmet_req *req)
985 986
{
	struct nvmet_ctrl *ctrl = req->sq->ctrl;
987
	u16 status = 0;
988

989
	if (!nvmet_check_transfer_len(req, 0))
990 991
		return;

992 993 994 995 996
	if (!ctrl->kato) {
		status = NVME_SC_KA_TIMEOUT_INVALID;
		goto out;
	}

997 998 999
	pr_debug("ctrl %d update keep-alive timer for %d secs\n",
		ctrl->cntlid, ctrl->kato);
	mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
1000 1001
out:
	nvmet_req_complete(req, status);
1002 1003
}

1004
u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
1005 1006
{
	struct nvme_command *cmd = req->cmd;
1007
	u16 ret;
1008

1009 1010
	if (nvme_is_fabrics(cmd))
		return nvmet_parse_fabrics_cmd(req);
1011
	if (nvmet_req_subsys(req)->type == NVME_NQN_DISC)
1012 1013
		return nvmet_parse_discovery_cmd(req);

1014
	ret = nvmet_check_ctrl_status(req);
1015 1016
	if (unlikely(ret))
		return ret;
1017

1018 1019 1020
	if (nvmet_req_passthru_ctrl(req))
		return nvmet_parse_passthru_admin_cmd(req);

1021 1022
	switch (cmd->common.opcode) {
	case nvme_admin_get_log_page:
1023 1024
		req->execute = nvmet_execute_get_log_page;
		return 0;
1025
	case nvme_admin_identify:
1026 1027
		req->execute = nvmet_execute_identify;
		return 0;
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
	case nvme_admin_abort_cmd:
		req->execute = nvmet_execute_abort;
		return 0;
	case nvme_admin_set_features:
		req->execute = nvmet_execute_set_features;
		return 0;
	case nvme_admin_get_features:
		req->execute = nvmet_execute_get_features;
		return 0;
	case nvme_admin_async_event:
		req->execute = nvmet_execute_async_event;
		return 0;
	case nvme_admin_keep_alive:
		req->execute = nvmet_execute_keep_alive;
		return 0;
1043 1044
	default:
		return nvmet_report_invalid_opcode(req);
1045 1046
	}
}