nvme.h 12.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2011-2014, Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#ifndef _NVME_H
#define _NVME_H

#include <linux/nvme.h>
18
#include <linux/cdev.h>
19 20 21
#include <linux/pci.h>
#include <linux/kref.h>
#include <linux/blk-mq.h>
22
#include <linux/lightnvm.h>
23
#include <linux/sed-opal.h>
24

25
extern unsigned int nvme_io_timeout;
26 27
#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)

28
extern unsigned int admin_timeout;
29 30
#define ADMIN_TIMEOUT	(admin_timeout * HZ)

S
Sagi Grimberg 已提交
31 32 33
#define NVME_DEFAULT_KATO	5
#define NVME_KATO_GRACE		10

34 35
extern struct workqueue_struct *nvme_wq;

M
Matias Bjørling 已提交
36 37 38 39 40
enum {
	NVME_NS_LBA		= 0,
	NVME_NS_LIGHTNVM	= 1,
};

41
/*
42 43
 * List of workarounds for devices that required behavior not specified in
 * the standard.
44
 */
45 46 47 48 49 50
enum nvme_quirks {
	/*
	 * Prefers I/O aligned to a stripe size specified in a vendor
	 * specific Identify field.
	 */
	NVME_QUIRK_STRIPE_SIZE			= (1 << 0),
51 52 53 54 55 56

	/*
	 * The controller doesn't handle Identify value others than 0 or 1
	 * correctly.
	 */
	NVME_QUIRK_IDENTIFY_CNS			= (1 << 1),
57 58

	/*
59 60
	 * The controller deterministically returns O's on reads to
	 * logical blocks that deallocate was called on.
61
	 */
62
	NVME_QUIRK_DEALLOCATE_ZEROES		= (1 << 2),
63 64 65 66 67 68

	/*
	 * The controller needs a delay before starts checking the device
	 * readiness, which is done by reading the NVME_CSTS_RDY bit.
	 */
	NVME_QUIRK_DELAY_BEFORE_CHK_RDY		= (1 << 3),
69 70 71 72 73

	/*
	 * APST should not be used.
	 */
	NVME_QUIRK_NO_APST			= (1 << 4),
74 75 76 77 78

	/*
	 * The deepest sleep state should not be used.
	 */
	NVME_QUIRK_NO_DEEPEST_PS		= (1 << 5),
C
Christoph Hellwig 已提交
79 80 81 82 83

	/*
	 * Supports the LighNVM command set if indicated in vs[1].
	 */
	NVME_QUIRK_LIGHTNVM			= (1 << 6),
84 85
};

86 87 88 89 90 91 92
/*
 * Common request structure for NVMe passthrough.  All drivers must have
 * this structure as the first member of their request-private data.
 */
struct nvme_request {
	struct nvme_command	*cmd;
	union nvme_result	result;
93
	u8			retries;
94 95 96 97
	u8			flags;
	u16			status;
};

98 99 100 101 102
/*
 * Mark a bio as coming in through the mpath node.
 */
#define REQ_NVME_MPATH		REQ_DRV

103 104
enum {
	NVME_REQ_CANCELLED		= (1 << 0),
105 106 107 108 109 110 111
};

static inline struct nvme_request *nvme_req(struct request *req)
{
	return blk_mq_rq_to_pdu(req);
}

112 113 114 115 116
/* The below value is the specific amount of delay needed before checking
 * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the
 * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
 * found empirically.
 */
117
#define NVME_QUIRK_DELAY_AMOUNT		2300
118

119 120 121
enum nvme_ctrl_state {
	NVME_CTRL_NEW,
	NVME_CTRL_LIVE,
122
	NVME_CTRL_ADMIN_ONLY,    /* Only admin queue live */
123
	NVME_CTRL_RESETTING,
124
	NVME_CTRL_RECONNECTING,
125
	NVME_CTRL_DELETING,
126
	NVME_CTRL_DEAD,
127 128
};

129
struct nvme_ctrl {
130
	enum nvme_ctrl_state state;
131
	bool identified;
132
	spinlock_t lock;
133
	const struct nvme_ctrl_ops *ops;
134
	struct request_queue *admin_q;
135
	struct request_queue *connect_q;
136 137
	struct device *dev;
	int instance;
138
	struct blk_mq_tag_set *tagset;
139
	struct blk_mq_tag_set *admin_tagset;
140
	struct list_head namespaces;
141
	struct mutex namespaces_mutex;
142
	struct device ctrl_device;
143
	struct device *device;	/* char device */
144
	struct cdev cdev;
145
	struct work_struct reset_work;
146
	struct work_struct delete_work;
147

C
Christoph Hellwig 已提交
148 149 150
	struct nvme_subsystem *subsys;
	struct list_head subsys_entry;

151
	struct opal_dev *opal_dev;
152

153
	char name[12];
C
Christoph Hellwig 已提交
154
	u16 cntlid;
155 156

	u32 ctrl_config;
157
	u16 mtfa;
158
	u32 queue_count;
159

160
	u64 cap;
161
	u32 page_size;
162 163
	u32 max_hw_sectors;
	u16 oncs;
164
	u16 oacs;
165 166
	u16 nssa;
	u16 nr_streams;
167
	atomic_t abort_limit;
168
	u8 vwc;
169
	u32 vs;
170
	u32 sgls;
S
Sagi Grimberg 已提交
171
	u16 kas;
172 173
	u8 npss;
	u8 apsta;
174
	u32 aen_result;
175
	unsigned int shutdown_timeout;
S
Sagi Grimberg 已提交
176
	unsigned int kato;
177
	bool subsystem;
178
	unsigned long quirks;
179
	struct nvme_id_power_state psd[32];
180
	struct nvme_effects_log *effects;
181
	struct work_struct scan_work;
182
	struct work_struct async_event_work;
S
Sagi Grimberg 已提交
183
	struct delayed_work ka_work;
184
	struct work_struct fw_act_work;
185

186 187
	/* Power saving configuration */
	u64 ps_max_latency_us;
188
	bool apst_enabled;
189

190
	/* PCIe only: */
191 192
	u32 hmpre;
	u32 hmmin;
193 194
	u32 hmminds;
	u16 hmmaxd;
195

196 197 198 199 200 201
	/* Fabrics only */
	u16 sqsize;
	u32 ioccsz;
	u32 iorcsz;
	u16 icdoff;
	u16 maxcmd;
202
	int nr_reconnects;
203
	struct nvmf_ctrl_options *opts;
204 205
};

C
Christoph Hellwig 已提交
206 207 208 209 210 211 212 213 214 215 216
struct nvme_subsystem {
	int			instance;
	struct device		dev;
	/*
	 * Because we unregister the device on the last put we need
	 * a separate refcount.
	 */
	struct kref		ref;
	struct list_head	entry;
	struct mutex		lock;
	struct list_head	ctrls;
C
Christoph Hellwig 已提交
217
	struct list_head	nsheads;
C
Christoph Hellwig 已提交
218 219 220 221 222 223
	char			subnqn[NVMF_NQN_SIZE];
	char			serial[20];
	char			model[40];
	char			firmware_rev[8];
	u8			cmic;
	u16			vendor_id;
C
Christoph Hellwig 已提交
224
	struct ida		ns_ida;
C
Christoph Hellwig 已提交
225 226
};

227 228 229 230 231 232 233 234 235
/*
 * Container structure for uniqueue namespace identifiers.
 */
struct nvme_ns_ids {
	u8	eui64[8];
	u8	nguid[16];
	uuid_t	uuid;
};

C
Christoph Hellwig 已提交
236 237 238 239 240 241 242 243
/*
 * Anchor structure for namespaces.  There is one for each namespace in a
 * NVMe subsystem that any of our controllers can see, and the namespace
 * structure for each controller is chained of it.  For private namespaces
 * there is a 1:1 relation to our namespace structures, that is ->list
 * only ever has a single entry for private namespaces.
 */
struct nvme_ns_head {
244 245 246 247 248 249 250
#ifdef CONFIG_NVME_MULTIPATH
	struct gendisk		*disk;
	struct nvme_ns __rcu	*current_path;
	struct bio_list		requeue_list;
	spinlock_t		requeue_lock;
	struct work_struct	requeue_work;
#endif
C
Christoph Hellwig 已提交
251 252 253 254 255 256 257 258 259 260
	struct list_head	list;
	struct srcu_struct      srcu;
	struct nvme_subsystem	*subsys;
	unsigned		ns_id;
	struct nvme_ns_ids	ids;
	struct list_head	entry;
	struct kref		ref;
	int			instance;
};

261 262 263
struct nvme_ns {
	struct list_head list;

264
	struct nvme_ctrl *ctrl;
265 266
	struct request_queue *queue;
	struct gendisk *disk;
C
Christoph Hellwig 已提交
267
	struct list_head siblings;
268
	struct nvm_dev *ndev;
269
	struct kref kref;
C
Christoph Hellwig 已提交
270
	struct nvme_ns_head *head;
271 272 273

	int lba_shift;
	u16 ms;
274 275
	u16 sgs;
	u32 sws;
276 277
	bool ext;
	u8 pi_type;
278 279
	unsigned long flags;
#define NVME_NS_REMOVING 0
280
#define NVME_NS_DEAD     1
281
	u16 noiob;
282 283
};

284
struct nvme_ctrl_ops {
M
Ming Lin 已提交
285
	const char *name;
286
	struct module *module;
287 288
	unsigned int flags;
#define NVME_F_FABRICS			(1 << 0)
289
#define NVME_F_METADATA_SUPPORTED	(1 << 1)
290
	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
291
	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
292
	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
293
	void (*free_ctrl)(struct nvme_ctrl *ctrl);
294
	void (*submit_async_event)(struct nvme_ctrl *ctrl);
295
	void (*delete_ctrl)(struct nvme_ctrl *ctrl);
M
Ming Lin 已提交
296
	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
S
Sagi Grimberg 已提交
297
	int (*reinit_request)(void *data, struct request *rq);
298 299
};

300 301 302 303 304 305 306 307 308
static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
{
	u32 val = 0;

	if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
		return false;
	return val & NVME_CSTS_RDY;
}

309 310 311 312 313 314 315
static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
{
	if (!ctrl->subsystem)
		return -ENOTTY;
	return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
}

316 317 318 319 320
static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
{
	return (sector >> (ns->lba_shift - 9));
}

M
Ming Lin 已提交
321 322
static inline void nvme_cleanup_cmd(struct request *req)
{
323 324 325 326
	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
		kfree(page_address(req->special_vec.bv_page) +
		      req->special_vec.bv_offset);
	}
M
Ming Lin 已提交
327 328
}

329 330
static inline void nvme_end_request(struct request *req, __le16 status,
		union nvme_result result)
331
{
332
	struct nvme_request *rq = nvme_req(req);
333

334 335
	rq->status = le16_to_cpu(status) >> 1;
	rq->result = result;
336
	blk_mq_complete_request(req);
337 338
}

339 340 341 342 343 344 345 346 347 348
static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
{
	get_device(ctrl->device);
}

static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
{
	put_device(ctrl->device);
}

349
void nvme_complete_rq(struct request *req);
350
void nvme_cancel_request(struct request *req, void *data, bool reserved);
351 352
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
		enum nvme_ctrl_state new_state);
353 354 355
int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
356 357
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
		const struct nvme_ctrl_ops *ops, unsigned long quirks);
358
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
359 360
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
361
void nvme_put_ctrl(struct nvme_ctrl *ctrl);
362
int nvme_init_identify(struct nvme_ctrl *ctrl);
363

364
void nvme_queue_scan(struct nvme_ctrl *ctrl);
365
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
366

367 368
int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
		bool send);
369

370 371
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
		union nvme_result *res);
372

373 374
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
375
void nvme_kill_queues(struct nvme_ctrl *ctrl);
K
Keith Busch 已提交
376 377 378 379
void nvme_unfreeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
void nvme_start_freeze(struct nvme_ctrl *ctrl);
S
Sagi Grimberg 已提交
380
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
381

382
#define NVME_QID_ANY -1
383
struct request *nvme_alloc_request(struct request_queue *q,
384
		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
385
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
M
Ming Lin 已提交
386
		struct nvme_command *cmd);
387 388 389
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
		void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
390
		union nvme_result *result, void *buffer, unsigned bufflen,
391 392
		unsigned timeout, int qid, int at_head,
		blk_mq_req_flags_t flags);
C
Christoph Hellwig 已提交
393
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
S
Sagi Grimberg 已提交
394 395
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
396
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
397 398
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
399

400
extern const struct attribute_group nvme_ns_id_attr_group;
401 402 403 404 405 406 407 408
extern const struct block_device_operations nvme_ns_head_ops;

#ifdef CONFIG_NVME_MULTIPATH
void nvme_failover_req(struct request *req);
bool nvme_req_needs_failover(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head);
409
void nvme_mpath_add_disk_links(struct nvme_ns *ns);
410
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
411
void nvme_mpath_remove_disk_links(struct nvme_ns *ns);
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442

static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
	struct nvme_ns_head *head = ns->head;

	if (head && ns == srcu_dereference(head->current_path, &head->srcu))
		rcu_assign_pointer(head->current_path, NULL);
}
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
#else
static inline void nvme_failover_req(struct request *req)
{
}
static inline bool nvme_req_needs_failover(struct request *req)
{
	return false;
}
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
}
static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
		struct nvme_ns_head *head)
{
	return 0;
}
static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
{
}
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
}
443 444 445 446 447 448
static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns)
{
}
static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
{
}
449 450 451 452 453
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
}
#endif /* CONFIG_NVME_MULTIPATH */

454
#ifdef CONFIG_NVM
455
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
456
void nvme_nvm_unregister(struct nvme_ns *ns);
457 458
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
459
int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
460
#else
461
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
462
				    int node)
463 464 465 466
{
	return 0;
}

467
static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
468 469 470 471 472
static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
{
	return 0;
}
static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
473 474 475 476 477
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
							unsigned long arg)
{
	return -ENOTTY;
}
478 479
#endif /* CONFIG_NVM */

480 481 482 483
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
	return dev_to_disk(dev)->private_data;
}
M
Matias Bjørling 已提交
484

485 486 487
int __init nvme_core_init(void);
void nvme_core_exit(void);

488
#endif /* _NVME_H */