nvme.h 12.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2011-2014, Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#ifndef _NVME_H
#define _NVME_H

#include <linux/nvme.h>
18
#include <linux/cdev.h>
19 20 21
#include <linux/pci.h>
#include <linux/kref.h>
#include <linux/blk-mq.h>
22
#include <linux/lightnvm.h>
23
#include <linux/sed-opal.h>
24

25
extern unsigned int nvme_io_timeout;
26 27
#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)

28
extern unsigned int admin_timeout;
29 30
#define ADMIN_TIMEOUT	(admin_timeout * HZ)

S
Sagi Grimberg 已提交
31 32 33
#define NVME_DEFAULT_KATO	5
#define NVME_KATO_GRACE		10

34 35
extern struct workqueue_struct *nvme_wq;

M
Matias Bjørling 已提交
36 37 38 39 40
enum {
	NVME_NS_LBA		= 0,
	NVME_NS_LIGHTNVM	= 1,
};

41
/*
42 43
 * List of workarounds for devices that required behavior not specified in
 * the standard.
44
 */
45 46 47 48 49 50
enum nvme_quirks {
	/*
	 * Prefers I/O aligned to a stripe size specified in a vendor
	 * specific Identify field.
	 */
	NVME_QUIRK_STRIPE_SIZE			= (1 << 0),
51 52 53 54 55 56

	/*
	 * The controller doesn't handle Identify value others than 0 or 1
	 * correctly.
	 */
	NVME_QUIRK_IDENTIFY_CNS			= (1 << 1),
57 58

	/*
59 60
	 * The controller deterministically returns O's on reads to
	 * logical blocks that deallocate was called on.
61
	 */
62
	NVME_QUIRK_DEALLOCATE_ZEROES		= (1 << 2),
63 64 65 66 67 68

	/*
	 * The controller needs a delay before starts checking the device
	 * readiness, which is done by reading the NVME_CSTS_RDY bit.
	 */
	NVME_QUIRK_DELAY_BEFORE_CHK_RDY		= (1 << 3),
69 70 71 72 73

	/*
	 * APST should not be used.
	 */
	NVME_QUIRK_NO_APST			= (1 << 4),
74 75 76 77 78

	/*
	 * The deepest sleep state should not be used.
	 */
	NVME_QUIRK_NO_DEEPEST_PS		= (1 << 5),
C
Christoph Hellwig 已提交
79 80 81 82 83

	/*
	 * Supports the LighNVM command set if indicated in vs[1].
	 */
	NVME_QUIRK_LIGHTNVM			= (1 << 6),
84 85
};

86 87 88 89 90 91 92
/*
 * Common request structure for NVMe passthrough.  All drivers must have
 * this structure as the first member of their request-private data.
 */
struct nvme_request {
	struct nvme_command	*cmd;
	union nvme_result	result;
93
	u8			retries;
94 95 96 97
	u8			flags;
	u16			status;
};

98 99 100 101 102
/*
 * Mark a bio as coming in through the mpath node.
 */
#define REQ_NVME_MPATH		REQ_DRV

103 104
enum {
	NVME_REQ_CANCELLED		= (1 << 0),
105 106 107 108 109 110 111
};

static inline struct nvme_request *nvme_req(struct request *req)
{
	return blk_mq_rq_to_pdu(req);
}

112 113 114 115 116 117 118
/* The below value is the specific amount of delay needed before checking
 * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the
 * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
 * found empirically.
 */
#define NVME_QUIRK_DELAY_AMOUNT		2000

119 120 121 122
enum nvme_ctrl_state {
	NVME_CTRL_NEW,
	NVME_CTRL_LIVE,
	NVME_CTRL_RESETTING,
123
	NVME_CTRL_RECONNECTING,
124
	NVME_CTRL_DELETING,
125
	NVME_CTRL_DEAD,
126 127
};

128
struct nvme_ctrl {
129
	enum nvme_ctrl_state state;
130
	bool identified;
131
	spinlock_t lock;
132
	const struct nvme_ctrl_ops *ops;
133
	struct request_queue *admin_q;
134
	struct request_queue *connect_q;
135 136
	struct device *dev;
	int instance;
137
	struct blk_mq_tag_set *tagset;
138
	struct blk_mq_tag_set *admin_tagset;
139
	struct list_head namespaces;
140
	struct mutex namespaces_mutex;
141
	struct device ctrl_device;
142
	struct device *device;	/* char device */
143
	struct cdev cdev;
144
	struct work_struct reset_work;
145
	struct work_struct delete_work;
146

C
Christoph Hellwig 已提交
147 148 149
	struct nvme_subsystem *subsys;
	struct list_head subsys_entry;

150
	struct opal_dev *opal_dev;
151

152
	char name[12];
C
Christoph Hellwig 已提交
153
	u16 cntlid;
154 155

	u32 ctrl_config;
156
	u16 mtfa;
157
	u32 queue_count;
158

159
	u64 cap;
160
	u32 page_size;
161 162
	u32 max_hw_sectors;
	u16 oncs;
163
	u16 oacs;
164 165
	u16 nssa;
	u16 nr_streams;
166
	atomic_t abort_limit;
167
	u8 vwc;
168
	u32 vs;
169
	u32 sgls;
S
Sagi Grimberg 已提交
170
	u16 kas;
171 172
	u8 npss;
	u8 apsta;
173
	u32 aen_result;
174
	unsigned int shutdown_timeout;
S
Sagi Grimberg 已提交
175
	unsigned int kato;
176
	bool subsystem;
177
	unsigned long quirks;
178
	struct nvme_id_power_state psd[32];
179
	struct nvme_effects_log *effects;
180
	struct work_struct scan_work;
181
	struct work_struct async_event_work;
S
Sagi Grimberg 已提交
182
	struct delayed_work ka_work;
183
	struct work_struct fw_act_work;
184

185 186
	/* Power saving configuration */
	u64 ps_max_latency_us;
187
	bool apst_enabled;
188

189
	/* PCIe only: */
190 191
	u32 hmpre;
	u32 hmmin;
192 193
	u32 hmminds;
	u16 hmmaxd;
194

195 196 197 198 199 200
	/* Fabrics only */
	u16 sqsize;
	u32 ioccsz;
	u32 iorcsz;
	u16 icdoff;
	u16 maxcmd;
201
	int nr_reconnects;
202
	struct nvmf_ctrl_options *opts;
203 204
};

C
Christoph Hellwig 已提交
205 206 207 208 209 210 211 212 213 214 215
struct nvme_subsystem {
	int			instance;
	struct device		dev;
	/*
	 * Because we unregister the device on the last put we need
	 * a separate refcount.
	 */
	struct kref		ref;
	struct list_head	entry;
	struct mutex		lock;
	struct list_head	ctrls;
C
Christoph Hellwig 已提交
216
	struct list_head	nsheads;
C
Christoph Hellwig 已提交
217 218 219 220 221 222
	char			subnqn[NVMF_NQN_SIZE];
	char			serial[20];
	char			model[40];
	char			firmware_rev[8];
	u8			cmic;
	u16			vendor_id;
C
Christoph Hellwig 已提交
223
	struct ida		ns_ida;
C
Christoph Hellwig 已提交
224 225
};

226 227 228 229 230 231 232 233 234
/*
 * Container structure for uniqueue namespace identifiers.
 */
struct nvme_ns_ids {
	u8	eui64[8];
	u8	nguid[16];
	uuid_t	uuid;
};

C
Christoph Hellwig 已提交
235 236 237 238 239 240 241 242
/*
 * Anchor structure for namespaces.  There is one for each namespace in a
 * NVMe subsystem that any of our controllers can see, and the namespace
 * structure for each controller is chained of it.  For private namespaces
 * there is a 1:1 relation to our namespace structures, that is ->list
 * only ever has a single entry for private namespaces.
 */
struct nvme_ns_head {
243 244 245 246 247 248 249
#ifdef CONFIG_NVME_MULTIPATH
	struct gendisk		*disk;
	struct nvme_ns __rcu	*current_path;
	struct bio_list		requeue_list;
	spinlock_t		requeue_lock;
	struct work_struct	requeue_work;
#endif
C
Christoph Hellwig 已提交
250 251 252 253 254 255 256 257 258 259
	struct list_head	list;
	struct srcu_struct      srcu;
	struct nvme_subsystem	*subsys;
	unsigned		ns_id;
	struct nvme_ns_ids	ids;
	struct list_head	entry;
	struct kref		ref;
	int			instance;
};

260 261 262
struct nvme_ns {
	struct list_head list;

263
	struct nvme_ctrl *ctrl;
264 265
	struct request_queue *queue;
	struct gendisk *disk;
C
Christoph Hellwig 已提交
266
	struct list_head siblings;
267
	struct nvm_dev *ndev;
268
	struct kref kref;
C
Christoph Hellwig 已提交
269
	struct nvme_ns_head *head;
270 271 272

	int lba_shift;
	u16 ms;
273 274
	u16 sgs;
	u32 sws;
275 276
	bool ext;
	u8 pi_type;
277 278
	unsigned long flags;
#define NVME_NS_REMOVING 0
279
#define NVME_NS_DEAD     1
280
	u16 noiob;
281 282
};

283
struct nvme_ctrl_ops {
M
Ming Lin 已提交
284
	const char *name;
285
	struct module *module;
286 287
	unsigned int flags;
#define NVME_F_FABRICS			(1 << 0)
288
#define NVME_F_METADATA_SUPPORTED	(1 << 1)
289
	int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
290
	int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
291
	int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
292
	void (*free_ctrl)(struct nvme_ctrl *ctrl);
293
	void (*submit_async_event)(struct nvme_ctrl *ctrl);
294
	void (*delete_ctrl)(struct nvme_ctrl *ctrl);
M
Ming Lin 已提交
295
	int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
S
Sagi Grimberg 已提交
296
	int (*reinit_request)(void *data, struct request *rq);
297 298
};

299 300 301 302 303 304 305 306 307
static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
{
	u32 val = 0;

	if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
		return false;
	return val & NVME_CSTS_RDY;
}

308 309 310 311 312 313 314
static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
{
	if (!ctrl->subsystem)
		return -ENOTTY;
	return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
}

315 316 317 318 319
static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
{
	return (sector >> (ns->lba_shift - 9));
}

M
Ming Lin 已提交
320 321
static inline void nvme_cleanup_cmd(struct request *req)
{
322 323 324 325
	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
		kfree(page_address(req->special_vec.bv_page) +
		      req->special_vec.bv_offset);
	}
M
Ming Lin 已提交
326 327
}

328 329
static inline void nvme_end_request(struct request *req, __le16 status,
		union nvme_result result)
330
{
331
	struct nvme_request *rq = nvme_req(req);
332

333 334
	rq->status = le16_to_cpu(status) >> 1;
	rq->result = result;
335
	blk_mq_complete_request(req);
336 337
}

338 339 340 341 342 343 344 345 346 347
static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
{
	get_device(ctrl->device);
}

static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
{
	put_device(ctrl->device);
}

348
void nvme_complete_rq(struct request *req);
349
void nvme_cancel_request(struct request *req, void *data, bool reserved);
350 351
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
		enum nvme_ctrl_state new_state);
352 353 354
int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
355 356
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
		const struct nvme_ctrl_ops *ops, unsigned long quirks);
357
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
358 359
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
360
void nvme_put_ctrl(struct nvme_ctrl *ctrl);
361
int nvme_init_identify(struct nvme_ctrl *ctrl);
362

363
void nvme_queue_scan(struct nvme_ctrl *ctrl);
364
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
365

366 367
int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
		bool send);
368

369 370
void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
		union nvme_result *res);
371

372 373
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
374
void nvme_kill_queues(struct nvme_ctrl *ctrl);
K
Keith Busch 已提交
375 376 377 378
void nvme_unfreeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
void nvme_start_freeze(struct nvme_ctrl *ctrl);
S
Sagi Grimberg 已提交
379
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
380

381
#define NVME_QID_ANY -1
382
struct request *nvme_alloc_request(struct request_queue *q,
383
		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid);
384
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
M
Ming Lin 已提交
385
		struct nvme_command *cmd);
386 387 388
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
		void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
389
		union nvme_result *result, void *buffer, unsigned bufflen,
390 391
		unsigned timeout, int qid, int at_head,
		blk_mq_req_flags_t flags);
C
Christoph Hellwig 已提交
392
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
S
Sagi Grimberg 已提交
393 394
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
395
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
396 397
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
398

399
extern const struct attribute_group nvme_ns_id_attr_group;
400 401 402 403 404 405 406 407
extern const struct block_device_operations nvme_ns_head_ops;

#ifdef CONFIG_NVME_MULTIPATH
void nvme_failover_req(struct request *req);
bool nvme_req_needs_failover(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head);
408
void nvme_mpath_add_disk_links(struct nvme_ns *ns);
409
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
410
void nvme_mpath_remove_disk_links(struct nvme_ns *ns);
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441

static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
	struct nvme_ns_head *head = ns->head;

	if (head && ns == srcu_dereference(head->current_path, &head->srcu))
		rcu_assign_pointer(head->current_path, NULL);
}
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
#else
static inline void nvme_failover_req(struct request *req)
{
}
static inline bool nvme_req_needs_failover(struct request *req)
{
	return false;
}
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
}
static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
		struct nvme_ns_head *head)
{
	return 0;
}
static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
{
}
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
}
442 443 444 445 446 447
static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns)
{
}
static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
{
}
448 449 450 451 452
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
}
#endif /* CONFIG_NVME_MULTIPATH */

453
#ifdef CONFIG_NVM
454
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
455
void nvme_nvm_unregister(struct nvme_ns *ns);
456 457
int nvme_nvm_register_sysfs(struct nvme_ns *ns);
void nvme_nvm_unregister_sysfs(struct nvme_ns *ns);
458
int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg);
459
#else
460
static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name,
461
				    int node)
462 463 464 465
{
	return 0;
}

466
static inline void nvme_nvm_unregister(struct nvme_ns *ns) {};
467 468 469 470 471
static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns)
{
	return 0;
}
static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {};
472 473 474 475 476
static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd,
							unsigned long arg)
{
	return -ENOTTY;
}
477 478
#endif /* CONFIG_NVM */

479 480 481 482
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
{
	return dev_to_disk(dev)->private_data;
}
M
Matias Bjørling 已提交
483

484 485 486
int __init nvme_core_init(void);
void nvme_core_exit(void);

487
#endif /* _NVME_H */