nbd.c 26.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6
/*
 * Network block device - make block devices work over TCP
 *
 * Note that you can not swap over this thing, yet. Seems to work but
 * deadlocks sometimes - you can not swap over TCP in general.
 * 
P
Pavel Machek 已提交
7
 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
L
Linus Torvalds 已提交
8 9
 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
 *
10
 * This file is released under GPLv2 or later.
L
Linus Torvalds 已提交
11
 *
12
 * (part of code stolen from loop.c)
L
Linus Torvalds 已提交
13 14 15 16 17 18 19 20 21 22 23 24 25 26
 */

#include <linux/major.h>

#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/ioctl.h>
27
#include <linux/mutex.h>
28 29 30
#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/kernel.h>
31
#include <linux/slab.h>
L
Linus Torvalds 已提交
32
#include <net/sock.h>
33
#include <linux/net.h>
34
#include <linux/kthread.h>
M
Markus Pargmann 已提交
35
#include <linux/types.h>
M
Markus Pargmann 已提交
36
#include <linux/debugfs.h>
J
Josef Bacik 已提交
37
#include <linux/blk-mq.h>
L
Linus Torvalds 已提交
38

39
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
40 41 42 43
#include <asm/types.h>

#include <linux/nbd.h>

J
Josef Bacik 已提交
44 45 46 47 48
struct nbd_sock {
	struct socket *sock;
	struct mutex tx_lock;
};

J
Josef Bacik 已提交
49 50
#define NBD_TIMEDOUT			0
#define NBD_DISCONNECT_REQUESTED	1
J
Josef Bacik 已提交
51 52
#define NBD_DISCONNECTED		2
#define NBD_RUNNING			3
J
Josef Bacik 已提交
53

54
struct nbd_device {
M
Markus Pargmann 已提交
55
	u32 flags;
J
Josef Bacik 已提交
56
	unsigned long runtime_flags;
J
Josef Bacik 已提交
57
	struct nbd_sock **socks;
58 59
	int magic;

J
Josef Bacik 已提交
60
	struct blk_mq_tag_set tag_set;
61

J
Josef Bacik 已提交
62
	struct mutex config_lock;
63
	struct gendisk *disk;
J
Josef Bacik 已提交
64 65 66
	int num_connections;
	atomic_t recv_threads;
	wait_queue_head_t recv_wq;
67
	loff_t blksize;
M
Markus Pargmann 已提交
68
	loff_t bytesize;
M
Markus Pargmann 已提交
69 70

	struct task_struct *task_recv;
J
Josef Bacik 已提交
71
	struct task_struct *task_setup;
M
Markus Pargmann 已提交
72 73 74 75

#if IS_ENABLED(CONFIG_DEBUG_FS)
	struct dentry *dbg_dir;
#endif
76 77
};

J
Josef Bacik 已提交
78 79
struct nbd_cmd {
	struct nbd_device *nbd;
J
Josef Bacik 已提交
80
	struct completion send_complete;
J
Josef Bacik 已提交
81 82
};

M
Markus Pargmann 已提交
83 84 85 86 87 88
#if IS_ENABLED(CONFIG_DEBUG_FS)
static struct dentry *nbd_dbg_dir;
#endif

#define nbd_name(nbd) ((nbd)->disk->disk_name)

89
#define NBD_MAGIC 0x68797548
L
Linus Torvalds 已提交
90

91
static unsigned int nbds_max = 16;
92
static struct nbd_device *nbd_dev;
L
Laurent Vivier 已提交
93
static int max_part;
L
Linus Torvalds 已提交
94

95
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
L
Linus Torvalds 已提交
96
{
97
	return disk_to_dev(nbd->disk);
L
Linus Torvalds 已提交
98 99
}

100 101 102 103 104
static bool nbd_is_connected(struct nbd_device *nbd)
{
	return !!nbd->task_recv;
}

L
Linus Torvalds 已提交
105 106 107 108 109 110
static const char *nbdcmd_to_ascii(int cmd)
{
	switch (cmd) {
	case  NBD_CMD_READ: return "read";
	case NBD_CMD_WRITE: return "write";
	case  NBD_CMD_DISC: return "disconnect";
A
Alex Bligh 已提交
111
	case NBD_CMD_FLUSH: return "flush";
P
Paul Clements 已提交
112
	case  NBD_CMD_TRIM: return "trim/discard";
L
Linus Torvalds 已提交
113 114 115 116
	}
	return "invalid";
}

117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
{
	bdev->bd_inode->i_size = 0;
	set_capacity(nbd->disk, 0);
	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);

	return 0;
}

static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
{
	if (!nbd_is_connected(nbd))
		return;

	bdev->bd_inode->i_size = nbd->bytesize;
	set_capacity(nbd->disk, nbd->bytesize >> 9);
	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
}

static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
137
			loff_t blocksize, loff_t nr_blocks)
138 139 140 141 142 143 144 145
{
	int ret;

	ret = set_blocksize(bdev, blocksize);
	if (ret)
		return ret;

	nbd->blksize = blocksize;
146
	nbd->bytesize = blocksize * nr_blocks;
147 148 149 150 151 152

	nbd_size_update(nbd, bdev);

	return 0;
}

J
Josef Bacik 已提交
153
static void nbd_end_request(struct nbd_cmd *cmd)
L
Linus Torvalds 已提交
154
{
J
Josef Bacik 已提交
155 156
	struct nbd_device *nbd = cmd->nbd;
	struct request *req = blk_mq_rq_from_pdu(cmd);
157
	int error = req->errors ? -EIO : 0;
L
Linus Torvalds 已提交
158

J
Josef Bacik 已提交
159
	dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
160
		error ? "failed" : "done");
L
Linus Torvalds 已提交
161

J
Josef Bacik 已提交
162
	blk_mq_complete_request(req, error);
L
Linus Torvalds 已提交
163 164
}

165 166 167
/*
 * Forcibly shutdown the socket causing all listeners to error
 */
168
static void sock_shutdown(struct nbd_device *nbd)
169
{
J
Josef Bacik 已提交
170
	int i;
M
Markus Pargmann 已提交
171

J
Josef Bacik 已提交
172 173 174
	if (nbd->num_connections == 0)
		return;
	if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
M
Markus Pargmann 已提交
175
		return;
M
Markus Pargmann 已提交
176

J
Josef Bacik 已提交
177 178 179 180 181 182 183
	for (i = 0; i < nbd->num_connections; i++) {
		struct nbd_sock *nsock = nbd->socks[i];
		mutex_lock(&nsock->tx_lock);
		kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
		mutex_unlock(&nsock->tx_lock);
	}
	dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
184 185
}

186 187
static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
						 bool reserved)
188
{
189 190
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
	struct nbd_device *nbd = cmd->nbd;
M
Markus Pargmann 已提交
191

J
Josef Bacik 已提交
192
	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
J
Josef Bacik 已提交
193
	set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
194
	req->errors++;
J
Josef Bacik 已提交
195 196 197 198

	mutex_lock(&nbd->config_lock);
	sock_shutdown(nbd);
	mutex_unlock(&nbd->config_lock);
199
	return BLK_EH_HANDLED;
200 201
}

L
Linus Torvalds 已提交
202 203 204
/*
 *  Send or receive packet.
 */
J
Josef Bacik 已提交
205 206
static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
		     int size, int msg_flags)
L
Linus Torvalds 已提交
207
{
J
Josef Bacik 已提交
208
	struct socket *sock = nbd->socks[index]->sock;
L
Linus Torvalds 已提交
209 210 211
	int result;
	struct msghdr msg;
	struct kvec iov;
212
	unsigned long pflags = current->flags;
L
Linus Torvalds 已提交
213

214
	if (unlikely(!sock)) {
215
		dev_err_ratelimited(disk_to_dev(nbd->disk),
216 217
			"Attempted %s on closed socket in sock_xmit\n",
			(send ? "send" : "recv"));
218 219 220
		return -EINVAL;
	}

221
	current->flags |= PF_MEMALLOC;
L
Linus Torvalds 已提交
222
	do {
223
		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
L
Linus Torvalds 已提交
224 225 226 227 228 229 230 231
		iov.iov_base = buf;
		iov.iov_len = size;
		msg.msg_name = NULL;
		msg.msg_namelen = 0;
		msg.msg_control = NULL;
		msg.msg_controllen = 0;
		msg.msg_flags = msg_flags | MSG_NOSIGNAL;

M
Markus Pargmann 已提交
232
		if (send)
L
Linus Torvalds 已提交
233
			result = kernel_sendmsg(sock, &msg, &iov, 1, size);
M
Markus Pargmann 已提交
234
		else
235 236
			result = kernel_recvmsg(sock, &msg, &iov, 1, size,
						msg.msg_flags);
L
Linus Torvalds 已提交
237 238 239 240 241 242 243 244 245 246

		if (result <= 0) {
			if (result == 0)
				result = -EPIPE; /* short read */
			break;
		}
		size -= result;
		buf += result;
	} while (size > 0);

247
	tsk_restore_flags(current, pflags, PF_MEMALLOC);
L
Linus Torvalds 已提交
248 249 250 251

	return result;
}

J
Josef Bacik 已提交
252 253
static inline int sock_send_bvec(struct nbd_device *nbd, int index,
				 struct bio_vec *bvec, int flags)
L
Linus Torvalds 已提交
254 255 256
{
	int result;
	void *kaddr = kmap(bvec->bv_page);
J
Josef Bacik 已提交
257
	result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset,
258
			   bvec->bv_len, flags);
L
Linus Torvalds 已提交
259 260 261 262
	kunmap(bvec->bv_page);
	return result;
}

263
/* always call with the tx_lock held */
J
Josef Bacik 已提交
264
static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
L
Linus Torvalds 已提交
265
{
J
Josef Bacik 已提交
266
	struct request *req = blk_mq_rq_from_pdu(cmd);
267
	int result;
L
Linus Torvalds 已提交
268
	struct nbd_request request;
269
	unsigned long size = blk_rq_bytes(req);
270
	struct bio *bio;
C
Christoph Hellwig 已提交
271
	u32 type;
J
Josef Bacik 已提交
272
	u32 tag = blk_mq_unique_tag(req);
C
Christoph Hellwig 已提交
273

274 275 276
	if (req->cmd_type != REQ_TYPE_FS)
		return -EIO;

J
Josef Bacik 已提交
277
	if (req_op(req) == REQ_OP_DISCARD)
C
Christoph Hellwig 已提交
278
		type = NBD_CMD_TRIM;
279
	else if (req_op(req) == REQ_OP_FLUSH)
C
Christoph Hellwig 已提交
280 281 282 283 284
		type = NBD_CMD_FLUSH;
	else if (rq_data_dir(req) == WRITE)
		type = NBD_CMD_WRITE;
	else
		type = NBD_CMD_READ;
L
Linus Torvalds 已提交
285

286 287 288 289 290 291 292
	if (rq_data_dir(req) == WRITE &&
	    (nbd->flags & NBD_FLAG_READ_ONLY)) {
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Write on read-only\n");
		return -EIO;
	}

293
	memset(&request, 0, sizeof(request));
L
Linus Torvalds 已提交
294
	request.magic = htonl(NBD_REQUEST_MAGIC);
C
Christoph Hellwig 已提交
295
	request.type = htonl(type);
J
Josef Bacik 已提交
296
	if (type != NBD_CMD_FLUSH) {
A
Alex Bligh 已提交
297 298 299
		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
		request.len = htonl(size);
	}
J
Josef Bacik 已提交
300
	memcpy(request.handle, &tag, sizeof(tag));
L
Linus Torvalds 已提交
301

302
	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
J
Josef Bacik 已提交
303
		cmd, nbdcmd_to_ascii(type),
304
		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
J
Josef Bacik 已提交
305
	result = sock_xmit(nbd, index, 1, &request, sizeof(request),
C
Christoph Hellwig 已提交
306
			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
L
Linus Torvalds 已提交
307
	if (result <= 0) {
308
		dev_err_ratelimited(disk_to_dev(nbd->disk),
309
			"Send control failed (result %d)\n", result);
310
		return -EIO;
L
Linus Torvalds 已提交
311 312
	}

313 314 315 316 317 318 319
	if (type != NBD_CMD_WRITE)
		return 0;

	bio = req->bio;
	while (bio) {
		struct bio *next = bio->bi_next;
		struct bvec_iter iter;
320
		struct bio_vec bvec;
321 322 323

		bio_for_each_segment(bvec, bio, iter) {
			bool is_last = !next && bio_iter_last(bvec, iter);
324
			int flags = is_last ? 0 : MSG_MORE;
325

326
			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
J
Josef Bacik 已提交
327
				cmd, bvec.bv_len);
J
Josef Bacik 已提交
328
			result = sock_send_bvec(nbd, index, &bvec, flags);
329
			if (result <= 0) {
330
				dev_err(disk_to_dev(nbd->disk),
331 332
					"Send data failed (result %d)\n",
					result);
333
				return -EIO;
334
			}
335 336 337 338 339 340 341 342
			/*
			 * The completion might already have come in,
			 * so break for the last one instead of letting
			 * the iterator do it. This prevents use-after-free
			 * of the bio.
			 */
			if (is_last)
				break;
L
Linus Torvalds 已提交
343
		}
344
		bio = next;
L
Linus Torvalds 已提交
345 346 347 348
	}
	return 0;
}

J
Josef Bacik 已提交
349 350
static inline int sock_recv_bvec(struct nbd_device *nbd, int index,
				 struct bio_vec *bvec)
L
Linus Torvalds 已提交
351 352 353
{
	int result;
	void *kaddr = kmap(bvec->bv_page);
J
Josef Bacik 已提交
354 355
	result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset,
			   bvec->bv_len, MSG_WAITALL);
L
Linus Torvalds 已提交
356 357 358 359 360
	kunmap(bvec->bv_page);
	return result;
}

/* NULL returned = something went wrong, inform userspace */
J
Josef Bacik 已提交
361
static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
L
Linus Torvalds 已提交
362 363 364
{
	int result;
	struct nbd_reply reply;
J
Josef Bacik 已提交
365 366 367
	struct nbd_cmd *cmd;
	struct request *req = NULL;
	u16 hwq;
J
Josef Bacik 已提交
368
	u32 tag;
L
Linus Torvalds 已提交
369 370

	reply.magic = 0;
J
Josef Bacik 已提交
371
	result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL);
L
Linus Torvalds 已提交
372
	if (result <= 0) {
J
Josef Bacik 已提交
373 374 375 376
		if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
		    !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
			dev_err(disk_to_dev(nbd->disk),
				"Receive control failed (result %d)\n", result);
377
		return ERR_PTR(result);
L
Linus Torvalds 已提交
378
	}
379 380

	if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
381
		dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
382
				(unsigned long)ntohl(reply.magic));
383
		return ERR_PTR(-EPROTO);
384 385
	}

J
Josef Bacik 已提交
386
	memcpy(&tag, reply.handle, sizeof(u32));
387

J
Josef Bacik 已提交
388 389 390 391 392 393 394 395
	hwq = blk_mq_unique_tag_to_hwq(tag);
	if (hwq < nbd->tag_set.nr_hw_queues)
		req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
				       blk_mq_unique_tag_to_tag(tag));
	if (!req || !blk_mq_request_started(req)) {
		dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
			tag, req);
		return ERR_PTR(-ENOENT);
L
Linus Torvalds 已提交
396
	}
J
Josef Bacik 已提交
397
	cmd = blk_mq_rq_to_pdu(req);
L
Linus Torvalds 已提交
398
	if (ntohl(reply.error)) {
399
		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
400
			ntohl(reply.error));
L
Linus Torvalds 已提交
401
		req->errors++;
J
Josef Bacik 已提交
402
		return cmd;
L
Linus Torvalds 已提交
403 404
	}

J
Josef Bacik 已提交
405
	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
C
Christoph Hellwig 已提交
406
	if (rq_data_dir(req) != WRITE) {
407
		struct req_iterator iter;
408
		struct bio_vec bvec;
409 410

		rq_for_each_segment(bvec, req, iter) {
J
Josef Bacik 已提交
411
			result = sock_recv_bvec(nbd, index, &bvec);
412
			if (result <= 0) {
413
				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
414
					result);
415
				req->errors++;
J
Josef Bacik 已提交
416
				return cmd;
417
			}
418
			dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
J
Josef Bacik 已提交
419
				cmd, bvec.bv_len);
L
Linus Torvalds 已提交
420
		}
J
Josef Bacik 已提交
421 422 423
	} else {
		/* See the comment in nbd_queue_rq. */
		wait_for_completion(&cmd->send_complete);
L
Linus Torvalds 已提交
424
	}
J
Josef Bacik 已提交
425
	return cmd;
L
Linus Torvalds 已提交
426 427
}

428 429
static ssize_t pid_show(struct device *dev,
			struct device_attribute *attr, char *buf)
430
{
431
	struct gendisk *disk = dev_to_disk(dev);
M
Markus Pargmann 已提交
432
	struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
433

M
Markus Pargmann 已提交
434
	return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
435 436
}

437
static struct device_attribute pid_attr = {
438
	.attr = { .name = "pid", .mode = S_IRUGO},
439 440 441
	.show = pid_show,
};

J
Josef Bacik 已提交
442 443 444 445 446 447 448
struct recv_thread_args {
	struct work_struct work;
	struct nbd_device *nbd;
	int index;
};

static void recv_work(struct work_struct *work)
L
Linus Torvalds 已提交
449
{
J
Josef Bacik 已提交
450 451 452 453
	struct recv_thread_args *args = container_of(work,
						     struct recv_thread_args,
						     work);
	struct nbd_device *nbd = args->nbd;
J
Josef Bacik 已提交
454
	struct nbd_cmd *cmd;
J
Josef Bacik 已提交
455
	int ret = 0;
L
Linus Torvalds 已提交
456

457
	BUG_ON(nbd->magic != NBD_MAGIC);
458
	while (1) {
J
Josef Bacik 已提交
459
		cmd = nbd_read_stat(nbd, args->index);
J
Josef Bacik 已提交
460 461
		if (IS_ERR(cmd)) {
			ret = PTR_ERR(cmd);
462 463 464
			break;
		}

J
Josef Bacik 已提交
465
		nbd_end_request(cmd);
466
	}
467

J
Josef Bacik 已提交
468 469 470 471 472 473 474 475
	/*
	 * We got an error, shut everybody down if this wasn't the result of a
	 * disconnect request.
	 */
	if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
		sock_shutdown(nbd);
	atomic_dec(&nbd->recv_threads);
	wake_up(&nbd->recv_wq);
L
Linus Torvalds 已提交
476 477
}

J
Josef Bacik 已提交
478
static void nbd_clear_req(struct request *req, void *data, bool reserved)
L
Linus Torvalds 已提交
479
{
J
Josef Bacik 已提交
480
	struct nbd_cmd *cmd;
L
Linus Torvalds 已提交
481

J
Josef Bacik 已提交
482 483 484 485 486 487 488 489 490
	if (!blk_mq_request_started(req))
		return;
	cmd = blk_mq_rq_to_pdu(req);
	req->errors++;
	nbd_end_request(cmd);
}

static void nbd_clear_que(struct nbd_device *nbd)
{
491
	BUG_ON(nbd->magic != NBD_MAGIC);
L
Linus Torvalds 已提交
492

J
Josef Bacik 已提交
493
	blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
494
	dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
L
Linus Torvalds 已提交
495 496
}

497

J
Josef Bacik 已提交
498
static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
499
{
J
Josef Bacik 已提交
500 501
	struct request *req = blk_mq_rq_from_pdu(cmd);
	struct nbd_device *nbd = cmd->nbd;
J
Josef Bacik 已提交
502
	struct nbd_sock *nsock;
J
Josef Bacik 已提交
503

J
Josef Bacik 已提交
504
	if (index >= nbd->num_connections) {
505 506
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on invalid socket\n");
507
		goto error_out;
J
Josef Bacik 已提交
508
	}
509

J
Josef Bacik 已提交
510
	if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
511 512
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on closed socket\n");
J
Josef Bacik 已提交
513 514 515
		goto error_out;
	}

516 517
	req->errors = 0;

J
Josef Bacik 已提交
518 519 520 521
	nsock = nbd->socks[index];
	mutex_lock(&nsock->tx_lock);
	if (unlikely(!nsock->sock)) {
		mutex_unlock(&nsock->tx_lock);
522 523
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on closed socket\n");
P
Pavel Machek 已提交
524
		goto error_out;
525 526
	}

J
Josef Bacik 已提交
527
	if (nbd_send_cmd(nbd, cmd, index) != 0) {
528 529
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Request send failed\n");
530
		req->errors++;
J
Josef Bacik 已提交
531
		nbd_end_request(cmd);
532 533
	}

J
Josef Bacik 已提交
534
	mutex_unlock(&nsock->tx_lock);
535 536 537 538 539

	return;

error_out:
	req->errors++;
J
Josef Bacik 已提交
540
	nbd_end_request(cmd);
541 542
}

J
Josef Bacik 已提交
543 544
static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
			const struct blk_mq_queue_data *bd)
L
Linus Torvalds 已提交
545
{
J
Josef Bacik 已提交
546
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
L
Linus Torvalds 已提交
547

J
Josef Bacik 已提交
548 549 550 551 552 553 554 555 556 557
	/*
	 * Since we look at the bio's to send the request over the network we
	 * need to make sure the completion work doesn't mark this request done
	 * before we are done doing our send.  This keeps us from dereferencing
	 * freed data if we have particularly fast completions (ie we get the
	 * completion before we exit sock_xmit on the last bvec) or in the case
	 * that the server is misbehaving (or there was an error) before we're
	 * done sending everything over the wire.
	 */
	init_completion(&cmd->send_complete);
J
Josef Bacik 已提交
558
	blk_mq_start_request(bd->rq);
J
Josef Bacik 已提交
559 560 561
	nbd_handle_cmd(cmd, hctx->queue_num);
	complete(&cmd->send_complete);

J
Josef Bacik 已提交
562
	return BLK_MQ_RQ_QUEUE_OK;
L
Linus Torvalds 已提交
563 564
}

J
Josef Bacik 已提交
565
static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
M
Markus Pargmann 已提交
566
{
J
Josef Bacik 已提交
567 568
	struct nbd_sock **socks;
	struct nbd_sock *nsock;
M
Markus Pargmann 已提交
569

J
Josef Bacik 已提交
570 571 572 573 574 575
	if (!nbd->task_setup)
		nbd->task_setup = current;
	if (nbd->task_setup != current) {
		dev_err(disk_to_dev(nbd->disk),
			"Device being setup by another task");
		return -EINVAL;
M
Markus Pargmann 已提交
576 577
	}

J
Josef Bacik 已提交
578 579 580 581 582 583 584 585 586
	socks = krealloc(nbd->socks, (nbd->num_connections + 1) *
			 sizeof(struct nbd_sock *), GFP_KERNEL);
	if (!socks)
		return -ENOMEM;
	nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
	if (!nsock)
		return -ENOMEM;

	nbd->socks = socks;
M
Markus Pargmann 已提交
587

J
Josef Bacik 已提交
588 589 590
	mutex_init(&nsock->tx_lock);
	nsock->sock = sock;
	socks[nbd->num_connections++] = nsock;
M
Markus Pargmann 已提交
591

J
Josef Bacik 已提交
592
	return 0;
M
Markus Pargmann 已提交
593 594
}

595 596 597
/* Reset all properties of an NBD device */
static void nbd_reset(struct nbd_device *nbd)
{
J
Josef Bacik 已提交
598 599 600 601 602 603
	int i;

	for (i = 0; i < nbd->num_connections; i++)
		kfree(nbd->socks[i]);
	kfree(nbd->socks);
	nbd->socks = NULL;
J
Josef Bacik 已提交
604
	nbd->runtime_flags = 0;
605 606 607 608
	nbd->blksize = 1024;
	nbd->bytesize = 0;
	set_capacity(nbd->disk, 0);
	nbd->flags = 0;
609
	nbd->tag_set.timeout = 0;
J
Josef Bacik 已提交
610 611
	nbd->num_connections = 0;
	nbd->task_setup = NULL;
612 613 614 615 616 617 618 619 620 621 622 623 624
	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
}

static void nbd_bdev_reset(struct block_device *bdev)
{
	set_device_ro(bdev, false);
	bdev->bd_inode->i_size = 0;
	if (max_part > 0) {
		blkdev_reread_part(bdev);
		bdev->bd_invalidated = 1;
	}
}

625 626 627 628 629 630 631
static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
{
	if (nbd->flags & NBD_FLAG_READ_ONLY)
		set_device_ro(bdev, true);
	if (nbd->flags & NBD_FLAG_SEND_TRIM)
		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
	if (nbd->flags & NBD_FLAG_SEND_FLUSH)
632
		blk_queue_write_cache(nbd->disk->queue, true, false);
633
	else
634
		blk_queue_write_cache(nbd->disk->queue, false, false);
635 636
}

J
Josef Bacik 已提交
637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
static void send_disconnects(struct nbd_device *nbd)
{
	struct nbd_request request = {};
	int i, ret;

	request.magic = htonl(NBD_REQUEST_MAGIC);
	request.type = htonl(NBD_CMD_DISC);

	for (i = 0; i < nbd->num_connections; i++) {
		ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0);
		if (ret <= 0)
			dev_err(disk_to_dev(nbd->disk),
				"Send disconnect failed %d\n", ret);
	}
}

M
Markus Pargmann 已提交
653 654 655
static int nbd_dev_dbg_init(struct nbd_device *nbd);
static void nbd_dev_dbg_close(struct nbd_device *nbd);

J
Josef Bacik 已提交
656
/* Must be called with config_lock held */
657
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
P
Pavel Machek 已提交
658 659
		       unsigned int cmd, unsigned long arg)
{
L
Linus Torvalds 已提交
660
	switch (cmd) {
P
Pavel Machek 已提交
661
	case NBD_DISCONNECT: {
662
		dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
J
Josef Bacik 已提交
663
		if (!nbd->socks)
664
			return -EINVAL;
P
Pavel Machek 已提交
665

J
Josef Bacik 已提交
666
		mutex_unlock(&nbd->config_lock);
667
		fsync_bdev(bdev);
J
Josef Bacik 已提交
668
		mutex_lock(&nbd->config_lock);
669 670

		/* Check again after getting mutex back.  */
J
Josef Bacik 已提交
671
		if (!nbd->socks)
L
Linus Torvalds 已提交
672
			return -EINVAL;
P
Paul Clements 已提交
673

J
Josef Bacik 已提交
674 675 676
		if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
				      &nbd->runtime_flags))
			send_disconnects(nbd);
P
Paul Clements 已提交
677
		return 0;
P
Pavel Machek 已提交
678
	}
J
Josef Bacik 已提交
679

M
Markus Pargmann 已提交
680 681
	case NBD_CLEAR_SOCK:
		sock_shutdown(nbd);
682
		nbd_clear_que(nbd);
683
		kill_bdev(bdev);
J
Josef Bacik 已提交
684 685 686 687 688 689 690 691 692 693 694 695 696
		nbd_bdev_reset(bdev);
		/*
		 * We want to give the run thread a chance to wait for everybody
		 * to clean up and then do it's own cleanup.
		 */
		if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) {
			int i;

			for (i = 0; i < nbd->num_connections; i++)
				kfree(nbd->socks[i]);
			kfree(nbd->socks);
			nbd->socks = NULL;
			nbd->num_connections = 0;
697
			nbd->task_setup = NULL;
J
Josef Bacik 已提交
698
		}
P
Pavel Machek 已提交
699 700 701
		return 0;

	case NBD_SET_SOCK: {
A
Al Viro 已提交
702
		int err;
M
Markus Pargmann 已提交
703 704 705 706 707
		struct socket *sock = sockfd_lookup(arg, &err);

		if (!sock)
			return err;

J
Josef Bacik 已提交
708
		err = nbd_add_socket(nbd, sock);
M
Markus Pargmann 已提交
709 710 711 712
		if (!err && max_part)
			bdev->bd_invalidated = 1;

		return err;
P
Pavel Machek 已提交
713 714
	}

715
	case NBD_SET_BLKSIZE: {
A
Arnd Bergmann 已提交
716
		loff_t bsize = div_s64(nbd->bytesize, arg);
717 718 719

		return nbd_size_set(nbd, bdev, arg, bsize);
	}
P
Pavel Machek 已提交
720

L
Linus Torvalds 已提交
721
	case NBD_SET_SIZE:
722
		return nbd_size_set(nbd, bdev, nbd->blksize,
J
Jens Axboe 已提交
723
					div_s64(arg, nbd->blksize));
724 725 726

	case NBD_SET_SIZE_BLOCKS:
		return nbd_size_set(nbd, bdev, nbd->blksize, arg);
P
Pavel Machek 已提交
727

728
	case NBD_SET_TIMEOUT:
729
		nbd->tag_set.timeout = arg * HZ;
730
		return 0;
P
Pavel Machek 已提交
731

P
Paul Clements 已提交
732 733 734 735
	case NBD_SET_FLAGS:
		nbd->flags = arg;
		return 0;

P
Pavel Machek 已提交
736
	case NBD_DO_IT: {
J
Josef Bacik 已提交
737 738
		struct recv_thread_args *args;
		int num_connections = nbd->num_connections;
739
		int error = 0, i;
P
Pavel Machek 已提交
740

M
Markus Pargmann 已提交
741
		if (nbd->task_recv)
742
			return -EBUSY;
J
Josef Bacik 已提交
743
		if (!nbd->socks)
L
Linus Torvalds 已提交
744
			return -EINVAL;
J
Josef Bacik 已提交
745 746 747
		if (num_connections > 1 &&
		    !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
			dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
748
			error = -EINVAL;
J
Josef Bacik 已提交
749 750
			goto out_err;
		}
P
Pavel Machek 已提交
751

J
Josef Bacik 已提交
752 753 754
		set_bit(NBD_RUNNING, &nbd->runtime_flags);
		blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
		args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
755 756
		if (!args) {
			error = -ENOMEM;
J
Josef Bacik 已提交
757
			goto out_err;
758
		}
V
Vegard Nossum 已提交
759
		nbd->task_recv = current;
J
Josef Bacik 已提交
760
		mutex_unlock(&nbd->config_lock);
P
Pavel Machek 已提交
761

762
		nbd_parse_flags(nbd, bdev);
P
Paul Clements 已提交
763

J
Josef Bacik 已提交
764 765 766 767 768 769 770 771
		error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
		if (error) {
			dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
			goto out_recv;
		}

		nbd_size_update(nbd, bdev);

M
Markus Pargmann 已提交
772
		nbd_dev_dbg_init(nbd);
J
Josef Bacik 已提交
773 774 775 776 777 778 779 780 781 782 783 784
		for (i = 0; i < num_connections; i++) {
			sk_set_memalloc(nbd->socks[i]->sock->sk);
			atomic_inc(&nbd->recv_threads);
			INIT_WORK(&args[i].work, recv_work);
			args[i].nbd = nbd;
			args[i].index = i;
			queue_work(system_long_wq, &args[i].work);
		}
		wait_event_interruptible(nbd->recv_wq,
					 atomic_read(&nbd->recv_threads) == 0);
		for (i = 0; i < num_connections; i++)
			flush_work(&args[i].work);
M
Markus Pargmann 已提交
785
		nbd_dev_dbg_close(nbd);
J
Josef Bacik 已提交
786 787 788 789
		nbd_size_clear(nbd, bdev);
		device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
out_recv:
		mutex_lock(&nbd->config_lock);
V
Vegard Nossum 已提交
790
		nbd->task_recv = NULL;
J
Josef Bacik 已提交
791
out_err:
792
		sock_shutdown(nbd);
793
		nbd_clear_que(nbd);
794
		kill_bdev(bdev);
795 796
		nbd_bdev_reset(bdev);

J
Josef Bacik 已提交
797 798
		/* user requested, ignore socket errors */
		if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
799
			error = 0;
J
Josef Bacik 已提交
800
		if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
801 802
			error = -ETIMEDOUT;

803
		nbd_reset(nbd);
804
		return error;
P
Pavel Machek 已提交
805 806
	}

L
Linus Torvalds 已提交
807
	case NBD_CLEAR_QUE:
808 809 810 811
		/*
		 * This is for compatibility only.  The queue is always cleared
		 * by NBD_DO_IT or NBD_CLEAR_SOCK.
		 */
L
Linus Torvalds 已提交
812
		return 0;
P
Pavel Machek 已提交
813

L
Linus Torvalds 已提交
814
	case NBD_PRINT_DEBUG:
J
Josef Bacik 已提交
815 816 817 818
		/*
		 * For compatibility only, we no longer keep a list of
		 * outstanding requests.
		 */
L
Linus Torvalds 已提交
819 820
		return 0;
	}
P
Pavel Machek 已提交
821 822 823 824 825 826
	return -ENOTTY;
}

static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
		     unsigned int cmd, unsigned long arg)
{
827
	struct nbd_device *nbd = bdev->bd_disk->private_data;
P
Pavel Machek 已提交
828 829 830 831 832
	int error;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

833
	BUG_ON(nbd->magic != NBD_MAGIC);
P
Pavel Machek 已提交
834

J
Josef Bacik 已提交
835
	mutex_lock(&nbd->config_lock);
836
	error = __nbd_ioctl(bdev, nbd, cmd, arg);
J
Josef Bacik 已提交
837
	mutex_unlock(&nbd->config_lock);
P
Pavel Machek 已提交
838 839

	return error;
L
Linus Torvalds 已提交
840 841
}

842
static const struct block_device_operations nbd_fops =
L
Linus Torvalds 已提交
843 844
{
	.owner =	THIS_MODULE,
845
	.ioctl =	nbd_ioctl,
A
Al Viro 已提交
846
	.compat_ioctl =	nbd_ioctl,
L
Linus Torvalds 已提交
847 848
};

M
Markus Pargmann 已提交
849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
#if IS_ENABLED(CONFIG_DEBUG_FS)

static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
{
	struct nbd_device *nbd = s->private;

	if (nbd->task_recv)
		seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));

	return 0;
}

static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
{
	return single_open(file, nbd_dbg_tasks_show, inode->i_private);
}

static const struct file_operations nbd_dbg_tasks_ops = {
	.open = nbd_dbg_tasks_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = single_release,
};

static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
{
	struct nbd_device *nbd = s->private;
	u32 flags = nbd->flags;

	seq_printf(s, "Hex: 0x%08x\n\n", flags);

	seq_puts(s, "Known flags:\n");

	if (flags & NBD_FLAG_HAS_FLAGS)
		seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
	if (flags & NBD_FLAG_READ_ONLY)
		seq_puts(s, "NBD_FLAG_READ_ONLY\n");
	if (flags & NBD_FLAG_SEND_FLUSH)
		seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
	if (flags & NBD_FLAG_SEND_TRIM)
		seq_puts(s, "NBD_FLAG_SEND_TRIM\n");

	return 0;
}

static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
{
	return single_open(file, nbd_dbg_flags_show, inode->i_private);
}

static const struct file_operations nbd_dbg_flags_ops = {
	.open = nbd_dbg_flags_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = single_release,
};

static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
	struct dentry *dir;
909 910 911

	if (!nbd_dbg_dir)
		return -EIO;
M
Markus Pargmann 已提交
912 913

	dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
914 915 916 917
	if (!dir) {
		dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
			nbd_name(nbd));
		return -EIO;
M
Markus Pargmann 已提交
918 919 920
	}
	nbd->dbg_dir = dir;

921 922
	debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
	debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
923
	debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
924
	debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize);
925
	debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
M
Markus Pargmann 已提交
926 927 928 929 930 931 932 933 934 935 936 937 938 939

	return 0;
}

static void nbd_dev_dbg_close(struct nbd_device *nbd)
{
	debugfs_remove_recursive(nbd->dbg_dir);
}

static int nbd_dbg_init(void)
{
	struct dentry *dbg_dir;

	dbg_dir = debugfs_create_dir("nbd", NULL);
940 941
	if (!dbg_dir)
		return -EIO;
M
Markus Pargmann 已提交
942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974

	nbd_dbg_dir = dbg_dir;

	return 0;
}

static void nbd_dbg_close(void)
{
	debugfs_remove_recursive(nbd_dbg_dir);
}

#else  /* IS_ENABLED(CONFIG_DEBUG_FS) */

static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
	return 0;
}

static void nbd_dev_dbg_close(struct nbd_device *nbd)
{
}

static int nbd_dbg_init(void)
{
	return 0;
}

static void nbd_dbg_close(void)
{
}

#endif

J
Josef Bacik 已提交
975 976 977 978 979 980 981 982 983 984 985 986
static int nbd_init_request(void *data, struct request *rq,
			    unsigned int hctx_idx, unsigned int request_idx,
			    unsigned int numa_node)
{
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
	cmd->nbd = data;
	return 0;
}

static struct blk_mq_ops nbd_mq_ops = {
	.queue_rq	= nbd_queue_rq,
	.init_request	= nbd_init_request,
987
	.timeout	= nbd_xmit_timeout,
J
Josef Bacik 已提交
988 989
};

L
Linus Torvalds 已提交
990 991 992 993 994 995 996 997 998
/*
 * And here should be modules and kernel interface 
 *  (Just smiley confuses emacs :-)
 */

static int __init nbd_init(void)
{
	int err = -ENOMEM;
	int i;
L
Laurent Vivier 已提交
999
	int part_shift;
L
Linus Torvalds 已提交
1000

1001
	BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
L
Linus Torvalds 已提交
1002

L
Laurent Vivier 已提交
1003
	if (max_part < 0) {
1004
		printk(KERN_ERR "nbd: max_part must be >= 0\n");
L
Laurent Vivier 已提交
1005 1006 1007 1008
		return -EINVAL;
	}

	part_shift = 0;
1009
	if (max_part > 0) {
L
Laurent Vivier 已提交
1010 1011
		part_shift = fls(max_part);

1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
		/*
		 * Adjust max_part according to part_shift as it is exported
		 * to user space so that user can know the max number of
		 * partition kernel should be able to manage.
		 *
		 * Note that -1 is required because partition 0 is reserved
		 * for the whole disk.
		 */
		max_part = (1UL << part_shift) - 1;
	}

1023 1024 1025 1026 1027 1028
	if ((1UL << part_shift) > DISK_MAX_PARTS)
		return -EINVAL;

	if (nbds_max > 1UL << (MINORBITS - part_shift))
		return -EINVAL;

S
Sudip Mukherjee 已提交
1029 1030 1031 1032
	nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
	if (!nbd_dev)
		return -ENOMEM;

1033
	for (i = 0; i < nbds_max; i++) {
1034
		struct request_queue *q;
L
Laurent Vivier 已提交
1035
		struct gendisk *disk = alloc_disk(1 << part_shift);
L
Linus Torvalds 已提交
1036 1037 1038
		if (!disk)
			goto out;
		nbd_dev[i].disk = disk;
J
Josef Bacik 已提交
1039 1040 1041 1042 1043 1044 1045

		nbd_dev[i].tag_set.ops = &nbd_mq_ops;
		nbd_dev[i].tag_set.nr_hw_queues = 1;
		nbd_dev[i].tag_set.queue_depth = 128;
		nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE;
		nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd);
		nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
J
Josef Bacik 已提交
1046
			BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
J
Josef Bacik 已提交
1047 1048 1049 1050 1051 1052 1053 1054
		nbd_dev[i].tag_set.driver_data = &nbd_dev[i];

		err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set);
		if (err) {
			put_disk(disk);
			goto out;
		}

L
Linus Torvalds 已提交
1055 1056 1057 1058 1059
		/*
		 * The new linux 2.5 block layer implementation requires
		 * every gendisk to have its very own request_queue struct.
		 * These structs are big so we dynamically allocate them.
		 */
1060 1061
		q = blk_mq_init_queue(&nbd_dev[i].tag_set);
		if (IS_ERR(q)) {
J
Josef Bacik 已提交
1062
			blk_mq_free_tag_set(&nbd_dev[i].tag_set);
L
Linus Torvalds 已提交
1063 1064 1065
			put_disk(disk);
			goto out;
		}
1066
		disk->queue = q;
J
Josef Bacik 已提交
1067

1068 1069 1070 1071
		/*
		 * Tell the block layer that we are not a rotational device
		 */
		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
1072
		queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
P
Paul Clements 已提交
1073
		disk->queue->limits.discard_granularity = 512;
1074
		blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
P
Paul Clements 已提交
1075
		disk->queue->limits.discard_zeroes_data = 0;
1076 1077
		blk_queue_max_hw_sectors(disk->queue, 65536);
		disk->queue->limits.max_sectors = 256;
L
Linus Torvalds 已提交
1078 1079 1080 1081 1082 1083 1084 1085 1086
	}

	if (register_blkdev(NBD_MAJOR, "nbd")) {
		err = -EIO;
		goto out;
	}

	printk(KERN_INFO "nbd: registered device at major %d\n", NBD_MAJOR);

M
Markus Pargmann 已提交
1087 1088
	nbd_dbg_init();

1089
	for (i = 0; i < nbds_max; i++) {
L
Linus Torvalds 已提交
1090
		struct gendisk *disk = nbd_dev[i].disk;
1091
		nbd_dev[i].magic = NBD_MAGIC;
J
Josef Bacik 已提交
1092
		mutex_init(&nbd_dev[i].config_lock);
L
Linus Torvalds 已提交
1093
		disk->major = NBD_MAJOR;
L
Laurent Vivier 已提交
1094
		disk->first_minor = i << part_shift;
L
Linus Torvalds 已提交
1095 1096 1097
		disk->fops = &nbd_fops;
		disk->private_data = &nbd_dev[i];
		sprintf(disk->disk_name, "nbd%d", i);
J
Josef Bacik 已提交
1098
		init_waitqueue_head(&nbd_dev[i].recv_wq);
1099
		nbd_reset(&nbd_dev[i]);
L
Linus Torvalds 已提交
1100 1101 1102 1103 1104 1105
		add_disk(disk);
	}

	return 0;
out:
	while (i--) {
J
Josef Bacik 已提交
1106
		blk_mq_free_tag_set(&nbd_dev[i].tag_set);
L
Linus Torvalds 已提交
1107 1108 1109
		blk_cleanup_queue(nbd_dev[i].disk->queue);
		put_disk(nbd_dev[i].disk);
	}
1110
	kfree(nbd_dev);
L
Linus Torvalds 已提交
1111 1112 1113 1114 1115 1116
	return err;
}

static void __exit nbd_cleanup(void)
{
	int i;
M
Markus Pargmann 已提交
1117 1118 1119

	nbd_dbg_close();

1120
	for (i = 0; i < nbds_max; i++) {
L
Linus Torvalds 已提交
1121
		struct gendisk *disk = nbd_dev[i].disk;
1122
		nbd_dev[i].magic = 0;
L
Linus Torvalds 已提交
1123 1124 1125
		if (disk) {
			del_gendisk(disk);
			blk_cleanup_queue(disk->queue);
J
Josef Bacik 已提交
1126
			blk_mq_free_tag_set(&nbd_dev[i].tag_set);
L
Linus Torvalds 已提交
1127 1128 1129 1130
			put_disk(disk);
		}
	}
	unregister_blkdev(NBD_MAJOR, "nbd");
1131
	kfree(nbd_dev);
L
Linus Torvalds 已提交
1132 1133 1134 1135 1136 1137 1138 1139 1140
	printk(KERN_INFO "nbd: unregistered device at major %d\n", NBD_MAJOR);
}

module_init(nbd_init);
module_exit(nbd_cleanup);

MODULE_DESCRIPTION("Network Block Device");
MODULE_LICENSE("GPL");

1141
module_param(nbds_max, int, 0444);
L
Laurent Vivier 已提交
1142 1143 1144
MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");