nbd.c 26.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6
/*
 * Network block device - make block devices work over TCP
 *
 * Note that you can not swap over this thing, yet. Seems to work but
 * deadlocks sometimes - you can not swap over TCP in general.
 * 
P
Pavel Machek 已提交
7
 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
L
Linus Torvalds 已提交
8 9
 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
 *
10
 * This file is released under GPLv2 or later.
L
Linus Torvalds 已提交
11
 *
12
 * (part of code stolen from loop.c)
L
Linus Torvalds 已提交
13 14 15 16 17 18 19 20 21 22 23 24 25 26
 */

#include <linux/major.h>

#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/ioctl.h>
27
#include <linux/mutex.h>
28 29 30
#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/kernel.h>
31
#include <linux/slab.h>
L
Linus Torvalds 已提交
32
#include <net/sock.h>
33
#include <linux/net.h>
34
#include <linux/kthread.h>
M
Markus Pargmann 已提交
35
#include <linux/types.h>
M
Markus Pargmann 已提交
36
#include <linux/debugfs.h>
J
Josef Bacik 已提交
37
#include <linux/blk-mq.h>
L
Linus Torvalds 已提交
38

39
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
40 41 42 43
#include <asm/types.h>

#include <linux/nbd.h>

44 45 46
static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);

J
Josef Bacik 已提交
47 48 49 50 51
struct nbd_sock {
	struct socket *sock;
	struct mutex tx_lock;
};

J
Josef Bacik 已提交
52 53
#define NBD_TIMEDOUT			0
#define NBD_DISCONNECT_REQUESTED	1
J
Josef Bacik 已提交
54 55
#define NBD_DISCONNECTED		2
#define NBD_RUNNING			3
J
Josef Bacik 已提交
56

57
struct nbd_device {
M
Markus Pargmann 已提交
58
	u32 flags;
J
Josef Bacik 已提交
59
	unsigned long runtime_flags;
J
Josef Bacik 已提交
60
	struct nbd_sock **socks;
61 62
	int magic;

J
Josef Bacik 已提交
63
	struct blk_mq_tag_set tag_set;
64

J
Josef Bacik 已提交
65
	struct mutex config_lock;
66
	struct gendisk *disk;
J
Josef Bacik 已提交
67 68 69
	int num_connections;
	atomic_t recv_threads;
	wait_queue_head_t recv_wq;
70
	loff_t blksize;
M
Markus Pargmann 已提交
71
	loff_t bytesize;
M
Markus Pargmann 已提交
72 73

	struct task_struct *task_recv;
J
Josef Bacik 已提交
74
	struct task_struct *task_setup;
M
Markus Pargmann 已提交
75 76 77 78

#if IS_ENABLED(CONFIG_DEBUG_FS)
	struct dentry *dbg_dir;
#endif
79 80
};

J
Josef Bacik 已提交
81 82
struct nbd_cmd {
	struct nbd_device *nbd;
J
Josef Bacik 已提交
83
	struct completion send_complete;
J
Josef Bacik 已提交
84 85
};

M
Markus Pargmann 已提交
86 87 88 89 90 91
#if IS_ENABLED(CONFIG_DEBUG_FS)
static struct dentry *nbd_dbg_dir;
#endif

#define nbd_name(nbd) ((nbd)->disk->disk_name)

92
#define NBD_MAGIC 0x68797548
L
Linus Torvalds 已提交
93

94
static unsigned int nbds_max = 16;
L
Laurent Vivier 已提交
95
static int max_part;
96
static struct workqueue_struct *recv_workqueue;
97
static int part_shift;
L
Linus Torvalds 已提交
98

J
Josef Bacik 已提交
99 100 101 102
static int nbd_dev_dbg_init(struct nbd_device *nbd);
static void nbd_dev_dbg_close(struct nbd_device *nbd);


103
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
L
Linus Torvalds 已提交
104
{
105
	return disk_to_dev(nbd->disk);
L
Linus Torvalds 已提交
106 107
}

108 109 110 111 112
static bool nbd_is_connected(struct nbd_device *nbd)
{
	return !!nbd->task_recv;
}

L
Linus Torvalds 已提交
113 114 115 116 117 118
static const char *nbdcmd_to_ascii(int cmd)
{
	switch (cmd) {
	case  NBD_CMD_READ: return "read";
	case NBD_CMD_WRITE: return "write";
	case  NBD_CMD_DISC: return "disconnect";
A
Alex Bligh 已提交
119
	case NBD_CMD_FLUSH: return "flush";
P
Paul Clements 已提交
120
	case  NBD_CMD_TRIM: return "trim/discard";
L
Linus Torvalds 已提交
121 122 123 124
	}
	return "invalid";
}

125 126
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
{
127
	bd_set_size(bdev, 0);
128 129 130 131 132 133 134 135
	set_capacity(nbd->disk, 0);
	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);

	return 0;
}

static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
{
136 137 138
	blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
	blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
	bd_set_size(bdev, nbd->bytesize);
139 140 141 142
	set_capacity(nbd->disk, nbd->bytesize >> 9);
	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
}

143
static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
144
			loff_t blocksize, loff_t nr_blocks)
145 146
{
	nbd->blksize = blocksize;
147
	nbd->bytesize = blocksize * nr_blocks;
148 149
	if (nbd_is_connected(nbd))
		nbd_size_update(nbd, bdev);
150 151
}

J
Josef Bacik 已提交
152
static void nbd_end_request(struct nbd_cmd *cmd)
L
Linus Torvalds 已提交
153
{
J
Josef Bacik 已提交
154 155
	struct nbd_device *nbd = cmd->nbd;
	struct request *req = blk_mq_rq_from_pdu(cmd);
156
	int error = req->errors ? -EIO : 0;
L
Linus Torvalds 已提交
157

J
Josef Bacik 已提交
158
	dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
159
		error ? "failed" : "done");
L
Linus Torvalds 已提交
160

J
Josef Bacik 已提交
161
	blk_mq_complete_request(req, error);
L
Linus Torvalds 已提交
162 163
}

164 165 166
/*
 * Forcibly shutdown the socket causing all listeners to error
 */
167
static void sock_shutdown(struct nbd_device *nbd)
168
{
J
Josef Bacik 已提交
169
	int i;
M
Markus Pargmann 已提交
170

J
Josef Bacik 已提交
171 172 173
	if (nbd->num_connections == 0)
		return;
	if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
M
Markus Pargmann 已提交
174
		return;
M
Markus Pargmann 已提交
175

J
Josef Bacik 已提交
176 177 178 179 180 181 182
	for (i = 0; i < nbd->num_connections; i++) {
		struct nbd_sock *nsock = nbd->socks[i];
		mutex_lock(&nsock->tx_lock);
		kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
		mutex_unlock(&nsock->tx_lock);
	}
	dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
183 184
}

185 186
static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
						 bool reserved)
187
{
188 189
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
	struct nbd_device *nbd = cmd->nbd;
M
Markus Pargmann 已提交
190

J
Josef Bacik 已提交
191
	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
J
Josef Bacik 已提交
192
	set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
193
	req->errors++;
J
Josef Bacik 已提交
194 195 196 197

	mutex_lock(&nbd->config_lock);
	sock_shutdown(nbd);
	mutex_unlock(&nbd->config_lock);
198
	return BLK_EH_HANDLED;
199 200
}

L
Linus Torvalds 已提交
201 202 203
/*
 *  Send or receive packet.
 */
A
Al Viro 已提交
204 205
static int sock_xmit(struct nbd_device *nbd, int index, int send,
		     struct iov_iter *iter, int msg_flags)
L
Linus Torvalds 已提交
206
{
J
Josef Bacik 已提交
207
	struct socket *sock = nbd->socks[index]->sock;
L
Linus Torvalds 已提交
208 209
	int result;
	struct msghdr msg;
210
	unsigned long pflags = current->flags;
L
Linus Torvalds 已提交
211

212
	if (unlikely(!sock)) {
213
		dev_err_ratelimited(disk_to_dev(nbd->disk),
214 215
			"Attempted %s on closed socket in sock_xmit\n",
			(send ? "send" : "recv"));
216 217 218
		return -EINVAL;
	}

A
Al Viro 已提交
219
	msg.msg_iter = *iter;
220

221
	current->flags |= PF_MEMALLOC;
L
Linus Torvalds 已提交
222
	do {
223
		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
L
Linus Torvalds 已提交
224 225 226 227 228 229
		msg.msg_name = NULL;
		msg.msg_namelen = 0;
		msg.msg_control = NULL;
		msg.msg_controllen = 0;
		msg.msg_flags = msg_flags | MSG_NOSIGNAL;

M
Markus Pargmann 已提交
230
		if (send)
231
			result = sock_sendmsg(sock, &msg);
M
Markus Pargmann 已提交
232
		else
233
			result = sock_recvmsg(sock, &msg, msg.msg_flags);
L
Linus Torvalds 已提交
234 235 236 237 238 239

		if (result <= 0) {
			if (result == 0)
				result = -EPIPE; /* short read */
			break;
		}
240
	} while (msg_data_left(&msg));
L
Linus Torvalds 已提交
241

242
	tsk_restore_flags(current, pflags, PF_MEMALLOC);
L
Linus Torvalds 已提交
243 244 245 246

	return result;
}

247
/* always call with the tx_lock held */
J
Josef Bacik 已提交
248
static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
L
Linus Torvalds 已提交
249
{
J
Josef Bacik 已提交
250
	struct request *req = blk_mq_rq_from_pdu(cmd);
251
	int result;
A
Al Viro 已提交
252 253 254
	struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
	struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
	struct iov_iter from;
255
	unsigned long size = blk_rq_bytes(req);
256
	struct bio *bio;
C
Christoph Hellwig 已提交
257
	u32 type;
J
Josef Bacik 已提交
258
	u32 tag = blk_mq_unique_tag(req);
C
Christoph Hellwig 已提交
259

A
Al Viro 已提交
260 261
	iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));

262 263
	switch (req_op(req)) {
	case REQ_OP_DISCARD:
C
Christoph Hellwig 已提交
264
		type = NBD_CMD_TRIM;
265 266
		break;
	case REQ_OP_FLUSH:
C
Christoph Hellwig 已提交
267
		type = NBD_CMD_FLUSH;
268 269
		break;
	case REQ_OP_WRITE:
C
Christoph Hellwig 已提交
270
		type = NBD_CMD_WRITE;
271 272
		break;
	case REQ_OP_READ:
C
Christoph Hellwig 已提交
273
		type = NBD_CMD_READ;
274 275 276 277
		break;
	default:
		return -EIO;
	}
L
Linus Torvalds 已提交
278

279 280 281 282 283 284 285
	if (rq_data_dir(req) == WRITE &&
	    (nbd->flags & NBD_FLAG_READ_ONLY)) {
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Write on read-only\n");
		return -EIO;
	}

C
Christoph Hellwig 已提交
286
	request.type = htonl(type);
J
Josef Bacik 已提交
287
	if (type != NBD_CMD_FLUSH) {
A
Alex Bligh 已提交
288 289 290
		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
		request.len = htonl(size);
	}
J
Josef Bacik 已提交
291
	memcpy(request.handle, &tag, sizeof(tag));
L
Linus Torvalds 已提交
292

293
	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
J
Josef Bacik 已提交
294
		cmd, nbdcmd_to_ascii(type),
295
		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
A
Al Viro 已提交
296
	result = sock_xmit(nbd, index, 1, &from,
C
Christoph Hellwig 已提交
297
			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
L
Linus Torvalds 已提交
298
	if (result <= 0) {
299
		dev_err_ratelimited(disk_to_dev(nbd->disk),
300
			"Send control failed (result %d)\n", result);
301
		return -EIO;
L
Linus Torvalds 已提交
302 303
	}

304 305 306 307 308 309 310
	if (type != NBD_CMD_WRITE)
		return 0;

	bio = req->bio;
	while (bio) {
		struct bio *next = bio->bi_next;
		struct bvec_iter iter;
311
		struct bio_vec bvec;
312 313 314

		bio_for_each_segment(bvec, bio, iter) {
			bool is_last = !next && bio_iter_last(bvec, iter);
315
			int flags = is_last ? 0 : MSG_MORE;
316

317
			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
J
Josef Bacik 已提交
318
				cmd, bvec.bv_len);
A
Al Viro 已提交
319 320 321
			iov_iter_bvec(&from, ITER_BVEC | WRITE,
				      &bvec, 1, bvec.bv_len);
			result = sock_xmit(nbd, index, 1, &from, flags);
322
			if (result <= 0) {
323
				dev_err(disk_to_dev(nbd->disk),
324 325
					"Send data failed (result %d)\n",
					result);
326
				return -EIO;
327
			}
328 329 330 331 332 333 334 335
			/*
			 * The completion might already have come in,
			 * so break for the last one instead of letting
			 * the iterator do it. This prevents use-after-free
			 * of the bio.
			 */
			if (is_last)
				break;
L
Linus Torvalds 已提交
336
		}
337
		bio = next;
L
Linus Torvalds 已提交
338 339 340 341 342
	}
	return 0;
}

/* NULL returned = something went wrong, inform userspace */
J
Josef Bacik 已提交
343
static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
L
Linus Torvalds 已提交
344 345 346
{
	int result;
	struct nbd_reply reply;
J
Josef Bacik 已提交
347 348 349
	struct nbd_cmd *cmd;
	struct request *req = NULL;
	u16 hwq;
J
Josef Bacik 已提交
350
	u32 tag;
A
Al Viro 已提交
351 352
	struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
	struct iov_iter to;
L
Linus Torvalds 已提交
353 354

	reply.magic = 0;
A
Al Viro 已提交
355 356
	iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
L
Linus Torvalds 已提交
357
	if (result <= 0) {
J
Josef Bacik 已提交
358 359 360 361
		if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
		    !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
			dev_err(disk_to_dev(nbd->disk),
				"Receive control failed (result %d)\n", result);
362
		return ERR_PTR(result);
L
Linus Torvalds 已提交
363
	}
364 365

	if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
366
		dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
367
				(unsigned long)ntohl(reply.magic));
368
		return ERR_PTR(-EPROTO);
369 370
	}

J
Josef Bacik 已提交
371
	memcpy(&tag, reply.handle, sizeof(u32));
372

J
Josef Bacik 已提交
373 374 375 376 377 378 379 380
	hwq = blk_mq_unique_tag_to_hwq(tag);
	if (hwq < nbd->tag_set.nr_hw_queues)
		req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
				       blk_mq_unique_tag_to_tag(tag));
	if (!req || !blk_mq_request_started(req)) {
		dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
			tag, req);
		return ERR_PTR(-ENOENT);
L
Linus Torvalds 已提交
381
	}
J
Josef Bacik 已提交
382
	cmd = blk_mq_rq_to_pdu(req);
L
Linus Torvalds 已提交
383
	if (ntohl(reply.error)) {
384
		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
385
			ntohl(reply.error));
L
Linus Torvalds 已提交
386
		req->errors++;
J
Josef Bacik 已提交
387
		return cmd;
L
Linus Torvalds 已提交
388 389
	}

J
Josef Bacik 已提交
390
	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
C
Christoph Hellwig 已提交
391
	if (rq_data_dir(req) != WRITE) {
392
		struct req_iterator iter;
393
		struct bio_vec bvec;
394 395

		rq_for_each_segment(bvec, req, iter) {
A
Al Viro 已提交
396 397 398
			iov_iter_bvec(&to, ITER_BVEC | READ,
				      &bvec, 1, bvec.bv_len);
			result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL);
399
			if (result <= 0) {
400
				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
401
					result);
402
				req->errors++;
J
Josef Bacik 已提交
403
				return cmd;
404
			}
405
			dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
J
Josef Bacik 已提交
406
				cmd, bvec.bv_len);
L
Linus Torvalds 已提交
407
		}
J
Josef Bacik 已提交
408 409 410
	} else {
		/* See the comment in nbd_queue_rq. */
		wait_for_completion(&cmd->send_complete);
L
Linus Torvalds 已提交
411
	}
J
Josef Bacik 已提交
412
	return cmd;
L
Linus Torvalds 已提交
413 414
}

415 416
static ssize_t pid_show(struct device *dev,
			struct device_attribute *attr, char *buf)
417
{
418
	struct gendisk *disk = dev_to_disk(dev);
M
Markus Pargmann 已提交
419
	struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
420

M
Markus Pargmann 已提交
421
	return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
422 423
}

424
static struct device_attribute pid_attr = {
425
	.attr = { .name = "pid", .mode = S_IRUGO},
426 427 428
	.show = pid_show,
};

J
Josef Bacik 已提交
429 430 431 432 433 434 435
struct recv_thread_args {
	struct work_struct work;
	struct nbd_device *nbd;
	int index;
};

static void recv_work(struct work_struct *work)
L
Linus Torvalds 已提交
436
{
J
Josef Bacik 已提交
437 438 439 440
	struct recv_thread_args *args = container_of(work,
						     struct recv_thread_args,
						     work);
	struct nbd_device *nbd = args->nbd;
J
Josef Bacik 已提交
441
	struct nbd_cmd *cmd;
J
Josef Bacik 已提交
442
	int ret = 0;
L
Linus Torvalds 已提交
443

444
	BUG_ON(nbd->magic != NBD_MAGIC);
445
	while (1) {
J
Josef Bacik 已提交
446
		cmd = nbd_read_stat(nbd, args->index);
J
Josef Bacik 已提交
447 448
		if (IS_ERR(cmd)) {
			ret = PTR_ERR(cmd);
449 450 451
			break;
		}

J
Josef Bacik 已提交
452
		nbd_end_request(cmd);
453
	}
454

J
Josef Bacik 已提交
455 456 457 458 459 460 461 462
	/*
	 * We got an error, shut everybody down if this wasn't the result of a
	 * disconnect request.
	 */
	if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
		sock_shutdown(nbd);
	atomic_dec(&nbd->recv_threads);
	wake_up(&nbd->recv_wq);
L
Linus Torvalds 已提交
463 464
}

J
Josef Bacik 已提交
465
static void nbd_clear_req(struct request *req, void *data, bool reserved)
L
Linus Torvalds 已提交
466
{
J
Josef Bacik 已提交
467
	struct nbd_cmd *cmd;
L
Linus Torvalds 已提交
468

J
Josef Bacik 已提交
469 470 471 472 473 474 475 476 477
	if (!blk_mq_request_started(req))
		return;
	cmd = blk_mq_rq_to_pdu(req);
	req->errors++;
	nbd_end_request(cmd);
}

static void nbd_clear_que(struct nbd_device *nbd)
{
478
	BUG_ON(nbd->magic != NBD_MAGIC);
L
Linus Torvalds 已提交
479

J
Josef Bacik 已提交
480
	blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
481
	dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
L
Linus Torvalds 已提交
482 483
}

484

J
Josef Bacik 已提交
485
static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
486
{
J
Josef Bacik 已提交
487 488
	struct request *req = blk_mq_rq_from_pdu(cmd);
	struct nbd_device *nbd = cmd->nbd;
J
Josef Bacik 已提交
489
	struct nbd_sock *nsock;
J
Josef Bacik 已提交
490

J
Josef Bacik 已提交
491
	if (index >= nbd->num_connections) {
492 493
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on invalid socket\n");
494
		goto error_out;
J
Josef Bacik 已提交
495
	}
496

J
Josef Bacik 已提交
497
	if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
498 499
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on closed socket\n");
J
Josef Bacik 已提交
500 501 502
		goto error_out;
	}

503 504
	req->errors = 0;

J
Josef Bacik 已提交
505 506 507 508
	nsock = nbd->socks[index];
	mutex_lock(&nsock->tx_lock);
	if (unlikely(!nsock->sock)) {
		mutex_unlock(&nsock->tx_lock);
509 510
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on closed socket\n");
P
Pavel Machek 已提交
511
		goto error_out;
512 513
	}

J
Josef Bacik 已提交
514
	if (nbd_send_cmd(nbd, cmd, index) != 0) {
515 516
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Request send failed\n");
517
		req->errors++;
J
Josef Bacik 已提交
518
		nbd_end_request(cmd);
519 520
	}

J
Josef Bacik 已提交
521
	mutex_unlock(&nsock->tx_lock);
522 523 524 525 526

	return;

error_out:
	req->errors++;
J
Josef Bacik 已提交
527
	nbd_end_request(cmd);
528 529
}

J
Josef Bacik 已提交
530 531
static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
			const struct blk_mq_queue_data *bd)
L
Linus Torvalds 已提交
532
{
J
Josef Bacik 已提交
533
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
L
Linus Torvalds 已提交
534

J
Josef Bacik 已提交
535 536 537 538 539 540 541 542 543 544
	/*
	 * Since we look at the bio's to send the request over the network we
	 * need to make sure the completion work doesn't mark this request done
	 * before we are done doing our send.  This keeps us from dereferencing
	 * freed data if we have particularly fast completions (ie we get the
	 * completion before we exit sock_xmit on the last bvec) or in the case
	 * that the server is misbehaving (or there was an error) before we're
	 * done sending everything over the wire.
	 */
	init_completion(&cmd->send_complete);
J
Josef Bacik 已提交
545
	blk_mq_start_request(bd->rq);
J
Josef Bacik 已提交
546 547 548
	nbd_handle_cmd(cmd, hctx->queue_num);
	complete(&cmd->send_complete);

J
Josef Bacik 已提交
549
	return BLK_MQ_RQ_QUEUE_OK;
L
Linus Torvalds 已提交
550 551
}

J
Josef Bacik 已提交
552 553
static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
			  unsigned long arg)
M
Markus Pargmann 已提交
554
{
J
Josef Bacik 已提交
555
	struct socket *sock;
J
Josef Bacik 已提交
556 557
	struct nbd_sock **socks;
	struct nbd_sock *nsock;
J
Josef Bacik 已提交
558 559 560 561 562
	int err;

	sock = sockfd_lookup(arg, &err);
	if (!sock)
		return err;
M
Markus Pargmann 已提交
563

J
Josef Bacik 已提交
564 565 566 567 568 569
	if (!nbd->task_setup)
		nbd->task_setup = current;
	if (nbd->task_setup != current) {
		dev_err(disk_to_dev(nbd->disk),
			"Device being setup by another task");
		return -EINVAL;
M
Markus Pargmann 已提交
570 571
	}

J
Josef Bacik 已提交
572 573 574 575 576 577 578 579 580
	socks = krealloc(nbd->socks, (nbd->num_connections + 1) *
			 sizeof(struct nbd_sock *), GFP_KERNEL);
	if (!socks)
		return -ENOMEM;
	nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
	if (!nsock)
		return -ENOMEM;

	nbd->socks = socks;
M
Markus Pargmann 已提交
581

J
Josef Bacik 已提交
582 583 584
	mutex_init(&nsock->tx_lock);
	nsock->sock = sock;
	socks[nbd->num_connections++] = nsock;
M
Markus Pargmann 已提交
585

J
Josef Bacik 已提交
586 587
	if (max_part)
		bdev->bd_invalidated = 1;
J
Josef Bacik 已提交
588
	return 0;
M
Markus Pargmann 已提交
589 590
}

591 592 593
/* Reset all properties of an NBD device */
static void nbd_reset(struct nbd_device *nbd)
{
J
Josef Bacik 已提交
594
	nbd->runtime_flags = 0;
595 596 597 598
	nbd->blksize = 1024;
	nbd->bytesize = 0;
	set_capacity(nbd->disk, 0);
	nbd->flags = 0;
599
	nbd->tag_set.timeout = 0;
600 601 602 603 604 605 606 607 608 609 610 611 612
	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
}

static void nbd_bdev_reset(struct block_device *bdev)
{
	set_device_ro(bdev, false);
	bdev->bd_inode->i_size = 0;
	if (max_part > 0) {
		blkdev_reread_part(bdev);
		bdev->bd_invalidated = 1;
	}
}

613 614 615 616 617 618 619
static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
{
	if (nbd->flags & NBD_FLAG_READ_ONLY)
		set_device_ro(bdev, true);
	if (nbd->flags & NBD_FLAG_SEND_TRIM)
		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
	if (nbd->flags & NBD_FLAG_SEND_FLUSH)
620
		blk_queue_write_cache(nbd->disk->queue, true, false);
621
	else
622
		blk_queue_write_cache(nbd->disk->queue, false, false);
623 624
}

J
Josef Bacik 已提交
625 626
static void send_disconnects(struct nbd_device *nbd)
{
A
Al Viro 已提交
627 628 629 630 631 632
	struct nbd_request request = {
		.magic = htonl(NBD_REQUEST_MAGIC),
		.type = htonl(NBD_CMD_DISC),
	};
	struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
	struct iov_iter from;
J
Josef Bacik 已提交
633 634 635
	int i, ret;

	for (i = 0; i < nbd->num_connections; i++) {
A
Al Viro 已提交
636 637
		iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
		ret = sock_xmit(nbd, i, 1, &from, 0);
J
Josef Bacik 已提交
638 639 640 641 642 643
		if (ret <= 0)
			dev_err(disk_to_dev(nbd->disk),
				"Send disconnect failed %d\n", ret);
	}
}

J
Josef Bacik 已提交
644 645 646 647 648
static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
{
	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
	if (!nbd->socks)
		return -EINVAL;
M
Markus Pargmann 已提交
649

J
Josef Bacik 已提交
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
	mutex_unlock(&nbd->config_lock);
	fsync_bdev(bdev);
	mutex_lock(&nbd->config_lock);

	/* Check again after getting mutex back.  */
	if (!nbd->socks)
		return -EINVAL;

	if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
			      &nbd->runtime_flags))
		send_disconnects(nbd);
	return 0;
}

static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
P
Pavel Machek 已提交
665
{
J
Josef Bacik 已提交
666 667 668 669 670 671 672 673 674 675 676 677
	sock_shutdown(nbd);
	nbd_clear_que(nbd);
	kill_bdev(bdev);
	nbd_bdev_reset(bdev);
	/*
	 * We want to give the run thread a chance to wait for everybody
	 * to clean up and then do it's own cleanup.
	 */
	if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
	    nbd->num_connections) {
		int i;

J
Josef Bacik 已提交
678 679
		for (i = 0; i < nbd->num_connections; i++) {
			sockfd_put(nbd->socks[i]->sock);
J
Josef Bacik 已提交
680
			kfree(nbd->socks[i]);
J
Josef Bacik 已提交
681
		}
J
Josef Bacik 已提交
682 683 684
		kfree(nbd->socks);
		nbd->socks = NULL;
		nbd->num_connections = 0;
P
Pavel Machek 已提交
685
	}
J
Josef Bacik 已提交
686
	nbd->task_setup = NULL;
J
Josef Bacik 已提交
687

J
Josef Bacik 已提交
688 689 690 691 692 693 694 695
	return 0;
}

static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
{
	struct recv_thread_args *args;
	int num_connections = nbd->num_connections;
	int error = 0, i;
P
Pavel Machek 已提交
696

J
Josef Bacik 已提交
697 698 699 700 701 702 703 704 705 706
	if (nbd->task_recv)
		return -EBUSY;
	if (!nbd->socks)
		return -EINVAL;
	if (num_connections > 1 &&
	    !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
		dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
		error = -EINVAL;
		goto out_err;
	}
M
Markus Pargmann 已提交
707

J
Josef Bacik 已提交
708 709 710 711 712 713 714 715 716
	set_bit(NBD_RUNNING, &nbd->runtime_flags);
	blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
	args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
	if (!args) {
		error = -ENOMEM;
		goto out_err;
	}
	nbd->task_recv = current;
	mutex_unlock(&nbd->config_lock);
M
Markus Pargmann 已提交
717

J
Josef Bacik 已提交
718
	nbd_parse_flags(nbd, bdev);
M
Markus Pargmann 已提交
719

J
Josef Bacik 已提交
720 721 722 723
	error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
	if (error) {
		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
		goto out_recv;
P
Pavel Machek 已提交
724 725
	}

J
Josef Bacik 已提交
726
	nbd_size_update(nbd, bdev);
727

J
Josef Bacik 已提交
728 729 730 731 732 733 734 735
	nbd_dev_dbg_init(nbd);
	for (i = 0; i < num_connections; i++) {
		sk_set_memalloc(nbd->socks[i]->sock->sk);
		atomic_inc(&nbd->recv_threads);
		INIT_WORK(&args[i].work, recv_work);
		args[i].nbd = nbd;
		args[i].index = i;
		queue_work(recv_workqueue, &args[i].work);
736
	}
J
Josef Bacik 已提交
737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
	wait_event_interruptible(nbd->recv_wq,
				 atomic_read(&nbd->recv_threads) == 0);
	for (i = 0; i < num_connections; i++)
		flush_work(&args[i].work);
	nbd_dev_dbg_close(nbd);
	nbd_size_clear(nbd, bdev);
	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
out_recv:
	mutex_lock(&nbd->config_lock);
	nbd->task_recv = NULL;
out_err:
	clear_bit(NBD_RUNNING, &nbd->runtime_flags);
	nbd_clear_sock(nbd, bdev);

	/* user requested, ignore socket errors */
	if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
		error = 0;
	if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
		error = -ETIMEDOUT;
P
Pavel Machek 已提交
756

J
Josef Bacik 已提交
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772
	nbd_reset(nbd);
	return error;
}

/* Must be called with config_lock held */
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
		       unsigned int cmd, unsigned long arg)
{
	switch (cmd) {
	case NBD_DISCONNECT:
		return nbd_disconnect(nbd, bdev);
	case NBD_CLEAR_SOCK:
		return nbd_clear_sock(nbd, bdev);
	case NBD_SET_SOCK:
		return nbd_add_socket(nbd, bdev, arg);
	case NBD_SET_BLKSIZE:
773 774 775
		nbd_size_set(nbd, bdev, arg,
			     div_s64(nbd->bytesize, arg));
		return 0;
L
Linus Torvalds 已提交
776
	case NBD_SET_SIZE:
777 778 779
		nbd_size_set(nbd, bdev, nbd->blksize,
			     div_s64(arg, nbd->blksize));
		return 0;
780
	case NBD_SET_SIZE_BLOCKS:
781 782
		nbd_size_set(nbd, bdev, nbd->blksize, arg);
		return 0;
783
	case NBD_SET_TIMEOUT:
784
		nbd->tag_set.timeout = arg * HZ;
785
		return 0;
P
Pavel Machek 已提交
786

P
Paul Clements 已提交
787 788 789
	case NBD_SET_FLAGS:
		nbd->flags = arg;
		return 0;
J
Josef Bacik 已提交
790 791
	case NBD_DO_IT:
		return nbd_start_device(nbd, bdev);
L
Linus Torvalds 已提交
792
	case NBD_CLEAR_QUE:
793 794 795 796
		/*
		 * This is for compatibility only.  The queue is always cleared
		 * by NBD_DO_IT or NBD_CLEAR_SOCK.
		 */
L
Linus Torvalds 已提交
797 798
		return 0;
	case NBD_PRINT_DEBUG:
J
Josef Bacik 已提交
799 800 801 802
		/*
		 * For compatibility only, we no longer keep a list of
		 * outstanding requests.
		 */
L
Linus Torvalds 已提交
803 804
		return 0;
	}
P
Pavel Machek 已提交
805 806 807 808 809 810
	return -ENOTTY;
}

static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
		     unsigned int cmd, unsigned long arg)
{
811
	struct nbd_device *nbd = bdev->bd_disk->private_data;
P
Pavel Machek 已提交
812 813 814 815 816
	int error;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

817
	BUG_ON(nbd->magic != NBD_MAGIC);
P
Pavel Machek 已提交
818

J
Josef Bacik 已提交
819
	mutex_lock(&nbd->config_lock);
820
	error = __nbd_ioctl(bdev, nbd, cmd, arg);
J
Josef Bacik 已提交
821
	mutex_unlock(&nbd->config_lock);
P
Pavel Machek 已提交
822 823

	return error;
L
Linus Torvalds 已提交
824 825
}

826
static const struct block_device_operations nbd_fops =
L
Linus Torvalds 已提交
827 828
{
	.owner =	THIS_MODULE,
829
	.ioctl =	nbd_ioctl,
A
Al Viro 已提交
830
	.compat_ioctl =	nbd_ioctl,
L
Linus Torvalds 已提交
831 832
};

M
Markus Pargmann 已提交
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
#if IS_ENABLED(CONFIG_DEBUG_FS)

static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
{
	struct nbd_device *nbd = s->private;

	if (nbd->task_recv)
		seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));

	return 0;
}

static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
{
	return single_open(file, nbd_dbg_tasks_show, inode->i_private);
}

static const struct file_operations nbd_dbg_tasks_ops = {
	.open = nbd_dbg_tasks_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = single_release,
};

static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
{
	struct nbd_device *nbd = s->private;
	u32 flags = nbd->flags;

	seq_printf(s, "Hex: 0x%08x\n\n", flags);

	seq_puts(s, "Known flags:\n");

	if (flags & NBD_FLAG_HAS_FLAGS)
		seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
	if (flags & NBD_FLAG_READ_ONLY)
		seq_puts(s, "NBD_FLAG_READ_ONLY\n");
	if (flags & NBD_FLAG_SEND_FLUSH)
		seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
	if (flags & NBD_FLAG_SEND_TRIM)
		seq_puts(s, "NBD_FLAG_SEND_TRIM\n");

	return 0;
}

static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
{
	return single_open(file, nbd_dbg_flags_show, inode->i_private);
}

static const struct file_operations nbd_dbg_flags_ops = {
	.open = nbd_dbg_flags_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = single_release,
};

static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
	struct dentry *dir;
893 894 895

	if (!nbd_dbg_dir)
		return -EIO;
M
Markus Pargmann 已提交
896 897

	dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
898 899 900 901
	if (!dir) {
		dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
			nbd_name(nbd));
		return -EIO;
M
Markus Pargmann 已提交
902 903 904
	}
	nbd->dbg_dir = dir;

905 906
	debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
	debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
907
	debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
908
	debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize);
909
	debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
M
Markus Pargmann 已提交
910 911 912 913 914 915 916 917 918 919 920 921 922 923

	return 0;
}

static void nbd_dev_dbg_close(struct nbd_device *nbd)
{
	debugfs_remove_recursive(nbd->dbg_dir);
}

static int nbd_dbg_init(void)
{
	struct dentry *dbg_dir;

	dbg_dir = debugfs_create_dir("nbd", NULL);
924 925
	if (!dbg_dir)
		return -EIO;
M
Markus Pargmann 已提交
926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958

	nbd_dbg_dir = dbg_dir;

	return 0;
}

static void nbd_dbg_close(void)
{
	debugfs_remove_recursive(nbd_dbg_dir);
}

#else  /* IS_ENABLED(CONFIG_DEBUG_FS) */

static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
	return 0;
}

static void nbd_dev_dbg_close(struct nbd_device *nbd)
{
}

static int nbd_dbg_init(void)
{
	return 0;
}

static void nbd_dbg_close(void)
{
}

#endif

J
Josef Bacik 已提交
959 960 961 962 963 964 965 966 967 968 969 970
static int nbd_init_request(void *data, struct request *rq,
			    unsigned int hctx_idx, unsigned int request_idx,
			    unsigned int numa_node)
{
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
	cmd->nbd = data;
	return 0;
}

static struct blk_mq_ops nbd_mq_ops = {
	.queue_rq	= nbd_queue_rq,
	.init_request	= nbd_init_request,
971
	.timeout	= nbd_xmit_timeout,
J
Josef Bacik 已提交
972 973
};

974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
static void nbd_dev_remove(struct nbd_device *nbd)
{
	struct gendisk *disk = nbd->disk;
	nbd->magic = 0;
	if (disk) {
		del_gendisk(disk);
		blk_cleanup_queue(disk->queue);
		blk_mq_free_tag_set(&nbd->tag_set);
		put_disk(disk);
	}
	kfree(nbd);
}

static int nbd_dev_add(int index)
{
	struct nbd_device *nbd;
	struct gendisk *disk;
	struct request_queue *q;
	int err = -ENOMEM;

	nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
	if (!nbd)
		goto out;

	disk = alloc_disk(1 << part_shift);
	if (!disk)
		goto out_free_nbd;

	if (index >= 0) {
		err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
				GFP_KERNEL);
		if (err == -ENOSPC)
			err = -EEXIST;
	} else {
		err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
		if (err >= 0)
			index = err;
	}
	if (err < 0)
		goto out_free_disk;

	nbd->disk = disk;
	nbd->tag_set.ops = &nbd_mq_ops;
	nbd->tag_set.nr_hw_queues = 1;
	nbd->tag_set.queue_depth = 128;
	nbd->tag_set.numa_node = NUMA_NO_NODE;
	nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
		BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
	nbd->tag_set.driver_data = nbd;

	err = blk_mq_alloc_tag_set(&nbd->tag_set);
	if (err)
		goto out_free_idr;

	q = blk_mq_init_queue(&nbd->tag_set);
	if (IS_ERR(q)) {
		err = PTR_ERR(q);
		goto out_free_tags;
	}
	disk->queue = q;

	/*
	 * Tell the block layer that we are not a rotational device
	 */
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
	disk->queue->limits.discard_granularity = 512;
	blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
	disk->queue->limits.discard_zeroes_data = 0;
	blk_queue_max_hw_sectors(disk->queue, 65536);
	disk->queue->limits.max_sectors = 256;

	nbd->magic = NBD_MAGIC;
	mutex_init(&nbd->config_lock);
	disk->major = NBD_MAJOR;
	disk->first_minor = index << part_shift;
	disk->fops = &nbd_fops;
	disk->private_data = nbd;
	sprintf(disk->disk_name, "nbd%d", index);
	init_waitqueue_head(&nbd->recv_wq);
	nbd_reset(nbd);
	add_disk(disk);
	return index;

out_free_tags:
	blk_mq_free_tag_set(&nbd->tag_set);
out_free_idr:
	idr_remove(&nbd_index_idr, index);
out_free_disk:
	put_disk(disk);
out_free_nbd:
	kfree(nbd);
out:
	return err;
}

L
Linus Torvalds 已提交
1071 1072 1073 1074 1075 1076 1077 1078 1079
/*
 * And here should be modules and kernel interface 
 *  (Just smiley confuses emacs :-)
 */

static int __init nbd_init(void)
{
	int i;

1080
	BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
L
Linus Torvalds 已提交
1081

L
Laurent Vivier 已提交
1082
	if (max_part < 0) {
1083
		printk(KERN_ERR "nbd: max_part must be >= 0\n");
L
Laurent Vivier 已提交
1084 1085 1086 1087
		return -EINVAL;
	}

	part_shift = 0;
1088
	if (max_part > 0) {
L
Laurent Vivier 已提交
1089 1090
		part_shift = fls(max_part);

1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
		/*
		 * Adjust max_part according to part_shift as it is exported
		 * to user space so that user can know the max number of
		 * partition kernel should be able to manage.
		 *
		 * Note that -1 is required because partition 0 is reserved
		 * for the whole disk.
		 */
		max_part = (1UL << part_shift) - 1;
	}

1102 1103 1104 1105 1106
	if ((1UL << part_shift) > DISK_MAX_PARTS)
		return -EINVAL;

	if (nbds_max > 1UL << (MINORBITS - part_shift))
		return -EINVAL;
1107 1108 1109 1110
	recv_workqueue = alloc_workqueue("knbd-recv",
					 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
	if (!recv_workqueue)
		return -ENOMEM;
1111

1112 1113
	if (register_blkdev(NBD_MAJOR, "nbd")) {
		destroy_workqueue(recv_workqueue);
1114
		return -EIO;
1115
	}
L
Linus Torvalds 已提交
1116

M
Markus Pargmann 已提交
1117 1118
	nbd_dbg_init();

1119 1120 1121 1122 1123 1124
	mutex_lock(&nbd_index_mutex);
	for (i = 0; i < nbds_max; i++)
		nbd_dev_add(i);
	mutex_unlock(&nbd_index_mutex);
	return 0;
}
L
Linus Torvalds 已提交
1125

1126 1127 1128 1129
static int nbd_exit_cb(int id, void *ptr, void *data)
{
	struct nbd_device *nbd = ptr;
	nbd_dev_remove(nbd);
L
Linus Torvalds 已提交
1130 1131 1132 1133 1134
	return 0;
}

static void __exit nbd_cleanup(void)
{
M
Markus Pargmann 已提交
1135 1136
	nbd_dbg_close();

1137 1138
	idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL);
	idr_destroy(&nbd_index_idr);
1139
	destroy_workqueue(recv_workqueue);
L
Linus Torvalds 已提交
1140 1141 1142 1143 1144 1145 1146 1147 1148
	unregister_blkdev(NBD_MAJOR, "nbd");
}

module_init(nbd_init);
module_exit(nbd_cleanup);

MODULE_DESCRIPTION("Network Block Device");
MODULE_LICENSE("GPL");

1149
module_param(nbds_max, int, 0444);
L
Laurent Vivier 已提交
1150 1151 1152
MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");