nbd.c 27.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6
/*
 * Network block device - make block devices work over TCP
 *
 * Note that you can not swap over this thing, yet. Seems to work but
 * deadlocks sometimes - you can not swap over TCP in general.
 * 
P
Pavel Machek 已提交
7
 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
L
Linus Torvalds 已提交
8 9
 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
 *
10
 * This file is released under GPLv2 or later.
L
Linus Torvalds 已提交
11
 *
12
 * (part of code stolen from loop.c)
L
Linus Torvalds 已提交
13 14 15 16 17 18 19 20 21 22 23 24 25 26
 */

#include <linux/major.h>

#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/ioctl.h>
27
#include <linux/mutex.h>
28 29 30
#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/kernel.h>
31
#include <linux/slab.h>
L
Linus Torvalds 已提交
32
#include <net/sock.h>
33
#include <linux/net.h>
34
#include <linux/kthread.h>
M
Markus Pargmann 已提交
35
#include <linux/types.h>
M
Markus Pargmann 已提交
36
#include <linux/debugfs.h>
J
Josef Bacik 已提交
37
#include <linux/blk-mq.h>
L
Linus Torvalds 已提交
38

39
#include <linux/uaccess.h>
L
Linus Torvalds 已提交
40 41 42 43
#include <asm/types.h>

#include <linux/nbd.h>

44 45 46
static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);

J
Josef Bacik 已提交
47 48 49 50 51
struct nbd_sock {
	struct socket *sock;
	struct mutex tx_lock;
};

J
Josef Bacik 已提交
52 53
#define NBD_TIMEDOUT			0
#define NBD_DISCONNECT_REQUESTED	1
J
Josef Bacik 已提交
54 55
#define NBD_DISCONNECTED		2
#define NBD_RUNNING			3
J
Josef Bacik 已提交
56

57
struct nbd_device {
M
Markus Pargmann 已提交
58
	u32 flags;
J
Josef Bacik 已提交
59
	unsigned long runtime_flags;
J
Josef Bacik 已提交
60
	struct nbd_sock **socks;
61 62
	int magic;

J
Josef Bacik 已提交
63
	struct blk_mq_tag_set tag_set;
64

J
Josef Bacik 已提交
65
	struct mutex config_lock;
66
	struct gendisk *disk;
J
Josef Bacik 已提交
67 68 69
	int num_connections;
	atomic_t recv_threads;
	wait_queue_head_t recv_wq;
70
	loff_t blksize;
M
Markus Pargmann 已提交
71
	loff_t bytesize;
M
Markus Pargmann 已提交
72 73

	struct task_struct *task_recv;
J
Josef Bacik 已提交
74
	struct task_struct *task_setup;
M
Markus Pargmann 已提交
75 76 77 78

#if IS_ENABLED(CONFIG_DEBUG_FS)
	struct dentry *dbg_dir;
#endif
79 80
};

J
Josef Bacik 已提交
81 82
struct nbd_cmd {
	struct nbd_device *nbd;
J
Josef Bacik 已提交
83
	struct completion send_complete;
J
Josef Bacik 已提交
84 85
};

M
Markus Pargmann 已提交
86 87 88 89 90 91
#if IS_ENABLED(CONFIG_DEBUG_FS)
static struct dentry *nbd_dbg_dir;
#endif

#define nbd_name(nbd) ((nbd)->disk->disk_name)

92
#define NBD_MAGIC 0x68797548
L
Linus Torvalds 已提交
93

94
static unsigned int nbds_max = 16;
L
Laurent Vivier 已提交
95
static int max_part;
96
static struct workqueue_struct *recv_workqueue;
97
static int part_shift;
L
Linus Torvalds 已提交
98

J
Josef Bacik 已提交
99 100 101 102
static int nbd_dev_dbg_init(struct nbd_device *nbd);
static void nbd_dev_dbg_close(struct nbd_device *nbd);


103
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
L
Linus Torvalds 已提交
104
{
105
	return disk_to_dev(nbd->disk);
L
Linus Torvalds 已提交
106 107
}

108 109 110 111 112
static bool nbd_is_connected(struct nbd_device *nbd)
{
	return !!nbd->task_recv;
}

L
Linus Torvalds 已提交
113 114 115 116 117 118
static const char *nbdcmd_to_ascii(int cmd)
{
	switch (cmd) {
	case  NBD_CMD_READ: return "read";
	case NBD_CMD_WRITE: return "write";
	case  NBD_CMD_DISC: return "disconnect";
A
Alex Bligh 已提交
119
	case NBD_CMD_FLUSH: return "flush";
P
Paul Clements 已提交
120
	case  NBD_CMD_TRIM: return "trim/discard";
L
Linus Torvalds 已提交
121 122 123 124
	}
	return "invalid";
}

125 126
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
{
127
	bd_set_size(bdev, 0);
128 129 130 131 132 133 134 135
	set_capacity(nbd->disk, 0);
	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);

	return 0;
}

static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
{
136 137 138
	blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
	blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
	bd_set_size(bdev, nbd->bytesize);
139 140 141 142
	set_capacity(nbd->disk, nbd->bytesize >> 9);
	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
}

143
static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
144
			loff_t blocksize, loff_t nr_blocks)
145 146
{
	nbd->blksize = blocksize;
147
	nbd->bytesize = blocksize * nr_blocks;
148 149
	if (nbd_is_connected(nbd))
		nbd_size_update(nbd, bdev);
150 151
}

J
Josef Bacik 已提交
152
static void nbd_end_request(struct nbd_cmd *cmd)
L
Linus Torvalds 已提交
153
{
J
Josef Bacik 已提交
154 155
	struct nbd_device *nbd = cmd->nbd;
	struct request *req = blk_mq_rq_from_pdu(cmd);
156
	int error = req->errors ? -EIO : 0;
L
Linus Torvalds 已提交
157

J
Josef Bacik 已提交
158
	dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
159
		error ? "failed" : "done");
L
Linus Torvalds 已提交
160

J
Josef Bacik 已提交
161
	blk_mq_complete_request(req, error);
L
Linus Torvalds 已提交
162 163
}

164 165 166
/*
 * Forcibly shutdown the socket causing all listeners to error
 */
167
static void sock_shutdown(struct nbd_device *nbd)
168
{
J
Josef Bacik 已提交
169
	int i;
M
Markus Pargmann 已提交
170

J
Josef Bacik 已提交
171 172 173
	if (nbd->num_connections == 0)
		return;
	if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
M
Markus Pargmann 已提交
174
		return;
M
Markus Pargmann 已提交
175

J
Josef Bacik 已提交
176 177 178 179 180 181 182
	for (i = 0; i < nbd->num_connections; i++) {
		struct nbd_sock *nsock = nbd->socks[i];
		mutex_lock(&nsock->tx_lock);
		kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
		mutex_unlock(&nsock->tx_lock);
	}
	dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
183 184
}

185 186
static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
						 bool reserved)
187
{
188 189
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
	struct nbd_device *nbd = cmd->nbd;
M
Markus Pargmann 已提交
190

J
Josef Bacik 已提交
191
	dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
J
Josef Bacik 已提交
192
	set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
193
	req->errors++;
J
Josef Bacik 已提交
194 195 196 197

	mutex_lock(&nbd->config_lock);
	sock_shutdown(nbd);
	mutex_unlock(&nbd->config_lock);
198
	return BLK_EH_HANDLED;
199 200
}

L
Linus Torvalds 已提交
201 202 203
/*
 *  Send or receive packet.
 */
J
Josef Bacik 已提交
204 205
static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf,
		     int size, int msg_flags)
L
Linus Torvalds 已提交
206
{
J
Josef Bacik 已提交
207
	struct socket *sock = nbd->socks[index]->sock;
L
Linus Torvalds 已提交
208 209 210
	int result;
	struct msghdr msg;
	struct kvec iov;
211
	unsigned long pflags = current->flags;
L
Linus Torvalds 已提交
212

213
	if (unlikely(!sock)) {
214
		dev_err_ratelimited(disk_to_dev(nbd->disk),
215 216
			"Attempted %s on closed socket in sock_xmit\n",
			(send ? "send" : "recv"));
217 218 219
		return -EINVAL;
	}

220
	current->flags |= PF_MEMALLOC;
L
Linus Torvalds 已提交
221
	do {
222
		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
L
Linus Torvalds 已提交
223 224 225 226 227 228 229 230
		iov.iov_base = buf;
		iov.iov_len = size;
		msg.msg_name = NULL;
		msg.msg_namelen = 0;
		msg.msg_control = NULL;
		msg.msg_controllen = 0;
		msg.msg_flags = msg_flags | MSG_NOSIGNAL;

M
Markus Pargmann 已提交
231
		if (send)
L
Linus Torvalds 已提交
232
			result = kernel_sendmsg(sock, &msg, &iov, 1, size);
M
Markus Pargmann 已提交
233
		else
234 235
			result = kernel_recvmsg(sock, &msg, &iov, 1, size,
						msg.msg_flags);
L
Linus Torvalds 已提交
236 237 238 239 240 241 242 243 244 245

		if (result <= 0) {
			if (result == 0)
				result = -EPIPE; /* short read */
			break;
		}
		size -= result;
		buf += result;
	} while (size > 0);

246
	tsk_restore_flags(current, pflags, PF_MEMALLOC);
L
Linus Torvalds 已提交
247 248 249 250

	return result;
}

J
Josef Bacik 已提交
251 252
static inline int sock_send_bvec(struct nbd_device *nbd, int index,
				 struct bio_vec *bvec, int flags)
L
Linus Torvalds 已提交
253 254 255
{
	int result;
	void *kaddr = kmap(bvec->bv_page);
J
Josef Bacik 已提交
256
	result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset,
257
			   bvec->bv_len, flags);
L
Linus Torvalds 已提交
258 259 260 261
	kunmap(bvec->bv_page);
	return result;
}

262
/* always call with the tx_lock held */
J
Josef Bacik 已提交
263
static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
L
Linus Torvalds 已提交
264
{
J
Josef Bacik 已提交
265
	struct request *req = blk_mq_rq_from_pdu(cmd);
266
	int result;
L
Linus Torvalds 已提交
267
	struct nbd_request request;
268
	unsigned long size = blk_rq_bytes(req);
269
	struct bio *bio;
C
Christoph Hellwig 已提交
270
	u32 type;
J
Josef Bacik 已提交
271
	u32 tag = blk_mq_unique_tag(req);
C
Christoph Hellwig 已提交
272

273 274
	switch (req_op(req)) {
	case REQ_OP_DISCARD:
C
Christoph Hellwig 已提交
275
		type = NBD_CMD_TRIM;
276 277
		break;
	case REQ_OP_FLUSH:
C
Christoph Hellwig 已提交
278
		type = NBD_CMD_FLUSH;
279 280
		break;
	case REQ_OP_WRITE:
C
Christoph Hellwig 已提交
281
		type = NBD_CMD_WRITE;
282 283
		break;
	case REQ_OP_READ:
C
Christoph Hellwig 已提交
284
		type = NBD_CMD_READ;
285 286 287 288
		break;
	default:
		return -EIO;
	}
L
Linus Torvalds 已提交
289

290 291 292 293 294 295 296
	if (rq_data_dir(req) == WRITE &&
	    (nbd->flags & NBD_FLAG_READ_ONLY)) {
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Write on read-only\n");
		return -EIO;
	}

297
	memset(&request, 0, sizeof(request));
L
Linus Torvalds 已提交
298
	request.magic = htonl(NBD_REQUEST_MAGIC);
C
Christoph Hellwig 已提交
299
	request.type = htonl(type);
J
Josef Bacik 已提交
300
	if (type != NBD_CMD_FLUSH) {
A
Alex Bligh 已提交
301 302 303
		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
		request.len = htonl(size);
	}
J
Josef Bacik 已提交
304
	memcpy(request.handle, &tag, sizeof(tag));
L
Linus Torvalds 已提交
305

306
	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
J
Josef Bacik 已提交
307
		cmd, nbdcmd_to_ascii(type),
308
		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
J
Josef Bacik 已提交
309
	result = sock_xmit(nbd, index, 1, &request, sizeof(request),
C
Christoph Hellwig 已提交
310
			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
L
Linus Torvalds 已提交
311
	if (result <= 0) {
312
		dev_err_ratelimited(disk_to_dev(nbd->disk),
313
			"Send control failed (result %d)\n", result);
314
		return -EIO;
L
Linus Torvalds 已提交
315 316
	}

317 318 319 320 321 322 323
	if (type != NBD_CMD_WRITE)
		return 0;

	bio = req->bio;
	while (bio) {
		struct bio *next = bio->bi_next;
		struct bvec_iter iter;
324
		struct bio_vec bvec;
325 326 327

		bio_for_each_segment(bvec, bio, iter) {
			bool is_last = !next && bio_iter_last(bvec, iter);
328
			int flags = is_last ? 0 : MSG_MORE;
329

330
			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
J
Josef Bacik 已提交
331
				cmd, bvec.bv_len);
J
Josef Bacik 已提交
332
			result = sock_send_bvec(nbd, index, &bvec, flags);
333
			if (result <= 0) {
334
				dev_err(disk_to_dev(nbd->disk),
335 336
					"Send data failed (result %d)\n",
					result);
337
				return -EIO;
338
			}
339 340 341 342 343 344 345 346
			/*
			 * The completion might already have come in,
			 * so break for the last one instead of letting
			 * the iterator do it. This prevents use-after-free
			 * of the bio.
			 */
			if (is_last)
				break;
L
Linus Torvalds 已提交
347
		}
348
		bio = next;
L
Linus Torvalds 已提交
349 350 351 352
	}
	return 0;
}

J
Josef Bacik 已提交
353 354
static inline int sock_recv_bvec(struct nbd_device *nbd, int index,
				 struct bio_vec *bvec)
L
Linus Torvalds 已提交
355 356 357
{
	int result;
	void *kaddr = kmap(bvec->bv_page);
J
Josef Bacik 已提交
358 359
	result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset,
			   bvec->bv_len, MSG_WAITALL);
L
Linus Torvalds 已提交
360 361 362 363 364
	kunmap(bvec->bv_page);
	return result;
}

/* NULL returned = something went wrong, inform userspace */
J
Josef Bacik 已提交
365
static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
L
Linus Torvalds 已提交
366 367 368
{
	int result;
	struct nbd_reply reply;
J
Josef Bacik 已提交
369 370 371
	struct nbd_cmd *cmd;
	struct request *req = NULL;
	u16 hwq;
J
Josef Bacik 已提交
372
	u32 tag;
L
Linus Torvalds 已提交
373 374

	reply.magic = 0;
J
Josef Bacik 已提交
375
	result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL);
L
Linus Torvalds 已提交
376
	if (result <= 0) {
J
Josef Bacik 已提交
377 378 379 380
		if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) &&
		    !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
			dev_err(disk_to_dev(nbd->disk),
				"Receive control failed (result %d)\n", result);
381
		return ERR_PTR(result);
L
Linus Torvalds 已提交
382
	}
383 384

	if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
385
		dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
386
				(unsigned long)ntohl(reply.magic));
387
		return ERR_PTR(-EPROTO);
388 389
	}

J
Josef Bacik 已提交
390
	memcpy(&tag, reply.handle, sizeof(u32));
391

J
Josef Bacik 已提交
392 393 394 395 396 397 398 399
	hwq = blk_mq_unique_tag_to_hwq(tag);
	if (hwq < nbd->tag_set.nr_hw_queues)
		req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
				       blk_mq_unique_tag_to_tag(tag));
	if (!req || !blk_mq_request_started(req)) {
		dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
			tag, req);
		return ERR_PTR(-ENOENT);
L
Linus Torvalds 已提交
400
	}
J
Josef Bacik 已提交
401
	cmd = blk_mq_rq_to_pdu(req);
L
Linus Torvalds 已提交
402
	if (ntohl(reply.error)) {
403
		dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
404
			ntohl(reply.error));
L
Linus Torvalds 已提交
405
		req->errors++;
J
Josef Bacik 已提交
406
		return cmd;
L
Linus Torvalds 已提交
407 408
	}

J
Josef Bacik 已提交
409
	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
C
Christoph Hellwig 已提交
410
	if (rq_data_dir(req) != WRITE) {
411
		struct req_iterator iter;
412
		struct bio_vec bvec;
413 414

		rq_for_each_segment(bvec, req, iter) {
J
Josef Bacik 已提交
415
			result = sock_recv_bvec(nbd, index, &bvec);
416
			if (result <= 0) {
417
				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
418
					result);
419
				req->errors++;
J
Josef Bacik 已提交
420
				return cmd;
421
			}
422
			dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
J
Josef Bacik 已提交
423
				cmd, bvec.bv_len);
L
Linus Torvalds 已提交
424
		}
J
Josef Bacik 已提交
425 426 427
	} else {
		/* See the comment in nbd_queue_rq. */
		wait_for_completion(&cmd->send_complete);
L
Linus Torvalds 已提交
428
	}
J
Josef Bacik 已提交
429
	return cmd;
L
Linus Torvalds 已提交
430 431
}

432 433
static ssize_t pid_show(struct device *dev,
			struct device_attribute *attr, char *buf)
434
{
435
	struct gendisk *disk = dev_to_disk(dev);
M
Markus Pargmann 已提交
436
	struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
437

M
Markus Pargmann 已提交
438
	return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
439 440
}

441
static struct device_attribute pid_attr = {
442
	.attr = { .name = "pid", .mode = S_IRUGO},
443 444 445
	.show = pid_show,
};

J
Josef Bacik 已提交
446 447 448 449 450 451 452
struct recv_thread_args {
	struct work_struct work;
	struct nbd_device *nbd;
	int index;
};

static void recv_work(struct work_struct *work)
L
Linus Torvalds 已提交
453
{
J
Josef Bacik 已提交
454 455 456 457
	struct recv_thread_args *args = container_of(work,
						     struct recv_thread_args,
						     work);
	struct nbd_device *nbd = args->nbd;
J
Josef Bacik 已提交
458
	struct nbd_cmd *cmd;
J
Josef Bacik 已提交
459
	int ret = 0;
L
Linus Torvalds 已提交
460

461
	BUG_ON(nbd->magic != NBD_MAGIC);
462
	while (1) {
J
Josef Bacik 已提交
463
		cmd = nbd_read_stat(nbd, args->index);
J
Josef Bacik 已提交
464 465
		if (IS_ERR(cmd)) {
			ret = PTR_ERR(cmd);
466 467 468
			break;
		}

J
Josef Bacik 已提交
469
		nbd_end_request(cmd);
470
	}
471

J
Josef Bacik 已提交
472 473 474 475 476 477 478 479
	/*
	 * We got an error, shut everybody down if this wasn't the result of a
	 * disconnect request.
	 */
	if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
		sock_shutdown(nbd);
	atomic_dec(&nbd->recv_threads);
	wake_up(&nbd->recv_wq);
L
Linus Torvalds 已提交
480 481
}

J
Josef Bacik 已提交
482
static void nbd_clear_req(struct request *req, void *data, bool reserved)
L
Linus Torvalds 已提交
483
{
J
Josef Bacik 已提交
484
	struct nbd_cmd *cmd;
L
Linus Torvalds 已提交
485

J
Josef Bacik 已提交
486 487 488 489 490 491 492 493 494
	if (!blk_mq_request_started(req))
		return;
	cmd = blk_mq_rq_to_pdu(req);
	req->errors++;
	nbd_end_request(cmd);
}

static void nbd_clear_que(struct nbd_device *nbd)
{
495
	BUG_ON(nbd->magic != NBD_MAGIC);
L
Linus Torvalds 已提交
496

J
Josef Bacik 已提交
497
	blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
498
	dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
L
Linus Torvalds 已提交
499 500
}

501

J
Josef Bacik 已提交
502
static void nbd_handle_cmd(struct nbd_cmd *cmd, int index)
503
{
J
Josef Bacik 已提交
504 505
	struct request *req = blk_mq_rq_from_pdu(cmd);
	struct nbd_device *nbd = cmd->nbd;
J
Josef Bacik 已提交
506
	struct nbd_sock *nsock;
J
Josef Bacik 已提交
507

J
Josef Bacik 已提交
508
	if (index >= nbd->num_connections) {
509 510
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on invalid socket\n");
511
		goto error_out;
J
Josef Bacik 已提交
512
	}
513

J
Josef Bacik 已提交
514
	if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) {
515 516
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on closed socket\n");
J
Josef Bacik 已提交
517 518 519
		goto error_out;
	}

520 521
	req->errors = 0;

J
Josef Bacik 已提交
522 523 524 525
	nsock = nbd->socks[index];
	mutex_lock(&nsock->tx_lock);
	if (unlikely(!nsock->sock)) {
		mutex_unlock(&nsock->tx_lock);
526 527
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Attempted send on closed socket\n");
P
Pavel Machek 已提交
528
		goto error_out;
529 530
	}

J
Josef Bacik 已提交
531
	if (nbd_send_cmd(nbd, cmd, index) != 0) {
532 533
		dev_err_ratelimited(disk_to_dev(nbd->disk),
				    "Request send failed\n");
534
		req->errors++;
J
Josef Bacik 已提交
535
		nbd_end_request(cmd);
536 537
	}

J
Josef Bacik 已提交
538
	mutex_unlock(&nsock->tx_lock);
539 540 541 542 543

	return;

error_out:
	req->errors++;
J
Josef Bacik 已提交
544
	nbd_end_request(cmd);
545 546
}

J
Josef Bacik 已提交
547 548
static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
			const struct blk_mq_queue_data *bd)
L
Linus Torvalds 已提交
549
{
J
Josef Bacik 已提交
550
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
L
Linus Torvalds 已提交
551

J
Josef Bacik 已提交
552 553 554 555 556 557 558 559 560 561
	/*
	 * Since we look at the bio's to send the request over the network we
	 * need to make sure the completion work doesn't mark this request done
	 * before we are done doing our send.  This keeps us from dereferencing
	 * freed data if we have particularly fast completions (ie we get the
	 * completion before we exit sock_xmit on the last bvec) or in the case
	 * that the server is misbehaving (or there was an error) before we're
	 * done sending everything over the wire.
	 */
	init_completion(&cmd->send_complete);
J
Josef Bacik 已提交
562
	blk_mq_start_request(bd->rq);
J
Josef Bacik 已提交
563 564 565
	nbd_handle_cmd(cmd, hctx->queue_num);
	complete(&cmd->send_complete);

J
Josef Bacik 已提交
566
	return BLK_MQ_RQ_QUEUE_OK;
L
Linus Torvalds 已提交
567 568
}

J
Josef Bacik 已提交
569 570
static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
			  unsigned long arg)
M
Markus Pargmann 已提交
571
{
J
Josef Bacik 已提交
572
	struct socket *sock;
J
Josef Bacik 已提交
573 574
	struct nbd_sock **socks;
	struct nbd_sock *nsock;
J
Josef Bacik 已提交
575 576 577 578 579
	int err;

	sock = sockfd_lookup(arg, &err);
	if (!sock)
		return err;
M
Markus Pargmann 已提交
580

J
Josef Bacik 已提交
581 582 583 584 585 586
	if (!nbd->task_setup)
		nbd->task_setup = current;
	if (nbd->task_setup != current) {
		dev_err(disk_to_dev(nbd->disk),
			"Device being setup by another task");
		return -EINVAL;
M
Markus Pargmann 已提交
587 588
	}

J
Josef Bacik 已提交
589 590 591 592 593 594 595 596 597
	socks = krealloc(nbd->socks, (nbd->num_connections + 1) *
			 sizeof(struct nbd_sock *), GFP_KERNEL);
	if (!socks)
		return -ENOMEM;
	nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
	if (!nsock)
		return -ENOMEM;

	nbd->socks = socks;
M
Markus Pargmann 已提交
598

J
Josef Bacik 已提交
599 600 601
	mutex_init(&nsock->tx_lock);
	nsock->sock = sock;
	socks[nbd->num_connections++] = nsock;
M
Markus Pargmann 已提交
602

J
Josef Bacik 已提交
603 604
	if (max_part)
		bdev->bd_invalidated = 1;
J
Josef Bacik 已提交
605
	return 0;
M
Markus Pargmann 已提交
606 607
}

608 609 610
/* Reset all properties of an NBD device */
static void nbd_reset(struct nbd_device *nbd)
{
J
Josef Bacik 已提交
611
	nbd->runtime_flags = 0;
612 613 614 615
	nbd->blksize = 1024;
	nbd->bytesize = 0;
	set_capacity(nbd->disk, 0);
	nbd->flags = 0;
616
	nbd->tag_set.timeout = 0;
617 618 619 620 621 622 623 624 625 626 627 628 629
	queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
}

static void nbd_bdev_reset(struct block_device *bdev)
{
	set_device_ro(bdev, false);
	bdev->bd_inode->i_size = 0;
	if (max_part > 0) {
		blkdev_reread_part(bdev);
		bdev->bd_invalidated = 1;
	}
}

630 631 632 633 634 635 636
static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
{
	if (nbd->flags & NBD_FLAG_READ_ONLY)
		set_device_ro(bdev, true);
	if (nbd->flags & NBD_FLAG_SEND_TRIM)
		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
	if (nbd->flags & NBD_FLAG_SEND_FLUSH)
637
		blk_queue_write_cache(nbd->disk->queue, true, false);
638
	else
639
		blk_queue_write_cache(nbd->disk->queue, false, false);
640 641
}

J
Josef Bacik 已提交
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
static void send_disconnects(struct nbd_device *nbd)
{
	struct nbd_request request = {};
	int i, ret;

	request.magic = htonl(NBD_REQUEST_MAGIC);
	request.type = htonl(NBD_CMD_DISC);

	for (i = 0; i < nbd->num_connections; i++) {
		ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0);
		if (ret <= 0)
			dev_err(disk_to_dev(nbd->disk),
				"Send disconnect failed %d\n", ret);
	}
}

J
Josef Bacik 已提交
658 659 660 661 662
static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
{
	dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
	if (!nbd->socks)
		return -EINVAL;
M
Markus Pargmann 已提交
663

J
Josef Bacik 已提交
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678
	mutex_unlock(&nbd->config_lock);
	fsync_bdev(bdev);
	mutex_lock(&nbd->config_lock);

	/* Check again after getting mutex back.  */
	if (!nbd->socks)
		return -EINVAL;

	if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
			      &nbd->runtime_flags))
		send_disconnects(nbd);
	return 0;
}

static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
P
Pavel Machek 已提交
679
{
J
Josef Bacik 已提交
680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
	sock_shutdown(nbd);
	nbd_clear_que(nbd);
	kill_bdev(bdev);
	nbd_bdev_reset(bdev);
	/*
	 * We want to give the run thread a chance to wait for everybody
	 * to clean up and then do it's own cleanup.
	 */
	if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
	    nbd->num_connections) {
		int i;

		for (i = 0; i < nbd->num_connections; i++)
			kfree(nbd->socks[i]);
		kfree(nbd->socks);
		nbd->socks = NULL;
		nbd->num_connections = 0;
P
Pavel Machek 已提交
697
	}
J
Josef Bacik 已提交
698
	nbd->task_setup = NULL;
J
Josef Bacik 已提交
699

J
Josef Bacik 已提交
700 701 702 703 704 705 706 707
	return 0;
}

static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
{
	struct recv_thread_args *args;
	int num_connections = nbd->num_connections;
	int error = 0, i;
P
Pavel Machek 已提交
708

J
Josef Bacik 已提交
709 710 711 712 713 714 715 716 717 718
	if (nbd->task_recv)
		return -EBUSY;
	if (!nbd->socks)
		return -EINVAL;
	if (num_connections > 1 &&
	    !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
		dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
		error = -EINVAL;
		goto out_err;
	}
M
Markus Pargmann 已提交
719

J
Josef Bacik 已提交
720 721 722 723 724 725 726 727 728
	set_bit(NBD_RUNNING, &nbd->runtime_flags);
	blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
	args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
	if (!args) {
		error = -ENOMEM;
		goto out_err;
	}
	nbd->task_recv = current;
	mutex_unlock(&nbd->config_lock);
M
Markus Pargmann 已提交
729

J
Josef Bacik 已提交
730
	nbd_parse_flags(nbd, bdev);
M
Markus Pargmann 已提交
731

J
Josef Bacik 已提交
732 733 734 735
	error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
	if (error) {
		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
		goto out_recv;
P
Pavel Machek 已提交
736 737
	}

J
Josef Bacik 已提交
738
	nbd_size_update(nbd, bdev);
739

J
Josef Bacik 已提交
740 741 742 743 744 745 746 747
	nbd_dev_dbg_init(nbd);
	for (i = 0; i < num_connections; i++) {
		sk_set_memalloc(nbd->socks[i]->sock->sk);
		atomic_inc(&nbd->recv_threads);
		INIT_WORK(&args[i].work, recv_work);
		args[i].nbd = nbd;
		args[i].index = i;
		queue_work(recv_workqueue, &args[i].work);
748
	}
J
Josef Bacik 已提交
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
	wait_event_interruptible(nbd->recv_wq,
				 atomic_read(&nbd->recv_threads) == 0);
	for (i = 0; i < num_connections; i++)
		flush_work(&args[i].work);
	nbd_dev_dbg_close(nbd);
	nbd_size_clear(nbd, bdev);
	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
out_recv:
	mutex_lock(&nbd->config_lock);
	nbd->task_recv = NULL;
out_err:
	clear_bit(NBD_RUNNING, &nbd->runtime_flags);
	nbd_clear_sock(nbd, bdev);

	/* user requested, ignore socket errors */
	if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
		error = 0;
	if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
		error = -ETIMEDOUT;
P
Pavel Machek 已提交
768

J
Josef Bacik 已提交
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784
	nbd_reset(nbd);
	return error;
}

/* Must be called with config_lock held */
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
		       unsigned int cmd, unsigned long arg)
{
	switch (cmd) {
	case NBD_DISCONNECT:
		return nbd_disconnect(nbd, bdev);
	case NBD_CLEAR_SOCK:
		return nbd_clear_sock(nbd, bdev);
	case NBD_SET_SOCK:
		return nbd_add_socket(nbd, bdev, arg);
	case NBD_SET_BLKSIZE:
785 786 787
		nbd_size_set(nbd, bdev, arg,
			     div_s64(nbd->bytesize, arg));
		return 0;
L
Linus Torvalds 已提交
788
	case NBD_SET_SIZE:
789 790 791
		nbd_size_set(nbd, bdev, nbd->blksize,
			     div_s64(arg, nbd->blksize));
		return 0;
792
	case NBD_SET_SIZE_BLOCKS:
793 794
		nbd_size_set(nbd, bdev, nbd->blksize, arg);
		return 0;
795
	case NBD_SET_TIMEOUT:
796
		nbd->tag_set.timeout = arg * HZ;
797
		return 0;
P
Pavel Machek 已提交
798

P
Paul Clements 已提交
799 800 801
	case NBD_SET_FLAGS:
		nbd->flags = arg;
		return 0;
J
Josef Bacik 已提交
802 803
	case NBD_DO_IT:
		return nbd_start_device(nbd, bdev);
L
Linus Torvalds 已提交
804
	case NBD_CLEAR_QUE:
805 806 807 808
		/*
		 * This is for compatibility only.  The queue is always cleared
		 * by NBD_DO_IT or NBD_CLEAR_SOCK.
		 */
L
Linus Torvalds 已提交
809 810
		return 0;
	case NBD_PRINT_DEBUG:
J
Josef Bacik 已提交
811 812 813 814
		/*
		 * For compatibility only, we no longer keep a list of
		 * outstanding requests.
		 */
L
Linus Torvalds 已提交
815 816
		return 0;
	}
P
Pavel Machek 已提交
817 818 819 820 821 822
	return -ENOTTY;
}

static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
		     unsigned int cmd, unsigned long arg)
{
823
	struct nbd_device *nbd = bdev->bd_disk->private_data;
P
Pavel Machek 已提交
824 825 826 827 828
	int error;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

829
	BUG_ON(nbd->magic != NBD_MAGIC);
P
Pavel Machek 已提交
830

J
Josef Bacik 已提交
831
	mutex_lock(&nbd->config_lock);
832
	error = __nbd_ioctl(bdev, nbd, cmd, arg);
J
Josef Bacik 已提交
833
	mutex_unlock(&nbd->config_lock);
P
Pavel Machek 已提交
834 835

	return error;
L
Linus Torvalds 已提交
836 837
}

838
static const struct block_device_operations nbd_fops =
L
Linus Torvalds 已提交
839 840
{
	.owner =	THIS_MODULE,
841
	.ioctl =	nbd_ioctl,
A
Al Viro 已提交
842
	.compat_ioctl =	nbd_ioctl,
L
Linus Torvalds 已提交
843 844
};

M
Markus Pargmann 已提交
845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904
#if IS_ENABLED(CONFIG_DEBUG_FS)

static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
{
	struct nbd_device *nbd = s->private;

	if (nbd->task_recv)
		seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));

	return 0;
}

static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
{
	return single_open(file, nbd_dbg_tasks_show, inode->i_private);
}

static const struct file_operations nbd_dbg_tasks_ops = {
	.open = nbd_dbg_tasks_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = single_release,
};

static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
{
	struct nbd_device *nbd = s->private;
	u32 flags = nbd->flags;

	seq_printf(s, "Hex: 0x%08x\n\n", flags);

	seq_puts(s, "Known flags:\n");

	if (flags & NBD_FLAG_HAS_FLAGS)
		seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
	if (flags & NBD_FLAG_READ_ONLY)
		seq_puts(s, "NBD_FLAG_READ_ONLY\n");
	if (flags & NBD_FLAG_SEND_FLUSH)
		seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
	if (flags & NBD_FLAG_SEND_TRIM)
		seq_puts(s, "NBD_FLAG_SEND_TRIM\n");

	return 0;
}

static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
{
	return single_open(file, nbd_dbg_flags_show, inode->i_private);
}

static const struct file_operations nbd_dbg_flags_ops = {
	.open = nbd_dbg_flags_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = single_release,
};

static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
	struct dentry *dir;
905 906 907

	if (!nbd_dbg_dir)
		return -EIO;
M
Markus Pargmann 已提交
908 909

	dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
910 911 912 913
	if (!dir) {
		dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
			nbd_name(nbd));
		return -EIO;
M
Markus Pargmann 已提交
914 915 916
	}
	nbd->dbg_dir = dir;

917 918
	debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
	debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
919
	debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
920
	debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize);
921
	debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
M
Markus Pargmann 已提交
922 923 924 925 926 927 928 929 930 931 932 933 934 935

	return 0;
}

static void nbd_dev_dbg_close(struct nbd_device *nbd)
{
	debugfs_remove_recursive(nbd->dbg_dir);
}

static int nbd_dbg_init(void)
{
	struct dentry *dbg_dir;

	dbg_dir = debugfs_create_dir("nbd", NULL);
936 937
	if (!dbg_dir)
		return -EIO;
M
Markus Pargmann 已提交
938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970

	nbd_dbg_dir = dbg_dir;

	return 0;
}

static void nbd_dbg_close(void)
{
	debugfs_remove_recursive(nbd_dbg_dir);
}

#else  /* IS_ENABLED(CONFIG_DEBUG_FS) */

static int nbd_dev_dbg_init(struct nbd_device *nbd)
{
	return 0;
}

static void nbd_dev_dbg_close(struct nbd_device *nbd)
{
}

static int nbd_dbg_init(void)
{
	return 0;
}

static void nbd_dbg_close(void)
{
}

#endif

J
Josef Bacik 已提交
971 972 973 974 975 976 977 978 979 980 981 982
static int nbd_init_request(void *data, struct request *rq,
			    unsigned int hctx_idx, unsigned int request_idx,
			    unsigned int numa_node)
{
	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
	cmd->nbd = data;
	return 0;
}

static struct blk_mq_ops nbd_mq_ops = {
	.queue_rq	= nbd_queue_rq,
	.init_request	= nbd_init_request,
983
	.timeout	= nbd_xmit_timeout,
J
Josef Bacik 已提交
984 985
};

986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
static void nbd_dev_remove(struct nbd_device *nbd)
{
	struct gendisk *disk = nbd->disk;
	nbd->magic = 0;
	if (disk) {
		del_gendisk(disk);
		blk_cleanup_queue(disk->queue);
		blk_mq_free_tag_set(&nbd->tag_set);
		put_disk(disk);
	}
	kfree(nbd);
}

static int nbd_dev_add(int index)
{
	struct nbd_device *nbd;
	struct gendisk *disk;
	struct request_queue *q;
	int err = -ENOMEM;

	nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
	if (!nbd)
		goto out;

	disk = alloc_disk(1 << part_shift);
	if (!disk)
		goto out_free_nbd;

	if (index >= 0) {
		err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
				GFP_KERNEL);
		if (err == -ENOSPC)
			err = -EEXIST;
	} else {
		err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
		if (err >= 0)
			index = err;
	}
	if (err < 0)
		goto out_free_disk;

	nbd->disk = disk;
	nbd->tag_set.ops = &nbd_mq_ops;
	nbd->tag_set.nr_hw_queues = 1;
	nbd->tag_set.queue_depth = 128;
	nbd->tag_set.numa_node = NUMA_NO_NODE;
	nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
		BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
	nbd->tag_set.driver_data = nbd;

	err = blk_mq_alloc_tag_set(&nbd->tag_set);
	if (err)
		goto out_free_idr;

	q = blk_mq_init_queue(&nbd->tag_set);
	if (IS_ERR(q)) {
		err = PTR_ERR(q);
		goto out_free_tags;
	}
	disk->queue = q;

	/*
	 * Tell the block layer that we are not a rotational device
	 */
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
	disk->queue->limits.discard_granularity = 512;
	blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
	disk->queue->limits.discard_zeroes_data = 0;
	blk_queue_max_hw_sectors(disk->queue, 65536);
	disk->queue->limits.max_sectors = 256;

	nbd->magic = NBD_MAGIC;
	mutex_init(&nbd->config_lock);
	disk->major = NBD_MAJOR;
	disk->first_minor = index << part_shift;
	disk->fops = &nbd_fops;
	disk->private_data = nbd;
	sprintf(disk->disk_name, "nbd%d", index);
	init_waitqueue_head(&nbd->recv_wq);
	nbd_reset(nbd);
	add_disk(disk);
	return index;

out_free_tags:
	blk_mq_free_tag_set(&nbd->tag_set);
out_free_idr:
	idr_remove(&nbd_index_idr, index);
out_free_disk:
	put_disk(disk);
out_free_nbd:
	kfree(nbd);
out:
	return err;
}

L
Linus Torvalds 已提交
1083 1084 1085 1086 1087 1088 1089 1090 1091
/*
 * And here should be modules and kernel interface 
 *  (Just smiley confuses emacs :-)
 */

static int __init nbd_init(void)
{
	int i;

1092
	BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
L
Linus Torvalds 已提交
1093

L
Laurent Vivier 已提交
1094
	if (max_part < 0) {
1095
		printk(KERN_ERR "nbd: max_part must be >= 0\n");
L
Laurent Vivier 已提交
1096 1097 1098 1099
		return -EINVAL;
	}

	part_shift = 0;
1100
	if (max_part > 0) {
L
Laurent Vivier 已提交
1101 1102
		part_shift = fls(max_part);

1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
		/*
		 * Adjust max_part according to part_shift as it is exported
		 * to user space so that user can know the max number of
		 * partition kernel should be able to manage.
		 *
		 * Note that -1 is required because partition 0 is reserved
		 * for the whole disk.
		 */
		max_part = (1UL << part_shift) - 1;
	}

1114 1115 1116 1117 1118
	if ((1UL << part_shift) > DISK_MAX_PARTS)
		return -EINVAL;

	if (nbds_max > 1UL << (MINORBITS - part_shift))
		return -EINVAL;
1119 1120 1121 1122
	recv_workqueue = alloc_workqueue("knbd-recv",
					 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
	if (!recv_workqueue)
		return -ENOMEM;
1123

1124 1125
	if (register_blkdev(NBD_MAJOR, "nbd")) {
		destroy_workqueue(recv_workqueue);
1126
		return -EIO;
1127
	}
L
Linus Torvalds 已提交
1128

M
Markus Pargmann 已提交
1129 1130
	nbd_dbg_init();

1131 1132 1133 1134 1135 1136
	mutex_lock(&nbd_index_mutex);
	for (i = 0; i < nbds_max; i++)
		nbd_dev_add(i);
	mutex_unlock(&nbd_index_mutex);
	return 0;
}
L
Linus Torvalds 已提交
1137

1138 1139 1140 1141
static int nbd_exit_cb(int id, void *ptr, void *data)
{
	struct nbd_device *nbd = ptr;
	nbd_dev_remove(nbd);
L
Linus Torvalds 已提交
1142 1143 1144 1145 1146
	return 0;
}

static void __exit nbd_cleanup(void)
{
M
Markus Pargmann 已提交
1147 1148
	nbd_dbg_close();

1149 1150
	idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL);
	idr_destroy(&nbd_index_idr);
1151
	destroy_workqueue(recv_workqueue);
L
Linus Torvalds 已提交
1152 1153 1154 1155 1156 1157 1158 1159 1160
	unregister_blkdev(NBD_MAJOR, "nbd");
}

module_init(nbd_init);
module_exit(nbd_cleanup);

MODULE_DESCRIPTION("Network Block Device");
MODULE_LICENSE("GPL");

1161
module_param(nbds_max, int, 0444);
L
Laurent Vivier 已提交
1162 1163 1164
MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");