xenbus.c 24.7 KB
Newer Older
K
Konrad Rzeszutek Wilk 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*  Xenbus code for blkif backend
    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
    Copyright (C) 2005 XenSource Ltd

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

*/

17 18
#define pr_fmt(fmt) "xen-blkback: " fmt

K
Konrad Rzeszutek Wilk 已提交
19 20 21
#include <stdarg.h>
#include <linux/module.h>
#include <linux/kthread.h>
22 23
#include <xen/events.h>
#include <xen/grant_table.h>
K
Konrad Rzeszutek Wilk 已提交
24 25
#include "common.h"

26 27
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
B
Bob Liu 已提交
28
#define RINGREF_NAME_LEN (20)
29

30
struct backend_info {
31
	struct xenbus_device	*dev;
32
	struct xen_blkif	*blkif;
33 34 35 36
	struct xenbus_watch	backend_watch;
	unsigned		major;
	unsigned		minor;
	char			*mode;
K
Konrad Rzeszutek Wilk 已提交
37 38
};

39
static struct kmem_cache *xen_blkif_cachep;
K
Konrad Rzeszutek Wilk 已提交
40 41 42 43
static void connect(struct backend_info *);
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
			    unsigned int);
44 45
static void xen_blkif_free(struct xen_blkif *blkif);
static void xen_vbd_free(struct xen_vbd *vbd);
K
Konrad Rzeszutek Wilk 已提交
46

47
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
48 49 50 51
{
	return be->dev;
}

52 53 54 55 56 57 58 59 60 61 62 63
/*
 * The last request could free the device from softirq context and
 * xen_blkif_free() can sleep.
 */
static void xen_blkif_deferred_free(struct work_struct *work)
{
	struct xen_blkif *blkif;

	blkif = container_of(work, struct xen_blkif, free_work);
	xen_blkif_free(blkif);
}

64
static int blkback_name(struct xen_blkif *blkif, char *buf)
K
Konrad Rzeszutek Wilk 已提交
65 66 67 68 69 70 71 72
{
	char *devpath, *devname;
	struct xenbus_device *dev = blkif->be->dev;

	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
	if (IS_ERR(devpath))
		return PTR_ERR(devpath);

73 74
	devname = strstr(devpath, "/dev/");
	if (devname != NULL)
K
Konrad Rzeszutek Wilk 已提交
75 76 77 78
		devname += strlen("/dev/");
	else
		devname  = devpath;

79
	snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname);
K
Konrad Rzeszutek Wilk 已提交
80 81 82 83 84
	kfree(devpath);

	return 0;
}

85
static void xen_update_blkif_status(struct xen_blkif *blkif)
K
Konrad Rzeszutek Wilk 已提交
86 87
{
	int err;
88
	char name[BLKBACK_NAME_LEN];
K
Konrad Rzeszutek Wilk 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108

	/* Not ready to connect? */
	if (!blkif->irq || !blkif->vbd.bdev)
		return;

	/* Already connected? */
	if (blkif->be->dev->state == XenbusStateConnected)
		return;

	/* Attempt to connect: exit if we fail to. */
	connect(blkif->be);
	if (blkif->be->dev->state != XenbusStateConnected)
		return;

	err = blkback_name(blkif, name);
	if (err) {
		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
		return;
	}

109 110 111 112 113 114 115
	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
	if (err) {
		xenbus_dev_error(blkif->be->dev, err, "block flush");
		return;
	}
	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);

116
	blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
K
Konrad Rzeszutek Wilk 已提交
117 118 119 120
	if (IS_ERR(blkif->xenblkd)) {
		err = PTR_ERR(blkif->xenblkd);
		blkif->xenblkd = NULL;
		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
121
		return;
K
Konrad Rzeszutek Wilk 已提交
122 123 124
	}
}

125
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
126
{
127
	struct xen_blkif *blkif;
128

129
	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
130

131
	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
132 133 134 135 136 137 138
	if (!blkif)
		return ERR_PTR(-ENOMEM);

	blkif->domid = domid;
	spin_lock_init(&blkif->blk_ring_lock);
	atomic_set(&blkif->refcnt, 1);
	init_waitqueue_head(&blkif->wq);
139 140
	init_completion(&blkif->drain_complete);
	atomic_set(&blkif->drain, 0);
141
	blkif->st_print = jiffies;
142
	blkif->persistent_gnts.rb_node = NULL;
143 144
	spin_lock_init(&blkif->free_pages_lock);
	INIT_LIST_HEAD(&blkif->free_pages);
R
Roger Pau Monne 已提交
145
	INIT_LIST_HEAD(&blkif->persistent_purge_list);
146
	blkif->free_pages_num = 0;
147
	atomic_set(&blkif->persistent_gnt_in_use, 0);
R
Roger Pau Monne 已提交
148
	atomic_set(&blkif->inflight, 0);
149
	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
150

151
	INIT_LIST_HEAD(&blkif->pending_free);
152
	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
153 154
	spin_lock_init(&blkif->pending_free_lock);
	init_waitqueue_head(&blkif->pending_free_wq);
155
	init_waitqueue_head(&blkif->shutdown_wq);
156 157 158 159

	return blkif;
}

B
Bob Liu 已提交
160 161
static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
			 unsigned int nr_grefs, unsigned int evtchn)
162 163 164 165 166 167 168
{
	int err;

	/* Already connected through? */
	if (blkif->irq)
		return 0;

B
Bob Liu 已提交
169
	err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
170
				     &blkif->blk_ring);
171
	if (err < 0)
172 173 174 175 176 177
		return err;

	switch (blkif->blk_protocol) {
	case BLKIF_PROTOCOL_NATIVE:
	{
		struct blkif_sring *sring;
178
		sring = (struct blkif_sring *)blkif->blk_ring;
B
Bob Liu 已提交
179
		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
180 181 182 183 184
		break;
	}
	case BLKIF_PROTOCOL_X86_32:
	{
		struct blkif_x86_32_sring *sring_x86_32;
185
		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
B
Bob Liu 已提交
186
		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
187 188 189 190 191
		break;
	}
	case BLKIF_PROTOCOL_X86_64:
	{
		struct blkif_x86_64_sring *sring_x86_64;
192
		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
B
Bob Liu 已提交
193
		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
194 195 196 197 198 199
		break;
	}
	default:
		BUG();
	}

200 201 202
	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
						    xen_blkif_be_int, 0,
						    "blkif-backend", blkif);
203
	if (err < 0) {
204
		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
205 206 207 208 209 210 211 212
		blkif->blk_rings.common.sring = NULL;
		return err;
	}
	blkif->irq = err;

	return 0;
}

213
static int xen_blkif_disconnect(struct xen_blkif *blkif)
214
{
215 216 217
	struct pending_req *req, *n;
	int i = 0, j;

218 219
	if (blkif->xenblkd) {
		kthread_stop(blkif->xenblkd);
220
		wake_up(&blkif->shutdown_wq);
221 222 223
		blkif->xenblkd = NULL;
	}

224 225 226 227 228 229
	/* The above kthread_stop() guarantees that at this point we
	 * don't have any discard_io or other_io requests. So, checking
	 * for inflight IO is enough.
	 */
	if (atomic_read(&blkif->inflight) > 0)
		return -EBUSY;
230 231 232 233 234 235 236

	if (blkif->irq) {
		unbind_from_irqhandler(blkif->irq, blkif);
		blkif->irq = 0;
	}

	if (blkif->blk_rings.common.sring) {
237
		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
238 239
		blkif->blk_rings.common.sring = NULL;
	}
240

241 242 243
	/* Remove all persistent grants and the cache of ballooned pages. */
	xen_blkbk_free_caches(blkif);

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
	/* Check that there is no request in use */
	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
		list_del(&req->free_list);

		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
			kfree(req->segments[j]);

		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
			kfree(req->indirect_pages[j]);

		kfree(req);
		i++;
	}

	WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
	blkif->nr_ring_pages = 0;

261
	return 0;
262 263
}

264
static void xen_blkif_free(struct xen_blkif *blkif)
265
{
266

267 268
	xen_blkif_disconnect(blkif);
	xen_vbd_free(&blkif->vbd);
269

R
Roger Pau Monne 已提交
270 271 272 273 274 275 276 277
	/* Make sure everything is drained before shutting down */
	BUG_ON(blkif->persistent_gnt_c != 0);
	BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
	BUG_ON(blkif->free_pages_num != 0);
	BUG_ON(!list_empty(&blkif->persistent_purge_list));
	BUG_ON(!list_empty(&blkif->free_pages));
	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));

278
	kmem_cache_free(xen_blkif_cachep, blkif);
279 280
}

281
int __init xen_blkif_interface_init(void)
282
{
283
	xen_blkif_cachep = kmem_cache_create("blkif_cache",
284
					     sizeof(struct xen_blkif),
285 286
					     0, 0, NULL);
	if (!xen_blkif_cachep)
287 288 289 290
		return -ENOMEM;

	return 0;
}
K
Konrad Rzeszutek Wilk 已提交
291

292
/*
K
Konrad Rzeszutek Wilk 已提交
293 294 295 296 297 298 299 300 301
 *  sysfs interface for VBD I/O requests
 */

#define VBD_SHOW(name, format, args...)					\
	static ssize_t show_##name(struct device *_dev,			\
				   struct device_attribute *attr,	\
				   char *buf)				\
	{								\
		struct xenbus_device *dev = to_xenbus_device(_dev);	\
302
		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
K
Konrad Rzeszutek Wilk 已提交
303 304 305 306 307
									\
		return sprintf(buf, format, ##args);			\
	}								\
	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)

308 309 310 311 312 313 314
VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req);
VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req);
VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req);
VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
K
Konrad Rzeszutek Wilk 已提交
315

316
static struct attribute *xen_vbdstat_attrs[] = {
K
Konrad Rzeszutek Wilk 已提交
317 318 319
	&dev_attr_oo_req.attr,
	&dev_attr_rd_req.attr,
	&dev_attr_wr_req.attr,
320
	&dev_attr_f_req.attr,
321
	&dev_attr_ds_req.attr,
K
Konrad Rzeszutek Wilk 已提交
322 323 324 325 326
	&dev_attr_rd_sect.attr,
	&dev_attr_wr_sect.attr,
	NULL
};

327
static struct attribute_group xen_vbdstat_group = {
K
Konrad Rzeszutek Wilk 已提交
328
	.name = "statistics",
329
	.attrs = xen_vbdstat_attrs,
K
Konrad Rzeszutek Wilk 已提交
330 331 332 333 334
};

VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);

335
static int xenvbd_sysfs_addif(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
336 337 338 339
{
	int error;

	error = device_create_file(&dev->dev, &dev_attr_physical_device);
340
	if (error)
K
Konrad Rzeszutek Wilk 已提交
341 342 343 344 345 346
		goto fail1;

	error = device_create_file(&dev->dev, &dev_attr_mode);
	if (error)
		goto fail2;

347
	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
348 349 350 351 352
	if (error)
		goto fail3;

	return 0;

353
fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
354 355 356 357 358
fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
	return error;
}

359
static void xenvbd_sysfs_delif(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
360
{
361
	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
362 363 364 365
	device_remove_file(&dev->dev, &dev_attr_mode);
	device_remove_file(&dev->dev, &dev_attr_physical_device);
}

366

367
static void xen_vbd_free(struct xen_vbd *vbd)
368 369 370 371 372 373
{
	if (vbd->bdev)
		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
	vbd->bdev = NULL;
}

374 375 376
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
			  unsigned major, unsigned minor, int readonly,
			  int cdrom)
377
{
378
	struct xen_vbd *vbd;
379
	struct block_device *bdev;
380
	struct request_queue *q;
381 382 383 384 385 386 387 388 389 390 391 392

	vbd = &blkif->vbd;
	vbd->handle   = handle;
	vbd->readonly = readonly;
	vbd->type     = 0;

	vbd->pdevice  = MKDEV(major, minor);

	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
				 FMODE_READ : FMODE_WRITE, NULL);

	if (IS_ERR(bdev)) {
393
		pr_warn("xen_vbd_create: device %08x could not be opened\n",
394 395 396 397 398 399
			vbd->pdevice);
		return -ENOENT;
	}

	vbd->bdev = bdev;
	if (vbd->bdev->bd_disk == NULL) {
400
		pr_warn("xen_vbd_create: device %08x doesn't exist\n",
401
			vbd->pdevice);
402
		xen_vbd_free(vbd);
403 404
		return -ENOENT;
	}
405
	vbd->size = vbd_sz(vbd);
406 407 408 409 410 411

	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
		vbd->type |= VDISK_CDROM;
	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
		vbd->type |= VDISK_REMOVABLE;

412 413 414 415
	q = bdev_get_queue(bdev);
	if (q && q->flush_flags)
		vbd->flush_support = true;

416 417 418
	if (q && blk_queue_secdiscard(q))
		vbd->discard_secure = true;

419
	pr_debug("Successful creation of handle=%04x (dom=%u)\n",
420 421 422
		handle, blkif->domid);
	return 0;
}
423
static int xen_blkbk_remove(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
424
{
425
	struct backend_info *be = dev_get_drvdata(&dev->dev);
K
Konrad Rzeszutek Wilk 已提交
426

427
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
428 429 430 431 432 433 434 435 436 437

	if (be->major || be->minor)
		xenvbd_sysfs_delif(dev);

	if (be->backend_watch.node) {
		unregister_xenbus_watch(&be->backend_watch);
		kfree(be->backend_watch.node);
		be->backend_watch.node = NULL;
	}

438 439
	dev_set_drvdata(&dev->dev, NULL);

K
Konrad Rzeszutek Wilk 已提交
440
	if (be->blkif) {
441
		xen_blkif_disconnect(be->blkif);
442
		xen_blkif_put(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
443 444
	}

445
	kfree(be->mode);
K
Konrad Rzeszutek Wilk 已提交
446 447 448 449
	kfree(be);
	return 0;
}

450 451
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
			      struct backend_info *be, int state)
K
Konrad Rzeszutek Wilk 已提交
452 453 454 455
{
	struct xenbus_device *dev = be->dev;
	int err;

456
	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
K
Konrad Rzeszutek Wilk 已提交
457 458
			    "%d", state);
	if (err)
459
		dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
K
Konrad Rzeszutek Wilk 已提交
460 461 462 463

	return err;
}

464
static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
465 466 467 468
{
	struct xenbus_device *dev = be->dev;
	struct xen_blkif *blkif = be->blkif;
	int err;
469
	int state = 0, discard_enable;
470 471 472
	struct block_device *bdev = be->blkif->vbd.bdev;
	struct request_queue *q = bdev_get_queue(bdev);

473 474 475 476 477
	err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d",
			   &discard_enable);
	if (err == 1 && !discard_enable)
		return;

478 479 480 481 482
	if (blk_queue_discard(q)) {
		err = xenbus_printf(xbt, dev->nodename,
			"discard-granularity", "%u",
			q->limits.discard_granularity);
		if (err) {
483 484
			dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
			return;
485 486 487 488 489
		}
		err = xenbus_printf(xbt, dev->nodename,
			"discard-alignment", "%u",
			q->limits.discard_alignment);
		if (err) {
490 491
			dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
			return;
492
		}
493 494 495 496 497 498
		state = 1;
		/* Optional. */
		err = xenbus_printf(xbt, dev->nodename,
				    "discard-secure", "%d",
				    blkif->vbd.discard_secure);
		if (err) {
499
			dev_warn(&dev->dev, "writing discard-secure (%d)", err);
500
			return;
501 502 503 504 505
		}
	}
	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
			    "%d", state);
	if (err)
506
		dev_warn(&dev->dev, "writing feature-discard (%d)", err);
507
}
508 509 510 511 512 513 514 515 516
int xen_blkbk_barrier(struct xenbus_transaction xbt,
		      struct backend_info *be, int state)
{
	struct xenbus_device *dev = be->dev;
	int err;

	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
			    "%d", state);
	if (err)
517
		dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
518 519 520

	return err;
}
521

522
/*
K
Konrad Rzeszutek Wilk 已提交
523 524 525 526
 * Entry point to this code when a new device is created.  Allocate the basic
 * structures, and watch the store waiting for the hotplug scripts to tell us
 * the device's physical major and minor numbers.  Switch to InitWait.
 */
527 528
static int xen_blkbk_probe(struct xenbus_device *dev,
			   const struct xenbus_device_id *id)
K
Konrad Rzeszutek Wilk 已提交
529 530 531 532
{
	int err;
	struct backend_info *be = kzalloc(sizeof(struct backend_info),
					  GFP_KERNEL);
533 534 535 536

	/* match the pr_debug in xen_blkbk_remove */
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);

K
Konrad Rzeszutek Wilk 已提交
537 538 539 540 541 542
	if (!be) {
		xenbus_dev_fatal(dev, -ENOMEM,
				 "allocating backend structure");
		return -ENOMEM;
	}
	be->dev = dev;
543
	dev_set_drvdata(&dev->dev, be);
K
Konrad Rzeszutek Wilk 已提交
544

545
	be->blkif = xen_blkif_alloc(dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
546 547 548 549 550 551 552 553 554 555
	if (IS_ERR(be->blkif)) {
		err = PTR_ERR(be->blkif);
		be->blkif = NULL;
		xenbus_dev_fatal(dev, err, "creating block interface");
		goto fail;
	}

	/* setup back pointer */
	be->blkif->be = be;

J
Jeremy Fitzhardinge 已提交
556 557
	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
				   "%s/%s", dev->nodename, "physical-device");
K
Konrad Rzeszutek Wilk 已提交
558 559 560
	if (err)
		goto fail;

B
Bob Liu 已提交
561 562 563 564 565
	err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
			    xen_blkif_max_ring_order);
	if (err)
		pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);

K
Konrad Rzeszutek Wilk 已提交
566 567 568 569 570 571 572
	err = xenbus_switch_state(dev, XenbusStateInitWait);
	if (err)
		goto fail;

	return 0;

fail:
573
	pr_warn("%s failed\n", __func__);
574
	xen_blkbk_remove(dev);
K
Konrad Rzeszutek Wilk 已提交
575 576 577 578
	return err;
}


579
/*
K
Konrad Rzeszutek Wilk 已提交
580 581 582 583 584 585 586 587 588 589 590 591 592 593
 * Callback received when the hotplug scripts have placed the physical-device
 * node.  Read it and the mode node, and create a vbd.  If the frontend is
 * ready, connect.
 */
static void backend_changed(struct xenbus_watch *watch,
			    const char **vec, unsigned int len)
{
	int err;
	unsigned major;
	unsigned minor;
	struct backend_info *be
		= container_of(watch, struct backend_info, backend_watch);
	struct xenbus_device *dev = be->dev;
	int cdrom = 0;
594
	unsigned long handle;
K
Konrad Rzeszutek Wilk 已提交
595 596
	char *device_type;

597
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
598 599 600 601

	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
			   &major, &minor);
	if (XENBUS_EXIST_ERR(err)) {
602 603 604 605 606
		/*
		 * Since this watch will fire once immediately after it is
		 * registered, we expect this.  Ignore it, and wait for the
		 * hotplug scripts.
		 */
K
Konrad Rzeszutek Wilk 已提交
607 608 609 610 611 612 613
		return;
	}
	if (err != 2) {
		xenbus_dev_fatal(dev, err, "reading physical-device");
		return;
	}

614 615
	if (be->major | be->minor) {
		if (be->major != major || be->minor != minor)
616
			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
617
				be->major, be->minor, major, minor);
K
Konrad Rzeszutek Wilk 已提交
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
		return;
	}

	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
	if (IS_ERR(be->mode)) {
		err = PTR_ERR(be->mode);
		be->mode = NULL;
		xenbus_dev_fatal(dev, err, "reading mode");
		return;
	}

	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
	if (!IS_ERR(device_type)) {
		cdrom = strcmp(device_type, "cdrom") == 0;
		kfree(device_type);
	}

635
	/* Front end dir is a number, which is used as the handle. */
636
	err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
637 638
	if (err)
		return;
K
Konrad Rzeszutek Wilk 已提交
639

640 641
	be->major = major;
	be->minor = minor;
K
Konrad Rzeszutek Wilk 已提交
642

643 644
	err = xen_vbd_create(be->blkif, handle, major, minor,
			     !strchr(be->mode, 'w'), cdrom);
K
Konrad Rzeszutek Wilk 已提交
645

646 647 648
	if (err)
		xenbus_dev_fatal(dev, err, "creating vbd structure");
	else {
K
Konrad Rzeszutek Wilk 已提交
649 650
		err = xenvbd_sysfs_addif(dev);
		if (err) {
651
			xen_vbd_free(&be->blkif->vbd);
K
Konrad Rzeszutek Wilk 已提交
652 653
			xenbus_dev_fatal(dev, err, "creating sysfs entries");
		}
654
	}
K
Konrad Rzeszutek Wilk 已提交
655

656 657 658 659 660 661
	if (err) {
		kfree(be->mode);
		be->mode = NULL;
		be->major = 0;
		be->minor = 0;
	} else {
K
Konrad Rzeszutek Wilk 已提交
662
		/* We're potentially connected now */
663
		xen_update_blkif_status(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
664 665 666 667
	}
}


668
/*
K
Konrad Rzeszutek Wilk 已提交
669 670 671 672 673
 * Callback received when the frontend's state changes.
 */
static void frontend_changed(struct xenbus_device *dev,
			     enum xenbus_state frontend_state)
{
674
	struct backend_info *be = dev_get_drvdata(&dev->dev);
K
Konrad Rzeszutek Wilk 已提交
675 676
	int err;

677
	pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
K
Konrad Rzeszutek Wilk 已提交
678 679 680 681

	switch (frontend_state) {
	case XenbusStateInitialising:
		if (dev->state == XenbusStateClosed) {
682
			pr_info("%s: prepare for reconnect\n", dev->nodename);
K
Konrad Rzeszutek Wilk 已提交
683 684 685 686 687 688
			xenbus_switch_state(dev, XenbusStateInitWait);
		}
		break;

	case XenbusStateInitialised:
	case XenbusStateConnected:
689 690
		/*
		 * Ensure we connect even when two watches fire in
691
		 * close succession and we miss the intermediate value
692 693
		 * of frontend_state.
		 */
K
Konrad Rzeszutek Wilk 已提交
694 695 696
		if (dev->state == XenbusStateConnected)
			break;

697 698
		/*
		 * Enforce precondition before potential leak point.
699
		 * xen_blkif_disconnect() is idempotent.
K
Keir Fraser 已提交
700
		 */
701 702 703 704 705
		err = xen_blkif_disconnect(be->blkif);
		if (err) {
			xenbus_dev_fatal(dev, err, "pending I/O");
			break;
		}
K
Keir Fraser 已提交
706

K
Konrad Rzeszutek Wilk 已提交
707 708 709
		err = connect_ring(be);
		if (err)
			break;
710
		xen_update_blkif_status(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
711 712 713 714 715 716 717
		break;

	case XenbusStateClosing:
		xenbus_switch_state(dev, XenbusStateClosing);
		break;

	case XenbusStateClosed:
718
		xen_blkif_disconnect(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
719 720 721 722 723
		xenbus_switch_state(dev, XenbusStateClosed);
		if (xenbus_dev_is_online(dev))
			break;
		/* fall through if not online */
	case XenbusStateUnknown:
724
		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
K
Konrad Rzeszutek Wilk 已提交
725 726 727 728 729 730 731 732 733 734 735 736 737 738
		device_unregister(&dev->dev);
		break;

	default:
		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
				 frontend_state);
		break;
	}
}


/* ** Connection ** */


739
/*
K
Konrad Rzeszutek Wilk 已提交
740 741 742 743 744 745 746 747 748
 * Write the physical details regarding the block device to the store, and
 * switch to Connected state.
 */
static void connect(struct backend_info *be)
{
	struct xenbus_transaction xbt;
	int err;
	struct xenbus_device *dev = be->dev;

749
	pr_debug("%s %s\n", __func__, dev->otherend);
K
Konrad Rzeszutek Wilk 已提交
750 751 752 753 754 755 756 757 758

	/* Supply the information about the device the frontend needs */
again:
	err = xenbus_transaction_start(&xbt);
	if (err) {
		xenbus_dev_fatal(dev, err, "starting transaction");
		return;
	}

759 760
	/* If we can't advertise it is OK. */
	xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
K
Konrad Rzeszutek Wilk 已提交
761

762
	xen_blkbk_discard(xbt, be);
763

764
	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
765

766 767 768 769 770 771
	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
				 dev->nodename);
		goto abort;
	}
772 773 774 775 776
	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
			    MAX_INDIRECT_SEGMENTS);
	if (err)
		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
			 dev->nodename, err);
777

K
Konrad Rzeszutek Wilk 已提交
778
	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
779
			    (unsigned long long)vbd_sz(&be->blkif->vbd));
K
Konrad Rzeszutek Wilk 已提交
780 781 782 783 784 785 786 787
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/sectors",
				 dev->nodename);
		goto abort;
	}

	/* FIXME: use a typename instead */
	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
788 789
			    be->blkif->vbd.type |
			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
K
Konrad Rzeszutek Wilk 已提交
790 791 792 793 794 795
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/info",
				 dev->nodename);
		goto abort;
	}
	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
796 797
			    (unsigned long)
			    bdev_logical_block_size(be->blkif->vbd.bdev));
K
Konrad Rzeszutek Wilk 已提交
798 799 800 801 802
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
				 dev->nodename);
		goto abort;
	}
803 804 805 806 807
	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
			    bdev_physical_block_size(be->blkif->vbd.bdev));
	if (err)
		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
				 dev->nodename);
K
Konrad Rzeszutek Wilk 已提交
808 809 810 811 812 813 814 815 816

	err = xenbus_transaction_end(xbt, 0);
	if (err == -EAGAIN)
		goto again;
	if (err)
		xenbus_dev_fatal(dev, err, "ending transaction");

	err = xenbus_switch_state(dev, XenbusStateConnected);
	if (err)
817
		xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
K
Konrad Rzeszutek Wilk 已提交
818 819 820 821 822 823 824 825 826 827 828
				 dev->nodename);

	return;
 abort:
	xenbus_transaction_end(xbt, 1);
}


static int connect_ring(struct backend_info *be)
{
	struct xenbus_device *dev = be->dev;
B
Bob Liu 已提交
829 830
	unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
	unsigned int evtchn, nr_grefs, ring_page_order;
831
	unsigned int pers_grants;
K
Konrad Rzeszutek Wilk 已提交
832
	char protocol[64] = "";
833 834
	struct pending_req *req, *n;
	int err, i, j;
K
Konrad Rzeszutek Wilk 已提交
835

836
	pr_debug("%s %s\n", __func__, dev->otherend);
K
Konrad Rzeszutek Wilk 已提交
837

B
Bob Liu 已提交
838 839 840 841 842
	err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
			  &evtchn);
	if (err != 1) {
		err = -EINVAL;
		xenbus_dev_fatal(dev, err, "reading %s/event-channel",
K
Konrad Rzeszutek Wilk 已提交
843 844 845
				 dev->otherend);
		return err;
	}
B
Bob Liu 已提交
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
	pr_info("event-channel %u\n", evtchn);

	err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
			  &ring_page_order);
	if (err != 1) {
		err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
				  "%u", &ring_ref[0]);
		if (err != 1) {
			err = -EINVAL;
			xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
					 dev->otherend);
			return err;
		}
		nr_grefs = 1;
		pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
			ring_ref[0]);
	} else {
		unsigned int i;

		if (ring_page_order > xen_blkif_max_ring_order) {
			err = -EINVAL;
			xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
					 dev->otherend, ring_page_order,
					 xen_blkif_max_ring_order);
			return err;
		}

		nr_grefs = 1 << ring_page_order;
		for (i = 0; i < nr_grefs; i++) {
			char ring_ref_name[RINGREF_NAME_LEN];

			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
			err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
					   "%u", &ring_ref[i]);
			if (err != 1) {
				err = -EINVAL;
				xenbus_dev_fatal(dev, err, "reading %s/%s",
						 dev->otherend, ring_ref_name);
				return err;
			}
			pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
		}
	}
K
Konrad Rzeszutek Wilk 已提交
889

890
	be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
K
Konrad Rzeszutek Wilk 已提交
891 892 893
	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
			    "%63s", protocol, NULL);
	if (err)
894
		strcpy(protocol, "unspecified, assuming default");
K
Konrad Rzeszutek Wilk 已提交
895 896 897 898 899 900 901 902 903 904
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
	else {
		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
		return -1;
	}
905
	err = xenbus_gather(XBT_NIL, dev->otherend,
906
			    "feature-persistent", "%u",
907 908 909 910 911 912
			    &pers_grants, NULL);
	if (err)
		pers_grants = 0;

	be->blkif->vbd.feature_gnt_persistent = pers_grants;
	be->blkif->vbd.overflow_max_grants = 0;
B
Bob Liu 已提交
913
	be->blkif->nr_ring_pages = nr_grefs;
914

B
Bob Liu 已提交
915 916
	pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
		nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
917
		pers_grants ? "persistent grants" : "");
K
Konrad Rzeszutek Wilk 已提交
918

B
Bob Liu 已提交
919
	for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
		req = kzalloc(sizeof(*req), GFP_KERNEL);
		if (!req)
			goto fail;
		list_add_tail(&req->free_list, &be->blkif->pending_free);
		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
			req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
			if (!req->segments[j])
				goto fail;
		}
		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
							 GFP_KERNEL);
			if (!req->indirect_pages[j])
				goto fail;
		}
	}

K
Konrad Rzeszutek Wilk 已提交
937
	/* Map the shared frame, irq etc. */
B
Bob Liu 已提交
938
	err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
K
Konrad Rzeszutek Wilk 已提交
939
	if (err) {
B
Bob Liu 已提交
940
		xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
K
Konrad Rzeszutek Wilk 已提交
941 942 943 944
		return err;
	}

	return 0;
945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961

fail:
	list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
		list_del(&req->free_list);
		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
			if (!req->segments[j])
				break;
			kfree(req->segments[j]);
		}
		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
			if (!req->indirect_pages[j])
				break;
			kfree(req->indirect_pages[j]);
		}
		kfree(req);
	}
	return -ENOMEM;
K
Konrad Rzeszutek Wilk 已提交
962 963
}

964
static const struct xenbus_device_id xen_blkbk_ids[] = {
K
Konrad Rzeszutek Wilk 已提交
965 966 967 968
	{ "vbd" },
	{ "" }
};

969 970
static struct xenbus_driver xen_blkbk_driver = {
	.ids  = xen_blkbk_ids,
971 972
	.probe = xen_blkbk_probe,
	.remove = xen_blkbk_remove,
K
Konrad Rzeszutek Wilk 已提交
973
	.otherend_changed = frontend_changed
974
};
K
Konrad Rzeszutek Wilk 已提交
975

976
int xen_blkif_xenbus_init(void)
K
Konrad Rzeszutek Wilk 已提交
977
{
978
	return xenbus_register_backend(&xen_blkbk_driver);
K
Konrad Rzeszutek Wilk 已提交
979
}