xenbus.c 24.8 KB
Newer Older
K
Konrad Rzeszutek Wilk 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*  Xenbus code for blkif backend
    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
    Copyright (C) 2005 XenSource Ltd

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

*/

17 18
#define pr_fmt(fmt) "xen-blkback: " fmt

K
Konrad Rzeszutek Wilk 已提交
19 20 21
#include <stdarg.h>
#include <linux/module.h>
#include <linux/kthread.h>
22 23
#include <xen/events.h>
#include <xen/grant_table.h>
K
Konrad Rzeszutek Wilk 已提交
24 25
#include "common.h"

26 27
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
B
Bob Liu 已提交
28
#define RINGREF_NAME_LEN (20)
29

30
struct backend_info {
31
	struct xenbus_device	*dev;
32
	struct xen_blkif	*blkif;
33 34 35 36
	struct xenbus_watch	backend_watch;
	unsigned		major;
	unsigned		minor;
	char			*mode;
K
Konrad Rzeszutek Wilk 已提交
37 38
};

39
static struct kmem_cache *xen_blkif_cachep;
K
Konrad Rzeszutek Wilk 已提交
40 41 42 43
static void connect(struct backend_info *);
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
			    unsigned int);
44 45
static void xen_blkif_free(struct xen_blkif *blkif);
static void xen_vbd_free(struct xen_vbd *vbd);
K
Konrad Rzeszutek Wilk 已提交
46

47
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
48 49 50 51
{
	return be->dev;
}

52 53 54 55 56 57 58 59 60 61 62 63
/*
 * The last request could free the device from softirq context and
 * xen_blkif_free() can sleep.
 */
static void xen_blkif_deferred_free(struct work_struct *work)
{
	struct xen_blkif *blkif;

	blkif = container_of(work, struct xen_blkif, free_work);
	xen_blkif_free(blkif);
}

64
static int blkback_name(struct xen_blkif *blkif, char *buf)
K
Konrad Rzeszutek Wilk 已提交
65 66 67 68 69 70 71 72
{
	char *devpath, *devname;
	struct xenbus_device *dev = blkif->be->dev;

	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
	if (IS_ERR(devpath))
		return PTR_ERR(devpath);

73 74
	devname = strstr(devpath, "/dev/");
	if (devname != NULL)
K
Konrad Rzeszutek Wilk 已提交
75 76 77 78
		devname += strlen("/dev/");
	else
		devname  = devpath;

79
	snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname);
K
Konrad Rzeszutek Wilk 已提交
80 81 82 83 84
	kfree(devpath);

	return 0;
}

85
static void xen_update_blkif_status(struct xen_blkif *blkif)
K
Konrad Rzeszutek Wilk 已提交
86 87
{
	int err;
88
	char name[BLKBACK_NAME_LEN];
K
Konrad Rzeszutek Wilk 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108

	/* Not ready to connect? */
	if (!blkif->irq || !blkif->vbd.bdev)
		return;

	/* Already connected? */
	if (blkif->be->dev->state == XenbusStateConnected)
		return;

	/* Attempt to connect: exit if we fail to. */
	connect(blkif->be);
	if (blkif->be->dev->state != XenbusStateConnected)
		return;

	err = blkback_name(blkif, name);
	if (err) {
		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
		return;
	}

109 110 111 112 113 114 115
	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
	if (err) {
		xenbus_dev_error(blkif->be->dev, err, "block flush");
		return;
	}
	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);

116
	blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
K
Konrad Rzeszutek Wilk 已提交
117 118 119 120
	if (IS_ERR(blkif->xenblkd)) {
		err = PTR_ERR(blkif->xenblkd);
		blkif->xenblkd = NULL;
		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
121
		return;
K
Konrad Rzeszutek Wilk 已提交
122 123 124
	}
}

125
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
126
{
127
	struct xen_blkif *blkif;
128

129
	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
130

131
	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
132 133 134 135 136 137 138
	if (!blkif)
		return ERR_PTR(-ENOMEM);

	blkif->domid = domid;
	spin_lock_init(&blkif->blk_ring_lock);
	atomic_set(&blkif->refcnt, 1);
	init_waitqueue_head(&blkif->wq);
139 140
	init_completion(&blkif->drain_complete);
	atomic_set(&blkif->drain, 0);
141
	blkif->st_print = jiffies;
142
	blkif->persistent_gnts.rb_node = NULL;
143 144
	spin_lock_init(&blkif->free_pages_lock);
	INIT_LIST_HEAD(&blkif->free_pages);
R
Roger Pau Monne 已提交
145
	INIT_LIST_HEAD(&blkif->persistent_purge_list);
146
	blkif->free_pages_num = 0;
147
	atomic_set(&blkif->persistent_gnt_in_use, 0);
R
Roger Pau Monne 已提交
148
	atomic_set(&blkif->inflight, 0);
149
	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
150

151
	INIT_LIST_HEAD(&blkif->pending_free);
152
	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
153 154
	spin_lock_init(&blkif->pending_free_lock);
	init_waitqueue_head(&blkif->pending_free_wq);
155
	init_waitqueue_head(&blkif->shutdown_wq);
156 157 158 159

	return blkif;
}

B
Bob Liu 已提交
160 161
static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
			 unsigned int nr_grefs, unsigned int evtchn)
162 163 164 165 166 167 168
{
	int err;

	/* Already connected through? */
	if (blkif->irq)
		return 0;

B
Bob Liu 已提交
169
	err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
170
				     &blkif->blk_ring);
171
	if (err < 0)
172 173 174 175 176 177
		return err;

	switch (blkif->blk_protocol) {
	case BLKIF_PROTOCOL_NATIVE:
	{
		struct blkif_sring *sring;
178
		sring = (struct blkif_sring *)blkif->blk_ring;
179 180
		BACK_RING_INIT(&blkif->blk_rings.native, sring,
			       XEN_PAGE_SIZE * nr_grefs);
181 182 183 184 185
		break;
	}
	case BLKIF_PROTOCOL_X86_32:
	{
		struct blkif_x86_32_sring *sring_x86_32;
186
		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
187 188
		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
			       XEN_PAGE_SIZE * nr_grefs);
189 190 191 192 193
		break;
	}
	case BLKIF_PROTOCOL_X86_64:
	{
		struct blkif_x86_64_sring *sring_x86_64;
194
		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
195 196
		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
			       XEN_PAGE_SIZE * nr_grefs);
197 198 199 200 201 202
		break;
	}
	default:
		BUG();
	}

203 204 205
	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
						    xen_blkif_be_int, 0,
						    "blkif-backend", blkif);
206
	if (err < 0) {
207
		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
208 209 210 211 212 213 214 215
		blkif->blk_rings.common.sring = NULL;
		return err;
	}
	blkif->irq = err;

	return 0;
}

216
static int xen_blkif_disconnect(struct xen_blkif *blkif)
217
{
218 219 220
	struct pending_req *req, *n;
	int i = 0, j;

221 222
	if (blkif->xenblkd) {
		kthread_stop(blkif->xenblkd);
223
		wake_up(&blkif->shutdown_wq);
224 225 226
		blkif->xenblkd = NULL;
	}

227 228 229 230 231 232
	/* The above kthread_stop() guarantees that at this point we
	 * don't have any discard_io or other_io requests. So, checking
	 * for inflight IO is enough.
	 */
	if (atomic_read(&blkif->inflight) > 0)
		return -EBUSY;
233 234 235 236 237 238 239

	if (blkif->irq) {
		unbind_from_irqhandler(blkif->irq, blkif);
		blkif->irq = 0;
	}

	if (blkif->blk_rings.common.sring) {
240
		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
241 242
		blkif->blk_rings.common.sring = NULL;
	}
243

244 245 246
	/* Remove all persistent grants and the cache of ballooned pages. */
	xen_blkbk_free_caches(blkif);

247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
	/* Check that there is no request in use */
	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
		list_del(&req->free_list);

		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
			kfree(req->segments[j]);

		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
			kfree(req->indirect_pages[j]);

		kfree(req);
		i++;
	}

	WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
	blkif->nr_ring_pages = 0;

264
	return 0;
265 266
}

267
static void xen_blkif_free(struct xen_blkif *blkif)
268
{
269

270 271
	xen_blkif_disconnect(blkif);
	xen_vbd_free(&blkif->vbd);
272

R
Roger Pau Monne 已提交
273 274 275 276 277 278 279 280
	/* Make sure everything is drained before shutting down */
	BUG_ON(blkif->persistent_gnt_c != 0);
	BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
	BUG_ON(blkif->free_pages_num != 0);
	BUG_ON(!list_empty(&blkif->persistent_purge_list));
	BUG_ON(!list_empty(&blkif->free_pages));
	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));

281
	kmem_cache_free(xen_blkif_cachep, blkif);
282 283
}

284
int __init xen_blkif_interface_init(void)
285
{
286
	xen_blkif_cachep = kmem_cache_create("blkif_cache",
287
					     sizeof(struct xen_blkif),
288 289
					     0, 0, NULL);
	if (!xen_blkif_cachep)
290 291 292 293
		return -ENOMEM;

	return 0;
}
K
Konrad Rzeszutek Wilk 已提交
294

295
/*
K
Konrad Rzeszutek Wilk 已提交
296 297 298 299 300 301 302 303 304
 *  sysfs interface for VBD I/O requests
 */

#define VBD_SHOW(name, format, args...)					\
	static ssize_t show_##name(struct device *_dev,			\
				   struct device_attribute *attr,	\
				   char *buf)				\
	{								\
		struct xenbus_device *dev = to_xenbus_device(_dev);	\
305
		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
K
Konrad Rzeszutek Wilk 已提交
306 307 308 309 310
									\
		return sprintf(buf, format, ##args);			\
	}								\
	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)

311 312 313 314 315 316 317
VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req);
VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req);
VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req);
VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
K
Konrad Rzeszutek Wilk 已提交
318

319
static struct attribute *xen_vbdstat_attrs[] = {
K
Konrad Rzeszutek Wilk 已提交
320 321 322
	&dev_attr_oo_req.attr,
	&dev_attr_rd_req.attr,
	&dev_attr_wr_req.attr,
323
	&dev_attr_f_req.attr,
324
	&dev_attr_ds_req.attr,
K
Konrad Rzeszutek Wilk 已提交
325 326 327 328 329
	&dev_attr_rd_sect.attr,
	&dev_attr_wr_sect.attr,
	NULL
};

330
static struct attribute_group xen_vbdstat_group = {
K
Konrad Rzeszutek Wilk 已提交
331
	.name = "statistics",
332
	.attrs = xen_vbdstat_attrs,
K
Konrad Rzeszutek Wilk 已提交
333 334 335 336 337
};

VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);

338
static int xenvbd_sysfs_addif(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
339 340 341 342
{
	int error;

	error = device_create_file(&dev->dev, &dev_attr_physical_device);
343
	if (error)
K
Konrad Rzeszutek Wilk 已提交
344 345 346 347 348 349
		goto fail1;

	error = device_create_file(&dev->dev, &dev_attr_mode);
	if (error)
		goto fail2;

350
	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
351 352 353 354 355
	if (error)
		goto fail3;

	return 0;

356
fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
357 358 359 360 361
fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
	return error;
}

362
static void xenvbd_sysfs_delif(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
363
{
364
	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
365 366 367 368
	device_remove_file(&dev->dev, &dev_attr_mode);
	device_remove_file(&dev->dev, &dev_attr_physical_device);
}

369

370
static void xen_vbd_free(struct xen_vbd *vbd)
371 372 373 374 375 376
{
	if (vbd->bdev)
		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
	vbd->bdev = NULL;
}

377 378 379
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
			  unsigned major, unsigned minor, int readonly,
			  int cdrom)
380
{
381
	struct xen_vbd *vbd;
382
	struct block_device *bdev;
383
	struct request_queue *q;
384 385 386 387 388 389 390 391 392 393 394 395

	vbd = &blkif->vbd;
	vbd->handle   = handle;
	vbd->readonly = readonly;
	vbd->type     = 0;

	vbd->pdevice  = MKDEV(major, minor);

	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
				 FMODE_READ : FMODE_WRITE, NULL);

	if (IS_ERR(bdev)) {
396
		pr_warn("xen_vbd_create: device %08x could not be opened\n",
397 398 399 400 401 402
			vbd->pdevice);
		return -ENOENT;
	}

	vbd->bdev = bdev;
	if (vbd->bdev->bd_disk == NULL) {
403
		pr_warn("xen_vbd_create: device %08x doesn't exist\n",
404
			vbd->pdevice);
405
		xen_vbd_free(vbd);
406 407
		return -ENOENT;
	}
408
	vbd->size = vbd_sz(vbd);
409 410 411 412 413 414

	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
		vbd->type |= VDISK_CDROM;
	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
		vbd->type |= VDISK_REMOVABLE;

415 416 417 418
	q = bdev_get_queue(bdev);
	if (q && q->flush_flags)
		vbd->flush_support = true;

419 420 421
	if (q && blk_queue_secdiscard(q))
		vbd->discard_secure = true;

422
	pr_debug("Successful creation of handle=%04x (dom=%u)\n",
423 424 425
		handle, blkif->domid);
	return 0;
}
426
static int xen_blkbk_remove(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
427
{
428
	struct backend_info *be = dev_get_drvdata(&dev->dev);
K
Konrad Rzeszutek Wilk 已提交
429

430
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
431 432 433 434 435 436 437 438 439 440

	if (be->major || be->minor)
		xenvbd_sysfs_delif(dev);

	if (be->backend_watch.node) {
		unregister_xenbus_watch(&be->backend_watch);
		kfree(be->backend_watch.node);
		be->backend_watch.node = NULL;
	}

441 442
	dev_set_drvdata(&dev->dev, NULL);

K
Konrad Rzeszutek Wilk 已提交
443
	if (be->blkif) {
444
		xen_blkif_disconnect(be->blkif);
445
		xen_blkif_put(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
446 447
	}

448
	kfree(be->mode);
K
Konrad Rzeszutek Wilk 已提交
449 450 451 452
	kfree(be);
	return 0;
}

453 454
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
			      struct backend_info *be, int state)
K
Konrad Rzeszutek Wilk 已提交
455 456 457 458
{
	struct xenbus_device *dev = be->dev;
	int err;

459
	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
K
Konrad Rzeszutek Wilk 已提交
460 461
			    "%d", state);
	if (err)
462
		dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
K
Konrad Rzeszutek Wilk 已提交
463 464 465 466

	return err;
}

467
static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
468 469 470 471
{
	struct xenbus_device *dev = be->dev;
	struct xen_blkif *blkif = be->blkif;
	int err;
472
	int state = 0, discard_enable;
473 474 475
	struct block_device *bdev = be->blkif->vbd.bdev;
	struct request_queue *q = bdev_get_queue(bdev);

476 477 478 479 480
	err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d",
			   &discard_enable);
	if (err == 1 && !discard_enable)
		return;

481 482 483 484 485
	if (blk_queue_discard(q)) {
		err = xenbus_printf(xbt, dev->nodename,
			"discard-granularity", "%u",
			q->limits.discard_granularity);
		if (err) {
486 487
			dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
			return;
488 489 490 491 492
		}
		err = xenbus_printf(xbt, dev->nodename,
			"discard-alignment", "%u",
			q->limits.discard_alignment);
		if (err) {
493 494
			dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
			return;
495
		}
496 497 498 499 500 501
		state = 1;
		/* Optional. */
		err = xenbus_printf(xbt, dev->nodename,
				    "discard-secure", "%d",
				    blkif->vbd.discard_secure);
		if (err) {
502
			dev_warn(&dev->dev, "writing discard-secure (%d)", err);
503
			return;
504 505 506 507 508
		}
	}
	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
			    "%d", state);
	if (err)
509
		dev_warn(&dev->dev, "writing feature-discard (%d)", err);
510
}
511 512 513 514 515 516 517 518 519
int xen_blkbk_barrier(struct xenbus_transaction xbt,
		      struct backend_info *be, int state)
{
	struct xenbus_device *dev = be->dev;
	int err;

	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
			    "%d", state);
	if (err)
520
		dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
521 522 523

	return err;
}
524

525
/*
K
Konrad Rzeszutek Wilk 已提交
526 527 528 529
 * Entry point to this code when a new device is created.  Allocate the basic
 * structures, and watch the store waiting for the hotplug scripts to tell us
 * the device's physical major and minor numbers.  Switch to InitWait.
 */
530 531
static int xen_blkbk_probe(struct xenbus_device *dev,
			   const struct xenbus_device_id *id)
K
Konrad Rzeszutek Wilk 已提交
532 533 534 535
{
	int err;
	struct backend_info *be = kzalloc(sizeof(struct backend_info),
					  GFP_KERNEL);
536 537 538 539

	/* match the pr_debug in xen_blkbk_remove */
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);

K
Konrad Rzeszutek Wilk 已提交
540 541 542 543 544 545
	if (!be) {
		xenbus_dev_fatal(dev, -ENOMEM,
				 "allocating backend structure");
		return -ENOMEM;
	}
	be->dev = dev;
546
	dev_set_drvdata(&dev->dev, be);
K
Konrad Rzeszutek Wilk 已提交
547

548
	be->blkif = xen_blkif_alloc(dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
549 550 551 552 553 554 555 556 557 558
	if (IS_ERR(be->blkif)) {
		err = PTR_ERR(be->blkif);
		be->blkif = NULL;
		xenbus_dev_fatal(dev, err, "creating block interface");
		goto fail;
	}

	/* setup back pointer */
	be->blkif->be = be;

J
Jeremy Fitzhardinge 已提交
559 560
	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
				   "%s/%s", dev->nodename, "physical-device");
K
Konrad Rzeszutek Wilk 已提交
561 562 563
	if (err)
		goto fail;

B
Bob Liu 已提交
564 565 566 567 568
	err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
			    xen_blkif_max_ring_order);
	if (err)
		pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);

K
Konrad Rzeszutek Wilk 已提交
569 570 571 572 573 574 575
	err = xenbus_switch_state(dev, XenbusStateInitWait);
	if (err)
		goto fail;

	return 0;

fail:
576
	pr_warn("%s failed\n", __func__);
577
	xen_blkbk_remove(dev);
K
Konrad Rzeszutek Wilk 已提交
578 579 580 581
	return err;
}


582
/*
K
Konrad Rzeszutek Wilk 已提交
583 584 585 586 587 588 589 590 591 592 593 594 595 596
 * Callback received when the hotplug scripts have placed the physical-device
 * node.  Read it and the mode node, and create a vbd.  If the frontend is
 * ready, connect.
 */
static void backend_changed(struct xenbus_watch *watch,
			    const char **vec, unsigned int len)
{
	int err;
	unsigned major;
	unsigned minor;
	struct backend_info *be
		= container_of(watch, struct backend_info, backend_watch);
	struct xenbus_device *dev = be->dev;
	int cdrom = 0;
597
	unsigned long handle;
K
Konrad Rzeszutek Wilk 已提交
598 599
	char *device_type;

600
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
601 602 603 604

	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
			   &major, &minor);
	if (XENBUS_EXIST_ERR(err)) {
605 606 607 608 609
		/*
		 * Since this watch will fire once immediately after it is
		 * registered, we expect this.  Ignore it, and wait for the
		 * hotplug scripts.
		 */
K
Konrad Rzeszutek Wilk 已提交
610 611 612 613 614 615 616
		return;
	}
	if (err != 2) {
		xenbus_dev_fatal(dev, err, "reading physical-device");
		return;
	}

617 618
	if (be->major | be->minor) {
		if (be->major != major || be->minor != minor)
619
			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
620
				be->major, be->minor, major, minor);
K
Konrad Rzeszutek Wilk 已提交
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
		return;
	}

	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
	if (IS_ERR(be->mode)) {
		err = PTR_ERR(be->mode);
		be->mode = NULL;
		xenbus_dev_fatal(dev, err, "reading mode");
		return;
	}

	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
	if (!IS_ERR(device_type)) {
		cdrom = strcmp(device_type, "cdrom") == 0;
		kfree(device_type);
	}

638
	/* Front end dir is a number, which is used as the handle. */
639
	err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
640 641
	if (err)
		return;
K
Konrad Rzeszutek Wilk 已提交
642

643 644
	be->major = major;
	be->minor = minor;
K
Konrad Rzeszutek Wilk 已提交
645

646 647
	err = xen_vbd_create(be->blkif, handle, major, minor,
			     !strchr(be->mode, 'w'), cdrom);
K
Konrad Rzeszutek Wilk 已提交
648

649 650 651
	if (err)
		xenbus_dev_fatal(dev, err, "creating vbd structure");
	else {
K
Konrad Rzeszutek Wilk 已提交
652 653
		err = xenvbd_sysfs_addif(dev);
		if (err) {
654
			xen_vbd_free(&be->blkif->vbd);
K
Konrad Rzeszutek Wilk 已提交
655 656
			xenbus_dev_fatal(dev, err, "creating sysfs entries");
		}
657
	}
K
Konrad Rzeszutek Wilk 已提交
658

659 660 661 662 663 664
	if (err) {
		kfree(be->mode);
		be->mode = NULL;
		be->major = 0;
		be->minor = 0;
	} else {
K
Konrad Rzeszutek Wilk 已提交
665
		/* We're potentially connected now */
666
		xen_update_blkif_status(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
667 668 669 670
	}
}


671
/*
K
Konrad Rzeszutek Wilk 已提交
672 673 674 675 676
 * Callback received when the frontend's state changes.
 */
static void frontend_changed(struct xenbus_device *dev,
			     enum xenbus_state frontend_state)
{
677
	struct backend_info *be = dev_get_drvdata(&dev->dev);
K
Konrad Rzeszutek Wilk 已提交
678 679
	int err;

680
	pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
K
Konrad Rzeszutek Wilk 已提交
681 682 683 684

	switch (frontend_state) {
	case XenbusStateInitialising:
		if (dev->state == XenbusStateClosed) {
685
			pr_info("%s: prepare for reconnect\n", dev->nodename);
K
Konrad Rzeszutek Wilk 已提交
686 687 688 689 690 691
			xenbus_switch_state(dev, XenbusStateInitWait);
		}
		break;

	case XenbusStateInitialised:
	case XenbusStateConnected:
692 693
		/*
		 * Ensure we connect even when two watches fire in
694
		 * close succession and we miss the intermediate value
695 696
		 * of frontend_state.
		 */
K
Konrad Rzeszutek Wilk 已提交
697 698 699
		if (dev->state == XenbusStateConnected)
			break;

700 701
		/*
		 * Enforce precondition before potential leak point.
702
		 * xen_blkif_disconnect() is idempotent.
K
Keir Fraser 已提交
703
		 */
704 705 706 707 708
		err = xen_blkif_disconnect(be->blkif);
		if (err) {
			xenbus_dev_fatal(dev, err, "pending I/O");
			break;
		}
K
Keir Fraser 已提交
709

K
Konrad Rzeszutek Wilk 已提交
710 711 712
		err = connect_ring(be);
		if (err)
			break;
713
		xen_update_blkif_status(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
714 715 716 717 718 719 720
		break;

	case XenbusStateClosing:
		xenbus_switch_state(dev, XenbusStateClosing);
		break;

	case XenbusStateClosed:
721
		xen_blkif_disconnect(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
722 723 724 725 726
		xenbus_switch_state(dev, XenbusStateClosed);
		if (xenbus_dev_is_online(dev))
			break;
		/* fall through if not online */
	case XenbusStateUnknown:
727
		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
K
Konrad Rzeszutek Wilk 已提交
728 729 730 731 732 733 734 735 736 737 738 739 740 741
		device_unregister(&dev->dev);
		break;

	default:
		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
				 frontend_state);
		break;
	}
}


/* ** Connection ** */


742
/*
K
Konrad Rzeszutek Wilk 已提交
743 744 745 746 747 748 749 750 751
 * Write the physical details regarding the block device to the store, and
 * switch to Connected state.
 */
static void connect(struct backend_info *be)
{
	struct xenbus_transaction xbt;
	int err;
	struct xenbus_device *dev = be->dev;

752
	pr_debug("%s %s\n", __func__, dev->otherend);
K
Konrad Rzeszutek Wilk 已提交
753 754 755 756 757 758 759 760 761

	/* Supply the information about the device the frontend needs */
again:
	err = xenbus_transaction_start(&xbt);
	if (err) {
		xenbus_dev_fatal(dev, err, "starting transaction");
		return;
	}

762 763
	/* If we can't advertise it is OK. */
	xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
K
Konrad Rzeszutek Wilk 已提交
764

765
	xen_blkbk_discard(xbt, be);
766

767
	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
768

769 770 771 772 773 774
	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
				 dev->nodename);
		goto abort;
	}
775 776 777 778 779
	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
			    MAX_INDIRECT_SEGMENTS);
	if (err)
		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
			 dev->nodename, err);
780

K
Konrad Rzeszutek Wilk 已提交
781
	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
782
			    (unsigned long long)vbd_sz(&be->blkif->vbd));
K
Konrad Rzeszutek Wilk 已提交
783 784 785 786 787 788 789 790
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/sectors",
				 dev->nodename);
		goto abort;
	}

	/* FIXME: use a typename instead */
	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
791 792
			    be->blkif->vbd.type |
			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
K
Konrad Rzeszutek Wilk 已提交
793 794 795 796 797 798
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/info",
				 dev->nodename);
		goto abort;
	}
	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
799 800
			    (unsigned long)
			    bdev_logical_block_size(be->blkif->vbd.bdev));
K
Konrad Rzeszutek Wilk 已提交
801 802 803 804 805
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
				 dev->nodename);
		goto abort;
	}
806 807 808 809 810
	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
			    bdev_physical_block_size(be->blkif->vbd.bdev));
	if (err)
		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
				 dev->nodename);
K
Konrad Rzeszutek Wilk 已提交
811 812 813 814 815 816 817 818 819

	err = xenbus_transaction_end(xbt, 0);
	if (err == -EAGAIN)
		goto again;
	if (err)
		xenbus_dev_fatal(dev, err, "ending transaction");

	err = xenbus_switch_state(dev, XenbusStateConnected);
	if (err)
820
		xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
K
Konrad Rzeszutek Wilk 已提交
821 822 823 824 825 826 827 828 829 830 831
				 dev->nodename);

	return;
 abort:
	xenbus_transaction_end(xbt, 1);
}


static int connect_ring(struct backend_info *be)
{
	struct xenbus_device *dev = be->dev;
832
	unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
B
Bob Liu 已提交
833
	unsigned int evtchn, nr_grefs, ring_page_order;
834
	unsigned int pers_grants;
K
Konrad Rzeszutek Wilk 已提交
835
	char protocol[64] = "";
836 837
	struct pending_req *req, *n;
	int err, i, j;
K
Konrad Rzeszutek Wilk 已提交
838

839
	pr_debug("%s %s\n", __func__, dev->otherend);
K
Konrad Rzeszutek Wilk 已提交
840

B
Bob Liu 已提交
841 842 843 844 845
	err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
			  &evtchn);
	if (err != 1) {
		err = -EINVAL;
		xenbus_dev_fatal(dev, err, "reading %s/event-channel",
K
Konrad Rzeszutek Wilk 已提交
846 847 848
				 dev->otherend);
		return err;
	}
B
Bob Liu 已提交
849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
	pr_info("event-channel %u\n", evtchn);

	err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
			  &ring_page_order);
	if (err != 1) {
		err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
				  "%u", &ring_ref[0]);
		if (err != 1) {
			err = -EINVAL;
			xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
					 dev->otherend);
			return err;
		}
		nr_grefs = 1;
		pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
			ring_ref[0]);
	} else {
		unsigned int i;

		if (ring_page_order > xen_blkif_max_ring_order) {
			err = -EINVAL;
			xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
					 dev->otherend, ring_page_order,
					 xen_blkif_max_ring_order);
			return err;
		}

		nr_grefs = 1 << ring_page_order;
		for (i = 0; i < nr_grefs; i++) {
			char ring_ref_name[RINGREF_NAME_LEN];

			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
			err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
					   "%u", &ring_ref[i]);
			if (err != 1) {
				err = -EINVAL;
				xenbus_dev_fatal(dev, err, "reading %s/%s",
						 dev->otherend, ring_ref_name);
				return err;
			}
			pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
		}
	}
K
Konrad Rzeszutek Wilk 已提交
892

893
	be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
K
Konrad Rzeszutek Wilk 已提交
894 895 896
	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
			    "%63s", protocol, NULL);
	if (err)
897
		strcpy(protocol, "unspecified, assuming default");
K
Konrad Rzeszutek Wilk 已提交
898 899 900 901 902 903 904 905 906 907
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
	else {
		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
		return -1;
	}
908
	err = xenbus_gather(XBT_NIL, dev->otherend,
909
			    "feature-persistent", "%u",
910 911 912 913 914 915
			    &pers_grants, NULL);
	if (err)
		pers_grants = 0;

	be->blkif->vbd.feature_gnt_persistent = pers_grants;
	be->blkif->vbd.overflow_max_grants = 0;
B
Bob Liu 已提交
916
	be->blkif->nr_ring_pages = nr_grefs;
917

B
Bob Liu 已提交
918 919
	pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
		nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
920
		pers_grants ? "persistent grants" : "");
K
Konrad Rzeszutek Wilk 已提交
921

B
Bob Liu 已提交
922
	for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
		req = kzalloc(sizeof(*req), GFP_KERNEL);
		if (!req)
			goto fail;
		list_add_tail(&req->free_list, &be->blkif->pending_free);
		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
			req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
			if (!req->segments[j])
				goto fail;
		}
		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
							 GFP_KERNEL);
			if (!req->indirect_pages[j])
				goto fail;
		}
	}

K
Konrad Rzeszutek Wilk 已提交
940
	/* Map the shared frame, irq etc. */
B
Bob Liu 已提交
941
	err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
K
Konrad Rzeszutek Wilk 已提交
942
	if (err) {
B
Bob Liu 已提交
943
		xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
K
Konrad Rzeszutek Wilk 已提交
944 945 946 947
		return err;
	}

	return 0;
948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964

fail:
	list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
		list_del(&req->free_list);
		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
			if (!req->segments[j])
				break;
			kfree(req->segments[j]);
		}
		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
			if (!req->indirect_pages[j])
				break;
			kfree(req->indirect_pages[j]);
		}
		kfree(req);
	}
	return -ENOMEM;
K
Konrad Rzeszutek Wilk 已提交
965 966
}

967
static const struct xenbus_device_id xen_blkbk_ids[] = {
K
Konrad Rzeszutek Wilk 已提交
968 969 970 971
	{ "vbd" },
	{ "" }
};

972 973
static struct xenbus_driver xen_blkbk_driver = {
	.ids  = xen_blkbk_ids,
974 975
	.probe = xen_blkbk_probe,
	.remove = xen_blkbk_remove,
K
Konrad Rzeszutek Wilk 已提交
976
	.otherend_changed = frontend_changed
977
};
K
Konrad Rzeszutek Wilk 已提交
978

979
int xen_blkif_xenbus_init(void)
K
Konrad Rzeszutek Wilk 已提交
980
{
981
	return xenbus_register_backend(&xen_blkbk_driver);
K
Konrad Rzeszutek Wilk 已提交
982
}