xenbus.c 24.7 KB
Newer Older
K
Konrad Rzeszutek Wilk 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*  Xenbus code for blkif backend
    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
    Copyright (C) 2005 XenSource Ltd

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

*/

17 18
#define pr_fmt(fmt) "xen-blkback: " fmt

K
Konrad Rzeszutek Wilk 已提交
19 20 21
#include <stdarg.h>
#include <linux/module.h>
#include <linux/kthread.h>
22 23
#include <xen/events.h>
#include <xen/grant_table.h>
K
Konrad Rzeszutek Wilk 已提交
24 25
#include "common.h"

26 27
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
B
Bob Liu 已提交
28
#define RINGREF_NAME_LEN (20)
29

30
struct backend_info {
31
	struct xenbus_device	*dev;
32
	struct xen_blkif	*blkif;
33 34 35 36
	struct xenbus_watch	backend_watch;
	unsigned		major;
	unsigned		minor;
	char			*mode;
K
Konrad Rzeszutek Wilk 已提交
37 38
};

39
static struct kmem_cache *xen_blkif_cachep;
K
Konrad Rzeszutek Wilk 已提交
40 41 42 43
static void connect(struct backend_info *);
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
			    unsigned int);
44 45
static void xen_blkif_free(struct xen_blkif *blkif);
static void xen_vbd_free(struct xen_vbd *vbd);
K
Konrad Rzeszutek Wilk 已提交
46

47
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
48 49 50 51
{
	return be->dev;
}

52 53 54 55 56 57 58 59 60 61 62 63
/*
 * The last request could free the device from softirq context and
 * xen_blkif_free() can sleep.
 */
static void xen_blkif_deferred_free(struct work_struct *work)
{
	struct xen_blkif *blkif;

	blkif = container_of(work, struct xen_blkif, free_work);
	xen_blkif_free(blkif);
}

64
static int blkback_name(struct xen_blkif *blkif, char *buf)
K
Konrad Rzeszutek Wilk 已提交
65 66 67 68 69 70 71 72
{
	char *devpath, *devname;
	struct xenbus_device *dev = blkif->be->dev;

	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
	if (IS_ERR(devpath))
		return PTR_ERR(devpath);

73 74
	devname = strstr(devpath, "/dev/");
	if (devname != NULL)
K
Konrad Rzeszutek Wilk 已提交
75 76 77 78
		devname += strlen("/dev/");
	else
		devname  = devpath;

79
	snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname);
K
Konrad Rzeszutek Wilk 已提交
80 81 82 83 84
	kfree(devpath);

	return 0;
}

85
static void xen_update_blkif_status(struct xen_blkif *blkif)
K
Konrad Rzeszutek Wilk 已提交
86 87
{
	int err;
88
	char name[BLKBACK_NAME_LEN];
K
Konrad Rzeszutek Wilk 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108

	/* Not ready to connect? */
	if (!blkif->irq || !blkif->vbd.bdev)
		return;

	/* Already connected? */
	if (blkif->be->dev->state == XenbusStateConnected)
		return;

	/* Attempt to connect: exit if we fail to. */
	connect(blkif->be);
	if (blkif->be->dev->state != XenbusStateConnected)
		return;

	err = blkback_name(blkif, name);
	if (err) {
		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
		return;
	}

109 110 111 112 113 114 115
	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
	if (err) {
		xenbus_dev_error(blkif->be->dev, err, "block flush");
		return;
	}
	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);

116
	blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
K
Konrad Rzeszutek Wilk 已提交
117 118 119 120
	if (IS_ERR(blkif->xenblkd)) {
		err = PTR_ERR(blkif->xenblkd);
		blkif->xenblkd = NULL;
		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
121
		return;
K
Konrad Rzeszutek Wilk 已提交
122 123 124
	}
}

125
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
126
{
127
	struct xen_blkif *blkif;
128

129
	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
130

131
	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
132 133 134 135 136 137 138
	if (!blkif)
		return ERR_PTR(-ENOMEM);

	blkif->domid = domid;
	spin_lock_init(&blkif->blk_ring_lock);
	atomic_set(&blkif->refcnt, 1);
	init_waitqueue_head(&blkif->wq);
139 140
	init_completion(&blkif->drain_complete);
	atomic_set(&blkif->drain, 0);
141
	blkif->st_print = jiffies;
142
	blkif->persistent_gnts.rb_node = NULL;
143 144
	spin_lock_init(&blkif->free_pages_lock);
	INIT_LIST_HEAD(&blkif->free_pages);
R
Roger Pau Monne 已提交
145
	INIT_LIST_HEAD(&blkif->persistent_purge_list);
146
	blkif->free_pages_num = 0;
147
	atomic_set(&blkif->persistent_gnt_in_use, 0);
R
Roger Pau Monne 已提交
148
	atomic_set(&blkif->inflight, 0);
149
	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
150

151
	INIT_LIST_HEAD(&blkif->pending_free);
152
	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
153 154
	spin_lock_init(&blkif->pending_free_lock);
	init_waitqueue_head(&blkif->pending_free_wq);
155
	init_waitqueue_head(&blkif->shutdown_wq);
156 157 158 159

	return blkif;
}

B
Bob Liu 已提交
160 161
static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
			 unsigned int nr_grefs, unsigned int evtchn)
162 163 164 165 166 167 168
{
	int err;

	/* Already connected through? */
	if (blkif->irq)
		return 0;

B
Bob Liu 已提交
169
	err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
170
				     &blkif->blk_ring);
171
	if (err < 0)
172 173 174 175 176 177
		return err;

	switch (blkif->blk_protocol) {
	case BLKIF_PROTOCOL_NATIVE:
	{
		struct blkif_sring *sring;
178
		sring = (struct blkif_sring *)blkif->blk_ring;
B
Bob Liu 已提交
179
		BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
180 181 182 183 184
		break;
	}
	case BLKIF_PROTOCOL_X86_32:
	{
		struct blkif_x86_32_sring *sring_x86_32;
185
		sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
B
Bob Liu 已提交
186
		BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
187 188 189 190 191
		break;
	}
	case BLKIF_PROTOCOL_X86_64:
	{
		struct blkif_x86_64_sring *sring_x86_64;
192
		sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
B
Bob Liu 已提交
193
		BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
194 195 196 197 198 199
		break;
	}
	default:
		BUG();
	}

200 201 202
	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
						    xen_blkif_be_int, 0,
						    "blkif-backend", blkif);
203
	if (err < 0) {
204
		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
205 206 207 208 209 210 211 212
		blkif->blk_rings.common.sring = NULL;
		return err;
	}
	blkif->irq = err;

	return 0;
}

213
static int xen_blkif_disconnect(struct xen_blkif *blkif)
214 215 216
{
	if (blkif->xenblkd) {
		kthread_stop(blkif->xenblkd);
217
		wake_up(&blkif->shutdown_wq);
218 219 220
		blkif->xenblkd = NULL;
	}

221 222 223 224 225 226
	/* The above kthread_stop() guarantees that at this point we
	 * don't have any discard_io or other_io requests. So, checking
	 * for inflight IO is enough.
	 */
	if (atomic_read(&blkif->inflight) > 0)
		return -EBUSY;
227 228 229 230 231 232 233

	if (blkif->irq) {
		unbind_from_irqhandler(blkif->irq, blkif);
		blkif->irq = 0;
	}

	if (blkif->blk_rings.common.sring) {
234
		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
235 236
		blkif->blk_rings.common.sring = NULL;
	}
237

238 239 240
	/* Remove all persistent grants and the cache of ballooned pages. */
	xen_blkbk_free_caches(blkif);

241
	return 0;
242 243
}

244
static void xen_blkif_free(struct xen_blkif *blkif)
245
{
246 247
	struct pending_req *req, *n;
	int i = 0, j;
248

249 250
	xen_blkif_disconnect(blkif);
	xen_vbd_free(&blkif->vbd);
251

R
Roger Pau Monne 已提交
252 253 254 255 256 257 258 259
	/* Make sure everything is drained before shutting down */
	BUG_ON(blkif->persistent_gnt_c != 0);
	BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
	BUG_ON(blkif->free_pages_num != 0);
	BUG_ON(!list_empty(&blkif->persistent_purge_list));
	BUG_ON(!list_empty(&blkif->free_pages));
	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));

260
	/* Check that there is no request in use */
261 262 263 264 265 266 267 268 269 270
	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
		list_del(&req->free_list);

		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
			kfree(req->segments[j]);

		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
			kfree(req->indirect_pages[j]);

		kfree(req);
271
		i++;
272 273
	}

B
Bob Liu 已提交
274
	WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
275

276
	kmem_cache_free(xen_blkif_cachep, blkif);
277 278
}

279
int __init xen_blkif_interface_init(void)
280
{
281
	xen_blkif_cachep = kmem_cache_create("blkif_cache",
282
					     sizeof(struct xen_blkif),
283 284
					     0, 0, NULL);
	if (!xen_blkif_cachep)
285 286 287 288
		return -ENOMEM;

	return 0;
}
K
Konrad Rzeszutek Wilk 已提交
289

290
/*
K
Konrad Rzeszutek Wilk 已提交
291 292 293 294 295 296 297 298 299
 *  sysfs interface for VBD I/O requests
 */

#define VBD_SHOW(name, format, args...)					\
	static ssize_t show_##name(struct device *_dev,			\
				   struct device_attribute *attr,	\
				   char *buf)				\
	{								\
		struct xenbus_device *dev = to_xenbus_device(_dev);	\
300
		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
K
Konrad Rzeszutek Wilk 已提交
301 302 303 304 305
									\
		return sprintf(buf, format, ##args);			\
	}								\
	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)

306 307 308 309 310 311 312
VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req);
VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req);
VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req);
VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
K
Konrad Rzeszutek Wilk 已提交
313

314
static struct attribute *xen_vbdstat_attrs[] = {
K
Konrad Rzeszutek Wilk 已提交
315 316 317
	&dev_attr_oo_req.attr,
	&dev_attr_rd_req.attr,
	&dev_attr_wr_req.attr,
318
	&dev_attr_f_req.attr,
319
	&dev_attr_ds_req.attr,
K
Konrad Rzeszutek Wilk 已提交
320 321 322 323 324
	&dev_attr_rd_sect.attr,
	&dev_attr_wr_sect.attr,
	NULL
};

325
static struct attribute_group xen_vbdstat_group = {
K
Konrad Rzeszutek Wilk 已提交
326
	.name = "statistics",
327
	.attrs = xen_vbdstat_attrs,
K
Konrad Rzeszutek Wilk 已提交
328 329 330 331 332
};

VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);

333
static int xenvbd_sysfs_addif(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
334 335 336 337
{
	int error;

	error = device_create_file(&dev->dev, &dev_attr_physical_device);
338
	if (error)
K
Konrad Rzeszutek Wilk 已提交
339 340 341 342 343 344
		goto fail1;

	error = device_create_file(&dev->dev, &dev_attr_mode);
	if (error)
		goto fail2;

345
	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
346 347 348 349 350
	if (error)
		goto fail3;

	return 0;

351
fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
352 353 354 355 356
fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
	return error;
}

357
static void xenvbd_sysfs_delif(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
358
{
359
	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
360 361 362 363
	device_remove_file(&dev->dev, &dev_attr_mode);
	device_remove_file(&dev->dev, &dev_attr_physical_device);
}

364

365
static void xen_vbd_free(struct xen_vbd *vbd)
366 367 368 369 370 371
{
	if (vbd->bdev)
		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
	vbd->bdev = NULL;
}

372 373 374
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
			  unsigned major, unsigned minor, int readonly,
			  int cdrom)
375
{
376
	struct xen_vbd *vbd;
377
	struct block_device *bdev;
378
	struct request_queue *q;
379 380 381 382 383 384 385 386 387 388 389 390

	vbd = &blkif->vbd;
	vbd->handle   = handle;
	vbd->readonly = readonly;
	vbd->type     = 0;

	vbd->pdevice  = MKDEV(major, minor);

	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
				 FMODE_READ : FMODE_WRITE, NULL);

	if (IS_ERR(bdev)) {
391
		pr_warn("xen_vbd_create: device %08x could not be opened\n",
392 393 394 395 396 397
			vbd->pdevice);
		return -ENOENT;
	}

	vbd->bdev = bdev;
	if (vbd->bdev->bd_disk == NULL) {
398
		pr_warn("xen_vbd_create: device %08x doesn't exist\n",
399
			vbd->pdevice);
400
		xen_vbd_free(vbd);
401 402
		return -ENOENT;
	}
403
	vbd->size = vbd_sz(vbd);
404 405 406 407 408 409

	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
		vbd->type |= VDISK_CDROM;
	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
		vbd->type |= VDISK_REMOVABLE;

410 411 412 413
	q = bdev_get_queue(bdev);
	if (q && q->flush_flags)
		vbd->flush_support = true;

414 415 416
	if (q && blk_queue_secdiscard(q))
		vbd->discard_secure = true;

417
	pr_debug("Successful creation of handle=%04x (dom=%u)\n",
418 419 420
		handle, blkif->domid);
	return 0;
}
421
static int xen_blkbk_remove(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
422
{
423
	struct backend_info *be = dev_get_drvdata(&dev->dev);
K
Konrad Rzeszutek Wilk 已提交
424

425
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
426 427 428 429 430 431 432 433 434 435

	if (be->major || be->minor)
		xenvbd_sysfs_delif(dev);

	if (be->backend_watch.node) {
		unregister_xenbus_watch(&be->backend_watch);
		kfree(be->backend_watch.node);
		be->backend_watch.node = NULL;
	}

436 437
	dev_set_drvdata(&dev->dev, NULL);

K
Konrad Rzeszutek Wilk 已提交
438
	if (be->blkif) {
439
		xen_blkif_disconnect(be->blkif);
440
		xen_blkif_put(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
441 442
	}

443
	kfree(be->mode);
K
Konrad Rzeszutek Wilk 已提交
444 445 446 447
	kfree(be);
	return 0;
}

448 449
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
			      struct backend_info *be, int state)
K
Konrad Rzeszutek Wilk 已提交
450 451 452 453
{
	struct xenbus_device *dev = be->dev;
	int err;

454
	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
K
Konrad Rzeszutek Wilk 已提交
455 456
			    "%d", state);
	if (err)
457
		dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
K
Konrad Rzeszutek Wilk 已提交
458 459 460 461

	return err;
}

462
static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
463 464 465 466
{
	struct xenbus_device *dev = be->dev;
	struct xen_blkif *blkif = be->blkif;
	int err;
467
	int state = 0, discard_enable;
468 469 470
	struct block_device *bdev = be->blkif->vbd.bdev;
	struct request_queue *q = bdev_get_queue(bdev);

471 472 473 474 475
	err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d",
			   &discard_enable);
	if (err == 1 && !discard_enable)
		return;

476 477 478 479 480
	if (blk_queue_discard(q)) {
		err = xenbus_printf(xbt, dev->nodename,
			"discard-granularity", "%u",
			q->limits.discard_granularity);
		if (err) {
481 482
			dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
			return;
483 484 485 486 487
		}
		err = xenbus_printf(xbt, dev->nodename,
			"discard-alignment", "%u",
			q->limits.discard_alignment);
		if (err) {
488 489
			dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
			return;
490
		}
491 492 493 494 495 496
		state = 1;
		/* Optional. */
		err = xenbus_printf(xbt, dev->nodename,
				    "discard-secure", "%d",
				    blkif->vbd.discard_secure);
		if (err) {
497
			dev_warn(&dev->dev, "writing discard-secure (%d)", err);
498
			return;
499 500 501 502 503
		}
	}
	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
			    "%d", state);
	if (err)
504
		dev_warn(&dev->dev, "writing feature-discard (%d)", err);
505
}
506 507 508 509 510 511 512 513 514
int xen_blkbk_barrier(struct xenbus_transaction xbt,
		      struct backend_info *be, int state)
{
	struct xenbus_device *dev = be->dev;
	int err;

	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
			    "%d", state);
	if (err)
515
		dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
516 517 518

	return err;
}
519

520
/*
K
Konrad Rzeszutek Wilk 已提交
521 522 523 524
 * Entry point to this code when a new device is created.  Allocate the basic
 * structures, and watch the store waiting for the hotplug scripts to tell us
 * the device's physical major and minor numbers.  Switch to InitWait.
 */
525 526
static int xen_blkbk_probe(struct xenbus_device *dev,
			   const struct xenbus_device_id *id)
K
Konrad Rzeszutek Wilk 已提交
527 528 529 530
{
	int err;
	struct backend_info *be = kzalloc(sizeof(struct backend_info),
					  GFP_KERNEL);
531 532 533 534

	/* match the pr_debug in xen_blkbk_remove */
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);

K
Konrad Rzeszutek Wilk 已提交
535 536 537 538 539 540
	if (!be) {
		xenbus_dev_fatal(dev, -ENOMEM,
				 "allocating backend structure");
		return -ENOMEM;
	}
	be->dev = dev;
541
	dev_set_drvdata(&dev->dev, be);
K
Konrad Rzeszutek Wilk 已提交
542

543
	be->blkif = xen_blkif_alloc(dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
544 545 546 547 548 549 550 551 552 553
	if (IS_ERR(be->blkif)) {
		err = PTR_ERR(be->blkif);
		be->blkif = NULL;
		xenbus_dev_fatal(dev, err, "creating block interface");
		goto fail;
	}

	/* setup back pointer */
	be->blkif->be = be;

J
Jeremy Fitzhardinge 已提交
554 555
	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
				   "%s/%s", dev->nodename, "physical-device");
K
Konrad Rzeszutek Wilk 已提交
556 557 558
	if (err)
		goto fail;

B
Bob Liu 已提交
559 560 561 562 563
	err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
			    xen_blkif_max_ring_order);
	if (err)
		pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);

K
Konrad Rzeszutek Wilk 已提交
564 565 566 567 568 569 570
	err = xenbus_switch_state(dev, XenbusStateInitWait);
	if (err)
		goto fail;

	return 0;

fail:
571
	pr_warn("%s failed\n", __func__);
572
	xen_blkbk_remove(dev);
K
Konrad Rzeszutek Wilk 已提交
573 574 575 576
	return err;
}


577
/*
K
Konrad Rzeszutek Wilk 已提交
578 579 580 581 582 583 584 585 586 587 588 589 590 591
 * Callback received when the hotplug scripts have placed the physical-device
 * node.  Read it and the mode node, and create a vbd.  If the frontend is
 * ready, connect.
 */
static void backend_changed(struct xenbus_watch *watch,
			    const char **vec, unsigned int len)
{
	int err;
	unsigned major;
	unsigned minor;
	struct backend_info *be
		= container_of(watch, struct backend_info, backend_watch);
	struct xenbus_device *dev = be->dev;
	int cdrom = 0;
592
	unsigned long handle;
K
Konrad Rzeszutek Wilk 已提交
593 594
	char *device_type;

595
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
596 597 598 599

	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
			   &major, &minor);
	if (XENBUS_EXIST_ERR(err)) {
600 601 602 603 604
		/*
		 * Since this watch will fire once immediately after it is
		 * registered, we expect this.  Ignore it, and wait for the
		 * hotplug scripts.
		 */
K
Konrad Rzeszutek Wilk 已提交
605 606 607 608 609 610 611
		return;
	}
	if (err != 2) {
		xenbus_dev_fatal(dev, err, "reading physical-device");
		return;
	}

612 613
	if (be->major | be->minor) {
		if (be->major != major || be->minor != minor)
614
			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
615
				be->major, be->minor, major, minor);
K
Konrad Rzeszutek Wilk 已提交
616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
		return;
	}

	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
	if (IS_ERR(be->mode)) {
		err = PTR_ERR(be->mode);
		be->mode = NULL;
		xenbus_dev_fatal(dev, err, "reading mode");
		return;
	}

	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
	if (!IS_ERR(device_type)) {
		cdrom = strcmp(device_type, "cdrom") == 0;
		kfree(device_type);
	}

633
	/* Front end dir is a number, which is used as the handle. */
634
	err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
635 636
	if (err)
		return;
K
Konrad Rzeszutek Wilk 已提交
637

638 639
	be->major = major;
	be->minor = minor;
K
Konrad Rzeszutek Wilk 已提交
640

641 642
	err = xen_vbd_create(be->blkif, handle, major, minor,
			     !strchr(be->mode, 'w'), cdrom);
K
Konrad Rzeszutek Wilk 已提交
643

644 645 646
	if (err)
		xenbus_dev_fatal(dev, err, "creating vbd structure");
	else {
K
Konrad Rzeszutek Wilk 已提交
647 648
		err = xenvbd_sysfs_addif(dev);
		if (err) {
649
			xen_vbd_free(&be->blkif->vbd);
K
Konrad Rzeszutek Wilk 已提交
650 651
			xenbus_dev_fatal(dev, err, "creating sysfs entries");
		}
652
	}
K
Konrad Rzeszutek Wilk 已提交
653

654 655 656 657 658 659
	if (err) {
		kfree(be->mode);
		be->mode = NULL;
		be->major = 0;
		be->minor = 0;
	} else {
K
Konrad Rzeszutek Wilk 已提交
660
		/* We're potentially connected now */
661
		xen_update_blkif_status(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
662 663 664 665
	}
}


666
/*
K
Konrad Rzeszutek Wilk 已提交
667 668 669 670 671
 * Callback received when the frontend's state changes.
 */
static void frontend_changed(struct xenbus_device *dev,
			     enum xenbus_state frontend_state)
{
672
	struct backend_info *be = dev_get_drvdata(&dev->dev);
K
Konrad Rzeszutek Wilk 已提交
673 674
	int err;

675
	pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
K
Konrad Rzeszutek Wilk 已提交
676 677 678 679

	switch (frontend_state) {
	case XenbusStateInitialising:
		if (dev->state == XenbusStateClosed) {
680
			pr_info("%s: prepare for reconnect\n", dev->nodename);
K
Konrad Rzeszutek Wilk 已提交
681 682 683 684 685 686
			xenbus_switch_state(dev, XenbusStateInitWait);
		}
		break;

	case XenbusStateInitialised:
	case XenbusStateConnected:
687 688
		/*
		 * Ensure we connect even when two watches fire in
689
		 * close succession and we miss the intermediate value
690 691
		 * of frontend_state.
		 */
K
Konrad Rzeszutek Wilk 已提交
692 693 694
		if (dev->state == XenbusStateConnected)
			break;

695 696
		/*
		 * Enforce precondition before potential leak point.
697
		 * xen_blkif_disconnect() is idempotent.
K
Keir Fraser 已提交
698
		 */
699 700 701 702 703
		err = xen_blkif_disconnect(be->blkif);
		if (err) {
			xenbus_dev_fatal(dev, err, "pending I/O");
			break;
		}
K
Keir Fraser 已提交
704

K
Konrad Rzeszutek Wilk 已提交
705 706 707
		err = connect_ring(be);
		if (err)
			break;
708
		xen_update_blkif_status(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
709 710 711 712 713 714 715
		break;

	case XenbusStateClosing:
		xenbus_switch_state(dev, XenbusStateClosing);
		break;

	case XenbusStateClosed:
716
		xen_blkif_disconnect(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
717 718 719 720 721
		xenbus_switch_state(dev, XenbusStateClosed);
		if (xenbus_dev_is_online(dev))
			break;
		/* fall through if not online */
	case XenbusStateUnknown:
722
		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
K
Konrad Rzeszutek Wilk 已提交
723 724 725 726 727 728 729 730 731 732 733 734 735 736
		device_unregister(&dev->dev);
		break;

	default:
		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
				 frontend_state);
		break;
	}
}


/* ** Connection ** */


737
/*
K
Konrad Rzeszutek Wilk 已提交
738 739 740 741 742 743 744 745 746
 * Write the physical details regarding the block device to the store, and
 * switch to Connected state.
 */
static void connect(struct backend_info *be)
{
	struct xenbus_transaction xbt;
	int err;
	struct xenbus_device *dev = be->dev;

747
	pr_debug("%s %s\n", __func__, dev->otherend);
K
Konrad Rzeszutek Wilk 已提交
748 749 750 751 752 753 754 755 756

	/* Supply the information about the device the frontend needs */
again:
	err = xenbus_transaction_start(&xbt);
	if (err) {
		xenbus_dev_fatal(dev, err, "starting transaction");
		return;
	}

757 758
	/* If we can't advertise it is OK. */
	xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
K
Konrad Rzeszutek Wilk 已提交
759

760
	xen_blkbk_discard(xbt, be);
761

762
	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
763

764 765 766 767 768 769
	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
				 dev->nodename);
		goto abort;
	}
770 771 772 773 774
	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
			    MAX_INDIRECT_SEGMENTS);
	if (err)
		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
			 dev->nodename, err);
775

K
Konrad Rzeszutek Wilk 已提交
776
	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
777
			    (unsigned long long)vbd_sz(&be->blkif->vbd));
K
Konrad Rzeszutek Wilk 已提交
778 779 780 781 782 783 784 785
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/sectors",
				 dev->nodename);
		goto abort;
	}

	/* FIXME: use a typename instead */
	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
786 787
			    be->blkif->vbd.type |
			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
K
Konrad Rzeszutek Wilk 已提交
788 789 790 791 792 793
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/info",
				 dev->nodename);
		goto abort;
	}
	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
794 795
			    (unsigned long)
			    bdev_logical_block_size(be->blkif->vbd.bdev));
K
Konrad Rzeszutek Wilk 已提交
796 797 798 799 800
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
				 dev->nodename);
		goto abort;
	}
801 802 803 804 805
	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
			    bdev_physical_block_size(be->blkif->vbd.bdev));
	if (err)
		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
				 dev->nodename);
K
Konrad Rzeszutek Wilk 已提交
806 807 808 809 810 811 812 813 814

	err = xenbus_transaction_end(xbt, 0);
	if (err == -EAGAIN)
		goto again;
	if (err)
		xenbus_dev_fatal(dev, err, "ending transaction");

	err = xenbus_switch_state(dev, XenbusStateConnected);
	if (err)
815
		xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
K
Konrad Rzeszutek Wilk 已提交
816 817 818 819 820 821 822 823 824 825 826
				 dev->nodename);

	return;
 abort:
	xenbus_transaction_end(xbt, 1);
}


static int connect_ring(struct backend_info *be)
{
	struct xenbus_device *dev = be->dev;
B
Bob Liu 已提交
827 828
	unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
	unsigned int evtchn, nr_grefs, ring_page_order;
829
	unsigned int pers_grants;
K
Konrad Rzeszutek Wilk 已提交
830
	char protocol[64] = "";
831 832
	struct pending_req *req, *n;
	int err, i, j;
K
Konrad Rzeszutek Wilk 已提交
833

834
	pr_debug("%s %s\n", __func__, dev->otherend);
K
Konrad Rzeszutek Wilk 已提交
835

B
Bob Liu 已提交
836 837 838 839 840
	err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
			  &evtchn);
	if (err != 1) {
		err = -EINVAL;
		xenbus_dev_fatal(dev, err, "reading %s/event-channel",
K
Konrad Rzeszutek Wilk 已提交
841 842 843
				 dev->otherend);
		return err;
	}
B
Bob Liu 已提交
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
	pr_info("event-channel %u\n", evtchn);

	err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
			  &ring_page_order);
	if (err != 1) {
		err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
				  "%u", &ring_ref[0]);
		if (err != 1) {
			err = -EINVAL;
			xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
					 dev->otherend);
			return err;
		}
		nr_grefs = 1;
		pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
			ring_ref[0]);
	} else {
		unsigned int i;

		if (ring_page_order > xen_blkif_max_ring_order) {
			err = -EINVAL;
			xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
					 dev->otherend, ring_page_order,
					 xen_blkif_max_ring_order);
			return err;
		}

		nr_grefs = 1 << ring_page_order;
		for (i = 0; i < nr_grefs; i++) {
			char ring_ref_name[RINGREF_NAME_LEN];

			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
			err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
					   "%u", &ring_ref[i]);
			if (err != 1) {
				err = -EINVAL;
				xenbus_dev_fatal(dev, err, "reading %s/%s",
						 dev->otherend, ring_ref_name);
				return err;
			}
			pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
		}
	}
K
Konrad Rzeszutek Wilk 已提交
887

888
	be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
K
Konrad Rzeszutek Wilk 已提交
889 890 891
	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
			    "%63s", protocol, NULL);
	if (err)
892
		strcpy(protocol, "unspecified, assuming default");
K
Konrad Rzeszutek Wilk 已提交
893 894 895 896 897 898 899 900 901 902
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
	else {
		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
		return -1;
	}
903
	err = xenbus_gather(XBT_NIL, dev->otherend,
904
			    "feature-persistent", "%u",
905 906 907 908 909 910
			    &pers_grants, NULL);
	if (err)
		pers_grants = 0;

	be->blkif->vbd.feature_gnt_persistent = pers_grants;
	be->blkif->vbd.overflow_max_grants = 0;
B
Bob Liu 已提交
911
	be->blkif->nr_ring_pages = nr_grefs;
912

B
Bob Liu 已提交
913 914
	pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
		nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
915
		pers_grants ? "persistent grants" : "");
K
Konrad Rzeszutek Wilk 已提交
916

B
Bob Liu 已提交
917
	for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
		req = kzalloc(sizeof(*req), GFP_KERNEL);
		if (!req)
			goto fail;
		list_add_tail(&req->free_list, &be->blkif->pending_free);
		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
			req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
			if (!req->segments[j])
				goto fail;
		}
		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
							 GFP_KERNEL);
			if (!req->indirect_pages[j])
				goto fail;
		}
	}

K
Konrad Rzeszutek Wilk 已提交
935
	/* Map the shared frame, irq etc. */
B
Bob Liu 已提交
936
	err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
K
Konrad Rzeszutek Wilk 已提交
937
	if (err) {
B
Bob Liu 已提交
938
		xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
K
Konrad Rzeszutek Wilk 已提交
939 940 941 942
		return err;
	}

	return 0;
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959

fail:
	list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
		list_del(&req->free_list);
		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
			if (!req->segments[j])
				break;
			kfree(req->segments[j]);
		}
		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
			if (!req->indirect_pages[j])
				break;
			kfree(req->indirect_pages[j]);
		}
		kfree(req);
	}
	return -ENOMEM;
K
Konrad Rzeszutek Wilk 已提交
960 961
}

962
static const struct xenbus_device_id xen_blkbk_ids[] = {
K
Konrad Rzeszutek Wilk 已提交
963 964 965 966
	{ "vbd" },
	{ "" }
};

967 968
static struct xenbus_driver xen_blkbk_driver = {
	.ids  = xen_blkbk_ids,
969 970
	.probe = xen_blkbk_probe,
	.remove = xen_blkbk_remove,
K
Konrad Rzeszutek Wilk 已提交
971
	.otherend_changed = frontend_changed
972
};
K
Konrad Rzeszutek Wilk 已提交
973

974
int xen_blkif_xenbus_init(void)
K
Konrad Rzeszutek Wilk 已提交
975
{
976
	return xenbus_register_backend(&xen_blkbk_driver);
K
Konrad Rzeszutek Wilk 已提交
977
}