xenbus.c 24.8 KB
Newer Older
K
Konrad Rzeszutek Wilk 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*  Xenbus code for blkif backend
    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
    Copyright (C) 2005 XenSource Ltd

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

*/

17 18
#define pr_fmt(fmt) "xen-blkback: " fmt

K
Konrad Rzeszutek Wilk 已提交
19 20 21
#include <stdarg.h>
#include <linux/module.h>
#include <linux/kthread.h>
22 23
#include <xen/events.h>
#include <xen/grant_table.h>
K
Konrad Rzeszutek Wilk 已提交
24 25
#include "common.h"

26 27
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
B
Bob Liu 已提交
28
#define RINGREF_NAME_LEN (20)
29

30
struct backend_info {
31
	struct xenbus_device	*dev;
32
	struct xen_blkif	*blkif;
33 34 35 36
	struct xenbus_watch	backend_watch;
	unsigned		major;
	unsigned		minor;
	char			*mode;
K
Konrad Rzeszutek Wilk 已提交
37 38
};

39
static struct kmem_cache *xen_blkif_cachep;
K
Konrad Rzeszutek Wilk 已提交
40 41 42 43
static void connect(struct backend_info *);
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
			    unsigned int);
44 45
static void xen_blkif_free(struct xen_blkif *blkif);
static void xen_vbd_free(struct xen_vbd *vbd);
K
Konrad Rzeszutek Wilk 已提交
46

47
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
48 49 50 51
{
	return be->dev;
}

52 53 54 55 56 57 58 59 60 61 62 63
/*
 * The last request could free the device from softirq context and
 * xen_blkif_free() can sleep.
 */
static void xen_blkif_deferred_free(struct work_struct *work)
{
	struct xen_blkif *blkif;

	blkif = container_of(work, struct xen_blkif, free_work);
	xen_blkif_free(blkif);
}

64
static int blkback_name(struct xen_blkif *blkif, char *buf)
K
Konrad Rzeszutek Wilk 已提交
65 66 67 68 69 70 71 72
{
	char *devpath, *devname;
	struct xenbus_device *dev = blkif->be->dev;

	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
	if (IS_ERR(devpath))
		return PTR_ERR(devpath);

73 74
	devname = strstr(devpath, "/dev/");
	if (devname != NULL)
K
Konrad Rzeszutek Wilk 已提交
75 76 77 78
		devname += strlen("/dev/");
	else
		devname  = devpath;

79
	snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname);
K
Konrad Rzeszutek Wilk 已提交
80 81 82 83 84
	kfree(devpath);

	return 0;
}

85
static void xen_update_blkif_status(struct xen_blkif *blkif)
K
Konrad Rzeszutek Wilk 已提交
86 87
{
	int err;
88
	char name[BLKBACK_NAME_LEN];
K
Konrad Rzeszutek Wilk 已提交
89 90

	/* Not ready to connect? */
91
	if (!blkif->ring.irq || !blkif->vbd.bdev)
K
Konrad Rzeszutek Wilk 已提交
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
		return;

	/* Already connected? */
	if (blkif->be->dev->state == XenbusStateConnected)
		return;

	/* Attempt to connect: exit if we fail to. */
	connect(blkif->be);
	if (blkif->be->dev->state != XenbusStateConnected)
		return;

	err = blkback_name(blkif, name);
	if (err) {
		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
		return;
	}

109 110 111 112 113 114 115
	err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
	if (err) {
		xenbus_dev_error(blkif->be->dev, err, "block flush");
		return;
	}
	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);

116 117 118 119
	blkif->ring.xenblkd = kthread_run(xen_blkif_schedule, &blkif->ring, "%s", name);
	if (IS_ERR(blkif->ring.xenblkd)) {
		err = PTR_ERR(blkif->ring.xenblkd);
		blkif->ring.xenblkd = NULL;
K
Konrad Rzeszutek Wilk 已提交
120
		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
121
		return;
K
Konrad Rzeszutek Wilk 已提交
122 123 124
	}
}

125
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
126
{
127
	struct xen_blkif *blkif;
128
	struct xen_blkif_ring *ring;
129

130
	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
131

132
	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
133 134 135 136 137
	if (!blkif)
		return ERR_PTR(-ENOMEM);

	blkif->domid = domid;
	atomic_set(&blkif->refcnt, 1);
138
	init_completion(&blkif->drain_complete);
139
	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
140 141
	spin_lock_init(&blkif->free_pages_lock);
	INIT_LIST_HEAD(&blkif->free_pages);
R
Roger Pau Monne 已提交
142
	INIT_LIST_HEAD(&blkif->persistent_purge_list);
143
	blkif->st_print = jiffies;
144
	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
145

146 147 148 149 150 151 152 153 154
	ring = &blkif->ring;
	ring->blkif = blkif;
	spin_lock_init(&ring->blk_ring_lock);
	init_waitqueue_head(&ring->wq);

	INIT_LIST_HEAD(&ring->pending_free);
	spin_lock_init(&ring->pending_free_lock);
	init_waitqueue_head(&ring->pending_free_wq);
	init_waitqueue_head(&ring->shutdown_wq);
155 156 157 158

	return blkif;
}

159
static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
B
Bob Liu 已提交
160
			 unsigned int nr_grefs, unsigned int evtchn)
161 162
{
	int err;
163
	struct xen_blkif *blkif = ring->blkif;
164 165

	/* Already connected through? */
166
	if (ring->irq)
167 168
		return 0;

B
Bob Liu 已提交
169
	err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
170
				     &ring->blk_ring);
171
	if (err < 0)
172 173 174 175 176 177
		return err;

	switch (blkif->blk_protocol) {
	case BLKIF_PROTOCOL_NATIVE:
	{
		struct blkif_sring *sring;
178 179
		sring = (struct blkif_sring *)ring->blk_ring;
		BACK_RING_INIT(&ring->blk_rings.native, sring,
180
			       XEN_PAGE_SIZE * nr_grefs);
181 182 183 184 185
		break;
	}
	case BLKIF_PROTOCOL_X86_32:
	{
		struct blkif_x86_32_sring *sring_x86_32;
186 187
		sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
		BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
188
			       XEN_PAGE_SIZE * nr_grefs);
189 190 191 192 193
		break;
	}
	case BLKIF_PROTOCOL_X86_64:
	{
		struct blkif_x86_64_sring *sring_x86_64;
194 195
		sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
		BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
196
			       XEN_PAGE_SIZE * nr_grefs);
197 198 199 200 201 202
		break;
	}
	default:
		BUG();
	}

203 204
	err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
						    xen_blkif_be_int, 0,
205
						    "blkif-backend", ring);
206
	if (err < 0) {
207 208
		xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
		ring->blk_rings.common.sring = NULL;
209 210
		return err;
	}
211
	ring->irq = err;
212 213 214 215

	return 0;
}

216
static int xen_blkif_disconnect(struct xen_blkif *blkif)
217
{
218 219
	struct pending_req *req, *n;
	int i = 0, j;
220
	struct xen_blkif_ring *ring = &blkif->ring;
221

222 223 224 225
	if (ring->xenblkd) {
		kthread_stop(ring->xenblkd);
		wake_up(&ring->shutdown_wq);
		ring->xenblkd = NULL;
226 227
	}

228 229 230 231
	/* The above kthread_stop() guarantees that at this point we
	 * don't have any discard_io or other_io requests. So, checking
	 * for inflight IO is enough.
	 */
232
	if (atomic_read(&ring->inflight) > 0)
233
		return -EBUSY;
234

235 236 237
	if (ring->irq) {
		unbind_from_irqhandler(ring->irq, ring);
		ring->irq = 0;
238 239
	}

240 241 242
	if (ring->blk_rings.common.sring) {
		xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
		ring->blk_rings.common.sring = NULL;
243
	}
244

245
	/* Remove all persistent grants and the cache of ballooned pages. */
246
	xen_blkbk_free_caches(ring);
247

248
	/* Check that there is no request in use */
249
	list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
		list_del(&req->free_list);

		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
			kfree(req->segments[j]);

		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
			kfree(req->indirect_pages[j]);

		kfree(req);
		i++;
	}

	WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
	blkif->nr_ring_pages = 0;

265
	return 0;
266 267
}

268
static void xen_blkif_free(struct xen_blkif *blkif)
269
{
270

271 272
	xen_blkif_disconnect(blkif);
	xen_vbd_free(&blkif->vbd);
273

R
Roger Pau Monne 已提交
274 275 276 277 278 279 280 281
	/* Make sure everything is drained before shutting down */
	BUG_ON(blkif->persistent_gnt_c != 0);
	BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
	BUG_ON(blkif->free_pages_num != 0);
	BUG_ON(!list_empty(&blkif->persistent_purge_list));
	BUG_ON(!list_empty(&blkif->free_pages));
	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));

282
	kmem_cache_free(xen_blkif_cachep, blkif);
283 284
}

285
int __init xen_blkif_interface_init(void)
286
{
287
	xen_blkif_cachep = kmem_cache_create("blkif_cache",
288
					     sizeof(struct xen_blkif),
289 290
					     0, 0, NULL);
	if (!xen_blkif_cachep)
291 292 293 294
		return -ENOMEM;

	return 0;
}
K
Konrad Rzeszutek Wilk 已提交
295

296
/*
K
Konrad Rzeszutek Wilk 已提交
297 298 299 300 301 302 303 304 305
 *  sysfs interface for VBD I/O requests
 */

#define VBD_SHOW(name, format, args...)					\
	static ssize_t show_##name(struct device *_dev,			\
				   struct device_attribute *attr,	\
				   char *buf)				\
	{								\
		struct xenbus_device *dev = to_xenbus_device(_dev);	\
306
		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
K
Konrad Rzeszutek Wilk 已提交
307 308 309 310 311
									\
		return sprintf(buf, format, ##args);			\
	}								\
	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)

312 313 314 315 316 317 318
VBD_SHOW(oo_req,  "%llu\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req,  "%llu\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req,  "%llu\n", be->blkif->st_wr_req);
VBD_SHOW(f_req,  "%llu\n", be->blkif->st_f_req);
VBD_SHOW(ds_req,  "%llu\n", be->blkif->st_ds_req);
VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
K
Konrad Rzeszutek Wilk 已提交
319

320
static struct attribute *xen_vbdstat_attrs[] = {
K
Konrad Rzeszutek Wilk 已提交
321 322 323
	&dev_attr_oo_req.attr,
	&dev_attr_rd_req.attr,
	&dev_attr_wr_req.attr,
324
	&dev_attr_f_req.attr,
325
	&dev_attr_ds_req.attr,
K
Konrad Rzeszutek Wilk 已提交
326 327 328 329 330
	&dev_attr_rd_sect.attr,
	&dev_attr_wr_sect.attr,
	NULL
};

331
static struct attribute_group xen_vbdstat_group = {
K
Konrad Rzeszutek Wilk 已提交
332
	.name = "statistics",
333
	.attrs = xen_vbdstat_attrs,
K
Konrad Rzeszutek Wilk 已提交
334 335 336 337 338
};

VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);

339
static int xenvbd_sysfs_addif(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
340 341 342 343
{
	int error;

	error = device_create_file(&dev->dev, &dev_attr_physical_device);
344
	if (error)
K
Konrad Rzeszutek Wilk 已提交
345 346 347 348 349 350
		goto fail1;

	error = device_create_file(&dev->dev, &dev_attr_mode);
	if (error)
		goto fail2;

351
	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
352 353 354 355 356
	if (error)
		goto fail3;

	return 0;

357
fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
358 359 360 361 362
fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
	return error;
}

363
static void xenvbd_sysfs_delif(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
364
{
365
	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
K
Konrad Rzeszutek Wilk 已提交
366 367 368 369
	device_remove_file(&dev->dev, &dev_attr_mode);
	device_remove_file(&dev->dev, &dev_attr_physical_device);
}

370

371
static void xen_vbd_free(struct xen_vbd *vbd)
372 373 374 375 376 377
{
	if (vbd->bdev)
		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
	vbd->bdev = NULL;
}

378 379 380
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
			  unsigned major, unsigned minor, int readonly,
			  int cdrom)
381
{
382
	struct xen_vbd *vbd;
383
	struct block_device *bdev;
384
	struct request_queue *q;
385 386 387 388 389 390 391 392 393 394 395 396

	vbd = &blkif->vbd;
	vbd->handle   = handle;
	vbd->readonly = readonly;
	vbd->type     = 0;

	vbd->pdevice  = MKDEV(major, minor);

	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
				 FMODE_READ : FMODE_WRITE, NULL);

	if (IS_ERR(bdev)) {
397
		pr_warn("xen_vbd_create: device %08x could not be opened\n",
398 399 400 401 402 403
			vbd->pdevice);
		return -ENOENT;
	}

	vbd->bdev = bdev;
	if (vbd->bdev->bd_disk == NULL) {
404
		pr_warn("xen_vbd_create: device %08x doesn't exist\n",
405
			vbd->pdevice);
406
		xen_vbd_free(vbd);
407 408
		return -ENOENT;
	}
409
	vbd->size = vbd_sz(vbd);
410 411 412 413 414 415

	if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
		vbd->type |= VDISK_CDROM;
	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
		vbd->type |= VDISK_REMOVABLE;

416 417 418 419
	q = bdev_get_queue(bdev);
	if (q && q->flush_flags)
		vbd->flush_support = true;

420 421 422
	if (q && blk_queue_secdiscard(q))
		vbd->discard_secure = true;

423
	pr_debug("Successful creation of handle=%04x (dom=%u)\n",
424 425 426
		handle, blkif->domid);
	return 0;
}
427
static int xen_blkbk_remove(struct xenbus_device *dev)
K
Konrad Rzeszutek Wilk 已提交
428
{
429
	struct backend_info *be = dev_get_drvdata(&dev->dev);
K
Konrad Rzeszutek Wilk 已提交
430

431
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
432 433 434 435 436 437 438 439 440 441

	if (be->major || be->minor)
		xenvbd_sysfs_delif(dev);

	if (be->backend_watch.node) {
		unregister_xenbus_watch(&be->backend_watch);
		kfree(be->backend_watch.node);
		be->backend_watch.node = NULL;
	}

442 443
	dev_set_drvdata(&dev->dev, NULL);

K
Konrad Rzeszutek Wilk 已提交
444
	if (be->blkif) {
445
		xen_blkif_disconnect(be->blkif);
446
		xen_blkif_put(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
447 448
	}

449
	kfree(be->mode);
K
Konrad Rzeszutek Wilk 已提交
450 451 452 453
	kfree(be);
	return 0;
}

454 455
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
			      struct backend_info *be, int state)
K
Konrad Rzeszutek Wilk 已提交
456 457 458 459
{
	struct xenbus_device *dev = be->dev;
	int err;

460
	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
K
Konrad Rzeszutek Wilk 已提交
461 462
			    "%d", state);
	if (err)
463
		dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
K
Konrad Rzeszutek Wilk 已提交
464 465 466 467

	return err;
}

468
static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
469 470 471 472
{
	struct xenbus_device *dev = be->dev;
	struct xen_blkif *blkif = be->blkif;
	int err;
473
	int state = 0, discard_enable;
474 475 476
	struct block_device *bdev = be->blkif->vbd.bdev;
	struct request_queue *q = bdev_get_queue(bdev);

477 478 479 480 481
	err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d",
			   &discard_enable);
	if (err == 1 && !discard_enable)
		return;

482 483 484 485 486
	if (blk_queue_discard(q)) {
		err = xenbus_printf(xbt, dev->nodename,
			"discard-granularity", "%u",
			q->limits.discard_granularity);
		if (err) {
487 488
			dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
			return;
489 490 491 492 493
		}
		err = xenbus_printf(xbt, dev->nodename,
			"discard-alignment", "%u",
			q->limits.discard_alignment);
		if (err) {
494 495
			dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
			return;
496
		}
497 498 499 500 501 502
		state = 1;
		/* Optional. */
		err = xenbus_printf(xbt, dev->nodename,
				    "discard-secure", "%d",
				    blkif->vbd.discard_secure);
		if (err) {
503
			dev_warn(&dev->dev, "writing discard-secure (%d)", err);
504
			return;
505 506 507 508 509
		}
	}
	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
			    "%d", state);
	if (err)
510
		dev_warn(&dev->dev, "writing feature-discard (%d)", err);
511
}
512 513 514 515 516 517 518 519 520
int xen_blkbk_barrier(struct xenbus_transaction xbt,
		      struct backend_info *be, int state)
{
	struct xenbus_device *dev = be->dev;
	int err;

	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
			    "%d", state);
	if (err)
521
		dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
522 523 524

	return err;
}
525

526
/*
K
Konrad Rzeszutek Wilk 已提交
527 528 529 530
 * Entry point to this code when a new device is created.  Allocate the basic
 * structures, and watch the store waiting for the hotplug scripts to tell us
 * the device's physical major and minor numbers.  Switch to InitWait.
 */
531 532
static int xen_blkbk_probe(struct xenbus_device *dev,
			   const struct xenbus_device_id *id)
K
Konrad Rzeszutek Wilk 已提交
533 534 535 536
{
	int err;
	struct backend_info *be = kzalloc(sizeof(struct backend_info),
					  GFP_KERNEL);
537 538 539 540

	/* match the pr_debug in xen_blkbk_remove */
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);

K
Konrad Rzeszutek Wilk 已提交
541 542 543 544 545 546
	if (!be) {
		xenbus_dev_fatal(dev, -ENOMEM,
				 "allocating backend structure");
		return -ENOMEM;
	}
	be->dev = dev;
547
	dev_set_drvdata(&dev->dev, be);
K
Konrad Rzeszutek Wilk 已提交
548

549
	be->blkif = xen_blkif_alloc(dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
550 551 552 553 554 555 556 557 558 559
	if (IS_ERR(be->blkif)) {
		err = PTR_ERR(be->blkif);
		be->blkif = NULL;
		xenbus_dev_fatal(dev, err, "creating block interface");
		goto fail;
	}

	/* setup back pointer */
	be->blkif->be = be;

J
Jeremy Fitzhardinge 已提交
560 561
	err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
				   "%s/%s", dev->nodename, "physical-device");
K
Konrad Rzeszutek Wilk 已提交
562 563 564
	if (err)
		goto fail;

B
Bob Liu 已提交
565 566 567 568 569
	err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
			    xen_blkif_max_ring_order);
	if (err)
		pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);

K
Konrad Rzeszutek Wilk 已提交
570 571 572 573 574 575 576
	err = xenbus_switch_state(dev, XenbusStateInitWait);
	if (err)
		goto fail;

	return 0;

fail:
577
	pr_warn("%s failed\n", __func__);
578
	xen_blkbk_remove(dev);
K
Konrad Rzeszutek Wilk 已提交
579 580 581 582
	return err;
}


583
/*
K
Konrad Rzeszutek Wilk 已提交
584 585 586 587 588 589 590 591 592 593 594 595 596 597
 * Callback received when the hotplug scripts have placed the physical-device
 * node.  Read it and the mode node, and create a vbd.  If the frontend is
 * ready, connect.
 */
static void backend_changed(struct xenbus_watch *watch,
			    const char **vec, unsigned int len)
{
	int err;
	unsigned major;
	unsigned minor;
	struct backend_info *be
		= container_of(watch, struct backend_info, backend_watch);
	struct xenbus_device *dev = be->dev;
	int cdrom = 0;
598
	unsigned long handle;
K
Konrad Rzeszutek Wilk 已提交
599 600
	char *device_type;

601
	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
K
Konrad Rzeszutek Wilk 已提交
602 603 604 605

	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
			   &major, &minor);
	if (XENBUS_EXIST_ERR(err)) {
606 607 608 609 610
		/*
		 * Since this watch will fire once immediately after it is
		 * registered, we expect this.  Ignore it, and wait for the
		 * hotplug scripts.
		 */
K
Konrad Rzeszutek Wilk 已提交
611 612 613 614 615 616 617
		return;
	}
	if (err != 2) {
		xenbus_dev_fatal(dev, err, "reading physical-device");
		return;
	}

618 619
	if (be->major | be->minor) {
		if (be->major != major || be->minor != minor)
620
			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
621
				be->major, be->minor, major, minor);
K
Konrad Rzeszutek Wilk 已提交
622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
		return;
	}

	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
	if (IS_ERR(be->mode)) {
		err = PTR_ERR(be->mode);
		be->mode = NULL;
		xenbus_dev_fatal(dev, err, "reading mode");
		return;
	}

	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
	if (!IS_ERR(device_type)) {
		cdrom = strcmp(device_type, "cdrom") == 0;
		kfree(device_type);
	}

639
	/* Front end dir is a number, which is used as the handle. */
640
	err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
641 642
	if (err)
		return;
K
Konrad Rzeszutek Wilk 已提交
643

644 645
	be->major = major;
	be->minor = minor;
K
Konrad Rzeszutek Wilk 已提交
646

647 648
	err = xen_vbd_create(be->blkif, handle, major, minor,
			     !strchr(be->mode, 'w'), cdrom);
K
Konrad Rzeszutek Wilk 已提交
649

650 651 652
	if (err)
		xenbus_dev_fatal(dev, err, "creating vbd structure");
	else {
K
Konrad Rzeszutek Wilk 已提交
653 654
		err = xenvbd_sysfs_addif(dev);
		if (err) {
655
			xen_vbd_free(&be->blkif->vbd);
K
Konrad Rzeszutek Wilk 已提交
656 657
			xenbus_dev_fatal(dev, err, "creating sysfs entries");
		}
658
	}
K
Konrad Rzeszutek Wilk 已提交
659

660 661 662 663 664 665
	if (err) {
		kfree(be->mode);
		be->mode = NULL;
		be->major = 0;
		be->minor = 0;
	} else {
K
Konrad Rzeszutek Wilk 已提交
666
		/* We're potentially connected now */
667
		xen_update_blkif_status(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
668 669 670 671
	}
}


672
/*
K
Konrad Rzeszutek Wilk 已提交
673 674 675 676 677
 * Callback received when the frontend's state changes.
 */
static void frontend_changed(struct xenbus_device *dev,
			     enum xenbus_state frontend_state)
{
678
	struct backend_info *be = dev_get_drvdata(&dev->dev);
K
Konrad Rzeszutek Wilk 已提交
679 680
	int err;

681
	pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
K
Konrad Rzeszutek Wilk 已提交
682 683 684 685

	switch (frontend_state) {
	case XenbusStateInitialising:
		if (dev->state == XenbusStateClosed) {
686
			pr_info("%s: prepare for reconnect\n", dev->nodename);
K
Konrad Rzeszutek Wilk 已提交
687 688 689 690 691 692
			xenbus_switch_state(dev, XenbusStateInitWait);
		}
		break;

	case XenbusStateInitialised:
	case XenbusStateConnected:
693 694
		/*
		 * Ensure we connect even when two watches fire in
695
		 * close succession and we miss the intermediate value
696 697
		 * of frontend_state.
		 */
K
Konrad Rzeszutek Wilk 已提交
698 699 700
		if (dev->state == XenbusStateConnected)
			break;

701 702
		/*
		 * Enforce precondition before potential leak point.
703
		 * xen_blkif_disconnect() is idempotent.
K
Keir Fraser 已提交
704
		 */
705 706 707 708 709
		err = xen_blkif_disconnect(be->blkif);
		if (err) {
			xenbus_dev_fatal(dev, err, "pending I/O");
			break;
		}
K
Keir Fraser 已提交
710

K
Konrad Rzeszutek Wilk 已提交
711 712 713
		err = connect_ring(be);
		if (err)
			break;
714
		xen_update_blkif_status(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
715 716 717 718 719 720 721
		break;

	case XenbusStateClosing:
		xenbus_switch_state(dev, XenbusStateClosing);
		break;

	case XenbusStateClosed:
722
		xen_blkif_disconnect(be->blkif);
K
Konrad Rzeszutek Wilk 已提交
723 724 725 726 727
		xenbus_switch_state(dev, XenbusStateClosed);
		if (xenbus_dev_is_online(dev))
			break;
		/* fall through if not online */
	case XenbusStateUnknown:
728
		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
K
Konrad Rzeszutek Wilk 已提交
729 730 731 732 733 734 735 736 737 738 739 740 741 742
		device_unregister(&dev->dev);
		break;

	default:
		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
				 frontend_state);
		break;
	}
}


/* ** Connection ** */


743
/*
K
Konrad Rzeszutek Wilk 已提交
744 745 746 747 748 749 750 751 752
 * Write the physical details regarding the block device to the store, and
 * switch to Connected state.
 */
static void connect(struct backend_info *be)
{
	struct xenbus_transaction xbt;
	int err;
	struct xenbus_device *dev = be->dev;

753
	pr_debug("%s %s\n", __func__, dev->otherend);
K
Konrad Rzeszutek Wilk 已提交
754 755 756 757 758 759 760 761 762

	/* Supply the information about the device the frontend needs */
again:
	err = xenbus_transaction_start(&xbt);
	if (err) {
		xenbus_dev_fatal(dev, err, "starting transaction");
		return;
	}

763 764
	/* If we can't advertise it is OK. */
	xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
K
Konrad Rzeszutek Wilk 已提交
765

766
	xen_blkbk_discard(xbt, be);
767

768
	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
769

770 771 772 773 774 775
	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
				 dev->nodename);
		goto abort;
	}
776 777 778 779 780
	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
			    MAX_INDIRECT_SEGMENTS);
	if (err)
		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
			 dev->nodename, err);
781

K
Konrad Rzeszutek Wilk 已提交
782
	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
783
			    (unsigned long long)vbd_sz(&be->blkif->vbd));
K
Konrad Rzeszutek Wilk 已提交
784 785 786 787 788 789 790 791
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/sectors",
				 dev->nodename);
		goto abort;
	}

	/* FIXME: use a typename instead */
	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
792 793
			    be->blkif->vbd.type |
			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
K
Konrad Rzeszutek Wilk 已提交
794 795 796 797 798 799
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/info",
				 dev->nodename);
		goto abort;
	}
	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
800 801
			    (unsigned long)
			    bdev_logical_block_size(be->blkif->vbd.bdev));
K
Konrad Rzeszutek Wilk 已提交
802 803 804 805 806
	if (err) {
		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
				 dev->nodename);
		goto abort;
	}
807 808 809 810 811
	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
			    bdev_physical_block_size(be->blkif->vbd.bdev));
	if (err)
		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
				 dev->nodename);
K
Konrad Rzeszutek Wilk 已提交
812 813 814 815 816 817 818 819 820

	err = xenbus_transaction_end(xbt, 0);
	if (err == -EAGAIN)
		goto again;
	if (err)
		xenbus_dev_fatal(dev, err, "ending transaction");

	err = xenbus_switch_state(dev, XenbusStateConnected);
	if (err)
821
		xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
K
Konrad Rzeszutek Wilk 已提交
822 823 824 825 826 827 828 829 830 831 832
				 dev->nodename);

	return;
 abort:
	xenbus_transaction_end(xbt, 1);
}


static int connect_ring(struct backend_info *be)
{
	struct xenbus_device *dev = be->dev;
833
	unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
B
Bob Liu 已提交
834
	unsigned int evtchn, nr_grefs, ring_page_order;
835
	unsigned int pers_grants;
K
Konrad Rzeszutek Wilk 已提交
836
	char protocol[64] = "";
837 838
	struct pending_req *req, *n;
	int err, i, j;
839
	struct xen_blkif_ring *ring = &be->blkif->ring;
K
Konrad Rzeszutek Wilk 已提交
840

841
	pr_debug("%s %s\n", __func__, dev->otherend);
K
Konrad Rzeszutek Wilk 已提交
842

B
Bob Liu 已提交
843 844 845 846 847
	err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
			  &evtchn);
	if (err != 1) {
		err = -EINVAL;
		xenbus_dev_fatal(dev, err, "reading %s/event-channel",
K
Konrad Rzeszutek Wilk 已提交
848 849 850
				 dev->otherend);
		return err;
	}
B
Bob Liu 已提交
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
	pr_info("event-channel %u\n", evtchn);

	err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
			  &ring_page_order);
	if (err != 1) {
		err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
				  "%u", &ring_ref[0]);
		if (err != 1) {
			err = -EINVAL;
			xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
					 dev->otherend);
			return err;
		}
		nr_grefs = 1;
		pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
			ring_ref[0]);
	} else {
		unsigned int i;

		if (ring_page_order > xen_blkif_max_ring_order) {
			err = -EINVAL;
			xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
					 dev->otherend, ring_page_order,
					 xen_blkif_max_ring_order);
			return err;
		}

		nr_grefs = 1 << ring_page_order;
		for (i = 0; i < nr_grefs; i++) {
			char ring_ref_name[RINGREF_NAME_LEN];

			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
			err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
					   "%u", &ring_ref[i]);
			if (err != 1) {
				err = -EINVAL;
				xenbus_dev_fatal(dev, err, "reading %s/%s",
						 dev->otherend, ring_ref_name);
				return err;
			}
			pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
		}
	}
K
Konrad Rzeszutek Wilk 已提交
894

895
	be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
K
Konrad Rzeszutek Wilk 已提交
896 897 898
	err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
			    "%63s", protocol, NULL);
	if (err)
899
		strcpy(protocol, "unspecified, assuming default");
K
Konrad Rzeszutek Wilk 已提交
900 901 902 903 904 905 906 907 908 909
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
		be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
	else {
		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
		return -1;
	}
910
	err = xenbus_gather(XBT_NIL, dev->otherend,
911
			    "feature-persistent", "%u",
912 913 914 915 916 917
			    &pers_grants, NULL);
	if (err)
		pers_grants = 0;

	be->blkif->vbd.feature_gnt_persistent = pers_grants;
	be->blkif->vbd.overflow_max_grants = 0;
B
Bob Liu 已提交
918
	be->blkif->nr_ring_pages = nr_grefs;
919

B
Bob Liu 已提交
920 921
	pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
		nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
922
		pers_grants ? "persistent grants" : "");
K
Konrad Rzeszutek Wilk 已提交
923

B
Bob Liu 已提交
924
	for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
925 926 927
		req = kzalloc(sizeof(*req), GFP_KERNEL);
		if (!req)
			goto fail;
928
		list_add_tail(&req->free_list, &ring->pending_free);
929 930 931 932 933 934 935 936 937 938 939 940 941
		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
			req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
			if (!req->segments[j])
				goto fail;
		}
		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
							 GFP_KERNEL);
			if (!req->indirect_pages[j])
				goto fail;
		}
	}

K
Konrad Rzeszutek Wilk 已提交
942
	/* Map the shared frame, irq etc. */
943
	err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
K
Konrad Rzeszutek Wilk 已提交
944
	if (err) {
B
Bob Liu 已提交
945
		xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
K
Konrad Rzeszutek Wilk 已提交
946 947 948 949
		return err;
	}

	return 0;
950 951

fail:
952
	list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
953 954 955 956 957 958 959 960 961 962 963 964 965 966
		list_del(&req->free_list);
		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
			if (!req->segments[j])
				break;
			kfree(req->segments[j]);
		}
		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
			if (!req->indirect_pages[j])
				break;
			kfree(req->indirect_pages[j]);
		}
		kfree(req);
	}
	return -ENOMEM;
K
Konrad Rzeszutek Wilk 已提交
967 968
}

969
static const struct xenbus_device_id xen_blkbk_ids[] = {
K
Konrad Rzeszutek Wilk 已提交
970 971 972 973
	{ "vbd" },
	{ "" }
};

974 975
static struct xenbus_driver xen_blkbk_driver = {
	.ids  = xen_blkbk_ids,
976 977
	.probe = xen_blkbk_probe,
	.remove = xen_blkbk_remove,
K
Konrad Rzeszutek Wilk 已提交
978
	.otherend_changed = frontend_changed
979
};
K
Konrad Rzeszutek Wilk 已提交
980

981
int xen_blkif_xenbus_init(void)
K
Konrad Rzeszutek Wilk 已提交
982
{
983
	return xenbus_register_backend(&xen_blkbk_driver);
K
Konrad Rzeszutek Wilk 已提交
984
}