virtio_balloon.c 15.7 KB
Newer Older
S
Sasha Levin 已提交
1 2
/*
 * Virtio balloon implementation, inspired by Dor Laor and Marcelo
R
Rusty Russell 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 * Tosatti's implementations.
 *
 *  Copyright 2008 Rusty Russell IBM Corporation
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
S
Sasha Levin 已提交
21

R
Rusty Russell 已提交
22 23 24 25 26
#include <linux/virtio.h>
#include <linux/virtio_balloon.h>
#include <linux/swap.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
27
#include <linux/delay.h>
28
#include <linux/slab.h>
29
#include <linux/module.h>
30
#include <linux/balloon_compaction.h>
R
Rusty Russell 已提交
31

32 33 34 35 36
/*
 * Balloon device works in 4K page units.  So each page is pointed to by
 * multiple balloon pages.  All memory counters in this driver are in balloon
 * page units.
 */
37 38
#define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
39

R
Rusty Russell 已提交
40 41 42
struct virtio_balloon
{
	struct virtio_device *vdev;
43
	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
R
Rusty Russell 已提交
44 45 46 47 48 49 50 51

	/* Where the ballooning thread waits for config to change. */
	wait_queue_head_t config_change;

	/* The thread servicing the balloon. */
	struct task_struct *thread;

	/* Waiting for host to ack the pages we released. */
52
	wait_queue_head_t acked;
R
Rusty Russell 已提交
53

54
	/* Number of balloon pages we've told the Host we're not using. */
R
Rusty Russell 已提交
55
	unsigned int num_pages;
56
	/*
57 58
	 * The pages we've told the Host we're not using are enqueued
	 * at vb_dev_info->pages list.
59 60 61
	 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
	 * to num_pages above.
	 */
62 63 64 65
	struct balloon_dev_info *vb_dev_info;

	/* Synchronize access/update to this struct virtio_balloon elements */
	struct mutex balloon_lock;
R
Rusty Russell 已提交
66 67 68

	/* The array of pfns we tell the Host about. */
	unsigned int num_pfns;
69
	u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
70 71

	/* Memory statistics */
72
	int need_stats_update;
73
	struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
R
Rusty Russell 已提交
74 75 76 77 78 79 80
};

static struct virtio_device_id id_table[] = {
	{ VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
	{ 0 },
};

81 82 83 84 85 86
static u32 page_to_balloon_pfn(struct page *page)
{
	unsigned long pfn = page_to_pfn(page);

	BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
	/* Convert pfn from Linux page size to balloon page size. */
87 88 89 90 91 92 93
	return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
}

static struct page *balloon_pfn_to_page(u32 pfn)
{
	BUG_ON(pfn % VIRTIO_BALLOON_PAGES_PER_PAGE);
	return pfn_to_page(pfn / VIRTIO_BALLOON_PAGES_PER_PAGE);
94 95
}

R
Rusty Russell 已提交
96 97
static void balloon_ack(struct virtqueue *vq)
{
98
	struct virtio_balloon *vb = vq->vdev->priv;
R
Rusty Russell 已提交
99

100
	wake_up(&vb->acked);
R
Rusty Russell 已提交
101 102 103 104 105
}

static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
{
	struct scatterlist sg;
106
	unsigned int len;
R
Rusty Russell 已提交
107 108 109 110

	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);

	/* We should always be able to add one buffer to an empty queue. */
111
	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
R
Rusty Russell 已提交
112
		BUG();
113
	virtqueue_kick(vq);
R
Rusty Russell 已提交
114 115

	/* When host has read buffer, this completes via balloon_ack */
116
	wait_event(vb->acked, virtqueue_get_buf(vq, &len));
R
Rusty Russell 已提交
117 118
}

119 120 121 122 123 124 125 126 127 128
static void set_page_pfns(u32 pfns[], struct page *page)
{
	unsigned int i;

	/* Set balloon pfns pointing at this page.
	 * Note that the first pfn points at start of the page. */
	for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
		pfns[i] = page_to_balloon_pfn(page) + i;
}

R
Rusty Russell 已提交
129 130
static void fill_balloon(struct virtio_balloon *vb, size_t num)
{
131 132
	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;

R
Rusty Russell 已提交
133 134 135
	/* We can only do one array worth at a time. */
	num = min(num, ARRAY_SIZE(vb->pfns));

136
	mutex_lock(&vb->balloon_lock);
137 138
	for (vb->num_pfns = 0; vb->num_pfns < num;
	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
139 140
		struct page *page = balloon_page_enqueue(vb_dev_info);

R
Rusty Russell 已提交
141
		if (!page) {
142
			dev_info_ratelimited(&vb->vdev->dev,
143 144
					     "Out of puff! Can't get %u pages\n",
					     VIRTIO_BALLOON_PAGES_PER_PAGE);
R
Rusty Russell 已提交
145 146 147 148
			/* Sleep for at least 1/5 of a second before retry. */
			msleep(200);
			break;
		}
149 150
		set_page_pfns(vb->pfns + vb->num_pfns, page);
		vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
R
Rusty Russell 已提交
151 152 153
		totalram_pages--;
	}

154 155 156 157
	/* Did we get any? */
	if (vb->num_pfns != 0)
		tell_host(vb, vb->inflate_vq);
	mutex_unlock(&vb->balloon_lock);
R
Rusty Russell 已提交
158 159 160 161 162 163
}

static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
{
	unsigned int i;

164 165
	/* Find pfns pointing at start of each page, get pages and free them. */
	for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
166
		balloon_page_free(balloon_pfn_to_page(pfns[i]));
R
Rusty Russell 已提交
167 168 169 170 171 172 173
		totalram_pages++;
	}
}

static void leak_balloon(struct virtio_balloon *vb, size_t num)
{
	struct page *page;
174
	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
R
Rusty Russell 已提交
175 176 177 178

	/* We can only do one array worth at a time. */
	num = min(num, ARRAY_SIZE(vb->pfns));

179
	mutex_lock(&vb->balloon_lock);
180 181
	for (vb->num_pfns = 0; vb->num_pfns < num;
	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
182 183 184
		page = balloon_page_dequeue(vb_dev_info);
		if (!page)
			break;
185 186
		set_page_pfns(vb->pfns + vb->num_pfns, page);
		vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
R
Rusty Russell 已提交
187 188
	}

189 190 191 192 193 194
	/*
	 * Note that if
	 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
	 * is true, we *have* to do it in this order
	 */
	tell_host(vb, vb->deflate_vq);
195
	mutex_unlock(&vb->balloon_lock);
196
	release_pages_by_pfn(vb->pfns, vb->num_pfns);
R
Rusty Russell 已提交
197 198
}

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
static inline void update_stat(struct virtio_balloon *vb, int idx,
			       u16 tag, u64 val)
{
	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
	vb->stats[idx].tag = tag;
	vb->stats[idx].val = val;
}

#define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)

static void update_balloon_stats(struct virtio_balloon *vb)
{
	unsigned long events[NR_VM_EVENT_ITEMS];
	struct sysinfo i;
	int idx = 0;

	all_vm_events(events);
	si_meminfo(&i);

	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
				pages_to_bytes(events[PSWPIN]));
	update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
				pages_to_bytes(events[PSWPOUT]));
	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
				pages_to_bytes(i.freeram));
	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
				pages_to_bytes(i.totalram));
}

/*
 * While most virtqueues communicate guest-initiated requests to the hypervisor,
 * the stats queue operates in reverse.  The driver initializes the virtqueue
 * with a single buffer.  From that point forward, all conversations consist of
 * a hypervisor request (a call to this function) which directs us to refill
235 236
 * the virtqueue with a fresh stats buffer.  Since stats collection can sleep,
 * we notify our kthread which does the actual work via stats_handle_request().
237
 */
238
static void stats_request(struct virtqueue *vq)
239
{
240
	struct virtio_balloon *vb = vq->vdev->priv;
241

242 243 244 245 246 247 248 249
	vb->need_stats_update = 1;
	wake_up(&vb->config_change);
}

static void stats_handle_request(struct virtio_balloon *vb)
{
	struct virtqueue *vq;
	struct scatterlist sg;
250
	unsigned int len;
251

252
	vb->need_stats_update = 0;
253 254
	update_balloon_stats(vb);

255
	vq = vb->stats_vq;
256 257
	if (!virtqueue_get_buf(vq, &len))
		return;
258
	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
259
	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
260
		BUG();
261
	virtqueue_kick(vq);
262 263
}

R
Rusty Russell 已提交
264 265 266 267 268 269 270
static void virtballoon_changed(struct virtio_device *vdev)
{
	struct virtio_balloon *vb = vdev->priv;

	wake_up(&vb->config_change);
}

271
static inline s64 towards_target(struct virtio_balloon *vb)
R
Rusty Russell 已提交
272
{
D
David Gibson 已提交
273 274 275
	__le32 v;
	s64 target;

276 277 278
	vb->vdev->config->get(vb->vdev,
			      offsetof(struct virtio_balloon_config, num_pages),
			      &v, sizeof(v));
D
David Gibson 已提交
279 280
	target = le32_to_cpu(v);
	return target - vb->num_pages;
R
Rusty Russell 已提交
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
}

static void update_balloon_size(struct virtio_balloon *vb)
{
	__le32 actual = cpu_to_le32(vb->num_pages);

	vb->vdev->config->set(vb->vdev,
			      offsetof(struct virtio_balloon_config, actual),
			      &actual, sizeof(actual));
}

static int balloon(void *_vballoon)
{
	struct virtio_balloon *vb = _vballoon;

	set_freezable();
	while (!kthread_should_stop()) {
298
		s64 diff;
R
Rusty Russell 已提交
299 300 301 302

		try_to_freeze();
		wait_event_interruptible(vb->config_change,
					 (diff = towards_target(vb)) != 0
303
					 || vb->need_stats_update
304 305
					 || kthread_should_stop()
					 || freezing(current));
306 307
		if (vb->need_stats_update)
			stats_handle_request(vb);
R
Rusty Russell 已提交
308 309 310 311 312 313 314 315 316
		if (diff > 0)
			fill_balloon(vb, diff);
		else if (diff < 0)
			leak_balloon(vb, -diff);
		update_balloon_size(vb);
	}
	return 0;
}

317
static int init_vqs(struct virtio_balloon *vb)
R
Rusty Russell 已提交
318
{
319
	struct virtqueue *vqs[3];
320
	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
321 322
	const char *names[] = { "inflate", "deflate", "stats" };
	int err, nvqs;
R
Rusty Russell 已提交
323

324 325 326 327
	/*
	 * We expect two virtqueues: inflate and deflate, and
	 * optionally stat.
	 */
328
	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
329
	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
330
	if (err)
331
		return err;
R
Rusty Russell 已提交
332

333 334
	vb->inflate_vq = vqs[0];
	vb->deflate_vq = vqs[1];
335 336 337 338 339 340 341 342 343
	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
		struct scatterlist sg;
		vb->stats_vq = vqs[2];

		/*
		 * Prime this virtqueue with one buffer so the hypervisor can
		 * use it to signal us later.
		 */
		sg_init_one(&sg, vb->stats, sizeof vb->stats);
344 345
		if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb, GFP_KERNEL)
		    < 0)
346
			BUG();
347
		virtqueue_kick(vb->stats_vq);
348
	}
349 350 351
	return 0;
}

352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
static const struct address_space_operations virtio_balloon_aops;
#ifdef CONFIG_BALLOON_COMPACTION
/*
 * virtballoon_migratepage - perform the balloon page migration on behalf of
 *			     a compation thread.     (called under page lock)
 * @mapping: the page->mapping which will be assigned to the new migrated page.
 * @newpage: page that will replace the isolated page after migration finishes.
 * @page   : the isolated (old) page that is about to be migrated to newpage.
 * @mode   : compaction mode -- not used for balloon page migration.
 *
 * After a ballooned page gets isolated by compaction procedures, this is the
 * function that performs the page migration on behalf of a compaction thread
 * The page migration for virtio balloon is done in a simple swap fashion which
 * follows these two macro steps:
 *  1) insert newpage into vb->pages list and update the host about it;
 *  2) update the host about the old page removed from vb->pages list;
 *
 * This function preforms the balloon page migration task.
 * Called through balloon_mapping->a_ops->migratepage
 */
int virtballoon_migratepage(struct address_space *mapping,
		struct page *newpage, struct page *page, enum migrate_mode mode)
{
	struct balloon_dev_info *vb_dev_info = balloon_page_device(page);
	struct virtio_balloon *vb;
	unsigned long flags;

	BUG_ON(!vb_dev_info);

	vb = vb_dev_info->balloon_device;

	/*
	 * In order to avoid lock contention while migrating pages concurrently
	 * to leak_balloon() or fill_balloon() we just give up the balloon_lock
	 * this turn, as it is easier to retry the page migration later.
	 * This also prevents fill_balloon() getting stuck into a mutex
	 * recursion in the case it ends up triggering memory compaction
	 * while it is attempting to inflate the ballon.
	 */
	if (!mutex_trylock(&vb->balloon_lock))
		return -EAGAIN;

	/* balloon's page migration 1st step  -- inflate "newpage" */
	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
	balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
	vb_dev_info->isolated_pages--;
	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
	set_page_pfns(vb->pfns, newpage);
	tell_host(vb, vb->inflate_vq);

	/*
	 * balloon's page migration 2nd step -- deflate "page"
	 *
	 * It's safe to delete page->lru here because this page is at
	 * an isolated migration list, and this step is expected to happen here
	 */
	balloon_page_delete(page);
	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
	set_page_pfns(vb->pfns, page);
	tell_host(vb, vb->deflate_vq);

	mutex_unlock(&vb->balloon_lock);

	return MIGRATEPAGE_BALLOON_SUCCESS;
}

/* define the balloon_mapping->a_ops callback to allow balloon page migration */
static const struct address_space_operations virtio_balloon_aops = {
			.migratepage = virtballoon_migratepage,
};
#endif /* CONFIG_BALLOON_COMPACTION */

425 426 427
static int virtballoon_probe(struct virtio_device *vdev)
{
	struct virtio_balloon *vb;
428 429
	struct address_space *vb_mapping;
	struct balloon_dev_info *vb_devinfo;
430 431 432 433 434 435 436 437 438
	int err;

	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
	if (!vb) {
		err = -ENOMEM;
		goto out;
	}

	vb->num_pages = 0;
439
	mutex_init(&vb->balloon_lock);
440
	init_waitqueue_head(&vb->config_change);
441
	init_waitqueue_head(&vb->acked);
442 443 444
	vb->vdev = vdev;
	vb->need_stats_update = 0;

445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
	vb_devinfo = balloon_devinfo_alloc(vb);
	if (IS_ERR(vb_devinfo)) {
		err = PTR_ERR(vb_devinfo);
		goto out_free_vb;
	}

	vb_mapping = balloon_mapping_alloc(vb_devinfo,
					   (balloon_compaction_check()) ?
					   &virtio_balloon_aops : NULL);
	if (IS_ERR(vb_mapping)) {
		/*
		 * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP
		 * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off.
		 */
		err = PTR_ERR(vb_mapping);
		if (err != -EOPNOTSUPP)
			goto out_free_vb_devinfo;
	}

	vb->vb_dev_info = vb_devinfo;

466 467
	err = init_vqs(vb);
	if (err)
468
		goto out_free_vb_mapping;
R
Rusty Russell 已提交
469 470 471 472

	vb->thread = kthread_run(balloon, vb, "vballoon");
	if (IS_ERR(vb->thread)) {
		err = PTR_ERR(vb->thread);
473
		goto out_del_vqs;
R
Rusty Russell 已提交
474 475 476 477
	}

	return 0;

478 479
out_del_vqs:
	vdev->config->del_vqs(vdev);
480 481 482 483
out_free_vb_mapping:
	balloon_mapping_free(vb_mapping);
out_free_vb_devinfo:
	balloon_devinfo_free(vb_devinfo);
R
Rusty Russell 已提交
484 485 486 487 488 489
out_free_vb:
	kfree(vb);
out:
	return err;
}

490
static void remove_common(struct virtio_balloon *vb)
R
Rusty Russell 已提交
491 492 493 494
{
	/* There might be pages left in the balloon: free them. */
	while (vb->num_pages)
		leak_balloon(vb, vb->num_pages);
495
	update_balloon_size(vb);
R
Rusty Russell 已提交
496 497

	/* Now we reset the device so we can clean up the queues. */
498
	vb->vdev->config->reset(vb->vdev);
R
Rusty Russell 已提交
499

500 501 502 503 504 505 506 507 508
	vb->vdev->config->del_vqs(vb->vdev);
}

static void __devexit virtballoon_remove(struct virtio_device *vdev)
{
	struct virtio_balloon *vb = vdev->priv;

	kthread_stop(vb->thread);
	remove_common(vb);
509 510
	balloon_mapping_free(vb->vb_dev_info->mapping);
	balloon_devinfo_free(vb->vb_dev_info);
R
Rusty Russell 已提交
511 512 513
	kfree(vb);
}

514 515 516
#ifdef CONFIG_PM
static int virtballoon_freeze(struct virtio_device *vdev)
{
517 518
	struct virtio_balloon *vb = vdev->priv;

519 520 521 522 523
	/*
	 * The kthread is already frozen by the PM core before this
	 * function is called.
	 */

524
	remove_common(vb);
525 526 527
	return 0;
}

528
static int virtballoon_restore(struct virtio_device *vdev)
529 530 531 532 533 534 535 536 537 538 539 540
{
	struct virtio_balloon *vb = vdev->priv;
	int ret;

	ret = init_vqs(vdev->priv);
	if (ret)
		return ret;

	fill_balloon(vb, towards_target(vb));
	update_balloon_size(vb);
	return 0;
}
541 542
#endif

543 544 545 546
static unsigned int features[] = {
	VIRTIO_BALLOON_F_MUST_TELL_HOST,
	VIRTIO_BALLOON_F_STATS_VQ,
};
547

548
static struct virtio_driver virtio_balloon_driver = {
549 550
	.feature_table = features,
	.feature_table_size = ARRAY_SIZE(features),
R
Rusty Russell 已提交
551 552 553 554 555 556
	.driver.name =	KBUILD_MODNAME,
	.driver.owner =	THIS_MODULE,
	.id_table =	id_table,
	.probe =	virtballoon_probe,
	.remove =	__devexit_p(virtballoon_remove),
	.config_changed = virtballoon_changed,
557 558 559 560
#ifdef CONFIG_PM
	.freeze	=	virtballoon_freeze,
	.restore =	virtballoon_restore,
#endif
R
Rusty Russell 已提交
561 562 563 564
};

static int __init init(void)
{
565
	return register_virtio_driver(&virtio_balloon_driver);
R
Rusty Russell 已提交
566 567 568 569
}

static void __exit fini(void)
{
570
	unregister_virtio_driver(&virtio_balloon_driver);
R
Rusty Russell 已提交
571 572 573 574 575 576 577
}
module_init(init);
module_exit(fini);

MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio balloon driver");
MODULE_LICENSE("GPL");