vmw_balloon.c 33.8 KB
Newer Older
D
Dmitry Torokhov 已提交
1 2 3
/*
 * VMware Balloon driver.
 *
4
 * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved.
D
Dmitry Torokhov 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; version 2 of the License and no later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 * NON INFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
20 21
 * Maintained by:	Xavier Deguillard <xdeguillard@vmware.com>
 *			Philip Moltmann <moltmann@vmware.com>
D
Dmitry Torokhov 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
 */

/*
 * This is VMware physical memory management driver for Linux. The driver
 * acts like a "balloon" that can be inflated to reclaim physical pages by
 * reserving them in the guest and invalidating them in the monitor,
 * freeing up the underlying machine pages so they can be allocated to
 * other guests.  The balloon can also be deflated to allow the guest to
 * use more physical memory. Higher level policies can control the sizes
 * of balloons in VMs in order to manage physical memory resources.
 */

//#define DEBUG
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
40
#include <linux/vmalloc.h>
D
Dmitry Torokhov 已提交
41 42 43 44 45
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
46 47
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
48
#include <asm/hypervisor.h>
D
Dmitry Torokhov 已提交
49 50 51

MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
52
MODULE_VERSION("1.5.0.0-k");
D
Dmitry Torokhov 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
MODULE_ALIAS("dmi:*:svnVMware*:*");
MODULE_ALIAS("vmware_vmmemctl");
MODULE_LICENSE("GPL");

/*
 * Various constants controlling rate of inflaint/deflating balloon,
 * measured in pages.
 */

/*
 * Rates of memory allocaton when guest experiences memory pressure
 * (driver performs sleeping allocations).
 */
#define VMW_BALLOON_RATE_ALLOC_MIN	512U
#define VMW_BALLOON_RATE_ALLOC_MAX	2048U
#define VMW_BALLOON_RATE_ALLOC_INC	16U

/*
 * When guest is under memory pressure, use a reduced page allocation
 * rate for next several cycles.
 */
#define VMW_BALLOON_SLOW_CYCLES		4

/*
 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
78
 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
D
Dmitry Torokhov 已提交
79 80 81 82 83 84 85 86 87 88 89 90 91
 * __GFP_NOWARN, to suppress page allocation failure warnings.
 */
#define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)

/*
 * Use GFP_HIGHUSER when executing in a separate kernel thread
 * context and allocation can sleep.  This is less stressful to
 * the guest memory system, since it allows the thread to block
 * while memory is reclaimed, and won't take pages from emergency
 * low-memory pools.
 */
#define VMW_PAGE_ALLOC_CANSLEEP		(GFP_HIGHUSER)

92 93
/* Maximum number of refused pages we accumulate during inflation cycle */
#define VMW_BALLOON_MAX_REFUSED		16
D
Dmitry Torokhov 已提交
94 95 96 97 98 99 100 101

/*
 * Hypervisor communication port definitions.
 */
#define VMW_BALLOON_HV_PORT		0x5670
#define VMW_BALLOON_HV_MAGIC		0x456c6d6f
#define VMW_BALLOON_GUEST_ID		1	/* Linux */

102 103 104 105
enum vmwballoon_capabilities {
	/*
	 * Bit 0 is reserved and not associated to any capability.
	 */
106 107 108 109
	VMW_BALLOON_BASIC_CMDS			= (1 << 1),
	VMW_BALLOON_BATCHED_CMDS		= (1 << 2),
	VMW_BALLOON_BATCHED_2M_CMDS		= (1 << 3),
	VMW_BALLOON_SIGNALLED_WAKEUP_CMD	= (1 << 4),
110 111
};

112
#define VMW_BALLOON_CAPABILITIES	(VMW_BALLOON_BASIC_CMDS \
113
					| VMW_BALLOON_BATCHED_CMDS \
114 115
					| VMW_BALLOON_BATCHED_2M_CMDS \
					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
116 117 118

#define VMW_BALLOON_2M_SHIFT		(9)
#define VMW_BALLOON_NUM_PAGE_SIZES	(2)
119

120 121 122 123 124 125 126 127 128
/*
 * Backdoor commands availability:
 *
 * START, GET_TARGET and GUEST_ID are always available,
 *
 * VMW_BALLOON_BASIC_CMDS:
 *	LOCK and UNLOCK commands,
 * VMW_BALLOON_BATCHED_CMDS:
 *	BATCHED_LOCK and BATCHED_UNLOCK commands.
129
 * VMW BALLOON_BATCHED_2M_CMDS:
130 131 132
 *	BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
 *	VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
133
 */
134 135 136 137 138 139 140 141 142
#define VMW_BALLOON_CMD_START			0
#define VMW_BALLOON_CMD_GET_TARGET		1
#define VMW_BALLOON_CMD_LOCK			2
#define VMW_BALLOON_CMD_UNLOCK			3
#define VMW_BALLOON_CMD_GUEST_ID		4
#define VMW_BALLOON_CMD_BATCHED_LOCK		6
#define VMW_BALLOON_CMD_BATCHED_UNLOCK		7
#define VMW_BALLOON_CMD_BATCHED_2M_LOCK		8
#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK	9
143
#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET	10
144

D
Dmitry Torokhov 已提交
145 146

/* error codes */
147 148 149 150 151 152 153 154 155 156 157 158 159
#define VMW_BALLOON_SUCCESS		        0
#define VMW_BALLOON_FAILURE		        -1
#define VMW_BALLOON_ERROR_CMD_INVALID	        1
#define VMW_BALLOON_ERROR_PPN_INVALID	        2
#define VMW_BALLOON_ERROR_PPN_LOCKED	        3
#define VMW_BALLOON_ERROR_PPN_UNLOCKED	        4
#define VMW_BALLOON_ERROR_PPN_PINNED	        5
#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	        6
#define VMW_BALLOON_ERROR_RESET		        7
#define VMW_BALLOON_ERROR_BUSY		        8

#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
/* Batch page description */

/*
 * Layout of a page in the batch page:
 *
 * +-------------+----------+--------+
 * |             |          |        |
 * | Page number | Reserved | Status |
 * |             |          |        |
 * +-------------+----------+--------+
 * 64  PAGE_SHIFT          6         0
 *
 * The reserved field should be set to 0.
 */
#define VMW_BALLOON_BATCH_MAX_PAGES	(PAGE_SIZE / sizeof(u64))
#define VMW_BALLOON_BATCH_STATUS_MASK	((1UL << 5) - 1)
#define VMW_BALLOON_BATCH_PAGE_MASK	(~((1UL << PAGE_SHIFT) - 1))

struct vmballoon_batch_page {
	u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
};

static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
{
	return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
}

static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
				int idx)
{
	return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
}

static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
				u64 pa)
{
	batch->pages[idx] = pa;
}


#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)		\
201
({								\
202
	unsigned long __status, __dummy1, __dummy2, __dummy3;	\
203 204 205 206
	__asm__ __volatile__ ("inl %%dx" :			\
		"=a"(__status),					\
		"=c"(__dummy1),					\
		"=d"(__dummy2),					\
207 208
		"=b"(result),					\
		"=S" (__dummy3) :				\
209 210 211
		"0"(VMW_BALLOON_HV_MAGIC),			\
		"1"(VMW_BALLOON_CMD_##cmd),			\
		"2"(VMW_BALLOON_HV_PORT),			\
212 213
		"3"(arg1),					\
		"4" (arg2) :					\
214 215 216 217 218
		"memory");					\
	if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START)	\
		result = __dummy1;				\
	result &= -1UL;						\
	__status & -1UL;					\
D
Dmitry Torokhov 已提交
219 220 221 222 223
})

#ifdef CONFIG_DEBUG_FS
struct vmballoon_stats {
	unsigned int timer;
224
	unsigned int doorbell;
D
Dmitry Torokhov 已提交
225

226
	/* allocation statistics */
227 228
	unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
229 230
	unsigned int sleep_alloc;
	unsigned int sleep_alloc_fail;
231 232 233
	unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
234 235

	/* monitor operations */
236 237 238 239
	unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
240 241 242 243 244 245
	unsigned int target;
	unsigned int target_fail;
	unsigned int start;
	unsigned int start_fail;
	unsigned int guest_type;
	unsigned int guest_type_fail;
246 247
	unsigned int doorbell_set;
	unsigned int doorbell_unset;
D
Dmitry Torokhov 已提交
248 249 250 251 252 253 254
};

#define STATS_INC(stat) (stat)++
#else
#define STATS_INC(stat)
#endif

255 256 257 258
struct vmballoon;

struct vmballoon_ops {
	void (*add_page)(struct vmballoon *b, int idx, struct page *p);
259
	int (*lock)(struct vmballoon *b, unsigned int num_pages,
260
			bool is_2m_pages, unsigned int *target);
261
	int (*unlock)(struct vmballoon *b, unsigned int num_pages,
262
			bool is_2m_pages, unsigned int *target);
263 264
};

265
struct vmballoon_page_size {
D
Dmitry Torokhov 已提交
266 267 268 269 270
	/* list of reserved physical pages */
	struct list_head pages;

	/* transient list of non-balloonable pages */
	struct list_head refused_pages;
271
	unsigned int n_refused_pages;
272 273 274 275 276 277 278
};

struct vmballoon {
	struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];

	/* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
	unsigned supported_page_sizes;
D
Dmitry Torokhov 已提交
279 280 281 282 283 284 285 286 287 288 289 290 291 292

	/* balloon size in pages */
	unsigned int size;
	unsigned int target;

	/* reset flag */
	bool reset_required;

	/* adjustment rates (pages per second) */
	unsigned int rate_alloc;

	/* slowdown page allocations for next few cycles */
	unsigned int slow_allocation_cycles;

293 294 295 296 297 298 299 300
	unsigned long capabilities;

	struct vmballoon_batch_page *batch_page;
	unsigned int batch_max_pages;
	struct page *page;

	const struct vmballoon_ops *ops;

D
Dmitry Torokhov 已提交
301 302 303 304 305 306 307 308 309 310 311
#ifdef CONFIG_DEBUG_FS
	/* statistics */
	struct vmballoon_stats stats;

	/* debugfs file exporting statistics */
	struct dentry *dbg_entry;
#endif

	struct sysinfo sysinfo;

	struct delayed_work dwork;
312 313

	struct vmci_handle vmci_doorbell;
D
Dmitry Torokhov 已提交
314 315 316 317 318 319 320 321
};

static struct vmballoon balloon;

/*
 * Send "start" command to the host, communicating supported version
 * of the protocol.
 */
322
static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
D
Dmitry Torokhov 已提交
323
{
324
	unsigned long status, capabilities, dummy = 0;
325
	bool success;
D
Dmitry Torokhov 已提交
326 327 328

	STATS_INC(b->stats.start);

329 330 331 332 333
	status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);

	switch (status) {
	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
		b->capabilities = capabilities;
334 335
		success = true;
		break;
336 337
	case VMW_BALLOON_SUCCESS:
		b->capabilities = VMW_BALLOON_BASIC_CMDS;
338 339 340 341
		success = true;
		break;
	default:
		success = false;
342
	}
D
Dmitry Torokhov 已提交
343

344 345 346 347 348 349 350 351 352 353
	if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
		b->supported_page_sizes = 2;
	else
		b->supported_page_sizes = 1;

	if (!success) {
		pr_debug("%s - failed, hv returns %ld\n", __func__, status);
		STATS_INC(b->stats.start_fail);
	}
	return success;
D
Dmitry Torokhov 已提交
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
}

static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
{
	switch (status) {
	case VMW_BALLOON_SUCCESS:
		return true;

	case VMW_BALLOON_ERROR_RESET:
		b->reset_required = true;
		/* fall through */

	default:
		return false;
	}
}

/*
 * Communicate guest type to the host so that it can adjust ballooning
 * algorithm to the one most appropriate for the guest. This command
 * is normally issued after sending "start" command and is part of
 * standard reset sequence.
 */
static bool vmballoon_send_guest_id(struct vmballoon *b)
{
379
	unsigned long status, dummy = 0;
D
Dmitry Torokhov 已提交
380

381 382
	status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
				dummy);
D
Dmitry Torokhov 已提交
383 384 385 386 387 388 389 390 391 392 393

	STATS_INC(b->stats.guest_type);

	if (vmballoon_check_status(b, status))
		return true;

	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
	STATS_INC(b->stats.guest_type_fail);
	return false;
}

394 395 396 397 398 399 400 401
static u16 vmballoon_page_size(bool is_2m_page)
{
	if (is_2m_page)
		return 1 << VMW_BALLOON_2M_SHIFT;

	return 1;
}

D
Dmitry Torokhov 已提交
402 403 404 405 406 407 408 409
/*
 * Retrieve desired balloon size from the host.
 */
static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
{
	unsigned long status;
	unsigned long target;
	unsigned long limit;
410
	unsigned long dummy = 0;
D
Dmitry Torokhov 已提交
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
	u32 limit32;

	/*
	 * si_meminfo() is cheap. Moreover, we want to provide dynamic
	 * max balloon size later. So let us call si_meminfo() every
	 * iteration.
	 */
	si_meminfo(&b->sysinfo);
	limit = b->sysinfo.totalram;

	/* Ensure limit fits in 32-bits */
	limit32 = (u32)limit;
	if (limit != limit32)
		return false;

	/* update stats */
	STATS_INC(b->stats.target);

429
	status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
D
Dmitry Torokhov 已提交
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
	if (vmballoon_check_status(b, status)) {
		*new_target = target;
		return true;
	}

	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
	STATS_INC(b->stats.target_fail);
	return false;
}

/*
 * Notify the host about allocated page so that host can use it without
 * fear that guest will need it. Host may reject some pages, we need to
 * check the return value and maybe submit a different page.
 */
445
static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
446
				unsigned int *hv_status, unsigned int *target)
D
Dmitry Torokhov 已提交
447
{
448
	unsigned long status, dummy = 0;
D
Dmitry Torokhov 已提交
449 450 451 452
	u32 pfn32;

	pfn32 = (u32)pfn;
	if (pfn32 != pfn)
453
		return -1;
D
Dmitry Torokhov 已提交
454

455
	STATS_INC(b->stats.lock[false]);
D
Dmitry Torokhov 已提交
456

457
	*hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
D
Dmitry Torokhov 已提交
458
	if (vmballoon_check_status(b, status))
459
		return 0;
D
Dmitry Torokhov 已提交
460 461

	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
462
	STATS_INC(b->stats.lock_fail[false]);
463
	return 1;
D
Dmitry Torokhov 已提交
464 465
}

466
static int vmballoon_send_batched_lock(struct vmballoon *b,
467
		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
468
{
469
	unsigned long status;
470 471
	unsigned long pfn = page_to_pfn(b->page);

472 473 474 475 476 477 478 479
	STATS_INC(b->stats.lock[is_2m_pages]);

	if (is_2m_pages)
		status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
				*target);
	else
		status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
				*target);
480 481 482 483 484

	if (vmballoon_check_status(b, status))
		return 0;

	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
485
	STATS_INC(b->stats.lock_fail[is_2m_pages]);
486 487 488
	return 1;
}

D
Dmitry Torokhov 已提交
489 490 491 492
/*
 * Notify the host that guest intends to release given page back into
 * the pool of available (to the guest) pages.
 */
493 494
static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
							unsigned int *target)
D
Dmitry Torokhov 已提交
495
{
496
	unsigned long status, dummy = 0;
D
Dmitry Torokhov 已提交
497 498 499 500 501 502
	u32 pfn32;

	pfn32 = (u32)pfn;
	if (pfn32 != pfn)
		return false;

503
	STATS_INC(b->stats.unlock[false]);
D
Dmitry Torokhov 已提交
504

505
	status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
D
Dmitry Torokhov 已提交
506 507 508 509
	if (vmballoon_check_status(b, status))
		return true;

	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
510
	STATS_INC(b->stats.unlock_fail[false]);
D
Dmitry Torokhov 已提交
511 512 513
	return false;
}

514
static bool vmballoon_send_batched_unlock(struct vmballoon *b,
515
		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
516
{
517
	unsigned long status;
518 519
	unsigned long pfn = page_to_pfn(b->page);

520 521 522 523 524 525 526 527
	STATS_INC(b->stats.unlock[is_2m_pages]);

	if (is_2m_pages)
		status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
				*target);
	else
		status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
				*target);
528 529 530 531 532

	if (vmballoon_check_status(b, status))
		return true;

	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
533
	STATS_INC(b->stats.unlock_fail[is_2m_pages]);
534 535 536
	return false;
}

537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
{
	if (is_2m_page)
		return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);

	return alloc_page(flags);
}

static void vmballoon_free_page(struct page *page, bool is_2m_page)
{
	if (is_2m_page)
		__free_pages(page, VMW_BALLOON_2M_SHIFT);
	else
		__free_page(page);
}

D
Dmitry Torokhov 已提交
553 554 555 556 557 558 559 560 561
/*
 * Quickly release all pages allocated for the balloon. This function is
 * called when host decides to "reset" balloon for one reason or another.
 * Unlike normal "deflate" we do not (shall not) notify host of the pages
 * being released.
 */
static void vmballoon_pop(struct vmballoon *b)
{
	struct page *page, *next;
562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
	unsigned is_2m_pages;

	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
			is_2m_pages++) {
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
		u16 size_per_page = vmballoon_page_size(is_2m_pages);

		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
			list_del(&page->lru);
			vmballoon_free_page(page, is_2m_pages);
			STATS_INC(b->stats.free[is_2m_pages]);
			b->size -= size_per_page;
			cond_resched();
		}
D
Dmitry Torokhov 已提交
577 578
	}

579 580 581 582
	if (b->batch_page) {
		vunmap(b->batch_page);
		b->batch_page = NULL;
	}
D
Dmitry Torokhov 已提交
583

584 585 586
	if (b->page) {
		__free_page(b->page);
		b->page = NULL;
D
Dmitry Torokhov 已提交
587 588 589 590
	}
}

/*
591 592 593
 * Notify the host of a ballooned page. If host rejects the page put it on the
 * refuse list, those refused page are then released at the end of the
 * inflation cycle.
D
Dmitry Torokhov 已提交
594
 */
595
static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
596
				bool is_2m_pages, unsigned int *target)
D
Dmitry Torokhov 已提交
597
{
598
	int locked, hv_status;
599
	struct page *page = b->page;
600 601 602
	struct vmballoon_page_size *page_size = &b->page_sizes[false];

	/* is_2m_pages can never happen as 2m pages support implies batching */
D
Dmitry Torokhov 已提交
603

604 605
	locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
								target);
606
	if (locked > 0) {
607
		STATS_INC(b->stats.refused_alloc[false]);
D
Dmitry Torokhov 已提交
608

609 610
		if (hv_status == VMW_BALLOON_ERROR_RESET ||
				hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
611
			vmballoon_free_page(page, false);
612 613
			return -EIO;
		}
D
Dmitry Torokhov 已提交
614

615 616 617 618 619
		/*
		 * Place page on the list of non-balloonable pages
		 * and retry allocation, unless we already accumulated
		 * too many of them, in which case take a breather.
		 */
620 621 622
		if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
			page_size->n_refused_pages++;
			list_add(&page->lru, &page_size->refused_pages);
623
		} else {
624
			vmballoon_free_page(page, false);
D
Dmitry Torokhov 已提交
625
		}
626 627
		return -EIO;
	}
D
Dmitry Torokhov 已提交
628 629

	/* track allocated page */
630
	list_add(&page->lru, &page_size->pages);
D
Dmitry Torokhov 已提交
631 632 633 634 635 636 637

	/* update balloon size */
	b->size++;

	return 0;
}

638
static int vmballoon_lock_batched_page(struct vmballoon *b,
639
		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
640 641
{
	int locked, i;
642
	u16 size_per_page = vmballoon_page_size(is_2m_pages);
643

644 645
	locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
			target);
646 647 648 649 650
	if (locked > 0) {
		for (i = 0; i < num_pages; i++) {
			u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
			struct page *p = pfn_to_page(pa >> PAGE_SHIFT);

651
			vmballoon_free_page(p, is_2m_pages);
652 653 654 655 656 657 658 659
		}

		return -EIO;
	}

	for (i = 0; i < num_pages; i++) {
		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
660 661
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
662 663 664 665 666

		locked = vmballoon_batch_get_status(b->batch_page, i);

		switch (locked) {
		case VMW_BALLOON_SUCCESS:
667 668
			list_add(&p->lru, &page_size->pages);
			b->size += size_per_page;
669 670 671
			break;
		case VMW_BALLOON_ERROR_PPN_PINNED:
		case VMW_BALLOON_ERROR_PPN_INVALID:
672 673 674 675
			if (page_size->n_refused_pages
					< VMW_BALLOON_MAX_REFUSED) {
				list_add(&p->lru, &page_size->refused_pages);
				page_size->n_refused_pages++;
676 677 678 679 680
				break;
			}
			/* Fallthrough */
		case VMW_BALLOON_ERROR_RESET:
		case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
681
			vmballoon_free_page(p, is_2m_pages);
682 683 684 685 686 687 688 689 690 691
			break;
		default:
			/* This should never happen */
			WARN_ON_ONCE(true);
		}
	}

	return 0;
}

D
Dmitry Torokhov 已提交
692 693 694 695 696
/*
 * Release the page allocated for the balloon. Note that we first notify
 * the host so it can make sure the page will be available for the guest
 * to use, if needed.
 */
697
static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
698
		bool is_2m_pages, unsigned int *target)
D
Dmitry Torokhov 已提交
699
{
700
	struct page *page = b->page;
701 702 703
	struct vmballoon_page_size *page_size = &b->page_sizes[false];

	/* is_2m_pages can never happen as 2m pages support implies batching */
D
Dmitry Torokhov 已提交
704

705
	if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
706
		list_add(&page->lru, &page_size->pages);
707 708
		return -EIO;
	}
D
Dmitry Torokhov 已提交
709 710

	/* deallocate page */
711 712
	vmballoon_free_page(page, false);
	STATS_INC(b->stats.free[false]);
D
Dmitry Torokhov 已提交
713 714 715 716 717 718 719

	/* update balloon size */
	b->size--;

	return 0;
}

720
static int vmballoon_unlock_batched_page(struct vmballoon *b,
721 722
				unsigned int num_pages, bool is_2m_pages,
				unsigned int *target)
723 724 725
{
	int locked, i, ret = 0;
	bool hv_success;
726
	u16 size_per_page = vmballoon_page_size(is_2m_pages);
727

728 729
	hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
			target);
730 731 732 733 734 735
	if (!hv_success)
		ret = -EIO;

	for (i = 0; i < num_pages; i++) {
		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
736 737
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
738 739 740 741 742 743 744 745

		locked = vmballoon_batch_get_status(b->batch_page, i);
		if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
			/*
			 * That page wasn't successfully unlocked by the
			 * hypervisor, re-add it to the list of pages owned by
			 * the balloon driver.
			 */
746
			list_add(&p->lru, &page_size->pages);
747 748
		} else {
			/* deallocate page */
749 750
			vmballoon_free_page(p, is_2m_pages);
			STATS_INC(b->stats.free[is_2m_pages]);
751 752

			/* update balloon size */
753
			b->size -= size_per_page;
754 755 756 757 758 759
		}
	}

	return ret;
}

D
Dmitry Torokhov 已提交
760 761 762 763
/*
 * Release pages that were allocated while attempting to inflate the
 * balloon but were refused by the host for one reason or another.
 */
764 765
static void vmballoon_release_refused_pages(struct vmballoon *b,
		bool is_2m_pages)
D
Dmitry Torokhov 已提交
766 767
{
	struct page *page, *next;
768 769
	struct vmballoon_page_size *page_size =
			&b->page_sizes[is_2m_pages];
D
Dmitry Torokhov 已提交
770

771
	list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
D
Dmitry Torokhov 已提交
772
		list_del(&page->lru);
773 774
		vmballoon_free_page(page, is_2m_pages);
		STATS_INC(b->stats.refused_free[is_2m_pages]);
D
Dmitry Torokhov 已提交
775
	}
776

777
	page_size->n_refused_pages = 0;
D
Dmitry Torokhov 已提交
778 779
}

780 781 782 783 784 785 786 787 788 789 790 791
static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
{
	b->page = p;
}

static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
				struct page *p)
{
	vmballoon_batch_set_pa(b->batch_page, idx,
			(u64)page_to_pfn(p) << PAGE_SHIFT);
}

D
Dmitry Torokhov 已提交
792 793 794 795 796 797 798
/*
 * Inflate the balloon towards its target size. Note that we try to limit
 * the rate of allocation to make sure we are not choking the rest of the
 * system.
 */
static void vmballoon_inflate(struct vmballoon *b)
{
799
	unsigned rate;
D
Dmitry Torokhov 已提交
800
	unsigned int allocations = 0;
801
	unsigned int num_pages = 0;
D
Dmitry Torokhov 已提交
802
	int error = 0;
803
	gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
804
	bool is_2m_pages;
D
Dmitry Torokhov 已提交
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826

	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);

	/*
	 * First try NOSLEEP page allocations to inflate balloon.
	 *
	 * If we do not throttle nosleep allocations, we can drain all
	 * free pages in the guest quickly (if the balloon target is high).
	 * As a side-effect, draining free pages helps to inform (force)
	 * the guest to start swapping if balloon target is not met yet,
	 * which is a desired behavior. However, balloon driver can consume
	 * all available CPU cycles if too many pages are allocated in a
	 * second. Therefore, we throttle nosleep allocations even when
	 * the guest is not under memory pressure. OTOH, if we have already
	 * predicted that the guest is under memory pressure, then we
	 * slowdown page allocations considerably.
	 */

	/*
	 * Start with no sleep allocation rate which may be higher
	 * than sleeping allocation rate.
	 */
827 828 829 830 831 832 833 834
	if (b->slow_allocation_cycles) {
		rate = b->rate_alloc;
		is_2m_pages = false;
	} else {
		rate = UINT_MAX;
		is_2m_pages =
			b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
	}
D
Dmitry Torokhov 已提交
835

836
	pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n",
837
		 __func__, b->target - b->size, rate, b->rate_alloc);
D
Dmitry Torokhov 已提交
838

839
	while (!b->reset_required &&
840 841
		b->size + num_pages * vmballoon_page_size(is_2m_pages)
		< b->target) {
842
		struct page *page;
D
Dmitry Torokhov 已提交
843

844
		if (flags == VMW_PAGE_ALLOC_NOSLEEP)
845
			STATS_INC(b->stats.alloc[is_2m_pages]);
846 847
		else
			STATS_INC(b->stats.sleep_alloc);
D
Dmitry Torokhov 已提交
848

849
		page = vmballoon_alloc_page(flags, is_2m_pages);
850
		if (!page) {
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
			STATS_INC(b->stats.alloc_fail[is_2m_pages]);

			if (is_2m_pages) {
				b->ops->lock(b, num_pages, true, &b->target);

				/*
				 * ignore errors from locking as we now switch
				 * to 4k pages and we might get different
				 * errors.
				 */

				num_pages = 0;
				is_2m_pages = false;
				continue;
			}

867
			if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
D
Dmitry Torokhov 已提交
868 869 870 871 872 873 874
				/*
				 * CANSLEEP page allocation failed, so guest
				 * is under severe memory pressure. Quickly
				 * decrease allocation rate.
				 */
				b->rate_alloc = max(b->rate_alloc / 2,
						    VMW_BALLOON_RATE_ALLOC_MIN);
875
				STATS_INC(b->stats.sleep_alloc_fail);
D
Dmitry Torokhov 已提交
876 877 878 879 880 881 882 883 884 885 886 887 888
				break;
			}

			/*
			 * NOSLEEP page allocation failed, so the guest is
			 * under memory pressure. Let us slow down page
			 * allocations for next few cycles so that the guest
			 * gets out of memory pressure. Also, if we already
			 * allocated b->rate_alloc pages, let's pause,
			 * otherwise switch to sleeping allocations.
			 */
			b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;

889
			if (allocations >= b->rate_alloc)
D
Dmitry Torokhov 已提交
890 891
				break;

892
			flags = VMW_PAGE_ALLOC_CANSLEEP;
D
Dmitry Torokhov 已提交
893 894
			/* Lower rate for sleeping allocations. */
			rate = b->rate_alloc;
895
			continue;
D
Dmitry Torokhov 已提交
896 897
		}

898 899
		b->ops->add_page(b, num_pages++, page);
		if (num_pages == b->batch_max_pages) {
900 901
			error = b->ops->lock(b, num_pages, is_2m_pages,
					&b->target);
902 903 904 905
			num_pages = 0;
			if (error)
				break;
		}
906

907
		cond_resched();
D
Dmitry Torokhov 已提交
908

909
		if (allocations >= rate) {
D
Dmitry Torokhov 已提交
910 911 912 913 914
			/* We allocated enough pages, let's take a break. */
			break;
		}
	}

915
	if (num_pages > 0)
916
		b->ops->lock(b, num_pages, is_2m_pages, &b->target);
917

D
Dmitry Torokhov 已提交
918 919 920 921
	/*
	 * We reached our goal without failures so try increasing
	 * allocation rate.
	 */
922 923
	if (error == 0 && allocations >= b->rate_alloc) {
		unsigned int mult = allocations / b->rate_alloc;
D
Dmitry Torokhov 已提交
924 925 926 927 928 929

		b->rate_alloc =
			min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
			    VMW_BALLOON_RATE_ALLOC_MAX);
	}

930 931
	vmballoon_release_refused_pages(b, true);
	vmballoon_release_refused_pages(b, false);
D
Dmitry Torokhov 已提交
932 933 934 935 936 937 938
}

/*
 * Decrease the size of the balloon allowing guest to use more memory.
 */
static void vmballoon_deflate(struct vmballoon *b)
{
939
	unsigned is_2m_pages;
D
Dmitry Torokhov 已提交
940

941
	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
D
Dmitry Torokhov 已提交
942 943

	/* free pages to reach target */
944 945 946 947 948 949 950 951 952 953 954 955 956 957
	for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
			is_2m_pages++) {
		struct page *page, *next;
		unsigned int num_pages = 0;
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];

		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
			if (b->reset_required ||
				(b->target > 0 &&
					b->size - num_pages
					* vmballoon_page_size(is_2m_pages)
				< b->target + vmballoon_page_size(true)))
				break;
958

959 960
			list_del(&page->lru);
			b->ops->add_page(b, num_pages++, page);
961

962 963
			if (num_pages == b->batch_max_pages) {
				int error;
D
Dmitry Torokhov 已提交
964

965 966 967 968 969 970
				error = b->ops->unlock(b, num_pages,
						is_2m_pages, &b->target);
				num_pages = 0;
				if (error)
					return;
			}
971

972 973
			cond_resched();
		}
D
Dmitry Torokhov 已提交
974

975 976 977
		if (num_pages > 0)
			b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
	}
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
}

static const struct vmballoon_ops vmballoon_basic_ops = {
	.add_page = vmballoon_add_page,
	.lock = vmballoon_lock_page,
	.unlock = vmballoon_unlock_page
};

static const struct vmballoon_ops vmballoon_batched_ops = {
	.add_page = vmballoon_add_batched_page,
	.lock = vmballoon_lock_batched_page,
	.unlock = vmballoon_unlock_batched_page
};

static bool vmballoon_init_batching(struct vmballoon *b)
{
	b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
	if (!b->page)
		return false;

	b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
	if (!b->batch_page) {
		__free_page(b->page);
		return false;
	}

	return true;
}

1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
/*
 * Receive notification and resize balloon
 */
static void vmballoon_doorbell(void *client_data)
{
	struct vmballoon *b = client_data;

	STATS_INC(b->stats.doorbell);

	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
}

/*
 * Clean up vmci doorbell
 */
static void vmballoon_vmci_cleanup(struct vmballoon *b)
{
	int error;

	VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
			VMCI_INVALID_ID, error);
	STATS_INC(b->stats.doorbell_unset);

	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
		vmci_doorbell_destroy(b->vmci_doorbell);
		b->vmci_doorbell = VMCI_INVALID_HANDLE;
	}
}

/*
 * Initialize vmci doorbell, to get notified as soon as balloon changes
 */
static int vmballoon_vmci_init(struct vmballoon *b)
{
	int error = 0;

	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) {
		error = vmci_doorbell_create(&b->vmci_doorbell,
				VMCI_FLAG_DELAYED_CB,
				VMCI_PRIVILEGE_FLAG_RESTRICTED,
				vmballoon_doorbell, b);

		if (error == VMCI_SUCCESS) {
			VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET,
					b->vmci_doorbell.context,
					b->vmci_doorbell.resource, error);
			STATS_INC(b->stats.doorbell_set);
		}
	}

	if (error != 0) {
		vmballoon_vmci_cleanup(b);

		return -EIO;
	}

	return 0;
}

1066 1067 1068 1069 1070 1071 1072
/*
 * Perform standard reset sequence by popping the balloon (in case it
 * is not  empty) and then restarting protocol. This operation normally
 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
 */
static void vmballoon_reset(struct vmballoon *b)
{
1073 1074 1075 1076
	int error;

	vmballoon_vmci_cleanup(b);

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
	/* free all pages, skipping monitor unlock */
	vmballoon_pop(b);

	if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
		return;

	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
		b->ops = &vmballoon_batched_ops;
		b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
		if (!vmballoon_init_batching(b)) {
			/*
			 * We failed to initialize batching, inform the monitor
			 * about it by sending a null capability.
			 *
			 * The guest will retry in one second.
			 */
			vmballoon_send_start(b, 0);
			return;
		}
	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
		b->ops = &vmballoon_basic_ops;
		b->batch_max_pages = 1;
	}

	b->reset_required = false;
1102 1103 1104 1105 1106

	error = vmballoon_vmci_init(b);
	if (error)
		pr_err("failed to initialize vmci doorbell\n");

1107 1108
	if (!vmballoon_send_guest_id(b))
		pr_err("failed to send guest ID to the host\n");
D
Dmitry Torokhov 已提交
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
}

/*
 * Balloon work function: reset protocol, if needed, get the new size and
 * adjust balloon as needed. Repeat in 1 sec.
 */
static void vmballoon_work(struct work_struct *work)
{
	struct delayed_work *dwork = to_delayed_work(work);
	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
	unsigned int target;

	STATS_INC(b->stats.timer);

	if (b->reset_required)
		vmballoon_reset(b);

	if (b->slow_allocation_cycles > 0)
		b->slow_allocation_cycles--;

1129
	if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
D
Dmitry Torokhov 已提交
1130 1131 1132 1133 1134
		/* update target, adjust size */
		b->target = target;

		if (b->size < target)
			vmballoon_inflate(b);
1135 1136
		else if (target == 0 ||
				b->size > target + vmballoon_page_size(true))
D
Dmitry Torokhov 已提交
1137 1138 1139
			vmballoon_deflate(b);
	}

1140 1141 1142 1143 1144 1145
	/*
	 * We are using a freezable workqueue so that balloon operations are
	 * stopped while the system transitions to/from sleep/hibernation.
	 */
	queue_delayed_work(system_freezable_wq,
			   dwork, round_jiffies_relative(HZ));
D
Dmitry Torokhov 已提交
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157
}

/*
 * DEBUGFS Interface
 */
#ifdef CONFIG_DEBUG_FS

static int vmballoon_debug_show(struct seq_file *f, void *offset)
{
	struct vmballoon *b = f->private;
	struct vmballoon_stats *stats = &b->stats;

1158 1159 1160
	/* format capabilities info */
	seq_printf(f,
		   "balloon capabilities:   %#4x\n"
1161 1162 1163 1164
		   "used capabilities:      %#4lx\n"
		   "is resetting:           %c\n",
		   VMW_BALLOON_CAPABILITIES, b->capabilities,
		   b->reset_required ? 'y' : 'n');
1165

D
Dmitry Torokhov 已提交
1166 1167 1168 1169 1170 1171 1172 1173
	/* format size info */
	seq_printf(f,
		   "target:             %8d pages\n"
		   "current:            %8d pages\n",
		   b->target, b->size);

	/* format rate info */
	seq_printf(f,
1174 1175
		   "rateSleepAlloc:     %8d pages/sec\n",
		   b->rate_alloc);
D
Dmitry Torokhov 已提交
1176 1177 1178 1179

	seq_printf(f,
		   "\n"
		   "timer:              %8u\n"
1180
		   "doorbell:           %8u\n"
D
Dmitry Torokhov 已提交
1181 1182
		   "start:              %8u (%4u failed)\n"
		   "guestType:          %8u (%4u failed)\n"
1183
		   "2m-lock:            %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1184
		   "lock:               %8u (%4u failed)\n"
1185
		   "2m-unlock:          %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1186 1187
		   "unlock:             %8u (%4u failed)\n"
		   "target:             %8u (%4u failed)\n"
1188
		   "prim2mAlloc:        %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1189 1190
		   "primNoSleepAlloc:   %8u (%4u failed)\n"
		   "primCanSleepAlloc:  %8u (%4u failed)\n"
1191
		   "prim2mFree:         %8u\n"
D
Dmitry Torokhov 已提交
1192
		   "primFree:           %8u\n"
1193
		   "err2mAlloc:         %8u\n"
D
Dmitry Torokhov 已提交
1194
		   "errAlloc:           %8u\n"
1195
		   "err2mFree:          %8u\n"
1196 1197 1198
		   "errFree:            %8u\n"
		   "doorbellSet:        %8u\n"
		   "doorbellUnset:      %8u\n",
D
Dmitry Torokhov 已提交
1199
		   stats->timer,
1200
		   stats->doorbell,
D
Dmitry Torokhov 已提交
1201 1202
		   stats->start, stats->start_fail,
		   stats->guest_type, stats->guest_type_fail,
1203 1204 1205 1206
		   stats->lock[true],  stats->lock_fail[true],
		   stats->lock[false],  stats->lock_fail[false],
		   stats->unlock[true], stats->unlock_fail[true],
		   stats->unlock[false], stats->unlock_fail[false],
D
Dmitry Torokhov 已提交
1207
		   stats->target, stats->target_fail,
1208 1209
		   stats->alloc[true], stats->alloc_fail[true],
		   stats->alloc[false], stats->alloc_fail[false],
D
Dmitry Torokhov 已提交
1210
		   stats->sleep_alloc, stats->sleep_alloc_fail,
1211 1212 1213
		   stats->free[true],
		   stats->free[false],
		   stats->refused_alloc[true], stats->refused_alloc[false],
1214 1215
		   stats->refused_free[true], stats->refused_free[false],
		   stats->doorbell_set, stats->doorbell_unset);
D
Dmitry Torokhov 已提交
1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268

	return 0;
}

static int vmballoon_debug_open(struct inode *inode, struct file *file)
{
	return single_open(file, vmballoon_debug_show, inode->i_private);
}

static const struct file_operations vmballoon_debug_fops = {
	.owner		= THIS_MODULE,
	.open		= vmballoon_debug_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int __init vmballoon_debugfs_init(struct vmballoon *b)
{
	int error;

	b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
					   &vmballoon_debug_fops);
	if (IS_ERR(b->dbg_entry)) {
		error = PTR_ERR(b->dbg_entry);
		pr_err("failed to create debugfs entry, error: %d\n", error);
		return error;
	}

	return 0;
}

static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
{
	debugfs_remove(b->dbg_entry);
}

#else

static inline int vmballoon_debugfs_init(struct vmballoon *b)
{
	return 0;
}

static inline void vmballoon_debugfs_exit(struct vmballoon *b)
{
}

#endif	/* CONFIG_DEBUG_FS */

static int __init vmballoon_init(void)
{
	int error;
1269
	unsigned is_2m_pages;
D
Dmitry Torokhov 已提交
1270 1271 1272 1273
	/*
	 * Check if we are running on VMware's hypervisor and bail out
	 * if we are not.
	 */
1274
	if (x86_hyper_type != X86_HYPER_VMWARE)
D
Dmitry Torokhov 已提交
1275 1276
		return -ENODEV;

1277 1278 1279 1280 1281
	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
			is_2m_pages++) {
		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
	}
D
Dmitry Torokhov 已提交
1282 1283 1284 1285 1286 1287 1288 1289

	/* initialize rates */
	balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;

	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);

	error = vmballoon_debugfs_init(&balloon);
	if (error)
1290
		return error;
D
Dmitry Torokhov 已提交
1291

1292
	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
1293 1294 1295 1296
	balloon.batch_page = NULL;
	balloon.page = NULL;
	balloon.reset_required = true;

1297
	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
D
Dmitry Torokhov 已提交
1298 1299 1300 1301 1302 1303 1304

	return 0;
}
module_init(vmballoon_init);

static void __exit vmballoon_exit(void)
{
1305
	vmballoon_vmci_cleanup(&balloon);
D
Dmitry Torokhov 已提交
1306 1307 1308 1309 1310 1311 1312 1313 1314
	cancel_delayed_work_sync(&balloon.dwork);

	vmballoon_debugfs_exit(&balloon);

	/*
	 * Deallocate all reserved memory, and reset connection with monitor.
	 * Reset connection before deallocating memory to avoid potential for
	 * additional spurious resets from guest touching deallocated pages.
	 */
1315
	vmballoon_send_start(&balloon, 0);
D
Dmitry Torokhov 已提交
1316 1317 1318
	vmballoon_pop(&balloon);
}
module_exit(vmballoon_exit);