vmw_balloon.c 33.8 KB
Newer Older
D
Dmitry Torokhov 已提交
1 2 3
/*
 * VMware Balloon driver.
 *
4
 * Copyright (C) 2000-2014, VMware, Inc. All Rights Reserved.
D
Dmitry Torokhov 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; version 2 of the License and no later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
 * NON INFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
20 21
 * Maintained by:	Xavier Deguillard <xdeguillard@vmware.com>
 *			Philip Moltmann <moltmann@vmware.com>
D
Dmitry Torokhov 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
 */

/*
 * This is VMware physical memory management driver for Linux. The driver
 * acts like a "balloon" that can be inflated to reclaim physical pages by
 * reserving them in the guest and invalidating them in the monitor,
 * freeing up the underlying machine pages so they can be allocated to
 * other guests.  The balloon can also be deflated to allow the guest to
 * use more physical memory. Higher level policies can control the sizes
 * of balloons in VMs in order to manage physical memory resources.
 */

//#define DEBUG
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
40
#include <linux/vmalloc.h>
D
Dmitry Torokhov 已提交
41 42 43 44 45
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
46 47
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
48
#include <asm/hypervisor.h>
D
Dmitry Torokhov 已提交
49 50 51

MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
52
MODULE_VERSION("1.5.0.0-k");
D
Dmitry Torokhov 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
MODULE_ALIAS("dmi:*:svnVMware*:*");
MODULE_ALIAS("vmware_vmmemctl");
MODULE_LICENSE("GPL");

/*
 * Various constants controlling rate of inflaint/deflating balloon,
 * measured in pages.
 */

/*
 * Rates of memory allocaton when guest experiences memory pressure
 * (driver performs sleeping allocations).
 */
#define VMW_BALLOON_RATE_ALLOC_MIN	512U
#define VMW_BALLOON_RATE_ALLOC_MAX	2048U
#define VMW_BALLOON_RATE_ALLOC_INC	16U

/*
 * When guest is under memory pressure, use a reduced page allocation
 * rate for next several cycles.
 */
#define VMW_BALLOON_SLOW_CYCLES		4

/*
 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
78
 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
D
Dmitry Torokhov 已提交
79 80 81 82 83 84 85 86 87 88 89 90 91
 * __GFP_NOWARN, to suppress page allocation failure warnings.
 */
#define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)

/*
 * Use GFP_HIGHUSER when executing in a separate kernel thread
 * context and allocation can sleep.  This is less stressful to
 * the guest memory system, since it allows the thread to block
 * while memory is reclaimed, and won't take pages from emergency
 * low-memory pools.
 */
#define VMW_PAGE_ALLOC_CANSLEEP		(GFP_HIGHUSER)

92 93
/* Maximum number of refused pages we accumulate during inflation cycle */
#define VMW_BALLOON_MAX_REFUSED		16
D
Dmitry Torokhov 已提交
94 95 96 97 98 99 100 101

/*
 * Hypervisor communication port definitions.
 */
#define VMW_BALLOON_HV_PORT		0x5670
#define VMW_BALLOON_HV_MAGIC		0x456c6d6f
#define VMW_BALLOON_GUEST_ID		1	/* Linux */

102 103 104 105
enum vmwballoon_capabilities {
	/*
	 * Bit 0 is reserved and not associated to any capability.
	 */
106 107 108 109
	VMW_BALLOON_BASIC_CMDS			= (1 << 1),
	VMW_BALLOON_BATCHED_CMDS		= (1 << 2),
	VMW_BALLOON_BATCHED_2M_CMDS		= (1 << 3),
	VMW_BALLOON_SIGNALLED_WAKEUP_CMD	= (1 << 4),
110 111
};

112
#define VMW_BALLOON_CAPABILITIES	(VMW_BALLOON_BASIC_CMDS \
113
					| VMW_BALLOON_BATCHED_CMDS \
114 115
					| VMW_BALLOON_BATCHED_2M_CMDS \
					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
116 117 118

#define VMW_BALLOON_2M_SHIFT		(9)
#define VMW_BALLOON_NUM_PAGE_SIZES	(2)
119

120 121 122 123 124 125 126 127 128
/*
 * Backdoor commands availability:
 *
 * START, GET_TARGET and GUEST_ID are always available,
 *
 * VMW_BALLOON_BASIC_CMDS:
 *	LOCK and UNLOCK commands,
 * VMW_BALLOON_BATCHED_CMDS:
 *	BATCHED_LOCK and BATCHED_UNLOCK commands.
129
 * VMW BALLOON_BATCHED_2M_CMDS:
130 131 132
 *	BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
 *	VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
133
 */
134 135 136 137 138 139 140 141 142
#define VMW_BALLOON_CMD_START			0
#define VMW_BALLOON_CMD_GET_TARGET		1
#define VMW_BALLOON_CMD_LOCK			2
#define VMW_BALLOON_CMD_UNLOCK			3
#define VMW_BALLOON_CMD_GUEST_ID		4
#define VMW_BALLOON_CMD_BATCHED_LOCK		6
#define VMW_BALLOON_CMD_BATCHED_UNLOCK		7
#define VMW_BALLOON_CMD_BATCHED_2M_LOCK		8
#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK	9
143
#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET	10
144

D
Dmitry Torokhov 已提交
145 146

/* error codes */
147 148 149 150 151 152 153 154 155 156 157 158 159
#define VMW_BALLOON_SUCCESS		        0
#define VMW_BALLOON_FAILURE		        -1
#define VMW_BALLOON_ERROR_CMD_INVALID	        1
#define VMW_BALLOON_ERROR_PPN_INVALID	        2
#define VMW_BALLOON_ERROR_PPN_LOCKED	        3
#define VMW_BALLOON_ERROR_PPN_UNLOCKED	        4
#define VMW_BALLOON_ERROR_PPN_PINNED	        5
#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	        6
#define VMW_BALLOON_ERROR_RESET		        7
#define VMW_BALLOON_ERROR_BUSY		        8

#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
/* Batch page description */

/*
 * Layout of a page in the batch page:
 *
 * +-------------+----------+--------+
 * |             |          |        |
 * | Page number | Reserved | Status |
 * |             |          |        |
 * +-------------+----------+--------+
 * 64  PAGE_SHIFT          6         0
 *
 * The reserved field should be set to 0.
 */
#define VMW_BALLOON_BATCH_MAX_PAGES	(PAGE_SIZE / sizeof(u64))
#define VMW_BALLOON_BATCH_STATUS_MASK	((1UL << 5) - 1)
#define VMW_BALLOON_BATCH_PAGE_MASK	(~((1UL << PAGE_SHIFT) - 1))

struct vmballoon_batch_page {
	u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
};

static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
{
	return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
}

static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
				int idx)
{
	return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
}

static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
				u64 pa)
{
	batch->pages[idx] = pa;
}


#define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)		\
201
({								\
202
	unsigned long __status, __dummy1, __dummy2, __dummy3;	\
203 204 205 206
	__asm__ __volatile__ ("inl %%dx" :			\
		"=a"(__status),					\
		"=c"(__dummy1),					\
		"=d"(__dummy2),					\
207 208
		"=b"(result),					\
		"=S" (__dummy3) :				\
209 210 211
		"0"(VMW_BALLOON_HV_MAGIC),			\
		"1"(VMW_BALLOON_CMD_##cmd),			\
		"2"(VMW_BALLOON_HV_PORT),			\
212 213
		"3"(arg1),					\
		"4" (arg2) :					\
214 215 216 217 218
		"memory");					\
	if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START)	\
		result = __dummy1;				\
	result &= -1UL;						\
	__status & -1UL;					\
D
Dmitry Torokhov 已提交
219 220 221 222 223
})

#ifdef CONFIG_DEBUG_FS
struct vmballoon_stats {
	unsigned int timer;
224
	unsigned int doorbell;
D
Dmitry Torokhov 已提交
225

226
	/* allocation statistics */
227 228
	unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
229 230
	unsigned int sleep_alloc;
	unsigned int sleep_alloc_fail;
231 232 233
	unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
234 235

	/* monitor operations */
236 237 238 239
	unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
240 241 242 243 244 245
	unsigned int target;
	unsigned int target_fail;
	unsigned int start;
	unsigned int start_fail;
	unsigned int guest_type;
	unsigned int guest_type_fail;
246 247
	unsigned int doorbell_set;
	unsigned int doorbell_unset;
D
Dmitry Torokhov 已提交
248 249 250 251 252 253 254
};

#define STATS_INC(stat) (stat)++
#else
#define STATS_INC(stat)
#endif

255 256 257 258
struct vmballoon;

struct vmballoon_ops {
	void (*add_page)(struct vmballoon *b, int idx, struct page *p);
259
	int (*lock)(struct vmballoon *b, unsigned int num_pages,
260
			bool is_2m_pages, unsigned int *target);
261
	int (*unlock)(struct vmballoon *b, unsigned int num_pages,
262
			bool is_2m_pages, unsigned int *target);
263 264
};

265
struct vmballoon_page_size {
D
Dmitry Torokhov 已提交
266 267 268 269 270
	/* list of reserved physical pages */
	struct list_head pages;

	/* transient list of non-balloonable pages */
	struct list_head refused_pages;
271
	unsigned int n_refused_pages;
272 273 274 275 276 277 278
};

struct vmballoon {
	struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];

	/* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
	unsigned supported_page_sizes;
D
Dmitry Torokhov 已提交
279 280 281 282 283 284 285 286 287 288 289 290 291 292

	/* balloon size in pages */
	unsigned int size;
	unsigned int target;

	/* reset flag */
	bool reset_required;

	/* adjustment rates (pages per second) */
	unsigned int rate_alloc;

	/* slowdown page allocations for next few cycles */
	unsigned int slow_allocation_cycles;

293 294 295 296 297 298 299 300
	unsigned long capabilities;

	struct vmballoon_batch_page *batch_page;
	unsigned int batch_max_pages;
	struct page *page;

	const struct vmballoon_ops *ops;

D
Dmitry Torokhov 已提交
301 302 303 304 305 306 307 308 309 310 311
#ifdef CONFIG_DEBUG_FS
	/* statistics */
	struct vmballoon_stats stats;

	/* debugfs file exporting statistics */
	struct dentry *dbg_entry;
#endif

	struct sysinfo sysinfo;

	struct delayed_work dwork;
312 313

	struct vmci_handle vmci_doorbell;
D
Dmitry Torokhov 已提交
314 315 316 317 318 319 320 321
};

static struct vmballoon balloon;

/*
 * Send "start" command to the host, communicating supported version
 * of the protocol.
 */
322
static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
D
Dmitry Torokhov 已提交
323
{
324
	unsigned long status, capabilities, dummy = 0;
325
	bool success;
D
Dmitry Torokhov 已提交
326 327 328

	STATS_INC(b->stats.start);

329 330 331 332 333
	status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);

	switch (status) {
	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
		b->capabilities = capabilities;
334 335
		success = true;
		break;
336 337
	case VMW_BALLOON_SUCCESS:
		b->capabilities = VMW_BALLOON_BASIC_CMDS;
338 339 340 341
		success = true;
		break;
	default:
		success = false;
342
	}
D
Dmitry Torokhov 已提交
343

344 345 346 347 348 349 350 351 352 353
	if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
		b->supported_page_sizes = 2;
	else
		b->supported_page_sizes = 1;

	if (!success) {
		pr_debug("%s - failed, hv returns %ld\n", __func__, status);
		STATS_INC(b->stats.start_fail);
	}
	return success;
D
Dmitry Torokhov 已提交
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
}

static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
{
	switch (status) {
	case VMW_BALLOON_SUCCESS:
		return true;

	case VMW_BALLOON_ERROR_RESET:
		b->reset_required = true;
		/* fall through */

	default:
		return false;
	}
}

/*
 * Communicate guest type to the host so that it can adjust ballooning
 * algorithm to the one most appropriate for the guest. This command
 * is normally issued after sending "start" command and is part of
 * standard reset sequence.
 */
static bool vmballoon_send_guest_id(struct vmballoon *b)
{
379
	unsigned long status, dummy = 0;
D
Dmitry Torokhov 已提交
380

381 382
	status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
				dummy);
D
Dmitry Torokhov 已提交
383 384 385 386 387 388 389 390 391 392 393

	STATS_INC(b->stats.guest_type);

	if (vmballoon_check_status(b, status))
		return true;

	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
	STATS_INC(b->stats.guest_type_fail);
	return false;
}

394 395 396 397 398 399 400 401
static u16 vmballoon_page_size(bool is_2m_page)
{
	if (is_2m_page)
		return 1 << VMW_BALLOON_2M_SHIFT;

	return 1;
}

D
Dmitry Torokhov 已提交
402 403 404 405 406 407 408 409
/*
 * Retrieve desired balloon size from the host.
 */
static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
{
	unsigned long status;
	unsigned long target;
	unsigned long limit;
410
	unsigned long dummy = 0;
D
Dmitry Torokhov 已提交
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
	u32 limit32;

	/*
	 * si_meminfo() is cheap. Moreover, we want to provide dynamic
	 * max balloon size later. So let us call si_meminfo() every
	 * iteration.
	 */
	si_meminfo(&b->sysinfo);
	limit = b->sysinfo.totalram;

	/* Ensure limit fits in 32-bits */
	limit32 = (u32)limit;
	if (limit != limit32)
		return false;

	/* update stats */
	STATS_INC(b->stats.target);

429
	status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
D
Dmitry Torokhov 已提交
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
	if (vmballoon_check_status(b, status)) {
		*new_target = target;
		return true;
	}

	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
	STATS_INC(b->stats.target_fail);
	return false;
}

/*
 * Notify the host about allocated page so that host can use it without
 * fear that guest will need it. Host may reject some pages, we need to
 * check the return value and maybe submit a different page.
 */
445
static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
446
				unsigned int *hv_status, unsigned int *target)
D
Dmitry Torokhov 已提交
447
{
448
	unsigned long status, dummy = 0;
D
Dmitry Torokhov 已提交
449 450 451 452
	u32 pfn32;

	pfn32 = (u32)pfn;
	if (pfn32 != pfn)
453
		return -1;
D
Dmitry Torokhov 已提交
454

455
	STATS_INC(b->stats.lock[false]);
D
Dmitry Torokhov 已提交
456

457
	*hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
D
Dmitry Torokhov 已提交
458
	if (vmballoon_check_status(b, status))
459
		return 0;
D
Dmitry Torokhov 已提交
460 461

	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
462
	STATS_INC(b->stats.lock_fail[false]);
463
	return 1;
D
Dmitry Torokhov 已提交
464 465
}

466
static int vmballoon_send_batched_lock(struct vmballoon *b,
467
		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
468
{
469
	unsigned long status;
470 471
	unsigned long pfn = page_to_pfn(b->page);

472 473 474 475 476 477 478 479
	STATS_INC(b->stats.lock[is_2m_pages]);

	if (is_2m_pages)
		status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
				*target);
	else
		status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
				*target);
480 481 482 483 484

	if (vmballoon_check_status(b, status))
		return 0;

	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
485
	STATS_INC(b->stats.lock_fail[is_2m_pages]);
486 487 488
	return 1;
}

D
Dmitry Torokhov 已提交
489 490 491 492
/*
 * Notify the host that guest intends to release given page back into
 * the pool of available (to the guest) pages.
 */
493 494
static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
							unsigned int *target)
D
Dmitry Torokhov 已提交
495
{
496
	unsigned long status, dummy = 0;
D
Dmitry Torokhov 已提交
497 498 499 500 501 502
	u32 pfn32;

	pfn32 = (u32)pfn;
	if (pfn32 != pfn)
		return false;

503
	STATS_INC(b->stats.unlock[false]);
D
Dmitry Torokhov 已提交
504

505
	status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
D
Dmitry Torokhov 已提交
506 507 508 509
	if (vmballoon_check_status(b, status))
		return true;

	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
510
	STATS_INC(b->stats.unlock_fail[false]);
D
Dmitry Torokhov 已提交
511 512 513
	return false;
}

514
static bool vmballoon_send_batched_unlock(struct vmballoon *b,
515
		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
516
{
517
	unsigned long status;
518 519
	unsigned long pfn = page_to_pfn(b->page);

520 521 522 523 524 525 526 527
	STATS_INC(b->stats.unlock[is_2m_pages]);

	if (is_2m_pages)
		status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
				*target);
	else
		status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
				*target);
528 529 530 531 532

	if (vmballoon_check_status(b, status))
		return true;

	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
533
	STATS_INC(b->stats.unlock_fail[is_2m_pages]);
534 535 536
	return false;
}

537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
{
	if (is_2m_page)
		return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);

	return alloc_page(flags);
}

static void vmballoon_free_page(struct page *page, bool is_2m_page)
{
	if (is_2m_page)
		__free_pages(page, VMW_BALLOON_2M_SHIFT);
	else
		__free_page(page);
}

D
Dmitry Torokhov 已提交
553 554 555 556 557 558 559 560 561
/*
 * Quickly release all pages allocated for the balloon. This function is
 * called when host decides to "reset" balloon for one reason or another.
 * Unlike normal "deflate" we do not (shall not) notify host of the pages
 * being released.
 */
static void vmballoon_pop(struct vmballoon *b)
{
	struct page *page, *next;
562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
	unsigned is_2m_pages;

	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
			is_2m_pages++) {
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
		u16 size_per_page = vmballoon_page_size(is_2m_pages);

		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
			list_del(&page->lru);
			vmballoon_free_page(page, is_2m_pages);
			STATS_INC(b->stats.free[is_2m_pages]);
			b->size -= size_per_page;
			cond_resched();
		}
D
Dmitry Torokhov 已提交
577 578
	}

579 580 581
	/* Clearing the batch_page unconditionally has no adverse effect */
	free_page((unsigned long)b->batch_page);
	b->batch_page = NULL;
D
Dmitry Torokhov 已提交
582 583 584
}

/*
585 586 587
 * Notify the host of a ballooned page. If host rejects the page put it on the
 * refuse list, those refused page are then released at the end of the
 * inflation cycle.
D
Dmitry Torokhov 已提交
588
 */
589
static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
590
				bool is_2m_pages, unsigned int *target)
D
Dmitry Torokhov 已提交
591
{
592
	int locked, hv_status;
593
	struct page *page = b->page;
594 595 596
	struct vmballoon_page_size *page_size = &b->page_sizes[false];

	/* is_2m_pages can never happen as 2m pages support implies batching */
D
Dmitry Torokhov 已提交
597

598 599
	locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
								target);
600
	if (locked > 0) {
601
		STATS_INC(b->stats.refused_alloc[false]);
D
Dmitry Torokhov 已提交
602

603 604
		if (hv_status == VMW_BALLOON_ERROR_RESET ||
				hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
605
			vmballoon_free_page(page, false);
606 607
			return -EIO;
		}
D
Dmitry Torokhov 已提交
608

609 610 611 612 613
		/*
		 * Place page on the list of non-balloonable pages
		 * and retry allocation, unless we already accumulated
		 * too many of them, in which case take a breather.
		 */
614 615 616
		if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
			page_size->n_refused_pages++;
			list_add(&page->lru, &page_size->refused_pages);
617
		} else {
618
			vmballoon_free_page(page, false);
D
Dmitry Torokhov 已提交
619
		}
620 621
		return -EIO;
	}
D
Dmitry Torokhov 已提交
622 623

	/* track allocated page */
624
	list_add(&page->lru, &page_size->pages);
D
Dmitry Torokhov 已提交
625 626 627 628 629 630 631

	/* update balloon size */
	b->size++;

	return 0;
}

632
static int vmballoon_lock_batched_page(struct vmballoon *b,
633
		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
634 635
{
	int locked, i;
636
	u16 size_per_page = vmballoon_page_size(is_2m_pages);
637

638 639
	locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
			target);
640 641 642 643 644
	if (locked > 0) {
		for (i = 0; i < num_pages; i++) {
			u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
			struct page *p = pfn_to_page(pa >> PAGE_SHIFT);

645
			vmballoon_free_page(p, is_2m_pages);
646 647 648 649 650 651 652 653
		}

		return -EIO;
	}

	for (i = 0; i < num_pages; i++) {
		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
654 655
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
656 657 658 659 660

		locked = vmballoon_batch_get_status(b->batch_page, i);

		switch (locked) {
		case VMW_BALLOON_SUCCESS:
661 662
			list_add(&p->lru, &page_size->pages);
			b->size += size_per_page;
663 664 665
			break;
		case VMW_BALLOON_ERROR_PPN_PINNED:
		case VMW_BALLOON_ERROR_PPN_INVALID:
666 667 668 669
			if (page_size->n_refused_pages
					< VMW_BALLOON_MAX_REFUSED) {
				list_add(&p->lru, &page_size->refused_pages);
				page_size->n_refused_pages++;
670 671 672 673 674
				break;
			}
			/* Fallthrough */
		case VMW_BALLOON_ERROR_RESET:
		case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
675
			vmballoon_free_page(p, is_2m_pages);
676 677 678 679 680 681 682 683 684 685
			break;
		default:
			/* This should never happen */
			WARN_ON_ONCE(true);
		}
	}

	return 0;
}

D
Dmitry Torokhov 已提交
686 687 688 689 690
/*
 * Release the page allocated for the balloon. Note that we first notify
 * the host so it can make sure the page will be available for the guest
 * to use, if needed.
 */
691
static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
692
		bool is_2m_pages, unsigned int *target)
D
Dmitry Torokhov 已提交
693
{
694
	struct page *page = b->page;
695 696 697
	struct vmballoon_page_size *page_size = &b->page_sizes[false];

	/* is_2m_pages can never happen as 2m pages support implies batching */
D
Dmitry Torokhov 已提交
698

699
	if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
700
		list_add(&page->lru, &page_size->pages);
701 702
		return -EIO;
	}
D
Dmitry Torokhov 已提交
703 704

	/* deallocate page */
705 706
	vmballoon_free_page(page, false);
	STATS_INC(b->stats.free[false]);
D
Dmitry Torokhov 已提交
707 708 709 710 711 712 713

	/* update balloon size */
	b->size--;

	return 0;
}

714
static int vmballoon_unlock_batched_page(struct vmballoon *b,
715 716
				unsigned int num_pages, bool is_2m_pages,
				unsigned int *target)
717 718 719
{
	int locked, i, ret = 0;
	bool hv_success;
720
	u16 size_per_page = vmballoon_page_size(is_2m_pages);
721

722 723
	hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
			target);
724 725 726 727 728 729
	if (!hv_success)
		ret = -EIO;

	for (i = 0; i < num_pages; i++) {
		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
730 731
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
732 733 734 735 736 737 738 739

		locked = vmballoon_batch_get_status(b->batch_page, i);
		if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
			/*
			 * That page wasn't successfully unlocked by the
			 * hypervisor, re-add it to the list of pages owned by
			 * the balloon driver.
			 */
740
			list_add(&p->lru, &page_size->pages);
741 742
		} else {
			/* deallocate page */
743 744
			vmballoon_free_page(p, is_2m_pages);
			STATS_INC(b->stats.free[is_2m_pages]);
745 746

			/* update balloon size */
747
			b->size -= size_per_page;
748 749 750 751 752 753
		}
	}

	return ret;
}

D
Dmitry Torokhov 已提交
754 755 756 757
/*
 * Release pages that were allocated while attempting to inflate the
 * balloon but were refused by the host for one reason or another.
 */
758 759
static void vmballoon_release_refused_pages(struct vmballoon *b,
		bool is_2m_pages)
D
Dmitry Torokhov 已提交
760 761
{
	struct page *page, *next;
762 763
	struct vmballoon_page_size *page_size =
			&b->page_sizes[is_2m_pages];
D
Dmitry Torokhov 已提交
764

765
	list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
D
Dmitry Torokhov 已提交
766
		list_del(&page->lru);
767 768
		vmballoon_free_page(page, is_2m_pages);
		STATS_INC(b->stats.refused_free[is_2m_pages]);
D
Dmitry Torokhov 已提交
769
	}
770

771
	page_size->n_refused_pages = 0;
D
Dmitry Torokhov 已提交
772 773
}

774 775 776 777 778 779 780 781 782 783 784 785
static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
{
	b->page = p;
}

static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
				struct page *p)
{
	vmballoon_batch_set_pa(b->batch_page, idx,
			(u64)page_to_pfn(p) << PAGE_SHIFT);
}

D
Dmitry Torokhov 已提交
786 787 788 789 790 791 792
/*
 * Inflate the balloon towards its target size. Note that we try to limit
 * the rate of allocation to make sure we are not choking the rest of the
 * system.
 */
static void vmballoon_inflate(struct vmballoon *b)
{
793
	unsigned rate;
D
Dmitry Torokhov 已提交
794
	unsigned int allocations = 0;
795
	unsigned int num_pages = 0;
D
Dmitry Torokhov 已提交
796
	int error = 0;
797
	gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
798
	bool is_2m_pages;
D
Dmitry Torokhov 已提交
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820

	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);

	/*
	 * First try NOSLEEP page allocations to inflate balloon.
	 *
	 * If we do not throttle nosleep allocations, we can drain all
	 * free pages in the guest quickly (if the balloon target is high).
	 * As a side-effect, draining free pages helps to inform (force)
	 * the guest to start swapping if balloon target is not met yet,
	 * which is a desired behavior. However, balloon driver can consume
	 * all available CPU cycles if too many pages are allocated in a
	 * second. Therefore, we throttle nosleep allocations even when
	 * the guest is not under memory pressure. OTOH, if we have already
	 * predicted that the guest is under memory pressure, then we
	 * slowdown page allocations considerably.
	 */

	/*
	 * Start with no sleep allocation rate which may be higher
	 * than sleeping allocation rate.
	 */
821 822 823 824 825 826 827 828
	if (b->slow_allocation_cycles) {
		rate = b->rate_alloc;
		is_2m_pages = false;
	} else {
		rate = UINT_MAX;
		is_2m_pages =
			b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
	}
D
Dmitry Torokhov 已提交
829

830
	pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n",
831
		 __func__, b->target - b->size, rate, b->rate_alloc);
D
Dmitry Torokhov 已提交
832

833
	while (!b->reset_required &&
834 835
		b->size + num_pages * vmballoon_page_size(is_2m_pages)
		< b->target) {
836
		struct page *page;
D
Dmitry Torokhov 已提交
837

838
		if (flags == VMW_PAGE_ALLOC_NOSLEEP)
839
			STATS_INC(b->stats.alloc[is_2m_pages]);
840 841
		else
			STATS_INC(b->stats.sleep_alloc);
D
Dmitry Torokhov 已提交
842

843
		page = vmballoon_alloc_page(flags, is_2m_pages);
844
		if (!page) {
845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
			STATS_INC(b->stats.alloc_fail[is_2m_pages]);

			if (is_2m_pages) {
				b->ops->lock(b, num_pages, true, &b->target);

				/*
				 * ignore errors from locking as we now switch
				 * to 4k pages and we might get different
				 * errors.
				 */

				num_pages = 0;
				is_2m_pages = false;
				continue;
			}

861
			if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
D
Dmitry Torokhov 已提交
862 863 864 865 866 867 868
				/*
				 * CANSLEEP page allocation failed, so guest
				 * is under severe memory pressure. Quickly
				 * decrease allocation rate.
				 */
				b->rate_alloc = max(b->rate_alloc / 2,
						    VMW_BALLOON_RATE_ALLOC_MIN);
869
				STATS_INC(b->stats.sleep_alloc_fail);
D
Dmitry Torokhov 已提交
870 871 872 873 874 875 876 877 878 879 880 881 882
				break;
			}

			/*
			 * NOSLEEP page allocation failed, so the guest is
			 * under memory pressure. Let us slow down page
			 * allocations for next few cycles so that the guest
			 * gets out of memory pressure. Also, if we already
			 * allocated b->rate_alloc pages, let's pause,
			 * otherwise switch to sleeping allocations.
			 */
			b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;

883
			if (allocations >= b->rate_alloc)
D
Dmitry Torokhov 已提交
884 885
				break;

886
			flags = VMW_PAGE_ALLOC_CANSLEEP;
D
Dmitry Torokhov 已提交
887 888
			/* Lower rate for sleeping allocations. */
			rate = b->rate_alloc;
889
			continue;
D
Dmitry Torokhov 已提交
890 891
		}

892 893
		b->ops->add_page(b, num_pages++, page);
		if (num_pages == b->batch_max_pages) {
894 895
			error = b->ops->lock(b, num_pages, is_2m_pages,
					&b->target);
896 897 898 899
			num_pages = 0;
			if (error)
				break;
		}
900

901
		cond_resched();
D
Dmitry Torokhov 已提交
902

903
		if (allocations >= rate) {
D
Dmitry Torokhov 已提交
904 905 906 907 908
			/* We allocated enough pages, let's take a break. */
			break;
		}
	}

909
	if (num_pages > 0)
910
		b->ops->lock(b, num_pages, is_2m_pages, &b->target);
911

D
Dmitry Torokhov 已提交
912 913 914 915
	/*
	 * We reached our goal without failures so try increasing
	 * allocation rate.
	 */
916 917
	if (error == 0 && allocations >= b->rate_alloc) {
		unsigned int mult = allocations / b->rate_alloc;
D
Dmitry Torokhov 已提交
918 919 920 921 922 923

		b->rate_alloc =
			min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
			    VMW_BALLOON_RATE_ALLOC_MAX);
	}

924 925
	vmballoon_release_refused_pages(b, true);
	vmballoon_release_refused_pages(b, false);
D
Dmitry Torokhov 已提交
926 927 928 929 930 931 932
}

/*
 * Decrease the size of the balloon allowing guest to use more memory.
 */
static void vmballoon_deflate(struct vmballoon *b)
{
933
	unsigned is_2m_pages;
D
Dmitry Torokhov 已提交
934

935
	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
D
Dmitry Torokhov 已提交
936 937

	/* free pages to reach target */
938 939 940 941 942 943 944 945 946 947 948 949 950 951
	for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
			is_2m_pages++) {
		struct page *page, *next;
		unsigned int num_pages = 0;
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];

		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
			if (b->reset_required ||
				(b->target > 0 &&
					b->size - num_pages
					* vmballoon_page_size(is_2m_pages)
				< b->target + vmballoon_page_size(true)))
				break;
952

953 954
			list_del(&page->lru);
			b->ops->add_page(b, num_pages++, page);
955

956 957
			if (num_pages == b->batch_max_pages) {
				int error;
D
Dmitry Torokhov 已提交
958

959 960 961 962 963 964
				error = b->ops->unlock(b, num_pages,
						is_2m_pages, &b->target);
				num_pages = 0;
				if (error)
					return;
			}
965

966 967
			cond_resched();
		}
D
Dmitry Torokhov 已提交
968

969 970 971
		if (num_pages > 0)
			b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
	}
972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
}

static const struct vmballoon_ops vmballoon_basic_ops = {
	.add_page = vmballoon_add_page,
	.lock = vmballoon_lock_page,
	.unlock = vmballoon_unlock_page
};

static const struct vmballoon_ops vmballoon_batched_ops = {
	.add_page = vmballoon_add_batched_page,
	.lock = vmballoon_lock_batched_page,
	.unlock = vmballoon_unlock_batched_page
};

static bool vmballoon_init_batching(struct vmballoon *b)
{
988
	struct page *page;
989

990 991
	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
	if (!page)
992 993
		return false;

994
	b->batch_page = page_address(page);
995 996 997
	return true;
}

998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
/*
 * Receive notification and resize balloon
 */
static void vmballoon_doorbell(void *client_data)
{
	struct vmballoon *b = client_data;

	STATS_INC(b->stats.doorbell);

	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
}

/*
 * Clean up vmci doorbell
 */
static void vmballoon_vmci_cleanup(struct vmballoon *b)
{
	int error;

	VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET, VMCI_INVALID_ID,
			VMCI_INVALID_ID, error);
	STATS_INC(b->stats.doorbell_unset);

	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
		vmci_doorbell_destroy(b->vmci_doorbell);
		b->vmci_doorbell = VMCI_INVALID_HANDLE;
	}
}

/*
 * Initialize vmci doorbell, to get notified as soon as balloon changes
 */
static int vmballoon_vmci_init(struct vmballoon *b)
{
	int error = 0;

	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) != 0) {
		error = vmci_doorbell_create(&b->vmci_doorbell,
				VMCI_FLAG_DELAYED_CB,
				VMCI_PRIVILEGE_FLAG_RESTRICTED,
				vmballoon_doorbell, b);

		if (error == VMCI_SUCCESS) {
			VMWARE_BALLOON_CMD(VMCI_DOORBELL_SET,
					b->vmci_doorbell.context,
					b->vmci_doorbell.resource, error);
			STATS_INC(b->stats.doorbell_set);
		}
	}

	if (error != 0) {
		vmballoon_vmci_cleanup(b);

		return -EIO;
	}

	return 0;
}

1057 1058 1059 1060 1061 1062 1063
/*
 * Perform standard reset sequence by popping the balloon (in case it
 * is not  empty) and then restarting protocol. This operation normally
 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
 */
static void vmballoon_reset(struct vmballoon *b)
{
1064 1065 1066 1067
	int error;

	vmballoon_vmci_cleanup(b);

1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092
	/* free all pages, skipping monitor unlock */
	vmballoon_pop(b);

	if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
		return;

	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
		b->ops = &vmballoon_batched_ops;
		b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
		if (!vmballoon_init_batching(b)) {
			/*
			 * We failed to initialize batching, inform the monitor
			 * about it by sending a null capability.
			 *
			 * The guest will retry in one second.
			 */
			vmballoon_send_start(b, 0);
			return;
		}
	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
		b->ops = &vmballoon_basic_ops;
		b->batch_max_pages = 1;
	}

	b->reset_required = false;
1093 1094 1095 1096 1097

	error = vmballoon_vmci_init(b);
	if (error)
		pr_err("failed to initialize vmci doorbell\n");

1098 1099
	if (!vmballoon_send_guest_id(b))
		pr_err("failed to send guest ID to the host\n");
D
Dmitry Torokhov 已提交
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
}

/*
 * Balloon work function: reset protocol, if needed, get the new size and
 * adjust balloon as needed. Repeat in 1 sec.
 */
static void vmballoon_work(struct work_struct *work)
{
	struct delayed_work *dwork = to_delayed_work(work);
	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
	unsigned int target;

	STATS_INC(b->stats.timer);

	if (b->reset_required)
		vmballoon_reset(b);

	if (b->slow_allocation_cycles > 0)
		b->slow_allocation_cycles--;

1120
	if (!b->reset_required && vmballoon_send_get_target(b, &target)) {
D
Dmitry Torokhov 已提交
1121 1122 1123 1124 1125
		/* update target, adjust size */
		b->target = target;

		if (b->size < target)
			vmballoon_inflate(b);
1126 1127
		else if (target == 0 ||
				b->size > target + vmballoon_page_size(true))
D
Dmitry Torokhov 已提交
1128 1129 1130
			vmballoon_deflate(b);
	}

1131 1132 1133 1134 1135 1136
	/*
	 * We are using a freezable workqueue so that balloon operations are
	 * stopped while the system transitions to/from sleep/hibernation.
	 */
	queue_delayed_work(system_freezable_wq,
			   dwork, round_jiffies_relative(HZ));
D
Dmitry Torokhov 已提交
1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
}

/*
 * DEBUGFS Interface
 */
#ifdef CONFIG_DEBUG_FS

static int vmballoon_debug_show(struct seq_file *f, void *offset)
{
	struct vmballoon *b = f->private;
	struct vmballoon_stats *stats = &b->stats;

1149 1150 1151
	/* format capabilities info */
	seq_printf(f,
		   "balloon capabilities:   %#4x\n"
1152 1153 1154 1155
		   "used capabilities:      %#4lx\n"
		   "is resetting:           %c\n",
		   VMW_BALLOON_CAPABILITIES, b->capabilities,
		   b->reset_required ? 'y' : 'n');
1156

D
Dmitry Torokhov 已提交
1157 1158 1159 1160 1161 1162 1163 1164
	/* format size info */
	seq_printf(f,
		   "target:             %8d pages\n"
		   "current:            %8d pages\n",
		   b->target, b->size);

	/* format rate info */
	seq_printf(f,
1165 1166
		   "rateSleepAlloc:     %8d pages/sec\n",
		   b->rate_alloc);
D
Dmitry Torokhov 已提交
1167 1168 1169 1170

	seq_printf(f,
		   "\n"
		   "timer:              %8u\n"
1171
		   "doorbell:           %8u\n"
D
Dmitry Torokhov 已提交
1172 1173
		   "start:              %8u (%4u failed)\n"
		   "guestType:          %8u (%4u failed)\n"
1174
		   "2m-lock:            %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1175
		   "lock:               %8u (%4u failed)\n"
1176
		   "2m-unlock:          %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1177 1178
		   "unlock:             %8u (%4u failed)\n"
		   "target:             %8u (%4u failed)\n"
1179
		   "prim2mAlloc:        %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1180 1181
		   "primNoSleepAlloc:   %8u (%4u failed)\n"
		   "primCanSleepAlloc:  %8u (%4u failed)\n"
1182
		   "prim2mFree:         %8u\n"
D
Dmitry Torokhov 已提交
1183
		   "primFree:           %8u\n"
1184
		   "err2mAlloc:         %8u\n"
D
Dmitry Torokhov 已提交
1185
		   "errAlloc:           %8u\n"
1186
		   "err2mFree:          %8u\n"
1187 1188 1189
		   "errFree:            %8u\n"
		   "doorbellSet:        %8u\n"
		   "doorbellUnset:      %8u\n",
D
Dmitry Torokhov 已提交
1190
		   stats->timer,
1191
		   stats->doorbell,
D
Dmitry Torokhov 已提交
1192 1193
		   stats->start, stats->start_fail,
		   stats->guest_type, stats->guest_type_fail,
1194 1195 1196 1197
		   stats->lock[true],  stats->lock_fail[true],
		   stats->lock[false],  stats->lock_fail[false],
		   stats->unlock[true], stats->unlock_fail[true],
		   stats->unlock[false], stats->unlock_fail[false],
D
Dmitry Torokhov 已提交
1198
		   stats->target, stats->target_fail,
1199 1200
		   stats->alloc[true], stats->alloc_fail[true],
		   stats->alloc[false], stats->alloc_fail[false],
D
Dmitry Torokhov 已提交
1201
		   stats->sleep_alloc, stats->sleep_alloc_fail,
1202 1203 1204
		   stats->free[true],
		   stats->free[false],
		   stats->refused_alloc[true], stats->refused_alloc[false],
1205 1206
		   stats->refused_free[true], stats->refused_free[false],
		   stats->doorbell_set, stats->doorbell_unset);
D
Dmitry Torokhov 已提交
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259

	return 0;
}

static int vmballoon_debug_open(struct inode *inode, struct file *file)
{
	return single_open(file, vmballoon_debug_show, inode->i_private);
}

static const struct file_operations vmballoon_debug_fops = {
	.owner		= THIS_MODULE,
	.open		= vmballoon_debug_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int __init vmballoon_debugfs_init(struct vmballoon *b)
{
	int error;

	b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
					   &vmballoon_debug_fops);
	if (IS_ERR(b->dbg_entry)) {
		error = PTR_ERR(b->dbg_entry);
		pr_err("failed to create debugfs entry, error: %d\n", error);
		return error;
	}

	return 0;
}

static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
{
	debugfs_remove(b->dbg_entry);
}

#else

static inline int vmballoon_debugfs_init(struct vmballoon *b)
{
	return 0;
}

static inline void vmballoon_debugfs_exit(struct vmballoon *b)
{
}

#endif	/* CONFIG_DEBUG_FS */

static int __init vmballoon_init(void)
{
	int error;
1260
	unsigned is_2m_pages;
D
Dmitry Torokhov 已提交
1261 1262 1263 1264
	/*
	 * Check if we are running on VMware's hypervisor and bail out
	 * if we are not.
	 */
1265
	if (x86_hyper_type != X86_HYPER_VMWARE)
D
Dmitry Torokhov 已提交
1266 1267
		return -ENODEV;

1268 1269 1270 1271 1272
	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
			is_2m_pages++) {
		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
	}
D
Dmitry Torokhov 已提交
1273 1274 1275 1276 1277 1278 1279 1280

	/* initialize rates */
	balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;

	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);

	error = vmballoon_debugfs_init(&balloon);
	if (error)
1281
		return error;
D
Dmitry Torokhov 已提交
1282

1283
	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
1284 1285 1286 1287
	balloon.batch_page = NULL;
	balloon.page = NULL;
	balloon.reset_required = true;

1288
	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
D
Dmitry Torokhov 已提交
1289 1290 1291 1292 1293 1294 1295

	return 0;
}
module_init(vmballoon_init);

static void __exit vmballoon_exit(void)
{
1296
	vmballoon_vmci_cleanup(&balloon);
D
Dmitry Torokhov 已提交
1297 1298 1299 1300 1301 1302 1303 1304 1305
	cancel_delayed_work_sync(&balloon.dwork);

	vmballoon_debugfs_exit(&balloon);

	/*
	 * Deallocate all reserved memory, and reset connection with monitor.
	 * Reset connection before deallocating memory to avoid potential for
	 * additional spurious resets from guest touching deallocated pages.
	 */
1306
	vmballoon_send_start(&balloon, 0);
D
Dmitry Torokhov 已提交
1307 1308 1309
	vmballoon_pop(&balloon);
}
module_exit(vmballoon_exit);