vmw_balloon.c 32.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
D
Dmitry Torokhov 已提交
2 3 4
/*
 * VMware Balloon driver.
 *
5
 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
D
Dmitry Torokhov 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * This is VMware physical memory management driver for Linux. The driver
 * acts like a "balloon" that can be inflated to reclaim physical pages by
 * reserving them in the guest and invalidating them in the monitor,
 * freeing up the underlying machine pages so they can be allocated to
 * other guests.  The balloon can also be deflated to allow the guest to
 * use more physical memory. Higher level policies can control the sizes
 * of balloons in VMs in order to manage physical memory resources.
 */

//#define DEBUG
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
22
#include <linux/vmalloc.h>
D
Dmitry Torokhov 已提交
23 24 25 26 27
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
28 29
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
30
#include <asm/hypervisor.h>
D
Dmitry Torokhov 已提交
31 32 33

MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
34
MODULE_VERSION("1.5.0.0-k");
D
Dmitry Torokhov 已提交
35 36 37 38 39 40
MODULE_ALIAS("dmi:*:svnVMware*:*");
MODULE_ALIAS("vmware_vmmemctl");
MODULE_LICENSE("GPL");

/*
 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
41
 * allow wait (__GFP_RECLAIM) for NOSLEEP page allocations. Use
D
Dmitry Torokhov 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54
 * __GFP_NOWARN, to suppress page allocation failure warnings.
 */
#define VMW_PAGE_ALLOC_NOSLEEP		(__GFP_HIGHMEM|__GFP_NOWARN)

/*
 * Use GFP_HIGHUSER when executing in a separate kernel thread
 * context and allocation can sleep.  This is less stressful to
 * the guest memory system, since it allows the thread to block
 * while memory is reclaimed, and won't take pages from emergency
 * low-memory pools.
 */
#define VMW_PAGE_ALLOC_CANSLEEP		(GFP_HIGHUSER)

55 56
/* Maximum number of refused pages we accumulate during inflation cycle */
#define VMW_BALLOON_MAX_REFUSED		16
D
Dmitry Torokhov 已提交
57 58 59 60 61 62 63 64

/*
 * Hypervisor communication port definitions.
 */
#define VMW_BALLOON_HV_PORT		0x5670
#define VMW_BALLOON_HV_MAGIC		0x456c6d6f
#define VMW_BALLOON_GUEST_ID		1	/* Linux */

65 66 67 68
enum vmwballoon_capabilities {
	/*
	 * Bit 0 is reserved and not associated to any capability.
	 */
69 70 71 72
	VMW_BALLOON_BASIC_CMDS			= (1 << 1),
	VMW_BALLOON_BATCHED_CMDS		= (1 << 2),
	VMW_BALLOON_BATCHED_2M_CMDS		= (1 << 3),
	VMW_BALLOON_SIGNALLED_WAKEUP_CMD	= (1 << 4),
73 74
};

75
#define VMW_BALLOON_CAPABILITIES	(VMW_BALLOON_BASIC_CMDS \
76
					| VMW_BALLOON_BATCHED_CMDS \
77 78
					| VMW_BALLOON_BATCHED_2M_CMDS \
					| VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
79 80 81

#define VMW_BALLOON_2M_SHIFT		(9)
#define VMW_BALLOON_NUM_PAGE_SIZES	(2)
82

83 84 85 86 87 88 89 90 91
/*
 * Backdoor commands availability:
 *
 * START, GET_TARGET and GUEST_ID are always available,
 *
 * VMW_BALLOON_BASIC_CMDS:
 *	LOCK and UNLOCK commands,
 * VMW_BALLOON_BATCHED_CMDS:
 *	BATCHED_LOCK and BATCHED_UNLOCK commands.
92
 * VMW BALLOON_BATCHED_2M_CMDS:
93 94 95
 *	BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
 *	VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
96
 */
97 98 99 100 101 102 103 104 105
#define VMW_BALLOON_CMD_START			0
#define VMW_BALLOON_CMD_GET_TARGET		1
#define VMW_BALLOON_CMD_LOCK			2
#define VMW_BALLOON_CMD_UNLOCK			3
#define VMW_BALLOON_CMD_GUEST_ID		4
#define VMW_BALLOON_CMD_BATCHED_LOCK		6
#define VMW_BALLOON_CMD_BATCHED_UNLOCK		7
#define VMW_BALLOON_CMD_BATCHED_2M_LOCK		8
#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK	9
106
#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET	10
107

D
Dmitry Torokhov 已提交
108 109

/* error codes */
110 111 112 113 114 115 116 117 118 119 120 121 122
#define VMW_BALLOON_SUCCESS		        0
#define VMW_BALLOON_FAILURE		        -1
#define VMW_BALLOON_ERROR_CMD_INVALID	        1
#define VMW_BALLOON_ERROR_PPN_INVALID	        2
#define VMW_BALLOON_ERROR_PPN_LOCKED	        3
#define VMW_BALLOON_ERROR_PPN_UNLOCKED	        4
#define VMW_BALLOON_ERROR_PPN_PINNED	        5
#define VMW_BALLOON_ERROR_PPN_NOTNEEDED	        6
#define VMW_BALLOON_ERROR_RESET		        7
#define VMW_BALLOON_ERROR_BUSY		        8

#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES	(0x03000000)

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
/* Batch page description */

/*
 * Layout of a page in the batch page:
 *
 * +-------------+----------+--------+
 * |             |          |        |
 * | Page number | Reserved | Status |
 * |             |          |        |
 * +-------------+----------+--------+
 * 64  PAGE_SHIFT          6         0
 *
 * The reserved field should be set to 0.
 */
#define VMW_BALLOON_BATCH_MAX_PAGES	(PAGE_SIZE / sizeof(u64))
#define VMW_BALLOON_BATCH_STATUS_MASK	((1UL << 5) - 1)
#define VMW_BALLOON_BATCH_PAGE_MASK	(~((1UL << PAGE_SHIFT) - 1))

141 142 143 144 145 146 147 148 149
#define VMW_BALLOON_CMD_WITH_TARGET_MASK			\
	((1UL << VMW_BALLOON_CMD_GET_TARGET)		|	\
	 (1UL << VMW_BALLOON_CMD_LOCK)			|	\
	 (1UL << VMW_BALLOON_CMD_UNLOCK)		|	\
	 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK)		|	\
	 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK)	|	\
	 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK)	|	\
	 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
struct vmballoon_batch_page {
	u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
};

static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
{
	return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
}

static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
				int idx)
{
	return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
}

static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
				u64 pa)
{
	batch->pages[idx] = pa;
}

D
Dmitry Torokhov 已提交
171 172 173
#ifdef CONFIG_DEBUG_FS
struct vmballoon_stats {
	unsigned int timer;
174
	unsigned int doorbell;
D
Dmitry Torokhov 已提交
175

176
	/* allocation statistics */
177 178
	unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
179 180
	unsigned int sleep_alloc;
	unsigned int sleep_alloc_fail;
181 182 183
	unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
184 185

	/* monitor operations */
186 187 188 189
	unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
	unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
D
Dmitry Torokhov 已提交
190 191 192 193 194 195
	unsigned int target;
	unsigned int target_fail;
	unsigned int start;
	unsigned int start_fail;
	unsigned int guest_type;
	unsigned int guest_type_fail;
196 197
	unsigned int doorbell_set;
	unsigned int doorbell_unset;
D
Dmitry Torokhov 已提交
198 199 200 201 202 203 204
};

#define STATS_INC(stat) (stat)++
#else
#define STATS_INC(stat)
#endif

205 206 207 208
struct vmballoon;

struct vmballoon_ops {
	void (*add_page)(struct vmballoon *b, int idx, struct page *p);
209
	int (*lock)(struct vmballoon *b, unsigned int num_pages,
210
		    bool is_2m_pages);
211
	int (*unlock)(struct vmballoon *b, unsigned int num_pages,
212
		      bool is_2m_pages);
213 214
};

215
struct vmballoon_page_size {
D
Dmitry Torokhov 已提交
216 217 218 219 220
	/* list of reserved physical pages */
	struct list_head pages;

	/* transient list of non-balloonable pages */
	struct list_head refused_pages;
221
	unsigned int n_refused_pages;
222 223 224 225 226 227 228
};

struct vmballoon {
	struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];

	/* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
	unsigned supported_page_sizes;
D
Dmitry Torokhov 已提交
229 230 231 232 233 234 235 236

	/* balloon size in pages */
	unsigned int size;
	unsigned int target;

	/* reset flag */
	bool reset_required;

237 238 239 240 241 242 243 244
	unsigned long capabilities;

	struct vmballoon_batch_page *batch_page;
	unsigned int batch_max_pages;
	struct page *page;

	const struct vmballoon_ops *ops;

D
Dmitry Torokhov 已提交
245 246 247 248 249 250 251 252 253 254 255
#ifdef CONFIG_DEBUG_FS
	/* statistics */
	struct vmballoon_stats stats;

	/* debugfs file exporting statistics */
	struct dentry *dbg_entry;
#endif

	struct sysinfo sysinfo;

	struct delayed_work dwork;
256 257

	struct vmci_handle vmci_doorbell;
D
Dmitry Torokhov 已提交
258 259 260 261
};

static struct vmballoon balloon;

262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
static inline unsigned long
__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
		unsigned long arg2, unsigned long *result)
{
	unsigned long status, dummy1, dummy2, dummy3, local_result;

	asm volatile ("inl %%dx" :
		"=a"(status),
		"=c"(dummy1),
		"=d"(dummy2),
		"=b"(local_result),
		"=S"(dummy3) :
		"0"(VMW_BALLOON_HV_MAGIC),
		"1"(cmd),
		"2"(VMW_BALLOON_HV_PORT),
		"3"(arg1),
		"4"(arg2) :
		"memory");

	/* update the result if needed */
	if (result)
		*result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
							   local_result;

	/* update target when applicable */
	if (status == VMW_BALLOON_SUCCESS &&
	    ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
		b->target = local_result;

	/* mark reset required accordingly */
	if (status == VMW_BALLOON_ERROR_RESET)
		b->reset_required = true;

	return status;
}

static __always_inline unsigned long
vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
	      unsigned long arg2)
{
	unsigned long dummy;

	return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
}

D
Dmitry Torokhov 已提交
307 308 309 310
/*
 * Send "start" command to the host, communicating supported version
 * of the protocol.
 */
311
static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
D
Dmitry Torokhov 已提交
312
{
313
	unsigned long status, capabilities;
314
	bool success;
D
Dmitry Torokhov 已提交
315 316 317

	STATS_INC(b->stats.start);

318 319
	status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
				 &capabilities);
320 321 322 323

	switch (status) {
	case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
		b->capabilities = capabilities;
324 325
		success = true;
		break;
326 327
	case VMW_BALLOON_SUCCESS:
		b->capabilities = VMW_BALLOON_BASIC_CMDS;
328 329 330 331
		success = true;
		break;
	default:
		success = false;
332
	}
D
Dmitry Torokhov 已提交
333

334 335 336 337 338 339 340
	/*
	 * 2MB pages are only supported with batching. If batching is for some
	 * reason disabled, do not use 2MB pages, since otherwise the legacy
	 * mechanism is used with 2MB pages, causing a failure.
	 */
	if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
	    (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
341 342 343 344 345 346 347 348 349
		b->supported_page_sizes = 2;
	else
		b->supported_page_sizes = 1;

	if (!success) {
		pr_debug("%s - failed, hv returns %ld\n", __func__, status);
		STATS_INC(b->stats.start_fail);
	}
	return success;
D
Dmitry Torokhov 已提交
350 351 352 353 354 355 356 357 358 359
}

/*
 * Communicate guest type to the host so that it can adjust ballooning
 * algorithm to the one most appropriate for the guest. This command
 * is normally issued after sending "start" command and is part of
 * standard reset sequence.
 */
static bool vmballoon_send_guest_id(struct vmballoon *b)
{
360
	unsigned long status;
D
Dmitry Torokhov 已提交
361

362 363
	status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
			       VMW_BALLOON_GUEST_ID, 0);
D
Dmitry Torokhov 已提交
364 365 366

	STATS_INC(b->stats.guest_type);

367
	if (status == VMW_BALLOON_SUCCESS)
D
Dmitry Torokhov 已提交
368 369 370 371 372 373 374
		return true;

	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
	STATS_INC(b->stats.guest_type_fail);
	return false;
}

375 376 377 378 379 380 381 382
static u16 vmballoon_page_size(bool is_2m_page)
{
	if (is_2m_page)
		return 1 << VMW_BALLOON_2M_SHIFT;

	return 1;
}

D
Dmitry Torokhov 已提交
383 384 385
/*
 * Retrieve desired balloon size from the host.
 */
386
static bool vmballoon_send_get_target(struct vmballoon *b)
D
Dmitry Torokhov 已提交
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
{
	unsigned long status;
	unsigned long limit;
	u32 limit32;

	/*
	 * si_meminfo() is cheap. Moreover, we want to provide dynamic
	 * max balloon size later. So let us call si_meminfo() every
	 * iteration.
	 */
	si_meminfo(&b->sysinfo);
	limit = b->sysinfo.totalram;

	/* Ensure limit fits in 32-bits */
	limit32 = (u32)limit;
	if (limit != limit32)
		return false;

	/* update stats */
	STATS_INC(b->stats.target);

408 409 410
	status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);

	if (status == VMW_BALLOON_SUCCESS)
D
Dmitry Torokhov 已提交
411 412 413 414 415 416 417 418 419 420 421 422
		return true;

	pr_debug("%s - failed, hv returns %ld\n", __func__, status);
	STATS_INC(b->stats.target_fail);
	return false;
}

/*
 * Notify the host about allocated page so that host can use it without
 * fear that guest will need it. Host may reject some pages, we need to
 * check the return value and maybe submit a different page.
 */
423
static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
424
				    unsigned int *hv_status)
D
Dmitry Torokhov 已提交
425
{
426
	unsigned long status;
D
Dmitry Torokhov 已提交
427 428 429 430
	u32 pfn32;

	pfn32 = (u32)pfn;
	if (pfn32 != pfn)
431
		return -EINVAL;
D
Dmitry Torokhov 已提交
432

433
	STATS_INC(b->stats.lock[false]);
D
Dmitry Torokhov 已提交
434

435 436 437
	*hv_status = status = vmballoon_cmd(b, VMW_BALLOON_CMD_LOCK, pfn, 0);

	if (status == VMW_BALLOON_SUCCESS)
438
		return 0;
D
Dmitry Torokhov 已提交
439 440

	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
441
	STATS_INC(b->stats.lock_fail[false]);
442
	return -EIO;
D
Dmitry Torokhov 已提交
443 444
}

445
static int vmballoon_send_batched_lock(struct vmballoon *b,
446
				       unsigned int num_pages, bool is_2m_pages)
447
{
448
	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
449
	unsigned long status, cmd;
450

451 452
	STATS_INC(b->stats.lock[is_2m_pages]);

453 454
	cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK :
			    VMW_BALLOON_CMD_BATCHED_LOCK;
455

456 457 458
	status = vmballoon_cmd(b, cmd, pfn, num_pages);

	if (status == VMW_BALLOON_SUCCESS)
459 460 461
		return 0;

	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
462
	STATS_INC(b->stats.lock_fail[is_2m_pages]);
463 464 465
	return 1;
}

D
Dmitry Torokhov 已提交
466 467 468 469
/*
 * Notify the host that guest intends to release given page back into
 * the pool of available (to the guest) pages.
 */
470
static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn)
D
Dmitry Torokhov 已提交
471
{
472
	unsigned long status;
D
Dmitry Torokhov 已提交
473 474 475 476 477 478
	u32 pfn32;

	pfn32 = (u32)pfn;
	if (pfn32 != pfn)
		return false;

479
	STATS_INC(b->stats.unlock[false]);
D
Dmitry Torokhov 已提交
480

481 482
	status = vmballoon_cmd(b, VMW_BALLOON_CMD_UNLOCK, pfn, 0);
	if (status == VMW_BALLOON_SUCCESS)
D
Dmitry Torokhov 已提交
483 484 485
		return true;

	pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
486
	STATS_INC(b->stats.unlock_fail[false]);
D
Dmitry Torokhov 已提交
487 488 489
	return false;
}

490
static bool vmballoon_send_batched_unlock(struct vmballoon *b,
491
		unsigned int num_pages, bool is_2m_pages)
492
{
493
	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
494
	unsigned long status, cmd;
495

496 497
	STATS_INC(b->stats.unlock[is_2m_pages]);

498 499 500 501
	cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
			    VMW_BALLOON_CMD_BATCHED_UNLOCK;

	status = vmballoon_cmd(b, cmd, pfn, num_pages);
502

503
	if (status == VMW_BALLOON_SUCCESS)
504 505 506
		return true;

	pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
507
	STATS_INC(b->stats.unlock_fail[is_2m_pages]);
508 509 510
	return false;
}

511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
{
	if (is_2m_page)
		return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);

	return alloc_page(flags);
}

static void vmballoon_free_page(struct page *page, bool is_2m_page)
{
	if (is_2m_page)
		__free_pages(page, VMW_BALLOON_2M_SHIFT);
	else
		__free_page(page);
}

D
Dmitry Torokhov 已提交
527 528 529 530 531 532 533 534 535
/*
 * Quickly release all pages allocated for the balloon. This function is
 * called when host decides to "reset" balloon for one reason or another.
 * Unlike normal "deflate" we do not (shall not) notify host of the pages
 * being released.
 */
static void vmballoon_pop(struct vmballoon *b)
{
	struct page *page, *next;
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
	unsigned is_2m_pages;

	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
			is_2m_pages++) {
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
		u16 size_per_page = vmballoon_page_size(is_2m_pages);

		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
			list_del(&page->lru);
			vmballoon_free_page(page, is_2m_pages);
			STATS_INC(b->stats.free[is_2m_pages]);
			b->size -= size_per_page;
			cond_resched();
		}
D
Dmitry Torokhov 已提交
551 552
	}

553 554 555
	/* Clearing the batch_page unconditionally has no adverse effect */
	free_page((unsigned long)b->batch_page);
	b->batch_page = NULL;
D
Dmitry Torokhov 已提交
556 557 558
}

/*
559 560 561
 * Notify the host of a ballooned page. If host rejects the page put it on the
 * refuse list, those refused page are then released at the end of the
 * inflation cycle.
D
Dmitry Torokhov 已提交
562
 */
563
static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
564
				bool is_2m_pages)
D
Dmitry Torokhov 已提交
565
{
566
	int locked, hv_status;
567
	struct page *page = b->page;
568 569 570
	struct vmballoon_page_size *page_size = &b->page_sizes[false];

	/* is_2m_pages can never happen as 2m pages support implies batching */
D
Dmitry Torokhov 已提交
571

572 573
	locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status);

574
	if (locked) {
575
		STATS_INC(b->stats.refused_alloc[false]);
D
Dmitry Torokhov 已提交
576

577 578 579
		if (locked == -EIO &&
		    (hv_status == VMW_BALLOON_ERROR_RESET ||
		     hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) {
580
			vmballoon_free_page(page, false);
581 582
			return -EIO;
		}
D
Dmitry Torokhov 已提交
583

584 585 586 587 588
		/*
		 * Place page on the list of non-balloonable pages
		 * and retry allocation, unless we already accumulated
		 * too many of them, in which case take a breather.
		 */
589 590 591
		if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
			page_size->n_refused_pages++;
			list_add(&page->lru, &page_size->refused_pages);
592
		} else {
593
			vmballoon_free_page(page, false);
D
Dmitry Torokhov 已提交
594
		}
595
		return locked;
596
	}
D
Dmitry Torokhov 已提交
597 598

	/* track allocated page */
599
	list_add(&page->lru, &page_size->pages);
D
Dmitry Torokhov 已提交
600 601 602 603 604 605 606

	/* update balloon size */
	b->size++;

	return 0;
}

607
static int vmballoon_lock_batched_page(struct vmballoon *b,
608
				       unsigned int num_pages, bool is_2m_pages)
609 610
{
	int locked, i;
611
	u16 size_per_page = vmballoon_page_size(is_2m_pages);
612

613 614
	locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages);

615 616 617 618 619
	if (locked > 0) {
		for (i = 0; i < num_pages; i++) {
			u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
			struct page *p = pfn_to_page(pa >> PAGE_SHIFT);

620
			vmballoon_free_page(p, is_2m_pages);
621 622 623 624 625 626 627 628
		}

		return -EIO;
	}

	for (i = 0; i < num_pages; i++) {
		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
629 630
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
631 632 633 634 635

		locked = vmballoon_batch_get_status(b->batch_page, i);

		switch (locked) {
		case VMW_BALLOON_SUCCESS:
636 637
			list_add(&p->lru, &page_size->pages);
			b->size += size_per_page;
638 639 640
			break;
		case VMW_BALLOON_ERROR_PPN_PINNED:
		case VMW_BALLOON_ERROR_PPN_INVALID:
641 642 643 644
			if (page_size->n_refused_pages
					< VMW_BALLOON_MAX_REFUSED) {
				list_add(&p->lru, &page_size->refused_pages);
				page_size->n_refused_pages++;
645 646 647 648 649
				break;
			}
			/* Fallthrough */
		case VMW_BALLOON_ERROR_RESET:
		case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
650
			vmballoon_free_page(p, is_2m_pages);
651 652 653 654 655 656 657 658 659 660
			break;
		default:
			/* This should never happen */
			WARN_ON_ONCE(true);
		}
	}

	return 0;
}

D
Dmitry Torokhov 已提交
661 662 663 664 665
/*
 * Release the page allocated for the balloon. Note that we first notify
 * the host so it can make sure the page will be available for the guest
 * to use, if needed.
 */
666
static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
667
				 bool is_2m_pages)
D
Dmitry Torokhov 已提交
668
{
669
	struct page *page = b->page;
670 671 672
	struct vmballoon_page_size *page_size = &b->page_sizes[false];

	/* is_2m_pages can never happen as 2m pages support implies batching */
D
Dmitry Torokhov 已提交
673

674
	if (!vmballoon_send_unlock_page(b, page_to_pfn(page))) {
675
		list_add(&page->lru, &page_size->pages);
676 677
		return -EIO;
	}
D
Dmitry Torokhov 已提交
678 679

	/* deallocate page */
680 681
	vmballoon_free_page(page, false);
	STATS_INC(b->stats.free[false]);
D
Dmitry Torokhov 已提交
682 683 684 685 686 687 688

	/* update balloon size */
	b->size--;

	return 0;
}

689
static int vmballoon_unlock_batched_page(struct vmballoon *b,
690
				unsigned int num_pages, bool is_2m_pages)
691 692 693
{
	int locked, i, ret = 0;
	bool hv_success;
694
	u16 size_per_page = vmballoon_page_size(is_2m_pages);
695

696 697
	hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages);

698 699 700 701 702 703
	if (!hv_success)
		ret = -EIO;

	for (i = 0; i < num_pages; i++) {
		u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
		struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
704 705
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];
706 707 708 709 710 711 712 713

		locked = vmballoon_batch_get_status(b->batch_page, i);
		if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
			/*
			 * That page wasn't successfully unlocked by the
			 * hypervisor, re-add it to the list of pages owned by
			 * the balloon driver.
			 */
714
			list_add(&p->lru, &page_size->pages);
715 716
		} else {
			/* deallocate page */
717 718
			vmballoon_free_page(p, is_2m_pages);
			STATS_INC(b->stats.free[is_2m_pages]);
719 720

			/* update balloon size */
721
			b->size -= size_per_page;
722 723 724 725 726 727
		}
	}

	return ret;
}

D
Dmitry Torokhov 已提交
728 729 730 731
/*
 * Release pages that were allocated while attempting to inflate the
 * balloon but were refused by the host for one reason or another.
 */
732 733
static void vmballoon_release_refused_pages(struct vmballoon *b,
		bool is_2m_pages)
D
Dmitry Torokhov 已提交
734 735
{
	struct page *page, *next;
736 737
	struct vmballoon_page_size *page_size =
			&b->page_sizes[is_2m_pages];
D
Dmitry Torokhov 已提交
738

739
	list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
D
Dmitry Torokhov 已提交
740
		list_del(&page->lru);
741 742
		vmballoon_free_page(page, is_2m_pages);
		STATS_INC(b->stats.refused_free[is_2m_pages]);
D
Dmitry Torokhov 已提交
743
	}
744

745
	page_size->n_refused_pages = 0;
D
Dmitry Torokhov 已提交
746 747
}

748 749 750 751 752 753 754 755 756 757 758 759
static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
{
	b->page = p;
}

static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
				struct page *p)
{
	vmballoon_batch_set_pa(b->batch_page, idx,
			(u64)page_to_pfn(p) << PAGE_SHIFT);
}

D
Dmitry Torokhov 已提交
760 761 762 763 764 765 766
/*
 * Inflate the balloon towards its target size. Note that we try to limit
 * the rate of allocation to make sure we are not choking the rest of the
 * system.
 */
static void vmballoon_inflate(struct vmballoon *b)
{
767
	unsigned int num_pages = 0;
D
Dmitry Torokhov 已提交
768
	int error = 0;
769
	gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
770
	bool is_2m_pages;
D
Dmitry Torokhov 已提交
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792

	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);

	/*
	 * First try NOSLEEP page allocations to inflate balloon.
	 *
	 * If we do not throttle nosleep allocations, we can drain all
	 * free pages in the guest quickly (if the balloon target is high).
	 * As a side-effect, draining free pages helps to inform (force)
	 * the guest to start swapping if balloon target is not met yet,
	 * which is a desired behavior. However, balloon driver can consume
	 * all available CPU cycles if too many pages are allocated in a
	 * second. Therefore, we throttle nosleep allocations even when
	 * the guest is not under memory pressure. OTOH, if we have already
	 * predicted that the guest is under memory pressure, then we
	 * slowdown page allocations considerably.
	 */

	/*
	 * Start with no sleep allocation rate which may be higher
	 * than sleeping allocation rate.
	 */
793
	is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
D
Dmitry Torokhov 已提交
794

795
	pr_debug("%s - goal: %d",  __func__, b->target - b->size);
D
Dmitry Torokhov 已提交
796

797
	while (!b->reset_required &&
798 799
		b->size + num_pages * vmballoon_page_size(is_2m_pages)
		< b->target) {
800
		struct page *page;
D
Dmitry Torokhov 已提交
801

802
		if (flags == VMW_PAGE_ALLOC_NOSLEEP)
803
			STATS_INC(b->stats.alloc[is_2m_pages]);
804 805
		else
			STATS_INC(b->stats.sleep_alloc);
D
Dmitry Torokhov 已提交
806

807
		page = vmballoon_alloc_page(flags, is_2m_pages);
808
		if (!page) {
809 810 811
			STATS_INC(b->stats.alloc_fail[is_2m_pages]);

			if (is_2m_pages) {
812
				b->ops->lock(b, num_pages, true);
813 814 815 816 817 818 819 820 821 822 823 824

				/*
				 * ignore errors from locking as we now switch
				 * to 4k pages and we might get different
				 * errors.
				 */

				num_pages = 0;
				is_2m_pages = false;
				continue;
			}

825
			if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
D
Dmitry Torokhov 已提交
826 827
				/*
				 * CANSLEEP page allocation failed, so guest
828 829 830
				 * is under severe memory pressure. We just log
				 * the event, but do not stop the inflation
				 * due to its negative impact on performance.
D
Dmitry Torokhov 已提交
831
				 */
832
				STATS_INC(b->stats.sleep_alloc_fail);
D
Dmitry Torokhov 已提交
833 834 835 836 837
				break;
			}

			/*
			 * NOSLEEP page allocation failed, so the guest is
838 839 840 841 842 843
			 * under memory pressure. Slowing down page alloctions
			 * seems to be reasonable, but doing so might actually
			 * cause the hypervisor to throttle us down, resulting
			 * in degraded performance. We will count on the
			 * scheduler and standard memory management mechanisms
			 * for now.
D
Dmitry Torokhov 已提交
844
			 */
845 846
			flags = VMW_PAGE_ALLOC_CANSLEEP;
			continue;
D
Dmitry Torokhov 已提交
847 848
		}

849 850
		b->ops->add_page(b, num_pages++, page);
		if (num_pages == b->batch_max_pages) {
851 852
			error = b->ops->lock(b, num_pages, is_2m_pages);

853 854 855 856
			num_pages = 0;
			if (error)
				break;
		}
857

858
		cond_resched();
D
Dmitry Torokhov 已提交
859 860
	}

861
	if (num_pages > 0)
862
		b->ops->lock(b, num_pages, is_2m_pages);
863

864 865
	vmballoon_release_refused_pages(b, true);
	vmballoon_release_refused_pages(b, false);
D
Dmitry Torokhov 已提交
866 867 868 869 870 871 872
}

/*
 * Decrease the size of the balloon allowing guest to use more memory.
 */
static void vmballoon_deflate(struct vmballoon *b)
{
873
	unsigned is_2m_pages;
D
Dmitry Torokhov 已提交
874

875
	pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
D
Dmitry Torokhov 已提交
876 877

	/* free pages to reach target */
878 879 880 881 882 883 884 885 886 887 888 889 890 891
	for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
			is_2m_pages++) {
		struct page *page, *next;
		unsigned int num_pages = 0;
		struct vmballoon_page_size *page_size =
				&b->page_sizes[is_2m_pages];

		list_for_each_entry_safe(page, next, &page_size->pages, lru) {
			if (b->reset_required ||
				(b->target > 0 &&
					b->size - num_pages
					* vmballoon_page_size(is_2m_pages)
				< b->target + vmballoon_page_size(true)))
				break;
892

893 894
			list_del(&page->lru);
			b->ops->add_page(b, num_pages++, page);
895

896 897
			if (num_pages == b->batch_max_pages) {
				int error;
D
Dmitry Torokhov 已提交
898

899
				error = b->ops->unlock(b, num_pages,
900
						       is_2m_pages);
901 902 903 904
				num_pages = 0;
				if (error)
					return;
			}
905

906 907
			cond_resched();
		}
D
Dmitry Torokhov 已提交
908

909
		if (num_pages > 0)
910
			b->ops->unlock(b, num_pages, is_2m_pages);
911
	}
912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
}

static const struct vmballoon_ops vmballoon_basic_ops = {
	.add_page = vmballoon_add_page,
	.lock = vmballoon_lock_page,
	.unlock = vmballoon_unlock_page
};

static const struct vmballoon_ops vmballoon_batched_ops = {
	.add_page = vmballoon_add_batched_page,
	.lock = vmballoon_lock_batched_page,
	.unlock = vmballoon_unlock_batched_page
};

static bool vmballoon_init_batching(struct vmballoon *b)
{
928
	struct page *page;
929

930 931
	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
	if (!page)
932 933
		return false;

934
	b->batch_page = page_address(page);
935 936 937
	return true;
}

938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954
/*
 * Receive notification and resize balloon
 */
static void vmballoon_doorbell(void *client_data)
{
	struct vmballoon *b = client_data;

	STATS_INC(b->stats.doorbell);

	mod_delayed_work(system_freezable_wq, &b->dwork, 0);
}

/*
 * Clean up vmci doorbell
 */
static void vmballoon_vmci_cleanup(struct vmballoon *b)
{
955 956
	vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
		      VMCI_INVALID_ID, VMCI_INVALID_ID);
957 958 959 960 961 962 963 964 965 966 967 968 969 970

	STATS_INC(b->stats.doorbell_unset);

	if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
		vmci_doorbell_destroy(b->vmci_doorbell);
		b->vmci_doorbell = VMCI_INVALID_HANDLE;
	}
}

/*
 * Initialize vmci doorbell, to get notified as soon as balloon changes
 */
static int vmballoon_vmci_init(struct vmballoon *b)
{
971
	unsigned long error;
972

973 974
	if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
		return 0;
975

976 977 978
	error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
				     VMCI_PRIVILEGE_FLAG_RESTRICTED,
				     vmballoon_doorbell, b);
979

980 981 982
	if (error != VMCI_SUCCESS)
		goto fail;

983 984 985
	error =	__vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
				b->vmci_doorbell.context,
				b->vmci_doorbell.resource, NULL);
986 987 988 989 990

	STATS_INC(b->stats.doorbell_set);

	if (error != VMW_BALLOON_SUCCESS)
		goto fail;
991 992

	return 0;
993 994 995
fail:
	vmballoon_vmci_cleanup(b);
	return -EIO;
996 997
}

998 999 1000 1001 1002 1003 1004
/*
 * Perform standard reset sequence by popping the balloon (in case it
 * is not  empty) and then restarting protocol. This operation normally
 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
 */
static void vmballoon_reset(struct vmballoon *b)
{
1005 1006 1007 1008
	int error;

	vmballoon_vmci_cleanup(b);

1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
	/* free all pages, skipping monitor unlock */
	vmballoon_pop(b);

	if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
		return;

	if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
		b->ops = &vmballoon_batched_ops;
		b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
		if (!vmballoon_init_batching(b)) {
			/*
			 * We failed to initialize batching, inform the monitor
			 * about it by sending a null capability.
			 *
			 * The guest will retry in one second.
			 */
			vmballoon_send_start(b, 0);
			return;
		}
	} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
		b->ops = &vmballoon_basic_ops;
		b->batch_max_pages = 1;
	}

	b->reset_required = false;
1034 1035 1036 1037 1038

	error = vmballoon_vmci_init(b);
	if (error)
		pr_err("failed to initialize vmci doorbell\n");

1039 1040
	if (!vmballoon_send_guest_id(b))
		pr_err("failed to send guest ID to the host\n");
D
Dmitry Torokhov 已提交
1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
}

/*
 * Balloon work function: reset protocol, if needed, get the new size and
 * adjust balloon as needed. Repeat in 1 sec.
 */
static void vmballoon_work(struct work_struct *work)
{
	struct delayed_work *dwork = to_delayed_work(work);
	struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);

	STATS_INC(b->stats.timer);

	if (b->reset_required)
		vmballoon_reset(b);

1057 1058
	if (!b->reset_required && vmballoon_send_get_target(b)) {
		unsigned long target = b->target;
D
Dmitry Torokhov 已提交
1059

1060
		/* update target, adjust size */
D
Dmitry Torokhov 已提交
1061 1062
		if (b->size < target)
			vmballoon_inflate(b);
1063 1064
		else if (target == 0 ||
				b->size > target + vmballoon_page_size(true))
D
Dmitry Torokhov 已提交
1065 1066 1067
			vmballoon_deflate(b);
	}

1068 1069 1070 1071 1072 1073
	/*
	 * We are using a freezable workqueue so that balloon operations are
	 * stopped while the system transitions to/from sleep/hibernation.
	 */
	queue_delayed_work(system_freezable_wq,
			   dwork, round_jiffies_relative(HZ));
D
Dmitry Torokhov 已提交
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
}

/*
 * DEBUGFS Interface
 */
#ifdef CONFIG_DEBUG_FS

static int vmballoon_debug_show(struct seq_file *f, void *offset)
{
	struct vmballoon *b = f->private;
	struct vmballoon_stats *stats = &b->stats;

1086 1087 1088
	/* format capabilities info */
	seq_printf(f,
		   "balloon capabilities:   %#4x\n"
1089 1090 1091 1092
		   "used capabilities:      %#4lx\n"
		   "is resetting:           %c\n",
		   VMW_BALLOON_CAPABILITIES, b->capabilities,
		   b->reset_required ? 'y' : 'n');
1093

D
Dmitry Torokhov 已提交
1094 1095 1096 1097 1098 1099 1100 1101 1102
	/* format size info */
	seq_printf(f,
		   "target:             %8d pages\n"
		   "current:            %8d pages\n",
		   b->target, b->size);

	seq_printf(f,
		   "\n"
		   "timer:              %8u\n"
1103
		   "doorbell:           %8u\n"
D
Dmitry Torokhov 已提交
1104 1105
		   "start:              %8u (%4u failed)\n"
		   "guestType:          %8u (%4u failed)\n"
1106
		   "2m-lock:            %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1107
		   "lock:               %8u (%4u failed)\n"
1108
		   "2m-unlock:          %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1109 1110
		   "unlock:             %8u (%4u failed)\n"
		   "target:             %8u (%4u failed)\n"
1111
		   "prim2mAlloc:        %8u (%4u failed)\n"
D
Dmitry Torokhov 已提交
1112 1113
		   "primNoSleepAlloc:   %8u (%4u failed)\n"
		   "primCanSleepAlloc:  %8u (%4u failed)\n"
1114
		   "prim2mFree:         %8u\n"
D
Dmitry Torokhov 已提交
1115
		   "primFree:           %8u\n"
1116
		   "err2mAlloc:         %8u\n"
D
Dmitry Torokhov 已提交
1117
		   "errAlloc:           %8u\n"
1118
		   "err2mFree:          %8u\n"
1119 1120 1121
		   "errFree:            %8u\n"
		   "doorbellSet:        %8u\n"
		   "doorbellUnset:      %8u\n",
D
Dmitry Torokhov 已提交
1122
		   stats->timer,
1123
		   stats->doorbell,
D
Dmitry Torokhov 已提交
1124 1125
		   stats->start, stats->start_fail,
		   stats->guest_type, stats->guest_type_fail,
1126 1127 1128 1129
		   stats->lock[true],  stats->lock_fail[true],
		   stats->lock[false],  stats->lock_fail[false],
		   stats->unlock[true], stats->unlock_fail[true],
		   stats->unlock[false], stats->unlock_fail[false],
D
Dmitry Torokhov 已提交
1130
		   stats->target, stats->target_fail,
1131 1132
		   stats->alloc[true], stats->alloc_fail[true],
		   stats->alloc[false], stats->alloc_fail[false],
D
Dmitry Torokhov 已提交
1133
		   stats->sleep_alloc, stats->sleep_alloc_fail,
1134 1135 1136
		   stats->free[true],
		   stats->free[false],
		   stats->refused_alloc[true], stats->refused_alloc[false],
1137 1138
		   stats->refused_free[true], stats->refused_free[false],
		   stats->doorbell_set, stats->doorbell_unset);
D
Dmitry Torokhov 已提交
1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191

	return 0;
}

static int vmballoon_debug_open(struct inode *inode, struct file *file)
{
	return single_open(file, vmballoon_debug_show, inode->i_private);
}

static const struct file_operations vmballoon_debug_fops = {
	.owner		= THIS_MODULE,
	.open		= vmballoon_debug_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int __init vmballoon_debugfs_init(struct vmballoon *b)
{
	int error;

	b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
					   &vmballoon_debug_fops);
	if (IS_ERR(b->dbg_entry)) {
		error = PTR_ERR(b->dbg_entry);
		pr_err("failed to create debugfs entry, error: %d\n", error);
		return error;
	}

	return 0;
}

static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
{
	debugfs_remove(b->dbg_entry);
}

#else

static inline int vmballoon_debugfs_init(struct vmballoon *b)
{
	return 0;
}

static inline void vmballoon_debugfs_exit(struct vmballoon *b)
{
}

#endif	/* CONFIG_DEBUG_FS */

static int __init vmballoon_init(void)
{
	int error;
1192
	unsigned is_2m_pages;
D
Dmitry Torokhov 已提交
1193 1194 1195 1196
	/*
	 * Check if we are running on VMware's hypervisor and bail out
	 * if we are not.
	 */
1197
	if (x86_hyper_type != X86_HYPER_VMWARE)
D
Dmitry Torokhov 已提交
1198 1199
		return -ENODEV;

1200 1201 1202 1203 1204
	for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
			is_2m_pages++) {
		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
		INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
	}
D
Dmitry Torokhov 已提交
1205 1206 1207 1208 1209

	INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);

	error = vmballoon_debugfs_init(&balloon);
	if (error)
1210
		return error;
D
Dmitry Torokhov 已提交
1211

1212
	balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
1213 1214 1215 1216
	balloon.batch_page = NULL;
	balloon.page = NULL;
	balloon.reset_required = true;

1217
	queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
D
Dmitry Torokhov 已提交
1218 1219 1220

	return 0;
}
1221 1222 1223 1224 1225 1226 1227 1228

/*
 * Using late_initcall() instead of module_init() allows the balloon to use the
 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
 * VMCI is probed only after the balloon is initialized. If the balloon is used
 * as a module, late_initcall() is equivalent to module_init().
 */
late_initcall(vmballoon_init);
D
Dmitry Torokhov 已提交
1229 1230 1231

static void __exit vmballoon_exit(void)
{
1232
	vmballoon_vmci_cleanup(&balloon);
D
Dmitry Torokhov 已提交
1233 1234 1235 1236 1237 1238 1239 1240 1241
	cancel_delayed_work_sync(&balloon.dwork);

	vmballoon_debugfs_exit(&balloon);

	/*
	 * Deallocate all reserved memory, and reset connection with monitor.
	 * Reset connection before deallocating memory to avoid potential for
	 * additional spurious resets from guest touching deallocated pages.
	 */
1242
	vmballoon_send_start(&balloon, 0);
D
Dmitry Torokhov 已提交
1243 1244 1245
	vmballoon_pop(&balloon);
}
module_exit(vmballoon_exit);