paging_tmpl.h 31.6 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0-only */
A
Avi Kivity 已提交
2 3 4 5 6 7 8 9 10
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
N
Nicolas Kaiser 已提交
11
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
A
Avi Kivity 已提交
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
	#define FNAME(name) paging##64_##name
27
	#define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
28 29
	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
30
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
31
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
32 33
	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
34
	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
35
	#ifdef CONFIG_X86_64
36
	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
37
	#define CMPXCHG "cmpxchgq"
38 39 40
	#else
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
41 42 43 44 45
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
46 47
	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
48
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
49
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
50
	#define PT_MAX_FULL_LEVELS 2
51 52
	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
53
	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
54
	#define CMPXCHG "cmpxchgl"
55 56 57 58
#elif PTTYPE == PTTYPE_EPT
	#define pt_element_t u64
	#define guest_walker guest_walkerEPT
	#define FNAME(name) ept_##name
59
	#define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
60 61 62 63
	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
64 65 66
	#define PT_GUEST_DIRTY_SHIFT 9
	#define PT_GUEST_ACCESSED_SHIFT 8
	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
67 68 69
	#ifdef CONFIG_X86_64
	#define CMPXCHG "cmpxchgq"
	#endif
70
	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
A
Avi Kivity 已提交
71 72 73 74
#else
	#error Invalid PTTYPE value
#endif

75 76 77
#define PT_GUEST_DIRTY_MASK    (1 << PT_GUEST_DIRTY_SHIFT)
#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)

78
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
79
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PG_LEVEL_4K)
80

A
Avi Kivity 已提交
81 82 83 84 85 86
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
87
	unsigned max_level;
88
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
89
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
90
	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
91
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
92
	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
93
	bool pte_writable[PT_MAX_FULL_LEVELS];
94 95
	unsigned int pt_access[PT_MAX_FULL_LEVELS];
	unsigned int pte_access;
96
	gfn_t gfn;
97
	struct x86_exception fault;
A
Avi Kivity 已提交
98 99
};

100
static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
101
{
102
	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
103 104
}

105 106
static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
					     unsigned gpte)
107 108 109
{
	unsigned mask;

110
	/* dirty bit is not supported, so no need to track it */
111
	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
112 113
		return;

114 115 116 117
	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);

	mask = (unsigned)~ACC_WRITE_MASK;
	/* Allow write access to dirty gptes */
118 119
	mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) &
		PT_WRITABLE_MASK;
120 121 122 123 124
	*access &= mask;
}

static inline int FNAME(is_present_gpte)(unsigned long pte)
{
125
#if PTTYPE != PTTYPE_EPT
B
Bandan Das 已提交
126
	return pte & PT_PRESENT_MASK;
127 128 129
#else
	return pte & 7;
#endif
130 131
}

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte)
{
#if PTTYPE != PTTYPE_EPT
	return false;
#else
	return __is_bad_mt_xwr(rsvd_check, gpte);
#endif
}

static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
{
	return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) ||
	       FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte);
}

147
static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
148 149
			       pt_element_t __user *ptep_user, unsigned index,
			       pt_element_t orig_pte, pt_element_t new_pte)
150
{
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
	signed char r;

	if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
		return -EFAULT;

#ifdef CMPXCHG
	asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
		     "setnz %b[r]\n"
		     "2:"
		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r])
		     : [ptr] "+m" (*ptep_user),
		       [old] "+a" (orig_pte),
		       [r] "=q" (r)
		     : [new] "r" (new_pte)
		     : "memory");
#else
	asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
		     "setnz %b[r]\n"
		     "2:"
		     _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r])
		     : [ptr] "+m" (*ptep_user),
		       [old] "+A" (orig_pte),
		       [r] "=q" (r)
		     : [new_lo] "b" ((u32)new_pte),
		       [new_hi] "c" ((u32)(new_pte >> 32))
		     : "memory");
#endif

	user_access_end();
	return r;
181 182
}

183 184 185 186 187 188 189
static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
				  struct kvm_mmu_page *sp, u64 *spte,
				  u64 gpte)
{
	if (!FNAME(is_present_gpte)(gpte))
		goto no_present;

190
	/* if accessed bit is not supported prefetch non accessed gpte */
191 192
	if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) &&
	    !(gpte & PT_GUEST_ACCESSED_MASK))
193 194
		goto no_present;

195
	if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PG_LEVEL_4K))
196 197
		goto no_present;

198 199 200 201 202 203 204
	return false;

no_present:
	drop_spte(vcpu->kvm, spte);
	return true;
}

205 206 207 208 209 210
/*
 * For PTTYPE_EPT, a page table can be executable but not readable
 * on supported processors. Therefore, set_spte does not automatically
 * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
 * to signify readability since it isn't used in the EPT case
 */
211
static inline unsigned FNAME(gpte_access)(u64 gpte)
212 213
{
	unsigned access;
214 215 216
#if PTTYPE == PTTYPE_EPT
	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
217
		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
218
#else
219 220 221 222 223
	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
	access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
	/* Combine NX with P (which is set here) to get ACC_EXEC_MASK.  */
	access ^= (gpte >> PT64_NX_SHIFT);
224
#endif
225 226 227 228

	return access;
}

229 230 231
static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
					     struct kvm_mmu *mmu,
					     struct guest_walker *walker,
232
					     gpa_t addr, int write_fault)
233 234 235 236 237 238 239
{
	unsigned level, index;
	pt_element_t pte, orig_pte;
	pt_element_t __user *ptep_user;
	gfn_t table_gfn;
	int ret;

240
	/* dirty/accessed bits are not supported, so no need to update them */
241
	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
242 243
		return 0;

244 245 246 247 248
	for (level = walker->max_level; level >= walker->level; --level) {
		pte = orig_pte = walker->ptes[level - 1];
		table_gfn = walker->table_gfn[level - 1];
		ptep_user = walker->ptep_user[level - 1];
		index = offset_in_page(ptep_user) / sizeof(pt_element_t);
249
		if (!(pte & PT_GUEST_ACCESSED_MASK)) {
250
			trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
251
			pte |= PT_GUEST_ACCESSED_MASK;
252
		}
253
		if (level == walker->level && write_fault &&
254
				!(pte & PT_GUEST_DIRTY_MASK)) {
255
			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
256
#if PTTYPE == PTTYPE_EPT
257
			if (kvm_x86_ops.nested_ops->write_log_dirty(vcpu, addr))
258 259
				return -EINVAL;
#endif
260
			pte |= PT_GUEST_DIRTY_MASK;
261 262 263 264
		}
		if (pte == orig_pte)
			continue;

265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
		/*
		 * If the slot is read-only, simply do not process the accessed
		 * and dirty bits.  This is the correct thing to do if the slot
		 * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
		 * are only supported if the accessed and dirty bits are already
		 * set in the ROM (so that MMIO writes are never needed).
		 *
		 * Note that NPT does not allow this at all and faults, since
		 * it always wants nested page table entries for the guest
		 * page tables to be writable.  And EPT works but will simply
		 * overwrite the read-only memory to set the accessed and dirty
		 * bits.
		 */
		if (unlikely(!walker->pte_writable[level - 1]))
			continue;

281 282 283 284
		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
		if (ret)
			return ret;

285
		kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
286
		walker->ptes[level - 1] = pte;
287 288 289 290
	}
	return 0;
}

291 292 293 294 295 296 297 298 299 300 301
static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
{
	unsigned pkeys = 0;
#if PTTYPE == 64
	pte_t pte = {.pte = gpte};

	pkeys = pte_flags_pkey(pte_flags(pte));
#endif
	return pkeys;
}

302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu,
				       unsigned int level, unsigned int gpte)
{
	/*
	 * For EPT and PAE paging (both variants), bit 7 is either reserved at
	 * all level or indicates a huge page (ignoring CR3/EPTP).  In either
	 * case, bit 7 being set terminates the walk.
	 */
#if PTTYPE == 32
	/*
	 * 32-bit paging requires special handling because bit 7 is ignored if
	 * CR4.PSE=0, not reserved.  Clear bit 7 in the gpte if the level is
	 * greater than the last level for which bit 7 is the PAGE_SIZE bit.
	 *
	 * The RHS has bit 7 set iff level < (2 + PSE).  If it is clear, bit 7
	 * is not reserved and does not indicate a large page at this level,
	 * so clear PT_PAGE_SIZE_MASK in gpte if that is the case.
	 */
	gpte &= level - (PT32_ROOT_LEVEL + mmu->mmu_role.ext.cr4_pse);
#endif
	/*
	 * PG_LEVEL_4K always terminates.  The RHS has bit 7 set
	 * iff level <= PG_LEVEL_4K, which for our purpose means
	 * level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then.
	 */
	gpte |= level - PG_LEVEL_4K - 1;

	return gpte & PT_PAGE_SIZE_MASK;
}
331
/*
332
 * Fetch a guest pte for a guest virtual address, or for an L2's GPA.
333
 */
334 335
static int FNAME(walk_addr_generic)(struct guest_walker *walker,
				    struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
336
				    gpa_t addr, u64 access)
A
Avi Kivity 已提交
337
{
338
	int ret;
339
	pt_element_t pte;
340
	pt_element_t __user *ptep_user;
341
	gfn_t table_gfn;
342 343
	u64 pt_access, pte_access;
	unsigned index, accessed_dirty, pte_pkey;
344
	u64 nested_access;
345
	gpa_t pte_gpa;
346
	bool have_ad;
347
	int offset;
348
	u64 walk_nx_mask = 0;
349 350 351 352
	const int write_fault = access & PFERR_WRITE_MASK;
	const int user_fault  = access & PFERR_USER_MASK;
	const int fetch_fault = access & PFERR_FETCH_MASK;
	u16 errcode = 0;
353 354
	gpa_t real_gpa;
	gfn_t gfn;
A
Avi Kivity 已提交
355

356
	trace_kvm_mmu_pagetable_walk(addr, access);
357
retry_walk:
358
	walker->level = mmu->root_level;
359
	pte           = mmu->get_guest_pgd(vcpu);
360
	have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
361

362
#if PTTYPE == 64
363
	walk_nx_mask = 1ULL << PT64_NX_SHIFT;
364
	if (walker->level == PT32E_ROOT_LEVEL) {
365
		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
366
		trace_kvm_mmu_paging_element(pte, walker->level);
367
		if (!FNAME(is_present_gpte)(pte))
368
			goto error;
369 370 371
		--walker->level;
	}
#endif
372
	walker->max_level = walker->level;
373
	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
A
Avi Kivity 已提交
374

375 376 377 378 379 380 381
	/*
	 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
	 * by the MOV to CR instruction are treated as reads and do not cause the
	 * processor to set the dirty flag in any EPT paging-structure entry.
	 */
	nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;

382
	pte_access = ~0;
383
	++walker->level;
384

385
	do {
386 387
		unsigned long host_addr;

388
		pt_access = pte_access;
389 390
		--walker->level;

391
		index = PT_INDEX(addr, walker->level);
392
		table_gfn = gpte_to_gfn(pte);
393 394
		offset    = index * sizeof(pt_element_t);
		pte_gpa   = gfn_to_gpa(table_gfn) + offset;
395 396

		BUG_ON(walker->level < 1);
397
		walker->table_gfn[walker->level - 1] = table_gfn;
398
		walker->pte_gpa[walker->level - 1] = pte_gpa;
399

400 401
		real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(table_gfn),
					     nested_access, &walker->fault);
402 403 404 405 406 407 408 409 410 411 412

		/*
		 * FIXME: This can happen if emulation (for of an INS/OUTS
		 * instruction) triggers a nested page fault.  The exit
		 * qualification / exit info field will incorrectly have
		 * "guest page access" as the nested page fault's cause,
		 * instead of "guest page structure access".  To fix this,
		 * the x86_exception struct should be augmented with enough
		 * information to fix the exit_qualification or exit_info_1
		 * fields.
		 */
413
		if (unlikely(real_gpa == UNMAPPED_GVA))
414
			return 0;
415

416
		host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
417
					    &walker->pte_writable[walker->level - 1]);
418 419
		if (unlikely(kvm_is_error_hva(host_addr)))
			goto error;
420 421

		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
422
		if (unlikely(__get_user(pte, ptep_user)))
423
			goto error;
424
		walker->ptep_user[walker->level - 1] = ptep_user;
425

426
		trace_kvm_mmu_paging_element(pte, walker->level);
427

428 429 430 431 432 433
		/*
		 * Inverting the NX it lets us AND it like other
		 * permission bits.
		 */
		pte_access = pt_access & (pte ^ walk_nx_mask);

434
		if (unlikely(!FNAME(is_present_gpte)(pte)))
435
			goto error;
436

437
		if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) {
438
			errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
439
			goto error;
440
		}
441

442
		walker->ptes[walker->level - 1] = pte;
443 444 445

		/* Convert to ACC_*_MASK flags for struct guest_walker.  */
		walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
446
	} while (!FNAME(is_last_gpte)(mmu, walker->level, pte));
447

448
	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
449 450 451
	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;

	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
452
	walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
453
	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
454
	if (unlikely(errcode))
455 456
		goto error;

457 458 459
	gfn = gpte_to_gfn_lvl(pte, walker->level);
	gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;

460
	if (PTTYPE == 32 && walker->level > PG_LEVEL_4K && is_cpuid_PSE36())
461 462
		gfn += pse36_gfn_delta(pte);

463
	real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn), access, &walker->fault);
464 465 466 467 468
	if (real_gpa == UNMAPPED_GVA)
		return 0;

	walker->gfn = real_gpa >> PAGE_SHIFT;

469
	if (!write_fault)
470
		FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
471 472
	else
		/*
473 474 475
		 * On a write fault, fold the dirty bit into accessed_dirty.
		 * For modes without A/D bits support accessed_dirty will be
		 * always clear.
476
		 */
477 478
		accessed_dirty &= pte >>
			(PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT);
479 480

	if (unlikely(!accessed_dirty)) {
481 482
		ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker,
							addr, write_fault);
483 484 485 486 487
		if (unlikely(ret < 0))
			goto error;
		else if (ret)
			goto retry_walk;
	}
488

489
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
490 491
		 __func__, (u64)pte, walker->pte_access,
		 walker->pt_access[walker->level - 1]);
492 493
	return 1;

494
error:
495
	errcode |= write_fault | user_fault;
496
	if (fetch_fault && (is_efer_nx(mmu) || is_cr4_smep(mmu)))
497
		errcode |= PFERR_FETCH_MASK;
498

499 500 501
	walker->fault.vector = PF_VECTOR;
	walker->fault.error_code_valid = true;
	walker->fault.error_code = errcode;
502 503 504 505 506 507 508 509

#if PTTYPE == PTTYPE_EPT
	/*
	 * Use PFERR_RSVD_MASK in error_code to to tell if EPT
	 * misconfiguration requires to be injected. The detection is
	 * done by is_rsvd_bits_set() above.
	 *
	 * We set up the value of exit_qualification to inject:
510 511
	 * [2:0] - Derive from the access bits. The exit_qualification might be
	 *         out of date if it is serving an EPT misconfiguration.
512 513 514 515 516 517
	 * [5:3] - Calculated by the page walk of the guest EPT page tables
	 * [7:8] - Derived from [7:8] of real exit_qualification
	 *
	 * The other bits are set to 0.
	 */
	if (!(errcode & PFERR_RSVD_MASK)) {
518 519
		vcpu->arch.exit_qualification &= (EPT_VIOLATION_GVA_IS_VALID |
						  EPT_VIOLATION_GVA_TRANSLATED);
520 521 522 523 524 525
		if (write_fault)
			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
		if (user_fault)
			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ;
		if (fetch_fault)
			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
526
		vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
527 528
	}
#endif
529 530
	walker->fault.address = addr;
	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
531
	walker->fault.async_page_fault = false;
532

533
	trace_kvm_mmu_walker_error(walker->fault.error_code);
534
	return 0;
A
Avi Kivity 已提交
535 536
}

537
static int FNAME(walk_addr)(struct guest_walker *walker,
538
			    struct kvm_vcpu *vcpu, gpa_t addr, u64 access)
539
{
540
	return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
541
					access);
542 543
}

544 545 546
static bool
FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
		     u64 *spte, pt_element_t gpte, bool no_dirty_log)
547
{
548
	struct kvm_memory_slot *slot;
549
	unsigned pte_access;
550
	gfn_t gfn;
D
Dan Williams 已提交
551
	kvm_pfn_t pfn;
552

553
	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
554
		return false;
555

556
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
557 558

	gfn = gpte_to_gfn(gpte);
559
	pte_access = sp->role.access & FNAME(gpte_access)(gpte);
560
	FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
561 562

	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn,
563
			no_dirty_log && (pte_access & ACC_WRITE_MASK));
564 565 566 567
	if (!slot)
		return false;

	pfn = gfn_to_pfn_memslot_atomic(slot, gfn);
568
	if (is_error_pfn(pfn))
569
		return false;
570

571
	mmu_set_spte(vcpu, slot, spte, pte_access, gfn, pfn, NULL);
572
	kvm_release_pfn_clean(pfn);
573 574 575
	return true;
}

A
Avi Kivity 已提交
576 577 578 579
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
				struct guest_walker *gw, int level)
{
	pt_element_t curr_pte;
580 581 582 583
	gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
	u64 mask;
	int r, index;

584
	if (level == PG_LEVEL_4K) {
585 586 587 588
		mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
		base_gpa = pte_gpa & ~mask;
		index = (pte_gpa - base_gpa) / sizeof(pt_element_t);

589
		r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa,
590 591 592
				gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
		curr_pte = gw->prefetch_ptes[index];
	} else
593
		r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa,
A
Avi Kivity 已提交
594
				  &curr_pte, sizeof(curr_pte));
595

A
Avi Kivity 已提交
596 597 598
	return r || curr_pte != gw->ptes[level - 1];
}

599 600
static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
				u64 *sptep)
601 602
{
	struct kvm_mmu_page *sp;
603
	pt_element_t *gptep = gw->prefetch_ptes;
604
	u64 *spte;
605
	int i;
606

607
	sp = sptep_to_sp(sptep);
608

609
	if (sp->role.level > PG_LEVEL_4K)
610 611
		return;

612 613 614 615 616 617 618
	/*
	 * If addresses are being invalidated, skip prefetching to avoid
	 * accidentally prefetching those addresses.
	 */
	if (unlikely(vcpu->kvm->mmu_notifier_count))
		return;

619 620 621 622 623 624 625 626 627 628
	if (sp->role.direct)
		return __direct_pte_prefetch(vcpu, sp, sptep);

	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
	spte = sp->spt + i;

	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
		if (spte == sptep)
			continue;

629
		if (is_shadow_present_pte(*spte))
630 631
			continue;

632
		if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
633 634 635 636
			break;
	}
}

A
Avi Kivity 已提交
637 638
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
639 640
 * If the guest tries to write a write-protected page, we need to
 * emulate this operation, return 1 to indicate this case.
A
Avi Kivity 已提交
641
 */
642 643
static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
			 struct guest_walker *gw)
A
Avi Kivity 已提交
644
{
645
	struct kvm_mmu_page *sp = NULL;
646
	struct kvm_shadow_walk_iterator it;
647
	unsigned int direct_access, access;
648
	int top_level, ret;
649
	gfn_t base_gfn = fault->gfn;
650

651
	WARN_ON_ONCE(gw->gfn != base_gfn);
652
	direct_access = gw->pte_access;
653

654
	top_level = vcpu->arch.mmu->root_level;
655 656 657 658 659 660 661 662 663 664 665
	if (top_level == PT32E_ROOT_LEVEL)
		top_level = PT32_ROOT_LEVEL;
	/*
	 * Verify that the top-level gpte is still there.  Since the page
	 * is a root page, it is either write protected (and cannot be
	 * changed from now on) or it is invalid (in which case, we don't
	 * really care if it changes underneath us after this point).
	 */
	if (FNAME(gpte_changed)(vcpu, gw, top_level))
		goto out_gpte_changed;

666
	if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
667 668
		goto out_gpte_changed;

669
	for (shadow_walk_init(&it, vcpu, fault->addr);
670 671
	     shadow_walk_okay(&it) && it.level > gw->level;
	     shadow_walk_next(&it)) {
672 673
		gfn_t table_gfn;

674
		clear_sp_write_flooding_count(it.sptep);
675
		drop_large_spte(vcpu, it.sptep);
676

677
		sp = NULL;
678 679
		if (!is_shadow_present_pte(*it.sptep)) {
			table_gfn = gw->table_gfn[it.level - 2];
680
			access = gw->pt_access[it.level - 2];
681
			sp = kvm_mmu_get_page(vcpu, table_gfn, fault->addr,
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
					      it.level-1, false, access);
			/*
			 * We must synchronize the pagetable before linking it
			 * because the guest doesn't need to flush tlb when
			 * the gpte is changed from non-present to present.
			 * Otherwise, the guest may use the wrong mapping.
			 *
			 * For PG_LEVEL_4K, kvm_mmu_get_page() has already
			 * synchronized it transiently via kvm_sync_page().
			 *
			 * For higher level pagetable, we synchronize it via
			 * the slower mmu_sync_children().  If it needs to
			 * break, some progress has been made; return
			 * RET_PF_RETRY and retry on the next #PF.
			 * KVM_REQ_MMU_SYNC is not necessary but it
			 * expedites the process.
			 */
			if (sp->unsync_children &&
			    mmu_sync_children(vcpu, sp, false))
				return RET_PF_RETRY;
702
		}
703 704 705 706 707

		/*
		 * Verify that the gpte in the page we've just write
		 * protected is still there.
		 */
708
		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
709
			goto out_gpte_changed;
710

711
		if (sp)
712
			link_shadow_page(vcpu, it.sptep, sp);
713
	}
A
Avi Kivity 已提交
714

715
	kvm_mmu_hugepage_adjust(vcpu, fault);
716

717
	trace_kvm_mmu_spte_requested(fault);
718

719
	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
720
		clear_sp_write_flooding_count(it.sptep);
P
Paolo Bonzini 已提交
721 722 723 724 725

		/*
		 * We cannot overwrite existing page tables with an NX
		 * large page, as the leaf could be executable.
		 */
726
		if (fault->nx_huge_page_workaround_enabled)
727
			disallowed_hugepage_adjust(fault, *it.sptep, it.level);
P
Paolo Bonzini 已提交
728

729
		base_gfn = fault->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
730
		if (it.level == fault->goal_level)
731 732
			break;

733
		validate_direct_spte(vcpu, it.sptep, direct_access);
734

735
		drop_large_spte(vcpu, it.sptep);
736

737
		if (!is_shadow_present_pte(*it.sptep)) {
738
			sp = kvm_mmu_get_page(vcpu, base_gfn, fault->addr,
739 740
					      it.level - 1, true, direct_access);
			link_shadow_page(vcpu, it.sptep, sp);
741 742
			if (fault->huge_page_disallowed &&
			    fault->req_level >= it.level)
P
Paolo Bonzini 已提交
743
				account_huge_nx_page(vcpu->kvm, sp);
744
		}
745 746
	}

747 748 749
	if (WARN_ON_ONCE(it.level != fault->goal_level))
		return -EFAULT;

750
	ret = mmu_set_spte(vcpu, fault->slot, it.sptep, gw->pte_access,
751
			   base_gfn, fault->pfn, fault);
752 753 754
	if (ret == RET_PF_SPURIOUS)
		return ret;

755
	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
756
	++vcpu->stat.pf_fixed;
757
	return ret;
758 759

out_gpte_changed:
760
	return RET_PF_RETRY;
A
Avi Kivity 已提交
761 762
}

763 764 765 766 767 768 769 770 771 772
 /*
 * To see whether the mapped gfn can write its page table in the current
 * mapping.
 *
 * It is the helper function of FNAME(page_fault). When guest uses large page
 * size to map the writable gfn which is used as current page table, we should
 * force kvm to use small page size to map it because new shadow page will be
 * created when kvm establishes shadow page table that stop kvm using large
 * page size. Do it early can avoid unnecessary #PF and emulation.
 *
773 774 775
 * @write_fault_to_shadow_pgtable will return true if the fault gfn is
 * currently used as its page table.
 *
776 777 778 779 780 781
 * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
 * since the PDPT is always shadowed, that means, we can not use large page
 * size to map the gfn which is used as PDPT.
 */
static bool
FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
782
			      struct guest_walker *walker, bool user_fault,
783
			      bool *write_fault_to_shadow_pgtable)
784 785 786
{
	int level;
	gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
787
	bool self_changed = false;
788 789

	if (!(walker->pte_access & ACC_WRITE_MASK ||
790
	    (!is_cr0_wp(vcpu->arch.mmu) && !user_fault)))
791 792
		return false;

793 794 795 796 797 798
	for (level = walker->level; level <= walker->max_level; level++) {
		gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];

		self_changed |= !(gfn & mask);
		*write_fault_to_shadow_pgtable |= !gfn;
	}
799

800
	return self_changed;
801 802
}

A
Avi Kivity 已提交
803 804 805 806 807 808 809 810 811 812 813
/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
814 815
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
816
 */
817
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
A
Avi Kivity 已提交
818 819
{
	struct guest_walker walker;
820
	int r;
821
	unsigned long mmu_seq;
822
	bool is_self_change_mapping;
A
Avi Kivity 已提交
823

824
	pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
825
	WARN_ON_ONCE(fault->is_tdp);
826

827
	/*
828
	 * Look up the guest pte for the faulting address.
829 830 831
	 * If PFEC.RSVD is set, this is a shadow page fault.
	 * The bit needs to be cleared before walking guest page tables.
	 */
832 833
	r = FNAME(walk_addr)(&walker, vcpu, fault->addr,
			     fault->error_code & ~PFERR_RSVD_MASK);
A
Avi Kivity 已提交
834 835 836 837

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
838
	if (!r) {
839
		pgprintk("%s: guest page fault\n", __func__);
840
		if (!fault->prefetch)
841
			kvm_inject_emulated_page_fault(vcpu, &walker.fault);
842

843
		return RET_PF_RETRY;
A
Avi Kivity 已提交
844 845
	}

846
	fault->gfn = walker.gfn;
847 848
	fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);

849
	if (page_fault_handle_page_track(vcpu, fault)) {
850
		shadow_page_table_clear_flood(vcpu, fault->addr);
851
		return RET_PF_EMULATE;
852
	}
853

854
	r = mmu_topup_memory_caches(vcpu, true);
855 856 857
	if (r)
		return r;

858 859 860
	vcpu->arch.write_fault_to_shadow_pgtable = false;

	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
861
	      &walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable);
862

863
	if (is_self_change_mapping)
864
		fault->max_level = PG_LEVEL_4K;
865
	else
866
		fault->max_level = walker.level;
867

868
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
869
	smp_rmb();
870

871
	if (kvm_faultin_pfn(vcpu, fault, &r))
872
		return r;
873

874
	if (handle_abnormal_pfn(vcpu, fault, walker.pte_access, &r))
875 876
		return r;

877 878 879 880
	/*
	 * Do not change pte_access if the pfn is a mmio page, otherwise
	 * we will cache the incorrect access into mmio spte.
	 */
881
	if (fault->write && !(walker.pte_access & ACC_WRITE_MASK) &&
882
	    !is_cr0_wp(vcpu->arch.mmu) && !fault->user && fault->slot) {
883 884 885 886 887 888 889 890 891
		walker.pte_access |= ACC_WRITE_MASK;
		walker.pte_access &= ~ACC_USER_MASK;

		/*
		 * If we converted a user page to a kernel page,
		 * so that the kernel can write to it when cr0.wp=0,
		 * then we should prevent the kernel from executing it
		 * if SMEP is enabled.
		 */
892
		if (is_cr4_smep(vcpu->arch.mmu))
893 894 895
			walker.pte_access &= ~ACC_EXEC_MASK;
	}

896
	r = RET_PF_RETRY;
897
	write_lock(&vcpu->kvm->mmu_lock);
898 899

	if (is_page_fault_stale(vcpu, fault, mmu_seq))
900
		goto out_unlock;
901

902 903
	r = make_mmu_pages_available(vcpu);
	if (r)
904
		goto out_unlock;
905
	r = FNAME(fetch)(vcpu, fault, &walker);
906 907

out_unlock:
908
	write_unlock(&vcpu->kvm->mmu_lock);
909
	kvm_release_pfn_clean(fault->pfn);
910
	return r;
A
Avi Kivity 已提交
911 912
}

X
Xiao Guangrong 已提交
913 914 915 916
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
{
	int offset = 0;

917
	WARN_ON(sp->role.level != PG_LEVEL_4K);
X
Xiao Guangrong 已提交
918 919 920 921 922 923 924

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
}

925
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
M
Marcelo Tosatti 已提交
926
{
927
	struct kvm_shadow_walk_iterator iterator;
928
	struct kvm_mmu_page *sp;
929
	u64 old_spte;
930 931 932
	int level;
	u64 *sptep;

933 934
	vcpu_clear_mmio_info(vcpu, gva);

935 936 937 938
	/*
	 * No need to check return value here, rmap_can_add() can
	 * help us to skip pte prefetch later.
	 */
939
	mmu_topup_memory_caches(vcpu, true);
M
Marcelo Tosatti 已提交
940

941
	if (!VALID_PAGE(root_hpa)) {
942 943 944 945
		WARN_ON(1);
		return;
	}

946
	write_lock(&vcpu->kvm->mmu_lock);
947
	for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
948 949
		level = iterator.level;
		sptep = iterator.sptep;
950

951
		sp = sptep_to_sp(sptep);
952 953
		old_spte = *sptep;
		if (is_last_spte(old_spte, level)) {
954 955 956
			pt_element_t gpte;
			gpa_t pte_gpa;

957 958 959
			if (!sp->unsync)
				break;

X
Xiao Guangrong 已提交
960
			pte_gpa = FNAME(get_level1_sp_gpa)(sp);
961
			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
962

963
			mmu_page_zap_pte(vcpu->kvm, sp, sptep, NULL);
964
			if (is_shadow_present_pte(old_spte))
965 966
				kvm_flush_remote_tlbs_with_address(vcpu->kvm,
					sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
967 968 969 970

			if (!rmap_can_add(vcpu))
				break;

971 972
			if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
						       sizeof(pt_element_t)))
973 974
				break;

975
			FNAME(prefetch_gpte)(vcpu, sp, sptep, gpte, false);
976
		}
M
Marcelo Tosatti 已提交
977

978
		if (!sp->unsync_children)
979 980
			break;
	}
981
	write_unlock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
982 983
}

984
/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
985
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
986
			       gpa_t addr, u64 access,
987
			       struct x86_exception *exception)
A
Avi Kivity 已提交
988 989
{
	struct guest_walker walker;
A
Avi Kivity 已提交
990 991
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
992

993 994
#ifndef CONFIG_X86_64
	/* A 64-bit GVA should be impossible on 32-bit KVM. */
995
	WARN_ON_ONCE((addr >> 32) && mmu == vcpu->arch.walk_mmu);
996 997
#endif

998
	r = FNAME(walk_addr_generic)(&walker, vcpu, mmu, addr, access);
999 1000 1001

	if (r) {
		gpa = gfn_to_gpa(walker.gfn);
1002
		gpa |= addr & ~PAGE_MASK;
1003 1004
	} else if (exception)
		*exception = walker.fault;
1005 1006 1007 1008

	return gpa;
}

1009 1010 1011 1012
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
1013 1014 1015 1016 1017
 *
 * Returns
 * < 0: the sp should be zapped
 *   0: the sp is synced and no tlb flushing is required
 * > 0: the sp is synced and tlb flushing is required
1018
 */
1019
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1020
{
1021
	union kvm_mmu_page_role mmu_role = vcpu->arch.mmu->mmu_role.base;
1022
	int i;
1023
	bool host_writable;
1024
	gpa_t first_pte_gpa;
1025
	bool flush = false;
1026

1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
	/*
	 * Ignore various flags when verifying that it's safe to sync a shadow
	 * page using the current MMU context.
	 *
	 *  - level: not part of the overall MMU role and will never match as the MMU's
	 *           level tracks the root level
	 *  - access: updated based on the new guest PTE
	 *  - quadrant: not part of the overall MMU role (similar to level)
	 */
	const union kvm_mmu_page_role sync_role_ign = {
		.level = 0xf,
		.access = 0x7,
		.quadrant = 0x3,
	};

	/*
	 * Direct pages can never be unsync, and KVM should never attempt to
	 * sync a shadow page for a different MMU context, e.g. if the role
	 * differs then the memslot lookup (SMM vs. non-SMM) will be bogus, the
	 * reserved bits checks will be wrong, etc...
	 */
	if (WARN_ON_ONCE(sp->role.direct ||
			 (sp->role.word ^ mmu_role.word) & ~sync_role_ign.word))
1050
		return -1;
1051

X
Xiao Guangrong 已提交
1052
	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
1053

1054
	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
1055
		u64 *sptep, spte;
1056
		struct kvm_memory_slot *slot;
1057 1058 1059
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
1060
		gfn_t gfn;
1061

1062
		if (!sp->spt[i])
1063 1064
			continue;

1065
		pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
1066

1067 1068
		if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
					       sizeof(pt_element_t)))
1069
			return -1;
1070

1071
		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
1072
			flush = true;
1073 1074 1075
			continue;
		}

1076 1077
		gfn = gpte_to_gfn(gpte);
		pte_access = sp->role.access;
1078
		pte_access &= FNAME(gpte_access)(gpte);
1079
		FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
1080

1081
		if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access))
1082 1083
			continue;

1084
		if (gfn != sp->gfns[i]) {
1085
			drop_spte(vcpu->kvm, &sp->spt[i]);
1086
			flush = true;
1087 1088 1089
			continue;
		}

1090 1091 1092
		sptep = &sp->spt[i];
		spte = *sptep;
		host_writable = spte & shadow_host_writable_mask;
1093 1094
		slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
		make_spte(vcpu, sp, slot, pte_access, gfn,
1095
			  spte_to_pfn(spte), spte, true, false,
1096
			  host_writable, &spte);
1097

1098
		flush |= mmu_spte_update(sptep, spte);
1099 1100
	}

1101
	return flush;
1102 1103
}

A
Avi Kivity 已提交
1104 1105 1106 1107 1108
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
1109 1110
#undef PT_LVL_ADDR_MASK
#undef PT_LVL_OFFSET_MASK
1111
#undef PT_LEVEL_BITS
1112
#undef PT_MAX_FULL_LEVELS
1113
#undef gpte_to_gfn
1114
#undef gpte_to_gfn_lvl
1115
#undef CMPXCHG
1116 1117 1118 1119
#undef PT_GUEST_ACCESSED_MASK
#undef PT_GUEST_DIRTY_MASK
#undef PT_GUEST_DIRTY_SHIFT
#undef PT_GUEST_ACCESSED_SHIFT
1120
#undef PT_HAVE_ACCESSED_DIRTY