paging_tmpl.h 30.8 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0-only */
A
Avi Kivity 已提交
2 3 4 5 6 7 8 9 10
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
N
Nicolas Kaiser 已提交
11
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
A
Avi Kivity 已提交
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
	#define FNAME(name) paging##64_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
28 29
	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
30
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
31
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
32 33
	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
34
	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
35
	#ifdef CONFIG_X86_64
36
	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
37
	#define CMPXCHG cmpxchg
38
	#else
39
	#define CMPXCHG cmpxchg64
40 41
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
42 43 44 45 46
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
47 48
	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
49
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
50
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
51
	#define PT_MAX_FULL_LEVELS 2
52 53
	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
54
	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
55
	#define CMPXCHG cmpxchg
56 57 58 59 60 61 62 63 64
#elif PTTYPE == PTTYPE_EPT
	#define pt_element_t u64
	#define guest_walker guest_walkerEPT
	#define FNAME(name) ept_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
65 66 67
	#define PT_GUEST_DIRTY_SHIFT 9
	#define PT_GUEST_ACCESSED_SHIFT 8
	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
68
	#define CMPXCHG cmpxchg64
69
	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
A
Avi Kivity 已提交
70 71 72 73
#else
	#error Invalid PTTYPE value
#endif

74 75 76
#define PT_GUEST_DIRTY_MASK    (1 << PT_GUEST_DIRTY_SHIFT)
#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)

77
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
78
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PG_LEVEL_4K)
79

A
Avi Kivity 已提交
80 81 82 83 84 85
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
86
	unsigned max_level;
87
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
88
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
89
	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
90
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
91
	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
92
	bool pte_writable[PT_MAX_FULL_LEVELS];
93 94
	unsigned pt_access;
	unsigned pte_access;
95
	gfn_t gfn;
96
	struct x86_exception fault;
A
Avi Kivity 已提交
97 98
};

99
static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
100
{
101
	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
102 103
}

104 105
static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
					     unsigned gpte)
106 107 108
{
	unsigned mask;

109
	/* dirty bit is not supported, so no need to track it */
110
	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
111 112
		return;

113 114 115 116
	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);

	mask = (unsigned)~ACC_WRITE_MASK;
	/* Allow write access to dirty gptes */
117 118
	mask |= (gpte >> (PT_GUEST_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) &
		PT_WRITABLE_MASK;
119 120 121 122 123
	*access &= mask;
}

static inline int FNAME(is_present_gpte)(unsigned long pte)
{
124
#if PTTYPE != PTTYPE_EPT
B
Bandan Das 已提交
125
	return pte & PT_PRESENT_MASK;
126 127 128
#else
	return pte & 7;
#endif
129 130
}

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
static bool FNAME(is_bad_mt_xwr)(struct rsvd_bits_validate *rsvd_check, u64 gpte)
{
#if PTTYPE != PTTYPE_EPT
	return false;
#else
	return __is_bad_mt_xwr(rsvd_check, gpte);
#endif
}

static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level)
{
	return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) ||
	       FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte);
}

146
static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
147 148
			       pt_element_t __user *ptep_user, unsigned index,
			       pt_element_t orig_pte, pt_element_t new_pte)
149
{
150
	int npages;
151 152 153 154
	pt_element_t ret;
	pt_element_t *table;
	struct page *page;

155
	npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
156 157 158 159 160 161 162 163 164 165 166 167
	if (likely(npages == 1)) {
		table = kmap_atomic(page);
		ret = CMPXCHG(&table[index], orig_pte, new_pte);
		kunmap_atomic(table);

		kvm_release_page_dirty(page);
	} else {
		struct vm_area_struct *vma;
		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
		unsigned long pfn;
		unsigned long paddr;

168
		mmap_read_lock(current->mm);
169 170
		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
171
			mmap_read_unlock(current->mm);
172 173 174 175 176 177
			return -EFAULT;
		}
		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
		paddr = pfn << PAGE_SHIFT;
		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
		if (!table) {
178
			mmap_read_unlock(current->mm);
179 180 181 182
			return -EFAULT;
		}
		ret = CMPXCHG(&table[index], orig_pte, new_pte);
		memunmap(table);
183
		mmap_read_unlock(current->mm);
184
	}
185 186 187 188

	return (ret != orig_pte);
}

189 190 191 192 193 194 195
static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
				  struct kvm_mmu_page *sp, u64 *spte,
				  u64 gpte)
{
	if (!FNAME(is_present_gpte)(gpte))
		goto no_present;

196
	/* if accessed bit is not supported prefetch non accessed gpte */
197 198
	if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) &&
	    !(gpte & PT_GUEST_ACCESSED_MASK))
199 200
		goto no_present;

201
	if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PG_LEVEL_4K))
202 203
		goto no_present;

204 205 206 207 208 209 210
	return false;

no_present:
	drop_spte(vcpu->kvm, spte);
	return true;
}

211 212 213 214 215 216
/*
 * For PTTYPE_EPT, a page table can be executable but not readable
 * on supported processors. Therefore, set_spte does not automatically
 * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
 * to signify readability since it isn't used in the EPT case
 */
217
static inline unsigned FNAME(gpte_access)(u64 gpte)
218 219
{
	unsigned access;
220 221 222
#if PTTYPE == PTTYPE_EPT
	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
223
		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
224
#else
225 226 227 228 229
	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
	access = gpte & (PT_WRITABLE_MASK | PT_USER_MASK | PT_PRESENT_MASK);
	/* Combine NX with P (which is set here) to get ACC_EXEC_MASK.  */
	access ^= (gpte >> PT64_NX_SHIFT);
230
#endif
231 232 233 234

	return access;
}

235 236 237
static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
					     struct kvm_mmu *mmu,
					     struct guest_walker *walker,
238
					     gpa_t addr, int write_fault)
239 240 241 242 243 244 245
{
	unsigned level, index;
	pt_element_t pte, orig_pte;
	pt_element_t __user *ptep_user;
	gfn_t table_gfn;
	int ret;

246
	/* dirty/accessed bits are not supported, so no need to update them */
247
	if (!PT_HAVE_ACCESSED_DIRTY(mmu))
248 249
		return 0;

250 251 252 253 254
	for (level = walker->max_level; level >= walker->level; --level) {
		pte = orig_pte = walker->ptes[level - 1];
		table_gfn = walker->table_gfn[level - 1];
		ptep_user = walker->ptep_user[level - 1];
		index = offset_in_page(ptep_user) / sizeof(pt_element_t);
255
		if (!(pte & PT_GUEST_ACCESSED_MASK)) {
256
			trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
257
			pte |= PT_GUEST_ACCESSED_MASK;
258
		}
259
		if (level == walker->level && write_fault &&
260
				!(pte & PT_GUEST_DIRTY_MASK)) {
261
			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
262
#if PTTYPE == PTTYPE_EPT
263
			if (kvm_x86_ops.nested_ops->write_log_dirty(vcpu, addr))
264 265
				return -EINVAL;
#endif
266
			pte |= PT_GUEST_DIRTY_MASK;
267 268 269 270
		}
		if (pte == orig_pte)
			continue;

271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
		/*
		 * If the slot is read-only, simply do not process the accessed
		 * and dirty bits.  This is the correct thing to do if the slot
		 * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
		 * are only supported if the accessed and dirty bits are already
		 * set in the ROM (so that MMIO writes are never needed).
		 *
		 * Note that NPT does not allow this at all and faults, since
		 * it always wants nested page table entries for the guest
		 * page tables to be writable.  And EPT works but will simply
		 * overwrite the read-only memory to set the accessed and dirty
		 * bits.
		 */
		if (unlikely(!walker->pte_writable[level - 1]))
			continue;

287 288 289 290
		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
		if (ret)
			return ret;

291
		kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
292
		walker->ptes[level - 1] = pte;
293 294 295 296
	}
	return 0;
}

297 298 299 300 301 302 303 304 305 306 307
static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
{
	unsigned pkeys = 0;
#if PTTYPE == 64
	pte_t pte = {.pte = gpte};

	pkeys = pte_flags_pkey(pte_flags(pte));
#endif
	return pkeys;
}

308
/*
309
 * Fetch a guest pte for a guest virtual address, or for an L2's GPA.
310
 */
311 312
static int FNAME(walk_addr_generic)(struct guest_walker *walker,
				    struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
313
				    gpa_t addr, u32 access)
A
Avi Kivity 已提交
314
{
315
	int ret;
316
	pt_element_t pte;
317
	pt_element_t __user *ptep_user;
318
	gfn_t table_gfn;
319 320
	u64 pt_access, pte_access;
	unsigned index, accessed_dirty, pte_pkey;
321
	unsigned nested_access;
322
	gpa_t pte_gpa;
323
	bool have_ad;
324
	int offset;
325
	u64 walk_nx_mask = 0;
326 327 328 329
	const int write_fault = access & PFERR_WRITE_MASK;
	const int user_fault  = access & PFERR_USER_MASK;
	const int fetch_fault = access & PFERR_FETCH_MASK;
	u16 errcode = 0;
330 331
	gpa_t real_gpa;
	gfn_t gfn;
A
Avi Kivity 已提交
332

333
	trace_kvm_mmu_pagetable_walk(addr, access);
334
retry_walk:
335
	walker->level = mmu->root_level;
336
	pte           = mmu->get_guest_pgd(vcpu);
337
	have_ad       = PT_HAVE_ACCESSED_DIRTY(mmu);
338

339
#if PTTYPE == 64
340
	walk_nx_mask = 1ULL << PT64_NX_SHIFT;
341
	if (walker->level == PT32E_ROOT_LEVEL) {
342
		pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3);
343
		trace_kvm_mmu_paging_element(pte, walker->level);
344
		if (!FNAME(is_present_gpte)(pte))
345
			goto error;
346 347 348
		--walker->level;
	}
#endif
349
	walker->max_level = walker->level;
350
	ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
A
Avi Kivity 已提交
351

352 353 354 355 356 357 358
	/*
	 * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
	 * by the MOV to CR instruction are treated as reads and do not cause the
	 * processor to set the dirty flag in any EPT paging-structure entry.
	 */
	nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;

359
	pte_access = ~0;
360
	++walker->level;
361

362
	do {
363 364
		unsigned long host_addr;

365
		pt_access = pte_access;
366 367
		--walker->level;

368
		index = PT_INDEX(addr, walker->level);
369
		table_gfn = gpte_to_gfn(pte);
370 371
		offset    = index * sizeof(pt_element_t);
		pte_gpa   = gfn_to_gpa(table_gfn) + offset;
372 373

		BUG_ON(walker->level < 1);
374
		walker->table_gfn[walker->level - 1] = table_gfn;
375
		walker->pte_gpa[walker->level - 1] = pte_gpa;
376

377
		real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
378
					      nested_access,
379
					      &walker->fault);
380 381 382 383 384 385 386 387 388 389 390

		/*
		 * FIXME: This can happen if emulation (for of an INS/OUTS
		 * instruction) triggers a nested page fault.  The exit
		 * qualification / exit info field will incorrectly have
		 * "guest page access" as the nested page fault's cause,
		 * instead of "guest page structure access".  To fix this,
		 * the x86_exception struct should be augmented with enough
		 * information to fix the exit_qualification or exit_info_1
		 * fields.
		 */
391
		if (unlikely(real_gpa == UNMAPPED_GVA))
392
			return 0;
393

394
		host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
395
					    &walker->pte_writable[walker->level - 1]);
396 397
		if (unlikely(kvm_is_error_hva(host_addr)))
			goto error;
398 399

		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
400
		if (unlikely(__get_user(pte, ptep_user)))
401
			goto error;
402
		walker->ptep_user[walker->level - 1] = ptep_user;
403

404
		trace_kvm_mmu_paging_element(pte, walker->level);
405

406 407 408 409 410 411
		/*
		 * Inverting the NX it lets us AND it like other
		 * permission bits.
		 */
		pte_access = pt_access & (pte ^ walk_nx_mask);

412
		if (unlikely(!FNAME(is_present_gpte)(pte)))
413
			goto error;
414

415
		if (unlikely(FNAME(is_rsvd_bits_set)(mmu, pte, walker->level))) {
416
			errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
417
			goto error;
418
		}
419

420
		walker->ptes[walker->level - 1] = pte;
A
Avi Kivity 已提交
421
	} while (!is_last_gpte(mmu, walker->level, pte));
422

423
	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
424 425 426
	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;

	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
427 428
	walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
	walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
429
	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
430
	if (unlikely(errcode))
431 432
		goto error;

433 434 435
	gfn = gpte_to_gfn_lvl(pte, walker->level);
	gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;

436
	if (PTTYPE == 32 && walker->level > PG_LEVEL_4K && is_cpuid_PSE36())
437 438
		gfn += pse36_gfn_delta(pte);

439
	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
440 441 442 443 444
	if (real_gpa == UNMAPPED_GVA)
		return 0;

	walker->gfn = real_gpa >> PAGE_SHIFT;

445
	if (!write_fault)
446
		FNAME(protect_clean_gpte)(mmu, &walker->pte_access, pte);
447 448
	else
		/*
449 450 451
		 * On a write fault, fold the dirty bit into accessed_dirty.
		 * For modes without A/D bits support accessed_dirty will be
		 * always clear.
452
		 */
453 454
		accessed_dirty &= pte >>
			(PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT);
455 456

	if (unlikely(!accessed_dirty)) {
457 458
		ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker,
							addr, write_fault);
459 460 461 462 463
		if (unlikely(ret < 0))
			goto error;
		else if (ret)
			goto retry_walk;
	}
464

465
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
466
		 __func__, (u64)pte, walker->pte_access, walker->pt_access);
467 468
	return 1;

469
error:
470
	errcode |= write_fault | user_fault;
471 472
	if (fetch_fault && (mmu->nx ||
			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)))
473
		errcode |= PFERR_FETCH_MASK;
474

475 476 477
	walker->fault.vector = PF_VECTOR;
	walker->fault.error_code_valid = true;
	walker->fault.error_code = errcode;
478 479 480 481 482 483 484 485

#if PTTYPE == PTTYPE_EPT
	/*
	 * Use PFERR_RSVD_MASK in error_code to to tell if EPT
	 * misconfiguration requires to be injected. The detection is
	 * done by is_rsvd_bits_set() above.
	 *
	 * We set up the value of exit_qualification to inject:
486 487
	 * [2:0] - Derive from the access bits. The exit_qualification might be
	 *         out of date if it is serving an EPT misconfiguration.
488 489 490 491 492 493
	 * [5:3] - Calculated by the page walk of the guest EPT page tables
	 * [7:8] - Derived from [7:8] of real exit_qualification
	 *
	 * The other bits are set to 0.
	 */
	if (!(errcode & PFERR_RSVD_MASK)) {
494 495 496 497 498 499 500
		vcpu->arch.exit_qualification &= 0x180;
		if (write_fault)
			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
		if (user_fault)
			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ;
		if (fetch_fault)
			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
501
		vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
502 503
	}
#endif
504 505
	walker->fault.address = addr;
	walker->fault.nested_page_fault = mmu != vcpu->arch.walk_mmu;
506

507
	trace_kvm_mmu_walker_error(walker->fault.error_code);
508
	return 0;
A
Avi Kivity 已提交
509 510
}

511
static int FNAME(walk_addr)(struct guest_walker *walker,
512
			    struct kvm_vcpu *vcpu, gpa_t addr, u32 access)
513
{
514
	return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr,
515
					access);
516 517
}

518
#if PTTYPE != PTTYPE_EPT
519 520
static int FNAME(walk_addr_nested)(struct guest_walker *walker,
				   struct kvm_vcpu *vcpu, gva_t addr,
521
				   u32 access)
522 523
{
	return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
524
					addr, access);
525
}
526
#endif
527

528 529 530
static bool
FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
		     u64 *spte, pt_element_t gpte, bool no_dirty_log)
531
{
532
	unsigned pte_access;
533
	gfn_t gfn;
D
Dan Williams 已提交
534
	kvm_pfn_t pfn;
535

536
	if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
537
		return false;
538

539
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
540 541

	gfn = gpte_to_gfn(gpte);
542
	pte_access = sp->role.access & FNAME(gpte_access)(gpte);
543
	FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
544 545
	pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
			no_dirty_log && (pte_access & ACC_WRITE_MASK));
546
	if (is_error_pfn(pfn))
547
		return false;
548

549
	/*
550 551
	 * we call mmu_set_spte() with host_writable = true because
	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
552
	 */
553
	mmu_set_spte(vcpu, spte, pte_access, 0, PG_LEVEL_4K, gfn, pfn,
554
		     true, true);
555

556
	kvm_release_pfn_clean(pfn);
557 558 559 560 561 562 563 564 565
	return true;
}

static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
			      u64 *spte, const void *pte)
{
	pt_element_t gpte = *(const pt_element_t *)pte;

	FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
566 567
}

A
Avi Kivity 已提交
568 569 570 571
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
				struct guest_walker *gw, int level)
{
	pt_element_t curr_pte;
572 573 574 575
	gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
	u64 mask;
	int r, index;

576
	if (level == PG_LEVEL_4K) {
577 578 579 580
		mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
		base_gpa = pte_gpa & ~mask;
		index = (pte_gpa - base_gpa) / sizeof(pt_element_t);

581
		r = kvm_vcpu_read_guest_atomic(vcpu, base_gpa,
582 583 584
				gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
		curr_pte = gw->prefetch_ptes[index];
	} else
585
		r = kvm_vcpu_read_guest_atomic(vcpu, pte_gpa,
A
Avi Kivity 已提交
586
				  &curr_pte, sizeof(curr_pte));
587

A
Avi Kivity 已提交
588 589 590
	return r || curr_pte != gw->ptes[level - 1];
}

591 592
static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
				u64 *sptep)
593 594
{
	struct kvm_mmu_page *sp;
595
	pt_element_t *gptep = gw->prefetch_ptes;
596
	u64 *spte;
597
	int i;
598

599
	sp = sptep_to_sp(sptep);
600

601
	if (sp->role.level > PG_LEVEL_4K)
602 603 604 605 606 607 608 609 610 611 612 613
		return;

	if (sp->role.direct)
		return __direct_pte_prefetch(vcpu, sp, sptep);

	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
	spte = sp->spt + i;

	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
		if (spte == sptep)
			continue;

614
		if (is_shadow_present_pte(*spte))
615 616
			continue;

617
		if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
618 619 620 621
			break;
	}
}

A
Avi Kivity 已提交
622 623
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
624 625
 * If the guest tries to write a write-protected page, we need to
 * emulate this operation, return 1 to indicate this case.
A
Avi Kivity 已提交
626
 */
627
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
628 629 630
			 struct guest_walker *gw, u32 error_code,
			 int max_level, kvm_pfn_t pfn, bool map_writable,
			 bool prefault)
A
Avi Kivity 已提交
631
{
632 633 634 635
	bool nx_huge_page_workaround_enabled = is_nx_huge_page_enabled();
	int write_fault = error_code & PFERR_WRITE_MASK;
	bool exec = error_code & PFERR_FETCH_MASK;
	bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
636
	struct kvm_mmu_page *sp = NULL;
637
	struct kvm_shadow_walk_iterator it;
638
	unsigned direct_access, access = gw->pt_access;
639
	int top_level, level, req_level, ret;
640
	gfn_t base_gfn = gw->gfn;
641

642
	direct_access = gw->pte_access;
643

644
	top_level = vcpu->arch.mmu->root_level;
645 646 647 648 649 650 651 652 653 654 655
	if (top_level == PT32E_ROOT_LEVEL)
		top_level = PT32_ROOT_LEVEL;
	/*
	 * Verify that the top-level gpte is still there.  Since the page
	 * is a root page, it is either write protected (and cannot be
	 * changed from now on) or it is invalid (in which case, we don't
	 * really care if it changes underneath us after this point).
	 */
	if (FNAME(gpte_changed)(vcpu, gw, top_level))
		goto out_gpte_changed;

656
	if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
657 658
		goto out_gpte_changed;

659 660 661
	for (shadow_walk_init(&it, vcpu, addr);
	     shadow_walk_okay(&it) && it.level > gw->level;
	     shadow_walk_next(&it)) {
662 663
		gfn_t table_gfn;

664
		clear_sp_write_flooding_count(it.sptep);
665
		drop_large_spte(vcpu, it.sptep);
666

667
		sp = NULL;
668 669 670
		if (!is_shadow_present_pte(*it.sptep)) {
			table_gfn = gw->table_gfn[it.level - 2];
			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
671
					      false, access);
672
		}
673 674 675 676 677

		/*
		 * Verify that the gpte in the page we've just write
		 * protected is still there.
		 */
678
		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
679
			goto out_gpte_changed;
680

681
		if (sp)
682
			link_shadow_page(vcpu, it.sptep, sp);
683
	}
A
Avi Kivity 已提交
684

685 686
	level = kvm_mmu_hugepage_adjust(vcpu, gw->gfn, max_level, &pfn,
					huge_page_disallowed, &req_level);
687

688 689
	trace_kvm_mmu_spte_requested(addr, gw->level, pfn);

690
	for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
691
		clear_sp_write_flooding_count(it.sptep);
P
Paolo Bonzini 已提交
692 693 694 695 696

		/*
		 * We cannot overwrite existing page tables with an NX
		 * large page, as the leaf could be executable.
		 */
697 698
		if (nx_huge_page_workaround_enabled)
			disallowed_hugepage_adjust(it, gw->gfn, &pfn, &level);
P
Paolo Bonzini 已提交
699

700
		base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
701
		if (it.level == level)
702 703
			break;

704
		validate_direct_spte(vcpu, it.sptep, direct_access);
705

706
		drop_large_spte(vcpu, it.sptep);
707

708 709 710 711
		if (!is_shadow_present_pte(*it.sptep)) {
			sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
					      it.level - 1, true, direct_access);
			link_shadow_page(vcpu, it.sptep, sp);
712
			if (huge_page_disallowed && req_level >= it.level)
P
Paolo Bonzini 已提交
713
				account_huge_nx_page(vcpu->kvm, sp);
714
		}
715 716
	}

717
	ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
718
			   it.level, base_gfn, pfn, prefault, map_writable);
719 720 721
	if (ret == RET_PF_SPURIOUS)
		return ret;

722
	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
723
	++vcpu->stat.pf_fixed;
724
	return ret;
725 726

out_gpte_changed:
727
	return RET_PF_RETRY;
A
Avi Kivity 已提交
728 729
}

730 731 732 733 734 735 736 737 738 739
 /*
 * To see whether the mapped gfn can write its page table in the current
 * mapping.
 *
 * It is the helper function of FNAME(page_fault). When guest uses large page
 * size to map the writable gfn which is used as current page table, we should
 * force kvm to use small page size to map it because new shadow page will be
 * created when kvm establishes shadow page table that stop kvm using large
 * page size. Do it early can avoid unnecessary #PF and emulation.
 *
740 741 742
 * @write_fault_to_shadow_pgtable will return true if the fault gfn is
 * currently used as its page table.
 *
743 744 745 746 747 748
 * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
 * since the PDPT is always shadowed, that means, we can not use large page
 * size to map the gfn which is used as PDPT.
 */
static bool
FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
749 750
			      struct guest_walker *walker, int user_fault,
			      bool *write_fault_to_shadow_pgtable)
751 752 753
{
	int level;
	gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
754
	bool self_changed = false;
755 756 757 758 759

	if (!(walker->pte_access & ACC_WRITE_MASK ||
	      (!is_write_protection(vcpu) && !user_fault)))
		return false;

760 761 762 763 764 765
	for (level = walker->level; level <= walker->max_level; level++) {
		gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];

		self_changed |= !(gfn & mask);
		*write_fault_to_shadow_pgtable |= !gfn;
	}
766

767
	return self_changed;
768 769
}

A
Avi Kivity 已提交
770 771 772 773 774 775 776 777 778 779 780
/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
781 782
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
783
 */
784
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
785
			     bool prefault)
A
Avi Kivity 已提交
786 787 788 789
{
	int write_fault = error_code & PFERR_WRITE_MASK;
	int user_fault = error_code & PFERR_USER_MASK;
	struct guest_walker walker;
790
	int r;
D
Dan Williams 已提交
791
	kvm_pfn_t pfn;
792
	unsigned long mmu_seq;
793
	bool map_writable, is_self_change_mapping;
794
	int max_level;
A
Avi Kivity 已提交
795

796
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
797

798 799 800 801 802 803
	/*
	 * If PFEC.RSVD is set, this is a shadow page fault.
	 * The bit needs to be cleared before walking guest page tables.
	 */
	error_code &= ~PFERR_RSVD_MASK;

A
Avi Kivity 已提交
804
	/*
805
	 * Look up the guest pte for the faulting address.
A
Avi Kivity 已提交
806
	 */
807
	r = FNAME(walk_addr)(&walker, vcpu, addr, error_code);
A
Avi Kivity 已提交
808 809 810 811

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
812
	if (!r) {
813
		pgprintk("%s: guest page fault\n", __func__);
814
		if (!prefault)
815
			kvm_inject_emulated_page_fault(vcpu, &walker.fault);
816

817
		return RET_PF_RETRY;
A
Avi Kivity 已提交
818 819
	}

820 821
	if (page_fault_handle_page_track(vcpu, error_code, walker.gfn)) {
		shadow_page_table_clear_flood(vcpu, addr);
822
		return RET_PF_EMULATE;
823
	}
824

825
	r = mmu_topup_memory_caches(vcpu, true);
826 827 828
	if (r)
		return r;

829 830 831 832 833
	vcpu->arch.write_fault_to_shadow_pgtable = false;

	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
	      &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable);

834
	if (is_self_change_mapping)
835
		max_level = PG_LEVEL_4K;
836 837 838
	else
		max_level = walker.level;

839
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
840
	smp_rmb();
841

842
	if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
843
			 &map_writable))
844
		return RET_PF_RETRY;
845

846
	if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
847 848
		return r;

849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
	/*
	 * Do not change pte_access if the pfn is a mmio page, otherwise
	 * we will cache the incorrect access into mmio spte.
	 */
	if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
	     !is_write_protection(vcpu) && !user_fault &&
	      !is_noslot_pfn(pfn)) {
		walker.pte_access |= ACC_WRITE_MASK;
		walker.pte_access &= ~ACC_USER_MASK;

		/*
		 * If we converted a user page to a kernel page,
		 * so that the kernel can write to it when cr0.wp=0,
		 * then we should prevent the kernel from executing it
		 * if SMEP is enabled.
		 */
		if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
			walker.pte_access &= ~ACC_EXEC_MASK;
	}

869
	r = RET_PF_RETRY;
870
	spin_lock(&vcpu->kvm->mmu_lock);
871
	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
872
		goto out_unlock;
873

874
	kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
875 876
	r = make_mmu_pages_available(vcpu);
	if (r)
877
		goto out_unlock;
878 879
	r = FNAME(fetch)(vcpu, addr, &walker, error_code, max_level, pfn,
			 map_writable, prefault);
880
	kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
881 882 883 884

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
885
	return r;
A
Avi Kivity 已提交
886 887
}

X
Xiao Guangrong 已提交
888 889 890 891
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)
{
	int offset = 0;

892
	WARN_ON(sp->role.level != PG_LEVEL_4K);
X
Xiao Guangrong 已提交
893 894 895 896 897 898 899

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	return gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);
}

900
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
M
Marcelo Tosatti 已提交
901
{
902
	struct kvm_shadow_walk_iterator iterator;
903
	struct kvm_mmu_page *sp;
904
	u64 old_spte;
905 906 907
	int level;
	u64 *sptep;

908 909
	vcpu_clear_mmio_info(vcpu, gva);

910 911 912 913
	/*
	 * No need to check return value here, rmap_can_add() can
	 * help us to skip pte prefetch later.
	 */
914
	mmu_topup_memory_caches(vcpu, true);
M
Marcelo Tosatti 已提交
915

916
	if (!VALID_PAGE(root_hpa)) {
917 918 919 920
		WARN_ON(1);
		return;
	}

921
	spin_lock(&vcpu->kvm->mmu_lock);
922
	for_each_shadow_entry_using_root(vcpu, root_hpa, gva, iterator) {
923 924
		level = iterator.level;
		sptep = iterator.sptep;
925

926
		sp = sptep_to_sp(sptep);
927 928
		old_spte = *sptep;
		if (is_last_spte(old_spte, level)) {
929 930 931
			pt_element_t gpte;
			gpa_t pte_gpa;

932 933 934
			if (!sp->unsync)
				break;

X
Xiao Guangrong 已提交
935
			pte_gpa = FNAME(get_level1_sp_gpa)(sp);
936
			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
937

938
			mmu_page_zap_pte(vcpu->kvm, sp, sptep, NULL);
939
			if (is_shadow_present_pte(old_spte))
940 941
				kvm_flush_remote_tlbs_with_address(vcpu->kvm,
					sp->gfn, KVM_PAGES_PER_HPAGE(sp->role.level));
942 943 944 945

			if (!rmap_can_add(vcpu))
				break;

946 947
			if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
						       sizeof(pt_element_t)))
948 949 950
				break;

			FNAME(update_pte)(vcpu, sp, sptep, &gpte);
951
		}
M
Marcelo Tosatti 已提交
952

953
		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
954 955
			break;
	}
956
	spin_unlock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
957 958
}

959 960
/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access,
961
			       struct x86_exception *exception)
A
Avi Kivity 已提交
962 963
{
	struct guest_walker walker;
A
Avi Kivity 已提交
964 965
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
966

967
	r = FNAME(walk_addr)(&walker, vcpu, addr, access);
A
Avi Kivity 已提交
968

A
Avi Kivity 已提交
969
	if (r) {
A
Avi Kivity 已提交
970
		gpa = gfn_to_gpa(walker.gfn);
971
		gpa |= addr & ~PAGE_MASK;
972 973
	} else if (exception)
		*exception = walker.fault;
A
Avi Kivity 已提交
974 975 976 977

	return gpa;
}

978
#if PTTYPE != PTTYPE_EPT
979 980
/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */
static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
981 982
				      u32 access,
				      struct x86_exception *exception)
983 984 985 986 987
{
	struct guest_walker walker;
	gpa_t gpa = UNMAPPED_GVA;
	int r;

988 989 990 991 992
#ifndef CONFIG_X86_64
	/* A 64-bit GVA should be impossible on 32-bit KVM. */
	WARN_ON_ONCE(vaddr >> 32);
#endif

993
	r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
994 995 996 997

	if (r) {
		gpa = gfn_to_gpa(walker.gfn);
		gpa |= vaddr & ~PAGE_MASK;
998 999
	} else if (exception)
		*exception = walker.fault;
1000 1001 1002

	return gpa;
}
1003
#endif
1004

1005 1006 1007 1008
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
1009 1010 1011 1012 1013 1014 1015
 *
 * Note:
 *   We should flush all tlbs if spte is dropped even though guest is
 *   responsible for it. Since if we don't, kvm_mmu_notifier_invalidate_page
 *   and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
 *   used by guest then tlbs are not flushed, so guest is allowed to access the
 *   freed pages.
1016
 *   And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
1017
 */
1018
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
1019
{
X
Xiao Guangrong 已提交
1020
	int i, nr_present = 0;
1021
	bool host_writable;
1022
	gpa_t first_pte_gpa;
1023
	int set_spte_ret = 0;
1024

1025 1026 1027
	/* direct kvm_mmu_page can not be unsync. */
	BUG_ON(sp->role.direct);

X
Xiao Guangrong 已提交
1028
	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
1029

1030 1031 1032 1033
	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
1034
		gfn_t gfn;
1035

1036
		if (!sp->spt[i])
1037 1038
			continue;

1039
		pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
1040

1041 1042
		if (kvm_vcpu_read_guest_atomic(vcpu, pte_gpa, &gpte,
					       sizeof(pt_element_t)))
1043
			return 0;
1044

1045
		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
1046 1047 1048 1049 1050 1051
			/*
			 * Update spte before increasing tlbs_dirty to make
			 * sure no tlb flush is lost after spte is zapped; see
			 * the comments in kvm_flush_remote_tlbs().
			 */
			smp_wmb();
1052
			vcpu->kvm->tlbs_dirty++;
1053 1054 1055
			continue;
		}

1056 1057
		gfn = gpte_to_gfn(gpte);
		pte_access = sp->role.access;
1058
		pte_access &= FNAME(gpte_access)(gpte);
1059
		FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
1060

1061
		if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
1062
		      &nr_present))
1063 1064
			continue;

1065
		if (gfn != sp->gfns[i]) {
1066
			drop_spte(vcpu->kvm, &sp->spt[i]);
1067 1068 1069 1070 1071
			/*
			 * The same as above where we are doing
			 * prefetch_invalid_gpte().
			 */
			smp_wmb();
1072
			vcpu->kvm->tlbs_dirty++;
1073 1074 1075 1076
			continue;
		}

		nr_present++;
1077

1078 1079
		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;

1080
		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
1081
					 pte_access, PG_LEVEL_4K,
1082 1083
					 gfn, spte_to_pfn(sp->spt[i]),
					 true, false, host_writable);
1084 1085
	}

1086 1087 1088
	if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
		kvm_flush_remote_tlbs(vcpu->kvm);

1089
	return nr_present;
1090 1091
}

A
Avi Kivity 已提交
1092 1093 1094 1095 1096
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
1097 1098
#undef PT_LVL_ADDR_MASK
#undef PT_LVL_OFFSET_MASK
1099
#undef PT_LEVEL_BITS
1100
#undef PT_MAX_FULL_LEVELS
1101
#undef gpte_to_gfn
1102
#undef gpte_to_gfn_lvl
1103
#undef CMPXCHG
1104 1105 1106 1107
#undef PT_GUEST_ACCESSED_MASK
#undef PT_GUEST_DIRTY_MASK
#undef PT_GUEST_DIRTY_SHIFT
#undef PT_GUEST_ACCESSED_SHIFT
1108
#undef PT_HAVE_ACCESSED_DIRTY