paging_tmpl.h 15.4 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
	#define FNAME(name) paging##64_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
33
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
34 35
	#ifdef CONFIG_X86_64
	#define PT_MAX_FULL_LEVELS 4
36
	#define CMPXCHG cmpxchg
37
	#else
38
	#define CMPXCHG cmpxchg64
39 40
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
41 42 43 44 45 46 47 48
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
49
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
50
	#define PT_MAX_FULL_LEVELS 2
51
	#define CMPXCHG cmpxchg
A
Avi Kivity 已提交
52 53 54 55
#else
	#error Invalid PTTYPE value
#endif

56 57 58
#define gpte_to_gfn FNAME(gpte_to_gfn)
#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)

A
Avi Kivity 已提交
59 60 61 62 63 64
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
65
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
66 67
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
68 69
	unsigned pt_access;
	unsigned pte_access;
70
	gfn_t gfn;
71
	u32 error_code;
A
Avi Kivity 已提交
72 73
};

74 75 76 77 78 79 80 81 82 83
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
{
	return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

84 85 86 87 88 89 90 91 92
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			 gfn_t table_gfn, unsigned index,
			 pt_element_t orig_pte, pt_element_t new_pte)
{
	pt_element_t ret;
	pt_element_t *table;
	struct page *page;

	page = gfn_to_page(kvm, table_gfn);
93

94 95 96 97 98 99 100 101 102
	table = kmap_atomic(page, KM_USER0);
	ret = CMPXCHG(&table[index], orig_pte, new_pte);
	kunmap_atomic(table, KM_USER0);

	kvm_release_page_dirty(page);

	return (ret != orig_pte);
}

103 104 105 106 107 108 109 110 111 112 113 114
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
	unsigned access;

	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
	if (is_nx(vcpu))
		access &= ~(gpte >> PT64_NX_SHIFT);
#endif
	return access;
}

115 116 117
/*
 * Fetch a guest pte for a guest virtual address
 */
118 119
static int FNAME(walk_addr)(struct guest_walker *walker,
			    struct kvm_vcpu *vcpu, gva_t addr,
120
			    int write_fault, int user_fault, int fetch_fault)
A
Avi Kivity 已提交
121
{
122
	pt_element_t pte;
123
	gfn_t table_gfn;
124
	unsigned index, pt_access, pte_access;
125
	gpa_t pte_gpa;
126
	int rsvd_fault = 0;
A
Avi Kivity 已提交
127

128
	pgprintk("%s: addr %lx\n", __func__, addr);
129
walk:
130 131
	walker->level = vcpu->arch.mmu.root_level;
	pte = vcpu->arch.cr3;
132 133
#if PTTYPE == 64
	if (!is_long_mode(vcpu)) {
A
Avi Kivity 已提交
134
		pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
135
		if (!is_present_pte(pte))
136
			goto not_present;
137 138 139
		--walker->level;
	}
#endif
A
Avi Kivity 已提交
140
	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
141
	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
A
Avi Kivity 已提交
142

143
	pt_access = ACC_ALL;
144 145

	for (;;) {
146
		index = PT_INDEX(addr, walker->level);
147

148
		table_gfn = gpte_to_gfn(pte);
A
Avi Kivity 已提交
149
		pte_gpa = gfn_to_gpa(table_gfn);
150
		pte_gpa += index * sizeof(pt_element_t);
151
		walker->table_gfn[walker->level - 1] = table_gfn;
152
		walker->pte_gpa[walker->level - 1] = pte_gpa;
153
		pgprintk("%s: table_gfn[%d] %lx\n", __func__,
154 155
			 walker->level - 1, table_gfn);

156
		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
157 158

		if (!is_present_pte(pte))
159 160
			goto not_present;

161 162 163 164
		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
		if (rsvd_fault)
			goto access_error;

165
		if (write_fault && !is_writeble_pte(pte))
166 167 168
			if (user_fault || is_write_protection(vcpu))
				goto access_error;

169
		if (user_fault && !(pte & PT_USER_MASK))
170 171
			goto access_error;

172
#if PTTYPE == 64
173
		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
174 175 176
			goto access_error;
#endif

177
		if (!(pte & PT_ACCESSED_MASK)) {
178
			mark_page_dirty(vcpu->kvm, table_gfn);
179 180 181
			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			    index, pte, pte|PT_ACCESSED_MASK))
				goto walk;
182
			pte |= PT_ACCESSED_MASK;
183
		}
184

185
		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
186

187 188
		walker->ptes[walker->level - 1] = pte;

189
		if (walker->level == PT_PAGE_TABLE_LEVEL) {
190
			walker->gfn = gpte_to_gfn(pte);
191 192 193 194
			break;
		}

		if (walker->level == PT_DIRECTORY_LEVEL
195
		    && (pte & PT_PAGE_SIZE_MASK)
196
		    && (PTTYPE == 64 || is_pse(vcpu))) {
197
			walker->gfn = gpte_to_gfn_pde(pte);
198
			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
199 200
			if (PTTYPE == 32 && is_cpuid_PSE36())
				walker->gfn += pse36_gfn_delta(pte);
201
			break;
202
		}
203

204
		pt_access = pte_access;
205 206
		--walker->level;
	}
207 208

	if (write_fault && !is_dirty_pte(pte)) {
209 210
		bool ret;

211
		mark_page_dirty(vcpu->kvm, table_gfn);
212 213 214 215
		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			    pte|PT_DIRTY_MASK);
		if (ret)
			goto walk;
216
		pte |= PT_DIRTY_MASK;
217
		walker->ptes[walker->level - 1] = pte;
218 219
	}

220 221 222
	walker->pt_access = pt_access;
	walker->pte_access = pte_access;
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
223
		 __func__, (u64)pte, pt_access, pte_access);
224 225 226 227 228 229 230 231 232 233 234 235 236 237
	return 1;

not_present:
	walker->error_code = 0;
	goto err;

access_error:
	walker->error_code = PFERR_PRESENT_MASK;

err:
	if (write_fault)
		walker->error_code |= PFERR_WRITE_MASK;
	if (user_fault)
		walker->error_code |= PFERR_USER_MASK;
238 239
	if (fetch_fault)
		walker->error_code |= PFERR_FETCH_MASK;
240 241
	if (rsvd_fault)
		walker->error_code |= PFERR_RSVD_MASK;
242
	return 0;
A
Avi Kivity 已提交
243 244
}

245
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
246
			      u64 *spte, const void *pte)
247 248
{
	pt_element_t gpte;
249
	unsigned pte_access;
250
	pfn_t pfn;
M
Marcelo Tosatti 已提交
251
	int largepage = vcpu->arch.update_pte.largepage;
252 253

	gpte = *(const pt_element_t *)pte;
254
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
255
		if (!is_present_pte(gpte))
256 257 258
			set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
		return;
	}
259
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
260
	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
261 262
	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
		return;
263 264
	pfn = vcpu->arch.update_pte.pfn;
	if (is_error_pfn(pfn))
265
		return;
266 267
	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
		return;
268
	kvm_get_pfn(pfn);
269
	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
270
		     gpte & PT_DIRTY_MASK, NULL, largepage,
271
		     gpte_to_gfn(gpte), pfn, true);
272 273
}

A
Avi Kivity 已提交
274 275 276
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 */
277 278 279 280
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			 struct guest_walker *gw,
			 int user_fault, int write_fault, int largepage,
			 int *ptwrite, pfn_t pfn)
A
Avi Kivity 已提交
281
{
282 283
	unsigned access = gw->pt_access;
	struct kvm_mmu_page *shadow_page;
284
	u64 spte, *sptep = NULL;
285
	int direct;
286 287
	gfn_t table_gfn;
	int r;
288
	int level;
289
	pt_element_t curr_pte;
290
	struct kvm_shadow_walk_iterator iterator;
291

292 293
	if (!is_present_pte(gw->ptes[gw->level - 1]))
		return NULL;
A
Avi Kivity 已提交
294

295 296 297 298 299 300 301 302 303 304 305 306 307
	for_each_shadow_entry(vcpu, addr, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
		if (level == PT_PAGE_TABLE_LEVEL
		    || (largepage && level == PT_DIRECTORY_LEVEL)) {
			mmu_set_spte(vcpu, sptep, access,
				     gw->pte_access & access,
				     user_fault, write_fault,
				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
				     ptwrite, largepage,
				     gw->gfn, pfn, false);
			break;
		}
A
Avi Kivity 已提交
308

309 310
		if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
			continue;
311

312
		if (is_large_pte(*sptep)) {
313
			rmap_remove(vcpu->kvm, sptep);
314 315
			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
			kvm_flush_remote_tlbs(vcpu->kvm);
316
		}
317

318 319
		if (level == PT_DIRECTORY_LEVEL
		    && gw->level == PT_DIRECTORY_LEVEL) {
320
			direct = 1;
321 322 323 324
			if (!is_dirty_pte(gw->ptes[level - 1]))
				access &= ~ACC_WRITE_MASK;
			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
		} else {
325
			direct = 0;
326 327 328
			table_gfn = gw->table_gfn[level - 2];
		}
		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
329 330
					       direct, access, sptep);
		if (!direct) {
331 332 333 334 335 336 337 338 339 340
			r = kvm_read_guest_atomic(vcpu->kvm,
						  gw->pte_gpa[level - 2],
						  &curr_pte, sizeof(curr_pte));
			if (r || curr_pte != gw->ptes[level - 2]) {
				kvm_mmu_put_page(shadow_page, sptep);
				kvm_release_pfn_clean(pfn);
				sptep = NULL;
				break;
			}
		}
341

342 343 344 345 346
		spte = __pa(shadow_page->spt)
			| PT_PRESENT_MASK | PT_ACCESSED_MASK
			| PT_WRITABLE_MASK | PT_USER_MASK;
		*sptep = spte;
	}
A
Avi Kivity 已提交
347

348
	return sptep;
A
Avi Kivity 已提交
349 350 351 352 353 354 355 356 357 358 359 360 361
}

/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
362 363
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
364 365 366 367 368 369
 */
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			       u32 error_code)
{
	int write_fault = error_code & PFERR_WRITE_MASK;
	int user_fault = error_code & PFERR_USER_MASK;
370
	int fetch_fault = error_code & PFERR_FETCH_MASK;
A
Avi Kivity 已提交
371 372
	struct guest_walker walker;
	u64 *shadow_pte;
373
	int write_pt = 0;
374
	int r;
375
	pfn_t pfn;
M
Marcelo Tosatti 已提交
376
	int largepage = 0;
377
	unsigned long mmu_seq;
A
Avi Kivity 已提交
378

379
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
380
	kvm_mmu_audit(vcpu, "pre page fault");
381

382 383 384
	r = mmu_topup_memory_caches(vcpu);
	if (r)
		return r;
385

A
Avi Kivity 已提交
386
	/*
387
	 * Look up the guest pte for the faulting address.
A
Avi Kivity 已提交
388
	 */
389 390
	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			     fetch_fault);
A
Avi Kivity 已提交
391 392 393 394

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
395
	if (!r) {
396
		pgprintk("%s: guest page fault\n", __func__);
397
		inject_page_fault(vcpu, addr, walker.error_code);
398
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
A
Avi Kivity 已提交
399 400 401
		return 0;
	}

M
Marcelo Tosatti 已提交
402 403 404 405 406 407 408 409
	if (walker.level == PT_DIRECTORY_LEVEL) {
		gfn_t large_gfn;
		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
		if (is_largepage_backed(vcpu, large_gfn)) {
			walker.gfn = large_gfn;
			largepage = 1;
		}
	}
410
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
411
	smp_rmb();
412
	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
413

414
	/* mmio */
415
	if (is_error_pfn(pfn)) {
416
		pgprintk("gfn %lx is mmio\n", walker.gfn);
417
		kvm_release_pfn_clean(pfn);
418 419 420
		return 1;
	}

421
	spin_lock(&vcpu->kvm->mmu_lock);
422 423
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
424
	kvm_mmu_free_some_pages(vcpu);
425
	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
426
				  largepage, &write_pt, pfn);
M
Marcelo Tosatti 已提交
427

428
	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
429
		 shadow_pte, *shadow_pte, write_pt);
430

431
	if (!write_pt)
432
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
433

A
Avi Kivity 已提交
434
	++vcpu->stat.pf_fixed;
435
	kvm_mmu_audit(vcpu, "post page fault (fixed)");
436
	spin_unlock(&vcpu->kvm->mmu_lock);
A
Avi Kivity 已提交
437

438
	return write_pt;
439 440 441 442 443

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return 0;
A
Avi Kivity 已提交
444 445
}

446
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
M
Marcelo Tosatti 已提交
447
{
448 449 450 451 452
	struct kvm_shadow_walk_iterator iterator;
	pt_element_t gpte;
	gpa_t pte_gpa = -1;
	int level;
	u64 *sptep;
453
	int need_flush = 0;
454 455

	spin_lock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
456

457 458 459
	for_each_shadow_entry(vcpu, gva, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
460

461 462 463 464
		/* FIXME: properly handle invlpg on large guest pages */
		if (level == PT_PAGE_TABLE_LEVEL ||
		    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
			struct kvm_mmu_page *sp = page_header(__pa(sptep));
465

466 467 468 469 470 471 472
			pte_gpa = (sp->gfn << PAGE_SHIFT);
			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);

			if (is_shadow_present_pte(*sptep)) {
				rmap_remove(vcpu->kvm, sptep);
				if (is_large_pte(*sptep))
					--vcpu->kvm->stat.lpages;
473
				need_flush = 1;
474 475 476
			}
			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
			break;
477
		}
M
Marcelo Tosatti 已提交
478

479 480 481
		if (!is_shadow_present_pte(*sptep))
			break;
	}
M
Marcelo Tosatti 已提交
482

483 484
	if (need_flush)
		kvm_flush_remote_tlbs(vcpu->kvm);
485
	spin_unlock(&vcpu->kvm->mmu_lock);
486 487

	if (pte_gpa == -1)
488
		return;
489
	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
490 491 492 493 494
				  sizeof(pt_element_t)))
		return;
	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
		if (mmu_topup_memory_caches(vcpu))
			return;
495
		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
496 497
				  sizeof(pt_element_t), 0);
	}
M
Marcelo Tosatti 已提交
498 499
}

A
Avi Kivity 已提交
500 501 502
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
	struct guest_walker walker;
A
Avi Kivity 已提交
503 504
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
505

A
Avi Kivity 已提交
506
	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
A
Avi Kivity 已提交
507

A
Avi Kivity 已提交
508
	if (r) {
A
Avi Kivity 已提交
509
		gpa = gfn_to_gpa(walker.gfn);
A
Avi Kivity 已提交
510
		gpa |= vaddr & ~PAGE_MASK;
A
Avi Kivity 已提交
511 512 513 514 515
	}

	return gpa;
}

516 517 518
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
				 struct kvm_mmu_page *sp)
{
A
Avi Kivity 已提交
519 520 521
	int i, j, offset, r;
	pt_element_t pt[256 / sizeof(pt_element_t)];
	gpa_t pte_gpa;
522

523
	if (sp->role.direct
524
	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
525 526 527 528
		nonpaging_prefetch_page(vcpu, sp);
		return;
	}

A
Avi Kivity 已提交
529 530
	pte_gpa = gfn_to_gpa(sp->gfn);
	if (PTTYPE == 32) {
531
		offset = sp->role.quadrant << PT64_LEVEL_BITS;
A
Avi Kivity 已提交
532 533
		pte_gpa += offset * sizeof(pt_element_t);
	}
534

A
Avi Kivity 已提交
535 536 537 538 539 540 541 542
	for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
		for (j = 0; j < ARRAY_SIZE(pt); ++j)
			if (r || is_present_pte(pt[j]))
				sp->spt[i+j] = shadow_trap_nonpresent_pte;
			else
				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
543
	}
544 545
}

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
 * - Alias changes zap the entire shadow cache.
 */
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
	int i, offset, nr_present;

	offset = nr_present = 0;

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
		gfn_t gfn = sp->gfns[i];

		if (!is_shadow_present_pte(sp->spt[i]))
			continue;

		pte_gpa = gfn_to_gpa(sp->gfn);
		pte_gpa += (i+offset) * sizeof(pt_element_t);

		if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
					  sizeof(pt_element_t)))
			return -EINVAL;

		if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) ||
		    !(gpte & PT_ACCESSED_MASK)) {
			u64 nonpresent;

			rmap_remove(vcpu->kvm, &sp->spt[i]);
			if (is_present_pte(gpte))
				nonpresent = shadow_trap_nonpresent_pte;
			else
				nonpresent = shadow_notrap_nonpresent_pte;
			set_shadow_pte(&sp->spt[i], nonpresent);
			continue;
		}

		nr_present++;
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
593
			 is_dirty_pte(gpte), 0, gfn,
594
			 spte_to_pfn(sp->spt[i]), true, false);
595 596 597 598 599
	}

	return !nr_present;
}

A
Avi Kivity 已提交
600 601 602 603 604 605 606
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_DIR_BASE_ADDR_MASK
607
#undef PT_LEVEL_BITS
608
#undef PT_MAX_FULL_LEVELS
609 610
#undef gpte_to_gfn
#undef gpte_to_gfn_pde
611
#undef CMPXCHG