paging_tmpl.h 15.3 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
	#define FNAME(name) paging##64_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
33
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
34 35
	#ifdef CONFIG_X86_64
	#define PT_MAX_FULL_LEVELS 4
36
	#define CMPXCHG cmpxchg
37
	#else
38
	#define CMPXCHG cmpxchg64
39 40
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
41 42 43 44 45 46 47 48
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
49
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
50
	#define PT_MAX_FULL_LEVELS 2
51
	#define CMPXCHG cmpxchg
A
Avi Kivity 已提交
52 53 54 55
#else
	#error Invalid PTTYPE value
#endif

56 57 58
#define gpte_to_gfn FNAME(gpte_to_gfn)
#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)

A
Avi Kivity 已提交
59 60 61 62 63 64
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
65
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
66 67
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
68 69
	unsigned pt_access;
	unsigned pte_access;
70
	gfn_t gfn;
71
	u32 error_code;
A
Avi Kivity 已提交
72 73
};

74 75 76 77 78 79 80 81 82 83
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
{
	return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

84 85 86 87 88 89 90 91 92
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			 gfn_t table_gfn, unsigned index,
			 pt_element_t orig_pte, pt_element_t new_pte)
{
	pt_element_t ret;
	pt_element_t *table;
	struct page *page;

	page = gfn_to_page(kvm, table_gfn);
93

94 95 96 97 98 99 100 101 102
	table = kmap_atomic(page, KM_USER0);
	ret = CMPXCHG(&table[index], orig_pte, new_pte);
	kunmap_atomic(table, KM_USER0);

	kvm_release_page_dirty(page);

	return (ret != orig_pte);
}

103 104 105 106 107 108 109 110 111 112 113 114
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
	unsigned access;

	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
	if (is_nx(vcpu))
		access &= ~(gpte >> PT64_NX_SHIFT);
#endif
	return access;
}

115 116 117
/*
 * Fetch a guest pte for a guest virtual address
 */
118 119
static int FNAME(walk_addr)(struct guest_walker *walker,
			    struct kvm_vcpu *vcpu, gva_t addr,
120
			    int write_fault, int user_fault, int fetch_fault)
A
Avi Kivity 已提交
121
{
122
	pt_element_t pte;
123
	gfn_t table_gfn;
124
	unsigned index, pt_access, pte_access;
125
	gpa_t pte_gpa;
A
Avi Kivity 已提交
126

127
	pgprintk("%s: addr %lx\n", __func__, addr);
128
walk:
129 130
	walker->level = vcpu->arch.mmu.root_level;
	pte = vcpu->arch.cr3;
131 132
#if PTTYPE == 64
	if (!is_long_mode(vcpu)) {
133
		pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
134
		if (!is_present_pte(pte))
135
			goto not_present;
136 137 138
		--walker->level;
	}
#endif
A
Avi Kivity 已提交
139
	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
140
	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
A
Avi Kivity 已提交
141

142
	pt_access = ACC_ALL;
143 144

	for (;;) {
145
		index = PT_INDEX(addr, walker->level);
146

147
		table_gfn = gpte_to_gfn(pte);
A
Avi Kivity 已提交
148
		pte_gpa = gfn_to_gpa(table_gfn);
149
		pte_gpa += index * sizeof(pt_element_t);
150
		walker->table_gfn[walker->level - 1] = table_gfn;
151
		walker->pte_gpa[walker->level - 1] = pte_gpa;
152
		pgprintk("%s: table_gfn[%d] %lx\n", __func__,
153 154
			 walker->level - 1, table_gfn);

155
		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
156 157

		if (!is_present_pte(pte))
158 159
			goto not_present;

160
		if (write_fault && !is_writeble_pte(pte))
161 162 163
			if (user_fault || is_write_protection(vcpu))
				goto access_error;

164
		if (user_fault && !(pte & PT_USER_MASK))
165 166
			goto access_error;

167
#if PTTYPE == 64
168
		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
169 170 171
			goto access_error;
#endif

172
		if (!(pte & PT_ACCESSED_MASK)) {
173
			mark_page_dirty(vcpu->kvm, table_gfn);
174 175 176
			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			    index, pte, pte|PT_ACCESSED_MASK))
				goto walk;
177
			pte |= PT_ACCESSED_MASK;
178
		}
179

180
		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
181

182 183
		walker->ptes[walker->level - 1] = pte;

184
		if (walker->level == PT_PAGE_TABLE_LEVEL) {
185
			walker->gfn = gpte_to_gfn(pte);
186 187 188 189
			break;
		}

		if (walker->level == PT_DIRECTORY_LEVEL
190
		    && (pte & PT_PAGE_SIZE_MASK)
191
		    && (PTTYPE == 64 || is_pse(vcpu))) {
192
			walker->gfn = gpte_to_gfn_pde(pte);
193
			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
194 195
			if (PTTYPE == 32 && is_cpuid_PSE36())
				walker->gfn += pse36_gfn_delta(pte);
196
			break;
197
		}
198

199
		pt_access = pte_access;
200 201
		--walker->level;
	}
202 203

	if (write_fault && !is_dirty_pte(pte)) {
204 205
		bool ret;

206
		mark_page_dirty(vcpu->kvm, table_gfn);
207 208 209 210
		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			    pte|PT_DIRTY_MASK);
		if (ret)
			goto walk;
211
		pte |= PT_DIRTY_MASK;
212
		walker->ptes[walker->level - 1] = pte;
213 214
	}

215 216 217
	walker->pt_access = pt_access;
	walker->pte_access = pte_access;
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
218
		 __func__, (u64)pte, pt_access, pte_access);
219 220 221 222 223 224 225 226 227 228 229 230 231 232
	return 1;

not_present:
	walker->error_code = 0;
	goto err;

access_error:
	walker->error_code = PFERR_PRESENT_MASK;

err:
	if (write_fault)
		walker->error_code |= PFERR_WRITE_MASK;
	if (user_fault)
		walker->error_code |= PFERR_USER_MASK;
233 234
	if (fetch_fault)
		walker->error_code |= PFERR_FETCH_MASK;
235
	return 0;
A
Avi Kivity 已提交
236 237
}

238
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
239
			      u64 *spte, const void *pte)
240 241
{
	pt_element_t gpte;
242
	unsigned pte_access;
243
	pfn_t pfn;
M
Marcelo Tosatti 已提交
244
	int largepage = vcpu->arch.update_pte.largepage;
245 246

	gpte = *(const pt_element_t *)pte;
247
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
248
		if (!is_present_pte(gpte))
249 250 251
			set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
		return;
	}
252
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
253
	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
254 255
	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
		return;
256 257
	pfn = vcpu->arch.update_pte.pfn;
	if (is_error_pfn(pfn))
258
		return;
259 260
	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
		return;
261
	kvm_get_pfn(pfn);
262
	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
263 264
		     gpte & PT_DIRTY_MASK, NULL, largepage,
		     gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
265
		     pfn, true);
266 267
}

A
Avi Kivity 已提交
268 269 270
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 */
271 272 273 274
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			 struct guest_walker *gw,
			 int user_fault, int write_fault, int largepage,
			 int *ptwrite, pfn_t pfn)
A
Avi Kivity 已提交
275
{
276 277
	unsigned access = gw->pt_access;
	struct kvm_mmu_page *shadow_page;
278
	u64 spte, *sptep;
279
	int direct;
280 281
	gfn_t table_gfn;
	int r;
282
	int level;
283
	pt_element_t curr_pte;
284
	struct kvm_shadow_walk_iterator iterator;
285

286 287
	if (!is_present_pte(gw->ptes[gw->level - 1]))
		return NULL;
A
Avi Kivity 已提交
288

289 290 291 292 293 294 295 296 297 298 299 300 301 302
	for_each_shadow_entry(vcpu, addr, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
		if (level == PT_PAGE_TABLE_LEVEL
		    || (largepage && level == PT_DIRECTORY_LEVEL)) {
			mmu_set_spte(vcpu, sptep, access,
				     gw->pte_access & access,
				     user_fault, write_fault,
				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
				     ptwrite, largepage,
				     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
				     gw->gfn, pfn, false);
			break;
		}
A
Avi Kivity 已提交
303

304 305
		if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
			continue;
306

307
		if (is_large_pte(*sptep)) {
308
			rmap_remove(vcpu->kvm, sptep);
309 310
			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
			kvm_flush_remote_tlbs(vcpu->kvm);
311
		}
312

313 314
		if (level == PT_DIRECTORY_LEVEL
		    && gw->level == PT_DIRECTORY_LEVEL) {
315
			direct = 1;
316 317 318 319
			if (!is_dirty_pte(gw->ptes[level - 1]))
				access &= ~ACC_WRITE_MASK;
			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
		} else {
320
			direct = 0;
321 322 323
			table_gfn = gw->table_gfn[level - 2];
		}
		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
324 325
					       direct, access, sptep);
		if (!direct) {
326 327 328 329 330 331 332 333 334 335
			r = kvm_read_guest_atomic(vcpu->kvm,
						  gw->pte_gpa[level - 2],
						  &curr_pte, sizeof(curr_pte));
			if (r || curr_pte != gw->ptes[level - 2]) {
				kvm_mmu_put_page(shadow_page, sptep);
				kvm_release_pfn_clean(pfn);
				sptep = NULL;
				break;
			}
		}
336

337 338 339 340 341
		spte = __pa(shadow_page->spt)
			| PT_PRESENT_MASK | PT_ACCESSED_MASK
			| PT_WRITABLE_MASK | PT_USER_MASK;
		*sptep = spte;
	}
A
Avi Kivity 已提交
342

343
	return sptep;
A
Avi Kivity 已提交
344 345 346 347 348 349 350 351 352 353 354 355 356
}

/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
357 358
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
359 360 361 362 363 364
 */
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			       u32 error_code)
{
	int write_fault = error_code & PFERR_WRITE_MASK;
	int user_fault = error_code & PFERR_USER_MASK;
365
	int fetch_fault = error_code & PFERR_FETCH_MASK;
A
Avi Kivity 已提交
366 367
	struct guest_walker walker;
	u64 *shadow_pte;
368
	int write_pt = 0;
369
	int r;
370
	pfn_t pfn;
M
Marcelo Tosatti 已提交
371
	int largepage = 0;
372
	unsigned long mmu_seq;
A
Avi Kivity 已提交
373

374
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
375
	kvm_mmu_audit(vcpu, "pre page fault");
376

377 378 379
	r = mmu_topup_memory_caches(vcpu);
	if (r)
		return r;
380

A
Avi Kivity 已提交
381 382 383
	/*
	 * Look up the shadow pte for the faulting address.
	 */
384 385
	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			     fetch_fault);
A
Avi Kivity 已提交
386 387 388 389

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
390
	if (!r) {
391
		pgprintk("%s: guest page fault\n", __func__);
392
		inject_page_fault(vcpu, addr, walker.error_code);
393
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
A
Avi Kivity 已提交
394 395 396
		return 0;
	}

M
Marcelo Tosatti 已提交
397 398 399 400 401 402 403 404
	if (walker.level == PT_DIRECTORY_LEVEL) {
		gfn_t large_gfn;
		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
		if (is_largepage_backed(vcpu, large_gfn)) {
			walker.gfn = large_gfn;
			largepage = 1;
		}
	}
405
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
406
	smp_rmb();
407
	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
408

409
	/* mmio */
410
	if (is_error_pfn(pfn)) {
411
		pgprintk("gfn %lx is mmio\n", walker.gfn);
412
		kvm_release_pfn_clean(pfn);
413 414 415
		return 1;
	}

416
	spin_lock(&vcpu->kvm->mmu_lock);
417 418
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
419
	kvm_mmu_free_some_pages(vcpu);
420
	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
421
				  largepage, &write_pt, pfn);
M
Marcelo Tosatti 已提交
422

423
	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
424
		 shadow_pte, *shadow_pte, write_pt);
425

426
	if (!write_pt)
427
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
428

A
Avi Kivity 已提交
429
	++vcpu->stat.pf_fixed;
430
	kvm_mmu_audit(vcpu, "post page fault (fixed)");
431
	spin_unlock(&vcpu->kvm->mmu_lock);
A
Avi Kivity 已提交
432

433
	return write_pt;
434 435 436 437 438

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return 0;
A
Avi Kivity 已提交
439 440
}

441
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
M
Marcelo Tosatti 已提交
442
{
443 444 445 446 447
	struct kvm_shadow_walk_iterator iterator;
	pt_element_t gpte;
	gpa_t pte_gpa = -1;
	int level;
	u64 *sptep;
448
	int need_flush = 0;
449 450

	spin_lock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
451

452 453 454
	for_each_shadow_entry(vcpu, gva, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
455

456 457 458 459
		/* FIXME: properly handle invlpg on large guest pages */
		if (level == PT_PAGE_TABLE_LEVEL ||
		    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
			struct kvm_mmu_page *sp = page_header(__pa(sptep));
460

461 462 463 464 465 466 467
			pte_gpa = (sp->gfn << PAGE_SHIFT);
			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);

			if (is_shadow_present_pte(*sptep)) {
				rmap_remove(vcpu->kvm, sptep);
				if (is_large_pte(*sptep))
					--vcpu->kvm->stat.lpages;
468
				need_flush = 1;
469 470 471
			}
			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
			break;
472
		}
M
Marcelo Tosatti 已提交
473

474 475 476
		if (!is_shadow_present_pte(*sptep))
			break;
	}
M
Marcelo Tosatti 已提交
477

478 479
	if (need_flush)
		kvm_flush_remote_tlbs(vcpu->kvm);
480
	spin_unlock(&vcpu->kvm->mmu_lock);
481 482

	if (pte_gpa == -1)
483
		return;
484
	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
485 486 487 488 489
				  sizeof(pt_element_t)))
		return;
	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
		if (mmu_topup_memory_caches(vcpu))
			return;
490
		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
491 492
				  sizeof(pt_element_t), 0);
	}
M
Marcelo Tosatti 已提交
493 494
}

A
Avi Kivity 已提交
495 496 497
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
	struct guest_walker walker;
A
Avi Kivity 已提交
498 499
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
500

A
Avi Kivity 已提交
501
	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
A
Avi Kivity 已提交
502

A
Avi Kivity 已提交
503
	if (r) {
A
Avi Kivity 已提交
504
		gpa = gfn_to_gpa(walker.gfn);
A
Avi Kivity 已提交
505
		gpa |= vaddr & ~PAGE_MASK;
A
Avi Kivity 已提交
506 507 508 509 510
	}

	return gpa;
}

511 512 513
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
				 struct kvm_mmu_page *sp)
{
A
Avi Kivity 已提交
514 515 516
	int i, j, offset, r;
	pt_element_t pt[256 / sizeof(pt_element_t)];
	gpa_t pte_gpa;
517

518
	if (sp->role.direct
519
	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
520 521 522 523
		nonpaging_prefetch_page(vcpu, sp);
		return;
	}

A
Avi Kivity 已提交
524 525
	pte_gpa = gfn_to_gpa(sp->gfn);
	if (PTTYPE == 32) {
526
		offset = sp->role.quadrant << PT64_LEVEL_BITS;
A
Avi Kivity 已提交
527 528
		pte_gpa += offset * sizeof(pt_element_t);
	}
529

A
Avi Kivity 已提交
530 531 532 533 534 535 536 537
	for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
		for (j = 0; j < ARRAY_SIZE(pt); ++j)
			if (r || is_present_pte(pt[j]))
				sp->spt[i+j] = shadow_trap_nonpresent_pte;
			else
				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
538
	}
539 540
}

541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
 * - Alias changes zap the entire shadow cache.
 */
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
	int i, offset, nr_present;

	offset = nr_present = 0;

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
		gfn_t gfn = sp->gfns[i];

		if (!is_shadow_present_pte(sp->spt[i]))
			continue;

		pte_gpa = gfn_to_gpa(sp->gfn);
		pte_gpa += (i+offset) * sizeof(pt_element_t);

		if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
					  sizeof(pt_element_t)))
			return -EINVAL;

		if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) ||
		    !(gpte & PT_ACCESSED_MASK)) {
			u64 nonpresent;

			rmap_remove(vcpu->kvm, &sp->spt[i]);
			if (is_present_pte(gpte))
				nonpresent = shadow_trap_nonpresent_pte;
			else
				nonpresent = shadow_notrap_nonpresent_pte;
			set_shadow_pte(&sp->spt[i], nonpresent);
			continue;
		}

		nr_present++;
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
588
			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
589
			 spte_to_pfn(sp->spt[i]), true, false);
590 591 592 593 594
	}

	return !nr_present;
}

A
Avi Kivity 已提交
595 596 597 598 599 600 601
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_DIR_BASE_ADDR_MASK
602
#undef PT_LEVEL_BITS
603
#undef PT_MAX_FULL_LEVELS
604 605
#undef gpte_to_gfn
#undef gpte_to_gfn_pde
606
#undef CMPXCHG