paging_tmpl.h 15.8 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
28
	#define shadow_walker shadow_walker64
A
Avi Kivity 已提交
29 30 31 32 33
	#define FNAME(name) paging##64_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
34
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
35 36
	#ifdef CONFIG_X86_64
	#define PT_MAX_FULL_LEVELS 4
37
	#define CMPXCHG cmpxchg
38
	#else
39
	#define CMPXCHG cmpxchg64
40 41
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
42 43 44
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
45
	#define shadow_walker shadow_walker32
A
Avi Kivity 已提交
46 47 48 49 50
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
51
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
52
	#define PT_MAX_FULL_LEVELS 2
53
	#define CMPXCHG cmpxchg
A
Avi Kivity 已提交
54 55 56 57
#else
	#error Invalid PTTYPE value
#endif

58 59 60
#define gpte_to_gfn FNAME(gpte_to_gfn)
#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)

A
Avi Kivity 已提交
61 62 63 64 65 66
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
67
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
68 69
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
70 71
	unsigned pt_access;
	unsigned pte_access;
72
	gfn_t gfn;
73
	u32 error_code;
A
Avi Kivity 已提交
74 75
};

76 77 78 79 80 81 82 83 84
struct shadow_walker {
	struct kvm_shadow_walk walker;
	struct guest_walker *guest_walker;
	int user_fault;
	int write_fault;
	int largepage;
	int *ptwrite;
	pfn_t pfn;
	u64 *sptep;
85
	gpa_t pte_gpa;
86 87
};

88 89 90 91 92 93 94 95 96 97
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
{
	return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

98 99 100 101 102 103 104 105 106
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			 gfn_t table_gfn, unsigned index,
			 pt_element_t orig_pte, pt_element_t new_pte)
{
	pt_element_t ret;
	pt_element_t *table;
	struct page *page;

	page = gfn_to_page(kvm, table_gfn);
107

108 109 110 111 112 113 114 115 116
	table = kmap_atomic(page, KM_USER0);
	ret = CMPXCHG(&table[index], orig_pte, new_pte);
	kunmap_atomic(table, KM_USER0);

	kvm_release_page_dirty(page);

	return (ret != orig_pte);
}

117 118 119 120 121 122 123 124 125 126 127 128
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
	unsigned access;

	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
	if (is_nx(vcpu))
		access &= ~(gpte >> PT64_NX_SHIFT);
#endif
	return access;
}

129 130 131
/*
 * Fetch a guest pte for a guest virtual address
 */
132 133
static int FNAME(walk_addr)(struct guest_walker *walker,
			    struct kvm_vcpu *vcpu, gva_t addr,
134
			    int write_fault, int user_fault, int fetch_fault)
A
Avi Kivity 已提交
135
{
136
	pt_element_t pte;
137
	gfn_t table_gfn;
138
	unsigned index, pt_access, pte_access;
139
	gpa_t pte_gpa;
A
Avi Kivity 已提交
140

141
	pgprintk("%s: addr %lx\n", __func__, addr);
142
walk:
143 144
	walker->level = vcpu->arch.mmu.root_level;
	pte = vcpu->arch.cr3;
145 146
#if PTTYPE == 64
	if (!is_long_mode(vcpu)) {
147
		pte = vcpu->arch.pdptrs[(addr >> 30) & 3];
148
		if (!is_present_pte(pte))
149
			goto not_present;
150 151 152
		--walker->level;
	}
#endif
A
Avi Kivity 已提交
153
	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
154
	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
A
Avi Kivity 已提交
155

156
	pt_access = ACC_ALL;
157 158

	for (;;) {
159
		index = PT_INDEX(addr, walker->level);
160

161
		table_gfn = gpte_to_gfn(pte);
A
Avi Kivity 已提交
162
		pte_gpa = gfn_to_gpa(table_gfn);
163
		pte_gpa += index * sizeof(pt_element_t);
164
		walker->table_gfn[walker->level - 1] = table_gfn;
165
		walker->pte_gpa[walker->level - 1] = pte_gpa;
166
		pgprintk("%s: table_gfn[%d] %lx\n", __func__,
167 168
			 walker->level - 1, table_gfn);

169
		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
170 171

		if (!is_present_pte(pte))
172 173
			goto not_present;

174
		if (write_fault && !is_writeble_pte(pte))
175 176 177
			if (user_fault || is_write_protection(vcpu))
				goto access_error;

178
		if (user_fault && !(pte & PT_USER_MASK))
179 180
			goto access_error;

181
#if PTTYPE == 64
182
		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
183 184 185
			goto access_error;
#endif

186
		if (!(pte & PT_ACCESSED_MASK)) {
187
			mark_page_dirty(vcpu->kvm, table_gfn);
188 189 190
			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			    index, pte, pte|PT_ACCESSED_MASK))
				goto walk;
191
			pte |= PT_ACCESSED_MASK;
192
		}
193

194
		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
195

196 197
		walker->ptes[walker->level - 1] = pte;

198
		if (walker->level == PT_PAGE_TABLE_LEVEL) {
199
			walker->gfn = gpte_to_gfn(pte);
200 201 202 203
			break;
		}

		if (walker->level == PT_DIRECTORY_LEVEL
204
		    && (pte & PT_PAGE_SIZE_MASK)
205
		    && (PTTYPE == 64 || is_pse(vcpu))) {
206
			walker->gfn = gpte_to_gfn_pde(pte);
207
			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
208 209
			if (PTTYPE == 32 && is_cpuid_PSE36())
				walker->gfn += pse36_gfn_delta(pte);
210
			break;
211
		}
212

213
		pt_access = pte_access;
214 215
		--walker->level;
	}
216 217

	if (write_fault && !is_dirty_pte(pte)) {
218 219
		bool ret;

220
		mark_page_dirty(vcpu->kvm, table_gfn);
221 222 223 224
		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			    pte|PT_DIRTY_MASK);
		if (ret)
			goto walk;
225
		pte |= PT_DIRTY_MASK;
226
		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
227
		walker->ptes[walker->level - 1] = pte;
228 229
	}

230 231 232
	walker->pt_access = pt_access;
	walker->pte_access = pte_access;
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
233
		 __func__, (u64)pte, pt_access, pte_access);
234 235 236 237 238 239 240 241 242 243 244 245 246 247
	return 1;

not_present:
	walker->error_code = 0;
	goto err;

access_error:
	walker->error_code = PFERR_PRESENT_MASK;

err:
	if (write_fault)
		walker->error_code |= PFERR_WRITE_MASK;
	if (user_fault)
		walker->error_code |= PFERR_USER_MASK;
248 249
	if (fetch_fault)
		walker->error_code |= PFERR_FETCH_MASK;
250
	return 0;
A
Avi Kivity 已提交
251 252
}

253
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
254
			      u64 *spte, const void *pte)
255 256
{
	pt_element_t gpte;
257
	unsigned pte_access;
258
	pfn_t pfn;
M
Marcelo Tosatti 已提交
259
	int largepage = vcpu->arch.update_pte.largepage;
260 261

	gpte = *(const pt_element_t *)pte;
262
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
263
		if (!is_present_pte(gpte))
264 265 266
			set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
		return;
	}
267
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
268
	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
269 270
	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
		return;
271 272
	pfn = vcpu->arch.update_pte.pfn;
	if (is_error_pfn(pfn))
273
		return;
274 275
	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
		return;
276
	kvm_get_pfn(pfn);
277
	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
278 279
		     gpte & PT_DIRTY_MASK, NULL, largepage,
		     gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
280
		     pfn, true);
281 282
}

A
Avi Kivity 已提交
283 284 285
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 */
286 287 288 289
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			 struct guest_walker *gw,
			 int user_fault, int write_fault, int largepage,
			 int *ptwrite, pfn_t pfn)
A
Avi Kivity 已提交
290
{
291 292
	unsigned access = gw->pt_access;
	struct kvm_mmu_page *shadow_page;
293
	u64 spte, *sptep;
294 295 296
	int metaphysical;
	gfn_t table_gfn;
	int r;
297
	int level;
298
	pt_element_t curr_pte;
299
	struct kvm_shadow_walk_iterator iterator;
300

301 302
	if (!is_present_pte(gw->ptes[gw->level - 1]))
		return NULL;
A
Avi Kivity 已提交
303

304 305 306 307 308 309 310 311 312 313 314 315 316 317
	for_each_shadow_entry(vcpu, addr, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
		if (level == PT_PAGE_TABLE_LEVEL
		    || (largepage && level == PT_DIRECTORY_LEVEL)) {
			mmu_set_spte(vcpu, sptep, access,
				     gw->pte_access & access,
				     user_fault, write_fault,
				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
				     ptwrite, largepage,
				     gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
				     gw->gfn, pfn, false);
			break;
		}
A
Avi Kivity 已提交
318

319 320
		if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
			continue;
321

322 323 324 325
		if (is_large_pte(*sptep)) {
			set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
			kvm_flush_remote_tlbs(vcpu->kvm);
			rmap_remove(vcpu->kvm, sptep);
326
		}
327

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
		if (level == PT_DIRECTORY_LEVEL
		    && gw->level == PT_DIRECTORY_LEVEL) {
			metaphysical = 1;
			if (!is_dirty_pte(gw->ptes[level - 1]))
				access &= ~ACC_WRITE_MASK;
			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
		} else {
			metaphysical = 0;
			table_gfn = gw->table_gfn[level - 2];
		}
		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
					       metaphysical, access, sptep);
		if (!metaphysical) {
			r = kvm_read_guest_atomic(vcpu->kvm,
						  gw->pte_gpa[level - 2],
						  &curr_pte, sizeof(curr_pte));
			if (r || curr_pte != gw->ptes[level - 2]) {
				kvm_mmu_put_page(shadow_page, sptep);
				kvm_release_pfn_clean(pfn);
				sptep = NULL;
				break;
			}
		}
351

352 353 354 355 356
		spte = __pa(shadow_page->spt)
			| PT_PRESENT_MASK | PT_ACCESSED_MASK
			| PT_WRITABLE_MASK | PT_USER_MASK;
		*sptep = spte;
	}
A
Avi Kivity 已提交
357

358
	return sptep;
A
Avi Kivity 已提交
359 360 361 362 363 364 365 366 367 368 369 370 371
}

/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
372 373
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
374 375 376 377 378 379
 */
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			       u32 error_code)
{
	int write_fault = error_code & PFERR_WRITE_MASK;
	int user_fault = error_code & PFERR_USER_MASK;
380
	int fetch_fault = error_code & PFERR_FETCH_MASK;
A
Avi Kivity 已提交
381 382
	struct guest_walker walker;
	u64 *shadow_pte;
383
	int write_pt = 0;
384
	int r;
385
	pfn_t pfn;
M
Marcelo Tosatti 已提交
386
	int largepage = 0;
387
	unsigned long mmu_seq;
A
Avi Kivity 已提交
388

389
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
390
	kvm_mmu_audit(vcpu, "pre page fault");
391

392 393 394
	r = mmu_topup_memory_caches(vcpu);
	if (r)
		return r;
395

A
Avi Kivity 已提交
396 397 398
	/*
	 * Look up the shadow pte for the faulting address.
	 */
399 400
	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			     fetch_fault);
A
Avi Kivity 已提交
401 402 403 404

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
405
	if (!r) {
406
		pgprintk("%s: guest page fault\n", __func__);
407
		inject_page_fault(vcpu, addr, walker.error_code);
408
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
A
Avi Kivity 已提交
409 410 411
		return 0;
	}

M
Marcelo Tosatti 已提交
412 413 414 415 416 417 418 419
	if (walker.level == PT_DIRECTORY_LEVEL) {
		gfn_t large_gfn;
		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
		if (is_largepage_backed(vcpu, large_gfn)) {
			walker.gfn = large_gfn;
			largepage = 1;
		}
	}
420
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
421
	smp_rmb();
422
	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
423

424
	/* mmio */
425
	if (is_error_pfn(pfn)) {
426
		pgprintk("gfn %lx is mmio\n", walker.gfn);
427
		kvm_release_pfn_clean(pfn);
428 429 430
		return 1;
	}

431
	spin_lock(&vcpu->kvm->mmu_lock);
432 433
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
434
	kvm_mmu_free_some_pages(vcpu);
435
	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
436
				  largepage, &write_pt, pfn);
M
Marcelo Tosatti 已提交
437

438
	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
439
		 shadow_pte, *shadow_pte, write_pt);
440

441
	if (!write_pt)
442
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
443

A
Avi Kivity 已提交
444
	++vcpu->stat.pf_fixed;
445
	kvm_mmu_audit(vcpu, "post page fault (fixed)");
446
	spin_unlock(&vcpu->kvm->mmu_lock);
A
Avi Kivity 已提交
447

448
	return write_pt;
449 450 451 452 453

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return 0;
A
Avi Kivity 已提交
454 455
}

M
Marcelo Tosatti 已提交
456 457 458 459
static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
				      struct kvm_vcpu *vcpu, u64 addr,
				      u64 *sptep, int level)
{
460 461
	struct shadow_walker *sw =
		container_of(_sw, struct shadow_walker, walker);
M
Marcelo Tosatti 已提交
462

463 464 465
	/* FIXME: properly handle invlpg on large guest pages */
	if (level == PT_PAGE_TABLE_LEVEL ||
	    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
466 467 468 469 470
		struct kvm_mmu_page *sp = page_header(__pa(sptep));

		sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
		sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);

471
		if (is_shadow_present_pte(*sptep)) {
M
Marcelo Tosatti 已提交
472
			rmap_remove(vcpu->kvm, sptep);
473 474 475
			if (is_large_pte(*sptep))
				--vcpu->kvm->stat.lpages;
		}
M
Marcelo Tosatti 已提交
476 477 478 479 480 481 482 483 484 485
		set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
		return 1;
	}
	if (!is_shadow_present_pte(*sptep))
		return 1;
	return 0;
}

static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
486
	pt_element_t gpte;
M
Marcelo Tosatti 已提交
487 488
	struct shadow_walker walker = {
		.walker = { .entry = FNAME(shadow_invlpg_entry), },
489
		.pte_gpa = -1,
M
Marcelo Tosatti 已提交
490 491
	};

492
	spin_lock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
493
	walk_shadow(&walker.walker, vcpu, gva);
494 495 496 497 498 499 500 501 502 503 504 505
	spin_unlock(&vcpu->kvm->mmu_lock);
	if (walker.pte_gpa == -1)
		return;
	if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
				  sizeof(pt_element_t)))
		return;
	if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
		if (mmu_topup_memory_caches(vcpu))
			return;
		kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
				  sizeof(pt_element_t), 0);
	}
M
Marcelo Tosatti 已提交
506 507
}

A
Avi Kivity 已提交
508 509 510
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
	struct guest_walker walker;
A
Avi Kivity 已提交
511 512
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
513

A
Avi Kivity 已提交
514
	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
A
Avi Kivity 已提交
515

A
Avi Kivity 已提交
516
	if (r) {
A
Avi Kivity 已提交
517
		gpa = gfn_to_gpa(walker.gfn);
A
Avi Kivity 已提交
518
		gpa |= vaddr & ~PAGE_MASK;
A
Avi Kivity 已提交
519 520 521 522 523
	}

	return gpa;
}

524 525 526
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
				 struct kvm_mmu_page *sp)
{
A
Avi Kivity 已提交
527 528 529
	int i, j, offset, r;
	pt_element_t pt[256 / sizeof(pt_element_t)];
	gpa_t pte_gpa;
530

531 532
	if (sp->role.metaphysical
	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
533 534 535 536
		nonpaging_prefetch_page(vcpu, sp);
		return;
	}

A
Avi Kivity 已提交
537 538
	pte_gpa = gfn_to_gpa(sp->gfn);
	if (PTTYPE == 32) {
539
		offset = sp->role.quadrant << PT64_LEVEL_BITS;
A
Avi Kivity 已提交
540 541
		pte_gpa += offset * sizeof(pt_element_t);
	}
542

A
Avi Kivity 已提交
543 544 545 546 547 548 549 550
	for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
		for (j = 0; j < ARRAY_SIZE(pt); ++j)
			if (r || is_present_pte(pt[j]))
				sp->spt[i+j] = shadow_trap_nonpresent_pte;
			else
				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
551
	}
552 553
}

554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
 * - Alias changes zap the entire shadow cache.
 */
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
	int i, offset, nr_present;

	offset = nr_present = 0;

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
		gfn_t gfn = sp->gfns[i];

		if (!is_shadow_present_pte(sp->spt[i]))
			continue;

		pte_gpa = gfn_to_gpa(sp->gfn);
		pte_gpa += (i+offset) * sizeof(pt_element_t);

		if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
					  sizeof(pt_element_t)))
			return -EINVAL;

		if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) ||
		    !(gpte & PT_ACCESSED_MASK)) {
			u64 nonpresent;

			rmap_remove(vcpu->kvm, &sp->spt[i]);
			if (is_present_pte(gpte))
				nonpresent = shadow_trap_nonpresent_pte;
			else
				nonpresent = shadow_notrap_nonpresent_pte;
			set_shadow_pte(&sp->spt[i], nonpresent);
			continue;
		}

		nr_present++;
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
601
			 is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
602
			 spte_to_pfn(sp->spt[i]), true, false);
603 604 605 606 607
	}

	return !nr_present;
}

A
Avi Kivity 已提交
608 609
#undef pt_element_t
#undef guest_walker
610
#undef shadow_walker
A
Avi Kivity 已提交
611 612 613 614 615
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_DIR_BASE_ADDR_MASK
616
#undef PT_LEVEL_BITS
617
#undef PT_MAX_FULL_LEVELS
618 619
#undef gpte_to_gfn
#undef gpte_to_gfn_pde
620
#undef CMPXCHG