paging_tmpl.h 15.7 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
	#define FNAME(name) paging##64_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
	#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
33
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
34 35
	#ifdef CONFIG_X86_64
	#define PT_MAX_FULL_LEVELS 4
36
	#define CMPXCHG cmpxchg
37
	#else
38
	#define CMPXCHG cmpxchg64
39 40
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
41 42 43 44 45 46 47 48
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
	#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
49
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
50
	#define PT_MAX_FULL_LEVELS 2
51
	#define CMPXCHG cmpxchg
A
Avi Kivity 已提交
52 53 54 55
#else
	#error Invalid PTTYPE value
#endif

56 57 58
#define gpte_to_gfn FNAME(gpte_to_gfn)
#define gpte_to_gfn_pde FNAME(gpte_to_gfn_pde)

A
Avi Kivity 已提交
59 60 61 62 63 64
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
65
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
66 67
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
68 69
	unsigned pt_access;
	unsigned pte_access;
70
	gfn_t gfn;
71
	u32 error_code;
A
Avi Kivity 已提交
72 73
};

74 75 76 77 78 79 80 81 82 83
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
	return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

static gfn_t gpte_to_gfn_pde(pt_element_t gpte)
{
	return (gpte & PT_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
}

84 85 86 87 88 89 90 91 92
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			 gfn_t table_gfn, unsigned index,
			 pt_element_t orig_pte, pt_element_t new_pte)
{
	pt_element_t ret;
	pt_element_t *table;
	struct page *page;

	page = gfn_to_page(kvm, table_gfn);
93

94 95 96 97 98 99 100 101 102
	table = kmap_atomic(page, KM_USER0);
	ret = CMPXCHG(&table[index], orig_pte, new_pte);
	kunmap_atomic(table, KM_USER0);

	kvm_release_page_dirty(page);

	return (ret != orig_pte);
}

103 104 105 106 107 108 109 110 111 112 113 114
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
	unsigned access;

	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
	if (is_nx(vcpu))
		access &= ~(gpte >> PT64_NX_SHIFT);
#endif
	return access;
}

115 116 117
/*
 * Fetch a guest pte for a guest virtual address
 */
118 119
static int FNAME(walk_addr)(struct guest_walker *walker,
			    struct kvm_vcpu *vcpu, gva_t addr,
120
			    int write_fault, int user_fault, int fetch_fault)
A
Avi Kivity 已提交
121
{
122
	pt_element_t pte;
123
	gfn_t table_gfn;
124
	unsigned index, pt_access, pte_access;
125
	gpa_t pte_gpa;
126
	int rsvd_fault = 0;
A
Avi Kivity 已提交
127

128 129
	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
				     fetch_fault);
130
walk:
131 132
	walker->level = vcpu->arch.mmu.root_level;
	pte = vcpu->arch.cr3;
133 134
#if PTTYPE == 64
	if (!is_long_mode(vcpu)) {
A
Avi Kivity 已提交
135
		pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
136
		trace_kvm_mmu_paging_element(pte, walker->level);
137
		if (!is_present_gpte(pte))
138
			goto not_present;
139 140 141
		--walker->level;
	}
#endif
A
Avi Kivity 已提交
142
	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
143
	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
A
Avi Kivity 已提交
144

145
	pt_access = ACC_ALL;
146 147

	for (;;) {
148
		index = PT_INDEX(addr, walker->level);
149

150
		table_gfn = gpte_to_gfn(pte);
A
Avi Kivity 已提交
151
		pte_gpa = gfn_to_gpa(table_gfn);
152
		pte_gpa += index * sizeof(pt_element_t);
153
		walker->table_gfn[walker->level - 1] = table_gfn;
154
		walker->pte_gpa[walker->level - 1] = pte_gpa;
155

156
		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
157
		trace_kvm_mmu_paging_element(pte, walker->level);
158

159
		if (!is_present_gpte(pte))
160 161
			goto not_present;

162 163 164 165
		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
		if (rsvd_fault)
			goto access_error;

166
		if (write_fault && !is_writeble_pte(pte))
167 168 169
			if (user_fault || is_write_protection(vcpu))
				goto access_error;

170
		if (user_fault && !(pte & PT_USER_MASK))
171 172
			goto access_error;

173
#if PTTYPE == 64
174
		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
175 176 177
			goto access_error;
#endif

178
		if (!(pte & PT_ACCESSED_MASK)) {
179 180
			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
						       sizeof(pte));
181
			mark_page_dirty(vcpu->kvm, table_gfn);
182 183 184
			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			    index, pte, pte|PT_ACCESSED_MASK))
				goto walk;
185
			pte |= PT_ACCESSED_MASK;
186
		}
187

188
		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
189

190 191
		walker->ptes[walker->level - 1] = pte;

192
		if (walker->level == PT_PAGE_TABLE_LEVEL) {
193
			walker->gfn = gpte_to_gfn(pte);
194 195 196 197
			break;
		}

		if (walker->level == PT_DIRECTORY_LEVEL
198
		    && (pte & PT_PAGE_SIZE_MASK)
199
		    && (PTTYPE == 64 || is_pse(vcpu))) {
200
			walker->gfn = gpte_to_gfn_pde(pte);
201
			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
202 203
			if (PTTYPE == 32 && is_cpuid_PSE36())
				walker->gfn += pse36_gfn_delta(pte);
204
			break;
205
		}
206

207
		pt_access = pte_access;
208 209
		--walker->level;
	}
210

211
	if (write_fault && !is_dirty_gpte(pte)) {
212 213
		bool ret;

214
		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
215
		mark_page_dirty(vcpu->kvm, table_gfn);
216 217 218 219
		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			    pte|PT_DIRTY_MASK);
		if (ret)
			goto walk;
220
		pte |= PT_DIRTY_MASK;
221
		walker->ptes[walker->level - 1] = pte;
222 223
	}

224 225 226
	walker->pt_access = pt_access;
	walker->pte_access = pte_access;
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
227
		 __func__, (u64)pte, pt_access, pte_access);
228 229 230 231 232 233 234 235 236 237 238 239 240 241
	return 1;

not_present:
	walker->error_code = 0;
	goto err;

access_error:
	walker->error_code = PFERR_PRESENT_MASK;

err:
	if (write_fault)
		walker->error_code |= PFERR_WRITE_MASK;
	if (user_fault)
		walker->error_code |= PFERR_USER_MASK;
242 243
	if (fetch_fault)
		walker->error_code |= PFERR_FETCH_MASK;
244 245
	if (rsvd_fault)
		walker->error_code |= PFERR_RSVD_MASK;
246
	trace_kvm_mmu_walker_error(walker->error_code);
247
	return 0;
A
Avi Kivity 已提交
248 249
}

250
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
251
			      u64 *spte, const void *pte)
252 253
{
	pt_element_t gpte;
254
	unsigned pte_access;
255
	pfn_t pfn;
M
Marcelo Tosatti 已提交
256
	int largepage = vcpu->arch.update_pte.largepage;
257 258

	gpte = *(const pt_element_t *)pte;
259
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
260
		if (!is_present_gpte(gpte))
A
Avi Kivity 已提交
261
			__set_spte(spte, shadow_notrap_nonpresent_pte);
262 263
		return;
	}
264
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
265
	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
266 267
	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
		return;
268 269
	pfn = vcpu->arch.update_pte.pfn;
	if (is_error_pfn(pfn))
270
		return;
271 272
	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
		return;
273
	kvm_get_pfn(pfn);
274
	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
275
		     gpte & PT_DIRTY_MASK, NULL, largepage,
276
		     gpte_to_gfn(gpte), pfn, true);
277 278
}

A
Avi Kivity 已提交
279 280 281
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 */
282 283 284 285
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			 struct guest_walker *gw,
			 int user_fault, int write_fault, int largepage,
			 int *ptwrite, pfn_t pfn)
A
Avi Kivity 已提交
286
{
287 288
	unsigned access = gw->pt_access;
	struct kvm_mmu_page *shadow_page;
289
	u64 spte, *sptep = NULL;
290
	int direct;
291 292
	gfn_t table_gfn;
	int r;
293
	int level;
294
	pt_element_t curr_pte;
295
	struct kvm_shadow_walk_iterator iterator;
296

297
	if (!is_present_gpte(gw->ptes[gw->level - 1]))
298
		return NULL;
A
Avi Kivity 已提交
299

300 301 302 303 304 305 306 307 308 309 310 311 312
	for_each_shadow_entry(vcpu, addr, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
		if (level == PT_PAGE_TABLE_LEVEL
		    || (largepage && level == PT_DIRECTORY_LEVEL)) {
			mmu_set_spte(vcpu, sptep, access,
				     gw->pte_access & access,
				     user_fault, write_fault,
				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
				     ptwrite, largepage,
				     gw->gfn, pfn, false);
			break;
		}
A
Avi Kivity 已提交
313

314 315
		if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
			continue;
316

317
		if (is_large_pte(*sptep)) {
318
			rmap_remove(vcpu->kvm, sptep);
A
Avi Kivity 已提交
319
			__set_spte(sptep, shadow_trap_nonpresent_pte);
320
			kvm_flush_remote_tlbs(vcpu->kvm);
321
		}
322

323 324
		if (level == PT_DIRECTORY_LEVEL
		    && gw->level == PT_DIRECTORY_LEVEL) {
325
			direct = 1;
326
			if (!is_dirty_gpte(gw->ptes[level - 1]))
327 328 329
				access &= ~ACC_WRITE_MASK;
			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
		} else {
330
			direct = 0;
331 332 333
			table_gfn = gw->table_gfn[level - 2];
		}
		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
334 335
					       direct, access, sptep);
		if (!direct) {
336 337 338 339 340 341 342 343 344 345
			r = kvm_read_guest_atomic(vcpu->kvm,
						  gw->pte_gpa[level - 2],
						  &curr_pte, sizeof(curr_pte));
			if (r || curr_pte != gw->ptes[level - 2]) {
				kvm_mmu_put_page(shadow_page, sptep);
				kvm_release_pfn_clean(pfn);
				sptep = NULL;
				break;
			}
		}
346

347 348 349 350 351
		spte = __pa(shadow_page->spt)
			| PT_PRESENT_MASK | PT_ACCESSED_MASK
			| PT_WRITABLE_MASK | PT_USER_MASK;
		*sptep = spte;
	}
A
Avi Kivity 已提交
352

353
	return sptep;
A
Avi Kivity 已提交
354 355 356 357 358 359 360 361 362 363 364 365 366
}

/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
367 368
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
369 370 371 372 373 374
 */
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			       u32 error_code)
{
	int write_fault = error_code & PFERR_WRITE_MASK;
	int user_fault = error_code & PFERR_USER_MASK;
375
	int fetch_fault = error_code & PFERR_FETCH_MASK;
A
Avi Kivity 已提交
376
	struct guest_walker walker;
A
Avi Kivity 已提交
377
	u64 *sptep;
378
	int write_pt = 0;
379
	int r;
380
	pfn_t pfn;
M
Marcelo Tosatti 已提交
381
	int largepage = 0;
382
	unsigned long mmu_seq;
A
Avi Kivity 已提交
383

384
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
385
	kvm_mmu_audit(vcpu, "pre page fault");
386

387 388 389
	r = mmu_topup_memory_caches(vcpu);
	if (r)
		return r;
390

A
Avi Kivity 已提交
391
	/*
392
	 * Look up the guest pte for the faulting address.
A
Avi Kivity 已提交
393
	 */
394 395
	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			     fetch_fault);
A
Avi Kivity 已提交
396 397 398 399

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
400
	if (!r) {
401
		pgprintk("%s: guest page fault\n", __func__);
402
		inject_page_fault(vcpu, addr, walker.error_code);
403
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
A
Avi Kivity 已提交
404 405 406
		return 0;
	}

M
Marcelo Tosatti 已提交
407 408
	if (walker.level == PT_DIRECTORY_LEVEL) {
		gfn_t large_gfn;
409
		large_gfn = walker.gfn &
410 411
			    ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
		if (mapping_level(vcpu, large_gfn) == PT_DIRECTORY_LEVEL) {
M
Marcelo Tosatti 已提交
412 413 414 415
			walker.gfn = large_gfn;
			largepage = 1;
		}
	}
416
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
417
	smp_rmb();
418
	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
419

420
	/* mmio */
421
	if (is_error_pfn(pfn)) {
422
		pgprintk("gfn %lx is mmio\n", walker.gfn);
423
		kvm_release_pfn_clean(pfn);
424 425 426
		return 1;
	}

427
	spin_lock(&vcpu->kvm->mmu_lock);
428 429
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
430
	kvm_mmu_free_some_pages(vcpu);
A
Avi Kivity 已提交
431 432
	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
			     largepage, &write_pt, pfn);
M
Marcelo Tosatti 已提交
433

434
	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
A
Avi Kivity 已提交
435
		 sptep, *sptep, write_pt);
436

437
	if (!write_pt)
438
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
439

A
Avi Kivity 已提交
440
	++vcpu->stat.pf_fixed;
441
	kvm_mmu_audit(vcpu, "post page fault (fixed)");
442
	spin_unlock(&vcpu->kvm->mmu_lock);
A
Avi Kivity 已提交
443

444
	return write_pt;
445 446 447 448 449

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return 0;
A
Avi Kivity 已提交
450 451
}

452
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
M
Marcelo Tosatti 已提交
453
{
454 455 456 457 458
	struct kvm_shadow_walk_iterator iterator;
	pt_element_t gpte;
	gpa_t pte_gpa = -1;
	int level;
	u64 *sptep;
459
	int need_flush = 0;
460 461

	spin_lock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
462

463 464 465
	for_each_shadow_entry(vcpu, gva, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
466

467 468 469 470
		/* FIXME: properly handle invlpg on large guest pages */
		if (level == PT_PAGE_TABLE_LEVEL ||
		    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
			struct kvm_mmu_page *sp = page_header(__pa(sptep));
471

472 473 474 475 476 477 478
			pte_gpa = (sp->gfn << PAGE_SHIFT);
			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);

			if (is_shadow_present_pte(*sptep)) {
				rmap_remove(vcpu->kvm, sptep);
				if (is_large_pte(*sptep))
					--vcpu->kvm->stat.lpages;
479
				need_flush = 1;
480
			}
A
Avi Kivity 已提交
481
			__set_spte(sptep, shadow_trap_nonpresent_pte);
482
			break;
483
		}
M
Marcelo Tosatti 已提交
484

485 486 487
		if (!is_shadow_present_pte(*sptep))
			break;
	}
M
Marcelo Tosatti 已提交
488

489 490
	if (need_flush)
		kvm_flush_remote_tlbs(vcpu->kvm);
491
	spin_unlock(&vcpu->kvm->mmu_lock);
492 493

	if (pte_gpa == -1)
494
		return;
495
	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
496 497
				  sizeof(pt_element_t)))
		return;
498
	if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
499 500
		if (mmu_topup_memory_caches(vcpu))
			return;
501
		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
502 503
				  sizeof(pt_element_t), 0);
	}
M
Marcelo Tosatti 已提交
504 505
}

A
Avi Kivity 已提交
506 507 508
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
	struct guest_walker walker;
A
Avi Kivity 已提交
509 510
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
511

A
Avi Kivity 已提交
512
	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
A
Avi Kivity 已提交
513

A
Avi Kivity 已提交
514
	if (r) {
A
Avi Kivity 已提交
515
		gpa = gfn_to_gpa(walker.gfn);
A
Avi Kivity 已提交
516
		gpa |= vaddr & ~PAGE_MASK;
A
Avi Kivity 已提交
517 518 519 520 521
	}

	return gpa;
}

522 523 524
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
				 struct kvm_mmu_page *sp)
{
A
Avi Kivity 已提交
525 526 527
	int i, j, offset, r;
	pt_element_t pt[256 / sizeof(pt_element_t)];
	gpa_t pte_gpa;
528

529
	if (sp->role.direct
530
	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
531 532 533 534
		nonpaging_prefetch_page(vcpu, sp);
		return;
	}

A
Avi Kivity 已提交
535 536
	pte_gpa = gfn_to_gpa(sp->gfn);
	if (PTTYPE == 32) {
537
		offset = sp->role.quadrant << PT64_LEVEL_BITS;
A
Avi Kivity 已提交
538 539
		pte_gpa += offset * sizeof(pt_element_t);
	}
540

A
Avi Kivity 已提交
541 542 543 544
	for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
		for (j = 0; j < ARRAY_SIZE(pt); ++j)
545
			if (r || is_present_gpte(pt[j]))
A
Avi Kivity 已提交
546 547 548
				sp->spt[i+j] = shadow_trap_nonpresent_pte;
			else
				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
549
	}
550 551
}

552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
 * - Alias changes zap the entire shadow cache.
 */
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
	int i, offset, nr_present;

	offset = nr_present = 0;

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
		gfn_t gfn = sp->gfns[i];

		if (!is_shadow_present_pte(sp->spt[i]))
			continue;

		pte_gpa = gfn_to_gpa(sp->gfn);
		pte_gpa += (i+offset) * sizeof(pt_element_t);

		if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
					  sizeof(pt_element_t)))
			return -EINVAL;

583
		if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
584 585 586 587
		    !(gpte & PT_ACCESSED_MASK)) {
			u64 nonpresent;

			rmap_remove(vcpu->kvm, &sp->spt[i]);
588
			if (is_present_gpte(gpte))
589 590 591
				nonpresent = shadow_trap_nonpresent_pte;
			else
				nonpresent = shadow_notrap_nonpresent_pte;
A
Avi Kivity 已提交
592
			__set_spte(&sp->spt[i], nonpresent);
593 594 595 596 597 598
			continue;
		}

		nr_present++;
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
599
			 is_dirty_gpte(gpte), 0, gfn,
600
			 spte_to_pfn(sp->spt[i]), true, false);
601 602 603 604 605
	}

	return !nr_present;
}

A
Avi Kivity 已提交
606 607 608 609 610 611 612
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_DIR_BASE_ADDR_MASK
613
#undef PT_LEVEL_BITS
614
#undef PT_MAX_FULL_LEVELS
615 616
#undef gpte_to_gfn
#undef gpte_to_gfn_pde
617
#undef CMPXCHG