paging_tmpl.h 15.9 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
	#define FNAME(name) paging##64_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
30 31
	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
32 33
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
34
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
35 36
	#ifdef CONFIG_X86_64
	#define PT_MAX_FULL_LEVELS 4
37
	#define CMPXCHG cmpxchg
38
	#else
39
	#define CMPXCHG cmpxchg64
40 41
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
42 43 44 45 46
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
47 48
	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
49 50
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
51
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
52
	#define PT_MAX_FULL_LEVELS 2
53
	#define CMPXCHG cmpxchg
A
Avi Kivity 已提交
54 55 56 57
#else
	#error Invalid PTTYPE value
#endif

58 59
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
60

A
Avi Kivity 已提交
61 62 63 64 65 66
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
67
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
68 69
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
70 71
	unsigned pt_access;
	unsigned pte_access;
72
	gfn_t gfn;
73
	u32 error_code;
A
Avi Kivity 已提交
74 75
};

76
static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
77
{
78
	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
79 80
}

81 82 83 84 85 86 87 88 89
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			 gfn_t table_gfn, unsigned index,
			 pt_element_t orig_pte, pt_element_t new_pte)
{
	pt_element_t ret;
	pt_element_t *table;
	struct page *page;

	page = gfn_to_page(kvm, table_gfn);
90

91 92 93 94 95 96 97 98 99
	table = kmap_atomic(page, KM_USER0);
	ret = CMPXCHG(&table[index], orig_pte, new_pte);
	kunmap_atomic(table, KM_USER0);

	kvm_release_page_dirty(page);

	return (ret != orig_pte);
}

100 101 102 103 104 105 106 107 108 109 110 111
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
	unsigned access;

	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
	if (is_nx(vcpu))
		access &= ~(gpte >> PT64_NX_SHIFT);
#endif
	return access;
}

112 113 114
/*
 * Fetch a guest pte for a guest virtual address
 */
115 116
static int FNAME(walk_addr)(struct guest_walker *walker,
			    struct kvm_vcpu *vcpu, gva_t addr,
117
			    int write_fault, int user_fault, int fetch_fault)
A
Avi Kivity 已提交
118
{
119
	pt_element_t pte;
120
	gfn_t table_gfn;
121
	unsigned index, pt_access, pte_access;
122
	gpa_t pte_gpa;
123
	int rsvd_fault = 0;
A
Avi Kivity 已提交
124

125 126
	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
				     fetch_fault);
127
walk:
128 129
	walker->level = vcpu->arch.mmu.root_level;
	pte = vcpu->arch.cr3;
130 131
#if PTTYPE == 64
	if (!is_long_mode(vcpu)) {
A
Avi Kivity 已提交
132
		pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
133
		trace_kvm_mmu_paging_element(pte, walker->level);
134
		if (!is_present_gpte(pte))
135
			goto not_present;
136 137 138
		--walker->level;
	}
#endif
A
Avi Kivity 已提交
139
	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
140
	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
A
Avi Kivity 已提交
141

142
	pt_access = ACC_ALL;
143 144

	for (;;) {
145
		index = PT_INDEX(addr, walker->level);
146

147
		table_gfn = gpte_to_gfn(pte);
A
Avi Kivity 已提交
148
		pte_gpa = gfn_to_gpa(table_gfn);
149
		pte_gpa += index * sizeof(pt_element_t);
150
		walker->table_gfn[walker->level - 1] = table_gfn;
151
		walker->pte_gpa[walker->level - 1] = pte_gpa;
152

153
		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
154
		trace_kvm_mmu_paging_element(pte, walker->level);
155

156
		if (!is_present_gpte(pte))
157 158
			goto not_present;

159 160 161 162
		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
		if (rsvd_fault)
			goto access_error;

163
		if (write_fault && !is_writeble_pte(pte))
164 165 166
			if (user_fault || is_write_protection(vcpu))
				goto access_error;

167
		if (user_fault && !(pte & PT_USER_MASK))
168 169
			goto access_error;

170
#if PTTYPE == 64
171
		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
172 173 174
			goto access_error;
#endif

175
		if (!(pte & PT_ACCESSED_MASK)) {
176 177
			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
						       sizeof(pte));
178
			mark_page_dirty(vcpu->kvm, table_gfn);
179 180 181
			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			    index, pte, pte|PT_ACCESSED_MASK))
				goto walk;
182
			pte |= PT_ACCESSED_MASK;
183
		}
184

185
		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
186

187 188
		walker->ptes[walker->level - 1] = pte;

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
		if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
		    ((walker->level == PT_DIRECTORY_LEVEL) &&
				(pte & PT_PAGE_SIZE_MASK)  &&
				(PTTYPE == 64 || is_pse(vcpu))) ||
		    ((walker->level == PT_PDPE_LEVEL) &&
				(pte & PT_PAGE_SIZE_MASK)  &&
				is_long_mode(vcpu))) {
			int lvl = walker->level;

			walker->gfn = gpte_to_gfn_lvl(pte, lvl);
			walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
					>> PAGE_SHIFT;

			if (PTTYPE == 32 &&
			    walker->level == PT_DIRECTORY_LEVEL &&
			    is_cpuid_PSE36())
205
				walker->gfn += pse36_gfn_delta(pte);
206

207
			break;
208
		}
209

210
		pt_access = pte_access;
211 212
		--walker->level;
	}
213

214
	if (write_fault && !is_dirty_gpte(pte)) {
215 216
		bool ret;

217
		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
218
		mark_page_dirty(vcpu->kvm, table_gfn);
219 220 221 222
		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			    pte|PT_DIRTY_MASK);
		if (ret)
			goto walk;
223
		pte |= PT_DIRTY_MASK;
224
		walker->ptes[walker->level - 1] = pte;
225 226
	}

227 228 229
	walker->pt_access = pt_access;
	walker->pte_access = pte_access;
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
230
		 __func__, (u64)pte, pt_access, pte_access);
231 232 233 234 235 236 237 238 239 240 241 242 243 244
	return 1;

not_present:
	walker->error_code = 0;
	goto err;

access_error:
	walker->error_code = PFERR_PRESENT_MASK;

err:
	if (write_fault)
		walker->error_code |= PFERR_WRITE_MASK;
	if (user_fault)
		walker->error_code |= PFERR_USER_MASK;
245 246
	if (fetch_fault)
		walker->error_code |= PFERR_FETCH_MASK;
247 248
	if (rsvd_fault)
		walker->error_code |= PFERR_RSVD_MASK;
249
	trace_kvm_mmu_walker_error(walker->error_code);
250
	return 0;
A
Avi Kivity 已提交
251 252
}

253
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
254
			      u64 *spte, const void *pte)
255 256
{
	pt_element_t gpte;
257
	unsigned pte_access;
258
	pfn_t pfn;
259
	int level = vcpu->arch.update_pte.level;
260 261

	gpte = *(const pt_element_t *)pte;
262
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
263
		if (!is_present_gpte(gpte))
A
Avi Kivity 已提交
264
			__set_spte(spte, shadow_notrap_nonpresent_pte);
265 266
		return;
	}
267
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
268
	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
269 270
	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
		return;
271 272
	pfn = vcpu->arch.update_pte.pfn;
	if (is_error_pfn(pfn))
273
		return;
274 275
	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
		return;
276
	kvm_get_pfn(pfn);
277
	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
278
		     gpte & PT_DIRTY_MASK, NULL, level,
279
		     gpte_to_gfn(gpte), pfn, true);
280 281
}

A
Avi Kivity 已提交
282 283 284
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 */
285 286 287 288
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			 struct guest_walker *gw,
			 int user_fault, int write_fault, int largepage,
			 int *ptwrite, pfn_t pfn)
A
Avi Kivity 已提交
289
{
290 291
	unsigned access = gw->pt_access;
	struct kvm_mmu_page *shadow_page;
292
	u64 spte, *sptep = NULL;
293
	int direct;
294 295
	gfn_t table_gfn;
	int r;
296
	int level;
297
	pt_element_t curr_pte;
298
	struct kvm_shadow_walk_iterator iterator;
299

300
	if (!is_present_gpte(gw->ptes[gw->level - 1]))
301
		return NULL;
A
Avi Kivity 已提交
302

303 304 305 306 307 308 309 310 311
	for_each_shadow_entry(vcpu, addr, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
		if (level == PT_PAGE_TABLE_LEVEL
		    || (largepage && level == PT_DIRECTORY_LEVEL)) {
			mmu_set_spte(vcpu, sptep, access,
				     gw->pte_access & access,
				     user_fault, write_fault,
				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
312
				     ptwrite, level,
313 314 315
				     gw->gfn, pfn, false);
			break;
		}
A
Avi Kivity 已提交
316

317 318
		if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
			continue;
319

320
		if (is_large_pte(*sptep)) {
321
			rmap_remove(vcpu->kvm, sptep);
A
Avi Kivity 已提交
322
			__set_spte(sptep, shadow_trap_nonpresent_pte);
323
			kvm_flush_remote_tlbs(vcpu->kvm);
324
		}
325

326 327
		if (level == PT_DIRECTORY_LEVEL
		    && gw->level == PT_DIRECTORY_LEVEL) {
328
			direct = 1;
329
			if (!is_dirty_gpte(gw->ptes[level - 1]))
330 331 332
				access &= ~ACC_WRITE_MASK;
			table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
		} else {
333
			direct = 0;
334 335 336
			table_gfn = gw->table_gfn[level - 2];
		}
		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
337 338
					       direct, access, sptep);
		if (!direct) {
339 340 341 342 343 344 345 346 347 348
			r = kvm_read_guest_atomic(vcpu->kvm,
						  gw->pte_gpa[level - 2],
						  &curr_pte, sizeof(curr_pte));
			if (r || curr_pte != gw->ptes[level - 2]) {
				kvm_mmu_put_page(shadow_page, sptep);
				kvm_release_pfn_clean(pfn);
				sptep = NULL;
				break;
			}
		}
349

350 351 352 353 354
		spte = __pa(shadow_page->spt)
			| PT_PRESENT_MASK | PT_ACCESSED_MASK
			| PT_WRITABLE_MASK | PT_USER_MASK;
		*sptep = spte;
	}
A
Avi Kivity 已提交
355

356
	return sptep;
A
Avi Kivity 已提交
357 358 359 360 361 362 363 364 365 366 367 368 369
}

/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
370 371
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
372 373 374 375 376 377
 */
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			       u32 error_code)
{
	int write_fault = error_code & PFERR_WRITE_MASK;
	int user_fault = error_code & PFERR_USER_MASK;
378
	int fetch_fault = error_code & PFERR_FETCH_MASK;
A
Avi Kivity 已提交
379
	struct guest_walker walker;
A
Avi Kivity 已提交
380
	u64 *sptep;
381
	int write_pt = 0;
382
	int r;
383
	pfn_t pfn;
M
Marcelo Tosatti 已提交
384
	int largepage = 0;
385
	unsigned long mmu_seq;
A
Avi Kivity 已提交
386

387
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
388
	kvm_mmu_audit(vcpu, "pre page fault");
389

390 391 392
	r = mmu_topup_memory_caches(vcpu);
	if (r)
		return r;
393

A
Avi Kivity 已提交
394
	/*
395
	 * Look up the guest pte for the faulting address.
A
Avi Kivity 已提交
396
	 */
397 398
	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			     fetch_fault);
A
Avi Kivity 已提交
399 400 401 402

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
403
	if (!r) {
404
		pgprintk("%s: guest page fault\n", __func__);
405
		inject_page_fault(vcpu, addr, walker.error_code);
406
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
A
Avi Kivity 已提交
407 408 409
		return 0;
	}

M
Marcelo Tosatti 已提交
410 411
	if (walker.level == PT_DIRECTORY_LEVEL) {
		gfn_t large_gfn;
412
		large_gfn = walker.gfn &
413 414
			    ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
		if (mapping_level(vcpu, large_gfn) == PT_DIRECTORY_LEVEL) {
M
Marcelo Tosatti 已提交
415 416 417 418
			walker.gfn = large_gfn;
			largepage = 1;
		}
	}
419
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
420
	smp_rmb();
421
	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
422

423
	/* mmio */
424
	if (is_error_pfn(pfn)) {
425
		pgprintk("gfn %lx is mmio\n", walker.gfn);
426
		kvm_release_pfn_clean(pfn);
427 428 429
		return 1;
	}

430
	spin_lock(&vcpu->kvm->mmu_lock);
431 432
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
433
	kvm_mmu_free_some_pages(vcpu);
A
Avi Kivity 已提交
434 435
	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
			     largepage, &write_pt, pfn);
M
Marcelo Tosatti 已提交
436

437
	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
A
Avi Kivity 已提交
438
		 sptep, *sptep, write_pt);
439

440
	if (!write_pt)
441
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
442

A
Avi Kivity 已提交
443
	++vcpu->stat.pf_fixed;
444
	kvm_mmu_audit(vcpu, "post page fault (fixed)");
445
	spin_unlock(&vcpu->kvm->mmu_lock);
A
Avi Kivity 已提交
446

447
	return write_pt;
448 449 450 451 452

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return 0;
A
Avi Kivity 已提交
453 454
}

455
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
M
Marcelo Tosatti 已提交
456
{
457 458 459 460 461
	struct kvm_shadow_walk_iterator iterator;
	pt_element_t gpte;
	gpa_t pte_gpa = -1;
	int level;
	u64 *sptep;
462
	int need_flush = 0;
463 464

	spin_lock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
465

466 467 468
	for_each_shadow_entry(vcpu, gva, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
469

470 471 472 473
		/* FIXME: properly handle invlpg on large guest pages */
		if (level == PT_PAGE_TABLE_LEVEL ||
		    ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
			struct kvm_mmu_page *sp = page_header(__pa(sptep));
474

475 476 477 478 479 480 481
			pte_gpa = (sp->gfn << PAGE_SHIFT);
			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);

			if (is_shadow_present_pte(*sptep)) {
				rmap_remove(vcpu->kvm, sptep);
				if (is_large_pte(*sptep))
					--vcpu->kvm->stat.lpages;
482
				need_flush = 1;
483
			}
A
Avi Kivity 已提交
484
			__set_spte(sptep, shadow_trap_nonpresent_pte);
485
			break;
486
		}
M
Marcelo Tosatti 已提交
487

488 489 490
		if (!is_shadow_present_pte(*sptep))
			break;
	}
M
Marcelo Tosatti 已提交
491

492 493
	if (need_flush)
		kvm_flush_remote_tlbs(vcpu->kvm);
494
	spin_unlock(&vcpu->kvm->mmu_lock);
495 496

	if (pte_gpa == -1)
497
		return;
498
	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
499 500
				  sizeof(pt_element_t)))
		return;
501
	if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
502 503
		if (mmu_topup_memory_caches(vcpu))
			return;
504
		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
505 506
				  sizeof(pt_element_t), 0);
	}
M
Marcelo Tosatti 已提交
507 508
}

A
Avi Kivity 已提交
509 510 511
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
	struct guest_walker walker;
A
Avi Kivity 已提交
512 513
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
514

A
Avi Kivity 已提交
515
	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
A
Avi Kivity 已提交
516

A
Avi Kivity 已提交
517
	if (r) {
A
Avi Kivity 已提交
518
		gpa = gfn_to_gpa(walker.gfn);
A
Avi Kivity 已提交
519
		gpa |= vaddr & ~PAGE_MASK;
A
Avi Kivity 已提交
520 521 522 523 524
	}

	return gpa;
}

525 526 527
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
				 struct kvm_mmu_page *sp)
{
A
Avi Kivity 已提交
528 529 530
	int i, j, offset, r;
	pt_element_t pt[256 / sizeof(pt_element_t)];
	gpa_t pte_gpa;
531

532
	if (sp->role.direct
533
	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
534 535 536 537
		nonpaging_prefetch_page(vcpu, sp);
		return;
	}

A
Avi Kivity 已提交
538 539
	pte_gpa = gfn_to_gpa(sp->gfn);
	if (PTTYPE == 32) {
540
		offset = sp->role.quadrant << PT64_LEVEL_BITS;
A
Avi Kivity 已提交
541 542
		pte_gpa += offset * sizeof(pt_element_t);
	}
543

A
Avi Kivity 已提交
544 545 546 547
	for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
		for (j = 0; j < ARRAY_SIZE(pt); ++j)
548
			if (r || is_present_gpte(pt[j]))
A
Avi Kivity 已提交
549 550 551
				sp->spt[i+j] = shadow_trap_nonpresent_pte;
			else
				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
552
	}
553 554
}

555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
 * - Alias changes zap the entire shadow cache.
 */
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
	int i, offset, nr_present;

	offset = nr_present = 0;

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
		gfn_t gfn = sp->gfns[i];

		if (!is_shadow_present_pte(sp->spt[i]))
			continue;

		pte_gpa = gfn_to_gpa(sp->gfn);
		pte_gpa += (i+offset) * sizeof(pt_element_t);

		if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
					  sizeof(pt_element_t)))
			return -EINVAL;

586
		if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
587 588 589 590
		    !(gpte & PT_ACCESSED_MASK)) {
			u64 nonpresent;

			rmap_remove(vcpu->kvm, &sp->spt[i]);
591
			if (is_present_gpte(gpte))
592 593 594
				nonpresent = shadow_trap_nonpresent_pte;
			else
				nonpresent = shadow_notrap_nonpresent_pte;
A
Avi Kivity 已提交
595
			__set_spte(&sp->spt[i], nonpresent);
596 597 598 599 600 601
			continue;
		}

		nr_present++;
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
602
			 is_dirty_gpte(gpte), 0, gfn,
603
			 spte_to_pfn(sp->spt[i]), true, false);
604 605 606 607 608
	}

	return !nr_present;
}

A
Avi Kivity 已提交
609 610 611 612 613 614
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef PT_LEVEL_MASK
615 616
#undef PT_LVL_ADDR_MASK
#undef PT_LVL_OFFSET_MASK
617
#undef PT_LEVEL_BITS
618
#undef PT_MAX_FULL_LEVELS
619
#undef gpte_to_gfn
620
#undef gpte_to_gfn_lvl
621
#undef CMPXCHG