paging_tmpl.h 15.9 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
	#define FNAME(name) paging##64_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
30 31
	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
32 33
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
34
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
35 36
	#ifdef CONFIG_X86_64
	#define PT_MAX_FULL_LEVELS 4
37
	#define CMPXCHG cmpxchg
38
	#else
39
	#define CMPXCHG cmpxchg64
40 41
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
42 43 44 45 46
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
47 48
	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
49 50
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
51
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
52
	#define PT_MAX_FULL_LEVELS 2
53
	#define CMPXCHG cmpxchg
A
Avi Kivity 已提交
54 55 56 57
#else
	#error Invalid PTTYPE value
#endif

58 59
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
60

A
Avi Kivity 已提交
61 62 63 64 65 66
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
67
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
68 69
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
70 71
	unsigned pt_access;
	unsigned pte_access;
72
	gfn_t gfn;
73
	u32 error_code;
A
Avi Kivity 已提交
74 75
};

76
static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
77
{
78
	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
79 80
}

81 82 83 84 85 86 87 88 89
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			 gfn_t table_gfn, unsigned index,
			 pt_element_t orig_pte, pt_element_t new_pte)
{
	pt_element_t ret;
	pt_element_t *table;
	struct page *page;

	page = gfn_to_page(kvm, table_gfn);
90

91 92 93 94 95 96 97 98 99
	table = kmap_atomic(page, KM_USER0);
	ret = CMPXCHG(&table[index], orig_pte, new_pte);
	kunmap_atomic(table, KM_USER0);

	kvm_release_page_dirty(page);

	return (ret != orig_pte);
}

100 101 102 103 104 105 106 107 108 109 110 111
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
	unsigned access;

	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
	if (is_nx(vcpu))
		access &= ~(gpte >> PT64_NX_SHIFT);
#endif
	return access;
}

112 113 114
/*
 * Fetch a guest pte for a guest virtual address
 */
115 116
static int FNAME(walk_addr)(struct guest_walker *walker,
			    struct kvm_vcpu *vcpu, gva_t addr,
117
			    int write_fault, int user_fault, int fetch_fault)
A
Avi Kivity 已提交
118
{
119
	pt_element_t pte;
120
	gfn_t table_gfn;
121
	unsigned index, pt_access, pte_access;
122
	gpa_t pte_gpa;
123
	int rsvd_fault = 0;
A
Avi Kivity 已提交
124

125 126
	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
				     fetch_fault);
127
walk:
128 129
	walker->level = vcpu->arch.mmu.root_level;
	pte = vcpu->arch.cr3;
130 131
#if PTTYPE == 64
	if (!is_long_mode(vcpu)) {
A
Avi Kivity 已提交
132
		pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
133
		trace_kvm_mmu_paging_element(pte, walker->level);
134
		if (!is_present_gpte(pte))
135
			goto not_present;
136 137 138
		--walker->level;
	}
#endif
A
Avi Kivity 已提交
139
	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
140
	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
A
Avi Kivity 已提交
141

142
	pt_access = ACC_ALL;
143 144

	for (;;) {
145
		index = PT_INDEX(addr, walker->level);
146

147
		table_gfn = gpte_to_gfn(pte);
A
Avi Kivity 已提交
148
		pte_gpa = gfn_to_gpa(table_gfn);
149
		pte_gpa += index * sizeof(pt_element_t);
150
		walker->table_gfn[walker->level - 1] = table_gfn;
151
		walker->pte_gpa[walker->level - 1] = pte_gpa;
152

153
		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
154
		trace_kvm_mmu_paging_element(pte, walker->level);
155

156
		if (!is_present_gpte(pte))
157 158
			goto not_present;

159 160 161 162
		rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
		if (rsvd_fault)
			goto access_error;

163
		if (write_fault && !is_writeble_pte(pte))
164 165 166
			if (user_fault || is_write_protection(vcpu))
				goto access_error;

167
		if (user_fault && !(pte & PT_USER_MASK))
168 169
			goto access_error;

170
#if PTTYPE == 64
171
		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
172 173 174
			goto access_error;
#endif

175
		if (!(pte & PT_ACCESSED_MASK)) {
176 177
			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
						       sizeof(pte));
178
			mark_page_dirty(vcpu->kvm, table_gfn);
179 180 181
			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			    index, pte, pte|PT_ACCESSED_MASK))
				goto walk;
182
			pte |= PT_ACCESSED_MASK;
183
		}
184

185
		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
186

187 188
		walker->ptes[walker->level - 1] = pte;

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
		if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
		    ((walker->level == PT_DIRECTORY_LEVEL) &&
				(pte & PT_PAGE_SIZE_MASK)  &&
				(PTTYPE == 64 || is_pse(vcpu))) ||
		    ((walker->level == PT_PDPE_LEVEL) &&
				(pte & PT_PAGE_SIZE_MASK)  &&
				is_long_mode(vcpu))) {
			int lvl = walker->level;

			walker->gfn = gpte_to_gfn_lvl(pte, lvl);
			walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
					>> PAGE_SHIFT;

			if (PTTYPE == 32 &&
			    walker->level == PT_DIRECTORY_LEVEL &&
			    is_cpuid_PSE36())
205
				walker->gfn += pse36_gfn_delta(pte);
206

207
			break;
208
		}
209

210
		pt_access = pte_access;
211 212
		--walker->level;
	}
213

214
	if (write_fault && !is_dirty_gpte(pte)) {
215 216
		bool ret;

217
		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
218
		mark_page_dirty(vcpu->kvm, table_gfn);
219 220 221 222
		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			    pte|PT_DIRTY_MASK);
		if (ret)
			goto walk;
223
		pte |= PT_DIRTY_MASK;
224
		walker->ptes[walker->level - 1] = pte;
225 226
	}

227 228 229
	walker->pt_access = pt_access;
	walker->pte_access = pte_access;
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
230
		 __func__, (u64)pte, pt_access, pte_access);
231 232 233 234 235 236 237 238 239 240 241 242 243 244
	return 1;

not_present:
	walker->error_code = 0;
	goto err;

access_error:
	walker->error_code = PFERR_PRESENT_MASK;

err:
	if (write_fault)
		walker->error_code |= PFERR_WRITE_MASK;
	if (user_fault)
		walker->error_code |= PFERR_USER_MASK;
245 246
	if (fetch_fault)
		walker->error_code |= PFERR_FETCH_MASK;
247 248
	if (rsvd_fault)
		walker->error_code |= PFERR_RSVD_MASK;
249
	trace_kvm_mmu_walker_error(walker->error_code);
250
	return 0;
A
Avi Kivity 已提交
251 252
}

253
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
254
			      u64 *spte, const void *pte)
255 256
{
	pt_element_t gpte;
257
	unsigned pte_access;
258
	pfn_t pfn;
259 260

	gpte = *(const pt_element_t *)pte;
261
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
262
		if (!is_present_gpte(gpte))
A
Avi Kivity 已提交
263
			__set_spte(spte, shadow_notrap_nonpresent_pte);
264 265
		return;
	}
266
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
267
	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
268 269
	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
		return;
270 271
	pfn = vcpu->arch.update_pte.pfn;
	if (is_error_pfn(pfn))
272
		return;
273 274
	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
		return;
275
	kvm_get_pfn(pfn);
276
	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
277
		     gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
278
		     gpte_to_gfn(gpte), pfn, true);
279 280
}

A
Avi Kivity 已提交
281 282 283
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 */
284 285
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			 struct guest_walker *gw,
286
			 int user_fault, int write_fault, int hlevel,
287
			 int *ptwrite, pfn_t pfn)
A
Avi Kivity 已提交
288
{
289 290
	unsigned access = gw->pt_access;
	struct kvm_mmu_page *shadow_page;
291
	u64 spte, *sptep = NULL;
292
	int direct;
293 294
	gfn_t table_gfn;
	int r;
295
	int level;
296
	pt_element_t curr_pte;
297
	struct kvm_shadow_walk_iterator iterator;
298

299
	if (!is_present_gpte(gw->ptes[gw->level - 1]))
300
		return NULL;
A
Avi Kivity 已提交
301

302 303 304
	for_each_shadow_entry(vcpu, addr, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
305
		if (iterator.level == hlevel) {
306 307 308 309
			mmu_set_spte(vcpu, sptep, access,
				     gw->pte_access & access,
				     user_fault, write_fault,
				     gw->ptes[gw->level-1] & PT_DIRTY_MASK,
310
				     ptwrite, level,
311 312 313
				     gw->gfn, pfn, false);
			break;
		}
A
Avi Kivity 已提交
314

315 316
		if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
			continue;
317

318
		if (is_large_pte(*sptep)) {
319
			rmap_remove(vcpu->kvm, sptep);
A
Avi Kivity 已提交
320
			__set_spte(sptep, shadow_trap_nonpresent_pte);
321
			kvm_flush_remote_tlbs(vcpu->kvm);
322
		}
323

324 325
		if (level <= gw->level) {
			int delta = level - gw->level + 1;
326
			direct = 1;
327
			if (!is_dirty_gpte(gw->ptes[level - delta]))
328
				access &= ~ACC_WRITE_MASK;
329 330 331 332
			table_gfn = gpte_to_gfn(gw->ptes[level - delta]);
			/* advance table_gfn when emulating 1gb pages with 4k */
			if (delta == 0)
				table_gfn += PT_INDEX(addr, level);
333
		} else {
334
			direct = 0;
335 336 337
			table_gfn = gw->table_gfn[level - 2];
		}
		shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
338 339
					       direct, access, sptep);
		if (!direct) {
340 341 342 343 344 345 346 347 348 349
			r = kvm_read_guest_atomic(vcpu->kvm,
						  gw->pte_gpa[level - 2],
						  &curr_pte, sizeof(curr_pte));
			if (r || curr_pte != gw->ptes[level - 2]) {
				kvm_mmu_put_page(shadow_page, sptep);
				kvm_release_pfn_clean(pfn);
				sptep = NULL;
				break;
			}
		}
350

351 352 353 354 355
		spte = __pa(shadow_page->spt)
			| PT_PRESENT_MASK | PT_ACCESSED_MASK
			| PT_WRITABLE_MASK | PT_USER_MASK;
		*sptep = spte;
	}
A
Avi Kivity 已提交
356

357
	return sptep;
A
Avi Kivity 已提交
358 359 360 361 362 363 364 365 366 367 368 369 370
}

/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
371 372
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
373 374 375 376 377 378
 */
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			       u32 error_code)
{
	int write_fault = error_code & PFERR_WRITE_MASK;
	int user_fault = error_code & PFERR_USER_MASK;
379
	int fetch_fault = error_code & PFERR_FETCH_MASK;
A
Avi Kivity 已提交
380
	struct guest_walker walker;
A
Avi Kivity 已提交
381
	u64 *sptep;
382
	int write_pt = 0;
383
	int r;
384
	pfn_t pfn;
385
	int level = PT_PAGE_TABLE_LEVEL;
386
	unsigned long mmu_seq;
A
Avi Kivity 已提交
387

388
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
389
	kvm_mmu_audit(vcpu, "pre page fault");
390

391 392 393
	r = mmu_topup_memory_caches(vcpu);
	if (r)
		return r;
394

A
Avi Kivity 已提交
395
	/*
396
	 * Look up the guest pte for the faulting address.
A
Avi Kivity 已提交
397
	 */
398 399
	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			     fetch_fault);
A
Avi Kivity 已提交
400 401 402 403

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
404
	if (!r) {
405
		pgprintk("%s: guest page fault\n", __func__);
406
		inject_page_fault(vcpu, addr, walker.error_code);
407
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
A
Avi Kivity 已提交
408 409 410
		return 0;
	}

411 412 413
	if (walker.level >= PT_DIRECTORY_LEVEL) {
		level = min(walker.level, mapping_level(vcpu, walker.gfn));
		walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
M
Marcelo Tosatti 已提交
414
	}
415

416
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
417
	smp_rmb();
418
	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
419

420
	/* mmio */
421
	if (is_error_pfn(pfn)) {
422
		pgprintk("gfn %lx is mmio\n", walker.gfn);
423
		kvm_release_pfn_clean(pfn);
424 425 426
		return 1;
	}

427
	spin_lock(&vcpu->kvm->mmu_lock);
428 429
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
430
	kvm_mmu_free_some_pages(vcpu);
A
Avi Kivity 已提交
431
	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
432
			     level, &write_pt, pfn);
433
	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
A
Avi Kivity 已提交
434
		 sptep, *sptep, write_pt);
435

436
	if (!write_pt)
437
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
438

A
Avi Kivity 已提交
439
	++vcpu->stat.pf_fixed;
440
	kvm_mmu_audit(vcpu, "post page fault (fixed)");
441
	spin_unlock(&vcpu->kvm->mmu_lock);
A
Avi Kivity 已提交
442

443
	return write_pt;
444 445 446 447 448

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return 0;
A
Avi Kivity 已提交
449 450
}

451
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
M
Marcelo Tosatti 已提交
452
{
453 454 455 456 457
	struct kvm_shadow_walk_iterator iterator;
	pt_element_t gpte;
	gpa_t pte_gpa = -1;
	int level;
	u64 *sptep;
458
	int need_flush = 0;
459 460

	spin_lock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
461

462 463 464
	for_each_shadow_entry(vcpu, gva, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
465

466
		/* FIXME: properly handle invlpg on large guest pages */
467 468 469
		if (level == PT_PAGE_TABLE_LEVEL  ||
		    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
		    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
470
			struct kvm_mmu_page *sp = page_header(__pa(sptep));
471

472 473 474 475 476 477 478
			pte_gpa = (sp->gfn << PAGE_SHIFT);
			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);

			if (is_shadow_present_pte(*sptep)) {
				rmap_remove(vcpu->kvm, sptep);
				if (is_large_pte(*sptep))
					--vcpu->kvm->stat.lpages;
479
				need_flush = 1;
480
			}
A
Avi Kivity 已提交
481
			__set_spte(sptep, shadow_trap_nonpresent_pte);
482
			break;
483
		}
M
Marcelo Tosatti 已提交
484

485 486 487
		if (!is_shadow_present_pte(*sptep))
			break;
	}
M
Marcelo Tosatti 已提交
488

489 490
	if (need_flush)
		kvm_flush_remote_tlbs(vcpu->kvm);
491
	spin_unlock(&vcpu->kvm->mmu_lock);
492 493

	if (pte_gpa == -1)
494
		return;
495
	if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
496 497
				  sizeof(pt_element_t)))
		return;
498
	if (is_present_gpte(gpte) && (gpte & PT_ACCESSED_MASK)) {
499 500
		if (mmu_topup_memory_caches(vcpu))
			return;
501
		kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
502 503
				  sizeof(pt_element_t), 0);
	}
M
Marcelo Tosatti 已提交
504 505
}

A
Avi Kivity 已提交
506 507 508
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
	struct guest_walker walker;
A
Avi Kivity 已提交
509 510
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
511

A
Avi Kivity 已提交
512
	r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
A
Avi Kivity 已提交
513

A
Avi Kivity 已提交
514
	if (r) {
A
Avi Kivity 已提交
515
		gpa = gfn_to_gpa(walker.gfn);
A
Avi Kivity 已提交
516
		gpa |= vaddr & ~PAGE_MASK;
A
Avi Kivity 已提交
517 518 519 520 521
	}

	return gpa;
}

522 523 524
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
				 struct kvm_mmu_page *sp)
{
A
Avi Kivity 已提交
525 526 527
	int i, j, offset, r;
	pt_element_t pt[256 / sizeof(pt_element_t)];
	gpa_t pte_gpa;
528

529
	if (sp->role.direct
530
	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
531 532 533 534
		nonpaging_prefetch_page(vcpu, sp);
		return;
	}

A
Avi Kivity 已提交
535 536
	pte_gpa = gfn_to_gpa(sp->gfn);
	if (PTTYPE == 32) {
537
		offset = sp->role.quadrant << PT64_LEVEL_BITS;
A
Avi Kivity 已提交
538 539
		pte_gpa += offset * sizeof(pt_element_t);
	}
540

A
Avi Kivity 已提交
541 542 543 544
	for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
		for (j = 0; j < ARRAY_SIZE(pt); ++j)
545
			if (r || is_present_gpte(pt[j]))
A
Avi Kivity 已提交
546 547 548
				sp->spt[i+j] = shadow_trap_nonpresent_pte;
			else
				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
549
	}
550 551
}

552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
 * - Alias changes zap the entire shadow cache.
 */
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
	int i, offset, nr_present;

	offset = nr_present = 0;

	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
		gfn_t gfn = sp->gfns[i];

		if (!is_shadow_present_pte(sp->spt[i]))
			continue;

		pte_gpa = gfn_to_gpa(sp->gfn);
		pte_gpa += (i+offset) * sizeof(pt_element_t);

		if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
					  sizeof(pt_element_t)))
			return -EINVAL;

583
		if (gpte_to_gfn(gpte) != gfn || !is_present_gpte(gpte) ||
584 585 586 587
		    !(gpte & PT_ACCESSED_MASK)) {
			u64 nonpresent;

			rmap_remove(vcpu->kvm, &sp->spt[i]);
588
			if (is_present_gpte(gpte))
589 590 591
				nonpresent = shadow_trap_nonpresent_pte;
			else
				nonpresent = shadow_notrap_nonpresent_pte;
A
Avi Kivity 已提交
592
			__set_spte(&sp->spt[i], nonpresent);
593 594 595 596 597 598
			continue;
		}

		nr_present++;
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
599
			 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
600
			 spte_to_pfn(sp->spt[i]), true, false);
601 602 603 604 605
	}

	return !nr_present;
}

A
Avi Kivity 已提交
606 607 608 609 610 611
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef PT_LEVEL_MASK
612 613
#undef PT_LVL_ADDR_MASK
#undef PT_LVL_OFFSET_MASK
614
#undef PT_LEVEL_BITS
615
#undef PT_MAX_FULL_LEVELS
616
#undef gpte_to_gfn
617
#undef gpte_to_gfn_lvl
618
#undef CMPXCHG