paging_tmpl.h 17.6 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * MMU support
 *
 * Copyright (C) 2006 Qumranet, Inc.
A
Avi Kivity 已提交
10
 * Copyright 2010 Red Hat, Inc. and/or its affilates.
A
Avi Kivity 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

/*
 * We need the mmu code to access both 32-bit and 64-bit guest ptes,
 * so the code in this file is compiled twice, once per pte size.
 */

#if PTTYPE == 64
	#define pt_element_t u64
	#define guest_walker guest_walker64
	#define FNAME(name) paging##64_##name
	#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
31 32
	#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
33 34
	#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
35
	#define PT_LEVEL_BITS PT64_LEVEL_BITS
36 37
	#ifdef CONFIG_X86_64
	#define PT_MAX_FULL_LEVELS 4
38
	#define CMPXCHG cmpxchg
39
	#else
40
	#define CMPXCHG cmpxchg64
41 42
	#define PT_MAX_FULL_LEVELS 2
	#endif
A
Avi Kivity 已提交
43 44 45 46 47
#elif PTTYPE == 32
	#define pt_element_t u32
	#define guest_walker guest_walker32
	#define FNAME(name) paging##32_##name
	#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
48 49
	#define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl)
	#define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl)
A
Avi Kivity 已提交
50 51
	#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
	#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
52
	#define PT_LEVEL_BITS PT32_LEVEL_BITS
53
	#define PT_MAX_FULL_LEVELS 2
54
	#define CMPXCHG cmpxchg
A
Avi Kivity 已提交
55 56 57 58
#else
	#error Invalid PTTYPE value
#endif

59 60
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
61

A
Avi Kivity 已提交
62 63 64 65 66 67
/*
 * The guest_walker structure emulates the behavior of the hardware page
 * table walker.
 */
struct guest_walker {
	int level;
68
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
69 70
	pt_element_t ptes[PT_MAX_FULL_LEVELS];
	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
71 72
	unsigned pt_access;
	unsigned pte_access;
73
	gfn_t gfn;
74
	u32 error_code;
A
Avi Kivity 已提交
75 76
};

77
static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
78
{
79
	return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
80 81
}

82 83 84 85 86 87 88 89 90
static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
			 gfn_t table_gfn, unsigned index,
			 pt_element_t orig_pte, pt_element_t new_pte)
{
	pt_element_t ret;
	pt_element_t *table;
	struct page *page;

	page = gfn_to_page(kvm, table_gfn);
91

92 93 94 95 96 97 98 99 100
	table = kmap_atomic(page, KM_USER0);
	ret = CMPXCHG(&table[index], orig_pte, new_pte);
	kunmap_atomic(table, KM_USER0);

	kvm_release_page_dirty(page);

	return (ret != orig_pte);
}

101 102 103 104 105 106 107 108 109 110 111 112
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
{
	unsigned access;

	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
#if PTTYPE == 64
	if (is_nx(vcpu))
		access &= ~(gpte >> PT64_NX_SHIFT);
#endif
	return access;
}

113 114 115
/*
 * Fetch a guest pte for a guest virtual address
 */
116 117
static int FNAME(walk_addr)(struct guest_walker *walker,
			    struct kvm_vcpu *vcpu, gva_t addr,
118
			    int write_fault, int user_fault, int fetch_fault)
A
Avi Kivity 已提交
119
{
120
	pt_element_t pte;
121
	gfn_t table_gfn;
122
	unsigned index, pt_access, uninitialized_var(pte_access);
123
	gpa_t pte_gpa;
124
	bool eperm, present, rsvd_fault;
A
Avi Kivity 已提交
125

126 127
	trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault,
				     fetch_fault);
128
walk:
129 130
	present = true;
	eperm = rsvd_fault = false;
131 132
	walker->level = vcpu->arch.mmu.root_level;
	pte = vcpu->arch.cr3;
133 134
#if PTTYPE == 64
	if (!is_long_mode(vcpu)) {
A
Avi Kivity 已提交
135
		pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3);
136
		trace_kvm_mmu_paging_element(pte, walker->level);
137 138 139 140
		if (!is_present_gpte(pte)) {
			present = false;
			goto error;
		}
141 142 143
		--walker->level;
	}
#endif
A
Avi Kivity 已提交
144
	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
145
	       (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
A
Avi Kivity 已提交
146

147
	pt_access = ACC_ALL;
148 149

	for (;;) {
150
		index = PT_INDEX(addr, walker->level);
151

152
		table_gfn = gpte_to_gfn(pte);
A
Avi Kivity 已提交
153
		pte_gpa = gfn_to_gpa(table_gfn);
154
		pte_gpa += index * sizeof(pt_element_t);
155
		walker->table_gfn[walker->level - 1] = table_gfn;
156
		walker->pte_gpa[walker->level - 1] = pte_gpa;
157

158 159 160 161
		if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) {
			present = false;
			break;
		}
162

163
		trace_kvm_mmu_paging_element(pte, walker->level);
164

165 166 167 168
		if (!is_present_gpte(pte)) {
			present = false;
			break;
		}
169

170 171 172 173
		if (is_rsvd_bits_set(vcpu, pte, walker->level)) {
			rsvd_fault = true;
			break;
		}
174

175
		if (write_fault && !is_writable_pte(pte))
176
			if (user_fault || is_write_protection(vcpu))
177
				eperm = true;
178

179
		if (user_fault && !(pte & PT_USER_MASK))
180
			eperm = true;
181

182
#if PTTYPE == 64
183
		if (fetch_fault && (pte & PT64_NX_MASK))
184
			eperm = true;
185 186
#endif

187
		if (!eperm && !rsvd_fault && !(pte & PT_ACCESSED_MASK)) {
188 189
			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
						       sizeof(pte));
190 191 192
			if (FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn,
			    index, pte, pte|PT_ACCESSED_MASK))
				goto walk;
193
			mark_page_dirty(vcpu->kvm, table_gfn);
194
			pte |= PT_ACCESSED_MASK;
195
		}
196

197
		pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
198

199 200
		walker->ptes[walker->level - 1] = pte;

201 202
		if ((walker->level == PT_PAGE_TABLE_LEVEL) ||
		    ((walker->level == PT_DIRECTORY_LEVEL) &&
203
				is_large_pte(pte) &&
204 205
				(PTTYPE == 64 || is_pse(vcpu))) ||
		    ((walker->level == PT_PDPE_LEVEL) &&
206
				is_large_pte(pte) &&
207 208 209 210 211 212 213 214 215 216
				is_long_mode(vcpu))) {
			int lvl = walker->level;

			walker->gfn = gpte_to_gfn_lvl(pte, lvl);
			walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl))
					>> PAGE_SHIFT;

			if (PTTYPE == 32 &&
			    walker->level == PT_DIRECTORY_LEVEL &&
			    is_cpuid_PSE36())
217
				walker->gfn += pse36_gfn_delta(pte);
218

219
			break;
220
		}
221

222
		pt_access = pte_access;
223 224
		--walker->level;
	}
225

226 227 228
	if (!present || eperm || rsvd_fault)
		goto error;

229
	if (write_fault && !is_dirty_gpte(pte)) {
230 231
		bool ret;

232
		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
233 234 235 236
		ret = FNAME(cmpxchg_gpte)(vcpu->kvm, table_gfn, index, pte,
			    pte|PT_DIRTY_MASK);
		if (ret)
			goto walk;
237
		mark_page_dirty(vcpu->kvm, table_gfn);
238
		pte |= PT_DIRTY_MASK;
239
		walker->ptes[walker->level - 1] = pte;
240 241
	}

242 243 244
	walker->pt_access = pt_access;
	walker->pte_access = pte_access;
	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
245
		 __func__, (u64)pte, pte_access, pt_access);
246 247
	return 1;

248
error:
249
	walker->error_code = 0;
250 251
	if (present)
		walker->error_code |= PFERR_PRESENT_MASK;
252 253 254 255
	if (write_fault)
		walker->error_code |= PFERR_WRITE_MASK;
	if (user_fault)
		walker->error_code |= PFERR_USER_MASK;
256
	if (fetch_fault && is_nx(vcpu))
257
		walker->error_code |= PFERR_FETCH_MASK;
258 259
	if (rsvd_fault)
		walker->error_code |= PFERR_RSVD_MASK;
260
	trace_kvm_mmu_walker_error(walker->error_code);
261
	return 0;
A
Avi Kivity 已提交
262 263
}

264
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
265
			      u64 *spte, const void *pte)
266 267
{
	pt_element_t gpte;
268
	unsigned pte_access;
269
	pfn_t pfn;
270
	u64 new_spte;
271 272

	gpte = *(const pt_element_t *)pte;
273
	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
274
		if (!is_present_gpte(gpte)) {
275
			if (sp->unsync)
276 277 278 279 280
				new_spte = shadow_trap_nonpresent_pte;
			else
				new_spte = shadow_notrap_nonpresent_pte;
			__set_spte(spte, new_spte);
		}
281 282
		return;
	}
283
	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
284
	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
285 286
	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
		return;
287 288
	pfn = vcpu->arch.update_pte.pfn;
	if (is_error_pfn(pfn))
289
		return;
290 291
	if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
		return;
292
	kvm_get_pfn(pfn);
293 294 295 296
	/*
	 * we call mmu_set_spte() with reset_host_protection = true beacuse that
	 * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
	 */
297
	mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
298
		     is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL,
299
		     gpte_to_gfn(gpte), pfn, true, true);
300 301
}

A
Avi Kivity 已提交
302 303 304 305 306 307 308 309 310 311 312
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
				struct guest_walker *gw, int level)
{
	int r;
	pt_element_t curr_pte;

	r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1],
				  &curr_pte, sizeof(curr_pte));
	return r || curr_pte != gw->ptes[level - 1];
}

A
Avi Kivity 已提交
313 314 315
/*
 * Fetch a shadow pte for a specific level in the paging hierarchy.
 */
316 317
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
			 struct guest_walker *gw,
318
			 int user_fault, int write_fault, int hlevel,
319
			 int *ptwrite, pfn_t pfn)
A
Avi Kivity 已提交
320
{
321
	unsigned access = gw->pt_access;
322
	struct kvm_mmu_page *sp = NULL;
323
	u64 *sptep = NULL;
324
	int uninitialized_var(level);
325
	bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]);
326
	int top_level;
327
	unsigned direct_access;
328
	struct kvm_shadow_walk_iterator iterator;
329

330
	if (!is_present_gpte(gw->ptes[gw->level - 1]))
331
		return NULL;
A
Avi Kivity 已提交
332

333 334 335 336
	direct_access = gw->pt_access & gw->pte_access;
	if (!dirty)
		direct_access &= ~ACC_WRITE_MASK;

337 338 339 340 341 342 343 344 345 346 347 348
	top_level = vcpu->arch.mmu.root_level;
	if (top_level == PT32E_ROOT_LEVEL)
		top_level = PT32_ROOT_LEVEL;
	/*
	 * Verify that the top-level gpte is still there.  Since the page
	 * is a root page, it is either write protected (and cannot be
	 * changed from now on) or it is invalid (in which case, we don't
	 * really care if it changes underneath us after this point).
	 */
	if (FNAME(gpte_changed)(vcpu, gw, top_level))
		goto out_gpte_changed;

349 350 351 352 353
	for (shadow_walk_init(&iterator, vcpu, addr);
	     shadow_walk_okay(&iterator) && iterator.level > gw->level;
	     shadow_walk_next(&iterator)) {
		gfn_t table_gfn;

354 355
		level = iterator.level;
		sptep = iterator.sptep;
356

357
		drop_large_spte(vcpu, sptep);
358

359 360 361 362 363 364
		sp = NULL;
		if (!is_shadow_present_pte(*sptep)) {
			table_gfn = gw->table_gfn[level - 2];
			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
					      false, access, sptep);
		}
365 366 367 368 369 370 371

		/*
		 * Verify that the gpte in the page we've just write
		 * protected is still there.
		 */
		if (FNAME(gpte_changed)(vcpu, gw, level - 1))
			goto out_gpte_changed;
372

373 374
		if (sp)
			link_shadow_page(sptep, sp);
375
	}
A
Avi Kivity 已提交
376

377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
	for (;
	     shadow_walk_okay(&iterator) && iterator.level > hlevel;
	     shadow_walk_next(&iterator)) {
		gfn_t direct_gfn;

		level = iterator.level;
		sptep = iterator.sptep;

		validate_direct_spte(vcpu, sptep, direct_access);

		drop_large_spte(vcpu, sptep);

		if (is_shadow_present_pte(*sptep))
			continue;

		direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);

		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, level-1,
				      true, direct_access, sptep);
		link_shadow_page(sptep, sp);
	}

	sptep = iterator.sptep;
	level = iterator.level;

	mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
		     user_fault, write_fault, dirty, ptwrite, level,
		     gw->gfn, pfn, false, true);

406
	return sptep;
407 408

out_gpte_changed:
409 410
	if (sp)
		kvm_mmu_put_page(sp, sptep);
411 412
	kvm_release_pfn_clean(pfn);
	return NULL;
A
Avi Kivity 已提交
413 414 415 416 417 418 419 420 421 422 423 424 425
}

/*
 * Page fault handler.  There are several causes for a page fault:
 *   - there is no shadow pte for the guest pte
 *   - write access through a shadow pte marked read only so that we can set
 *     the dirty bit
 *   - write access to a shadow pte marked read only so we can update the page
 *     dirty bitmap, when userspace requests it
 *   - mmio access; in this case we will never install a present shadow pte
 *   - normal guest page fault due to the guest pte marked not present, not
 *     writable, or not executable
 *
426 427
 *  Returns: 1 if we need to emulate the instruction, 0 otherwise, or
 *           a negative value on error.
A
Avi Kivity 已提交
428 429 430 431 432 433
 */
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
			       u32 error_code)
{
	int write_fault = error_code & PFERR_WRITE_MASK;
	int user_fault = error_code & PFERR_USER_MASK;
434
	int fetch_fault = error_code & PFERR_FETCH_MASK;
A
Avi Kivity 已提交
435
	struct guest_walker walker;
A
Avi Kivity 已提交
436
	u64 *sptep;
437
	int write_pt = 0;
438
	int r;
439
	pfn_t pfn;
440
	int level = PT_PAGE_TABLE_LEVEL;
441
	unsigned long mmu_seq;
A
Avi Kivity 已提交
442

443
	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
444
	kvm_mmu_audit(vcpu, "pre page fault");
445

446 447 448
	r = mmu_topup_memory_caches(vcpu);
	if (r)
		return r;
449

A
Avi Kivity 已提交
450
	/*
451
	 * Look up the guest pte for the faulting address.
A
Avi Kivity 已提交
452
	 */
453 454
	r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
			     fetch_fault);
A
Avi Kivity 已提交
455 456 457 458

	/*
	 * The page is not mapped by the guest.  Let the guest handle it.
	 */
459
	if (!r) {
460
		pgprintk("%s: guest page fault\n", __func__);
461
		inject_page_fault(vcpu, addr, walker.error_code);
462
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
A
Avi Kivity 已提交
463 464 465
		return 0;
	}

466 467 468
	if (walker.level >= PT_DIRECTORY_LEVEL) {
		level = min(walker.level, mapping_level(vcpu, walker.gfn));
		walker.gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE(level) - 1);
M
Marcelo Tosatti 已提交
469
	}
470

471
	mmu_seq = vcpu->kvm->mmu_notifier_seq;
472
	smp_rmb();
473
	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
474

475
	/* mmio */
476 477
	if (is_error_pfn(pfn))
		return kvm_handle_bad_page(vcpu->kvm, walker.gfn, pfn);
478

479
	spin_lock(&vcpu->kvm->mmu_lock);
480 481
	if (mmu_notifier_retry(vcpu, mmu_seq))
		goto out_unlock;
482
	kvm_mmu_free_some_pages(vcpu);
A
Avi Kivity 已提交
483
	sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
484
			     level, &write_pt, pfn);
A
Andi Kleen 已提交
485
	(void)sptep;
486
	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
A
Avi Kivity 已提交
487
		 sptep, *sptep, write_pt);
488

489
	if (!write_pt)
490
		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
491

A
Avi Kivity 已提交
492
	++vcpu->stat.pf_fixed;
493
	kvm_mmu_audit(vcpu, "post page fault (fixed)");
494
	spin_unlock(&vcpu->kvm->mmu_lock);
A
Avi Kivity 已提交
495

496
	return write_pt;
497 498 499 500 501

out_unlock:
	spin_unlock(&vcpu->kvm->mmu_lock);
	kvm_release_pfn_clean(pfn);
	return 0;
A
Avi Kivity 已提交
502 503
}

504
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
M
Marcelo Tosatti 已提交
505
{
506
	struct kvm_shadow_walk_iterator iterator;
507
	struct kvm_mmu_page *sp;
508
	gpa_t pte_gpa = -1;
509 510
	int level;
	u64 *sptep;
511
	int need_flush = 0;
512 513

	spin_lock(&vcpu->kvm->mmu_lock);
M
Marcelo Tosatti 已提交
514

515 516 517
	for_each_shadow_entry(vcpu, gva, iterator) {
		level = iterator.level;
		sptep = iterator.sptep;
518

519
		sp = page_header(__pa(sptep));
X
Xiao Guangrong 已提交
520
		if (is_last_spte(*sptep, level)) {
521
			int offset, shift;
522

523 524 525
			if (!sp->unsync)
				break;

526 527 528 529 530
			shift = PAGE_SHIFT -
				  (PT_LEVEL_BITS - PT64_LEVEL_BITS) * level;
			offset = sp->role.quadrant << shift;

			pte_gpa = (sp->gfn << PAGE_SHIFT) + offset;
531
			pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
532 533 534 535

			if (is_shadow_present_pte(*sptep)) {
				if (is_large_pte(*sptep))
					--vcpu->kvm->stat.lpages;
A
Avi Kivity 已提交
536 537
				drop_spte(vcpu->kvm, sptep,
					  shadow_trap_nonpresent_pte);
538
				need_flush = 1;
A
Avi Kivity 已提交
539 540
			} else
				__set_spte(sptep, shadow_trap_nonpresent_pte);
541
			break;
542
		}
M
Marcelo Tosatti 已提交
543

544
		if (!is_shadow_present_pte(*sptep) || !sp->unsync_children)
545 546
			break;
	}
M
Marcelo Tosatti 已提交
547

548 549
	if (need_flush)
		kvm_flush_remote_tlbs(vcpu->kvm);
550 551 552

	atomic_inc(&vcpu->kvm->arch.invlpg_counter);

553
	spin_unlock(&vcpu->kvm->mmu_lock);
554 555 556 557 558 559 560

	if (pte_gpa == -1)
		return;

	if (mmu_topup_memory_caches(vcpu))
		return;
	kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0);
M
Marcelo Tosatti 已提交
561 562
}

563 564
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
			       u32 *error)
A
Avi Kivity 已提交
565 566
{
	struct guest_walker walker;
A
Avi Kivity 已提交
567 568
	gpa_t gpa = UNMAPPED_GVA;
	int r;
A
Avi Kivity 已提交
569

570 571 572 573
	r = FNAME(walk_addr)(&walker, vcpu, vaddr,
			     !!(access & PFERR_WRITE_MASK),
			     !!(access & PFERR_USER_MASK),
			     !!(access & PFERR_FETCH_MASK));
A
Avi Kivity 已提交
574

A
Avi Kivity 已提交
575
	if (r) {
A
Avi Kivity 已提交
576
		gpa = gfn_to_gpa(walker.gfn);
A
Avi Kivity 已提交
577
		gpa |= vaddr & ~PAGE_MASK;
578 579
	} else if (error)
		*error = walker.error_code;
A
Avi Kivity 已提交
580 581 582 583

	return gpa;
}

584 585 586
static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
				 struct kvm_mmu_page *sp)
{
A
Avi Kivity 已提交
587 588 589
	int i, j, offset, r;
	pt_element_t pt[256 / sizeof(pt_element_t)];
	gpa_t pte_gpa;
590

591
	if (sp->role.direct
592
	    || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
593 594 595 596
		nonpaging_prefetch_page(vcpu, sp);
		return;
	}

A
Avi Kivity 已提交
597 598
	pte_gpa = gfn_to_gpa(sp->gfn);
	if (PTTYPE == 32) {
599
		offset = sp->role.quadrant << PT64_LEVEL_BITS;
A
Avi Kivity 已提交
600 601
		pte_gpa += offset * sizeof(pt_element_t);
	}
602

A
Avi Kivity 已提交
603 604 605 606
	for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) {
		r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt);
		pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t);
		for (j = 0; j < ARRAY_SIZE(pt); ++j)
607
			if (r || is_present_gpte(pt[j]))
A
Avi Kivity 已提交
608 609 610
				sp->spt[i+j] = shadow_trap_nonpresent_pte;
			else
				sp->spt[i+j] = shadow_notrap_nonpresent_pte;
611
	}
612 613
}

614 615 616 617 618
/*
 * Using the cached information from sp->gfns is safe because:
 * - The spte has a reference to the struct page, so the pfn for a given gfn
 *   can't change unless all sptes pointing to it are nuked first.
 */
619 620
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
			    bool clear_unsync)
621 622
{
	int i, offset, nr_present;
623
	bool reset_host_protection;
624
	gpa_t first_pte_gpa;
625 626 627

	offset = nr_present = 0;

628 629 630
	/* direct kvm_mmu_page can not be unsync. */
	BUG_ON(sp->role.direct);

631 632 633
	if (PTTYPE == 32)
		offset = sp->role.quadrant << PT64_LEVEL_BITS;

634 635
	first_pte_gpa = gfn_to_gpa(sp->gfn) + offset * sizeof(pt_element_t);

636 637 638 639
	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
		unsigned pte_access;
		pt_element_t gpte;
		gpa_t pte_gpa;
640
		gfn_t gfn;
641 642 643 644

		if (!is_shadow_present_pte(sp->spt[i]))
			continue;

645
		pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
646 647 648 649 650

		if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
					  sizeof(pt_element_t)))
			return -EINVAL;

651
		gfn = gpte_to_gfn(gpte);
A
Avi Kivity 已提交
652
		if (gfn != sp->gfns[i] ||
653
		      !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) {
654 655
			u64 nonpresent;

656
			if (is_present_gpte(gpte) || !clear_unsync)
657 658 659
				nonpresent = shadow_trap_nonpresent_pte;
			else
				nonpresent = shadow_notrap_nonpresent_pte;
A
Avi Kivity 已提交
660
			drop_spte(vcpu->kvm, &sp->spt[i], nonpresent);
661 662 663 664 665
			continue;
		}

		nr_present++;
		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
666 667 668 669 670 671
		if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
			pte_access &= ~ACC_WRITE_MASK;
			reset_host_protection = 0;
		} else {
			reset_host_protection = 1;
		}
672
		set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
673
			 is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
674 675
			 spte_to_pfn(sp->spt[i]), true, false,
			 reset_host_protection);
676 677 678 679 680
	}

	return !nr_present;
}

A
Avi Kivity 已提交
681 682 683 684 685 686
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef PT_LEVEL_MASK
687 688
#undef PT_LVL_ADDR_MASK
#undef PT_LVL_OFFSET_MASK
689
#undef PT_LEVEL_BITS
690
#undef PT_MAX_FULL_LEVELS
691
#undef gpte_to_gfn
692
#undef gpte_to_gfn_lvl
693
#undef CMPXCHG