svm.c 84.2 KB
Newer Older
A
Avi Kivity 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * AMD SVM support
 *
 * Copyright (C) 2006 Qumranet, Inc.
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */
16 17
#include <linux/kvm_host.h>

18
#include "irq.h"
19
#include "mmu.h"
20
#include "kvm_cache_regs.h"
21
#include "x86.h"
A
Avi Kivity 已提交
22

A
Avi Kivity 已提交
23
#include <linux/module.h>
24
#include <linux/kernel.h>
A
Avi Kivity 已提交
25 26
#include <linux/vmalloc.h>
#include <linux/highmem.h>
A
Alexey Dobriyan 已提交
27
#include <linux/sched.h>
28
#include <linux/ftrace_event.h>
29
#include <linux/slab.h>
A
Avi Kivity 已提交
30

A
Avi Kivity 已提交
31
#include <asm/desc.h>
A
Avi Kivity 已提交
32

33
#include <asm/virtext.h>
34
#include "trace.h"
35

36 37
#define __ex(x) __kvm_handle_fault_on_reboot(x)

A
Avi Kivity 已提交
38 39 40 41 42 43 44 45 46
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");

#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1

#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3

47 48
#define SVM_FEATURE_NPT  (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1)
A
Amit Shah 已提交
49
#define SVM_FEATURE_SVML (1 << 2)
50
#define SVM_FEATURE_NRIP (1 << 3)
51
#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
52

53 54 55 56
#define NESTED_EXIT_HOST	0	/* Exit handled on host level */
#define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
#define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */

57 58
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))

A
Avi Kivity 已提交
59 60 61 62 63 64 65 66 67 68 69 70
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
	MSR_FS_BASE,
#endif
	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
};

#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)

struct kvm_vcpu;

71 72 73
struct nested_state {
	struct vmcb *hsave;
	u64 hsave_msr;
74
	u64 vm_cr_msr;
75 76 77 78 79 80 81
	u64 vmcb;

	/* These are the merged vectors */
	u32 *msrpm;

	/* gpa pointers to the real vectors */
	u64 vmcb_msrpm;
J
Joerg Roedel 已提交
82

83 84 85
	/* A VMEXIT is required but not yet emulated */
	bool exit_required;

J
Joerg Roedel 已提交
86 87 88 89 90 91 92 93
	/* cache for intercepts of the guest */
	u16 intercept_cr_read;
	u16 intercept_cr_write;
	u16 intercept_dr_read;
	u16 intercept_dr_write;
	u32 intercept_exceptions;
	u64 intercept;

94 95
};

96 97 98
#define MSRPM_OFFSETS	16
static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;

A
Avi Kivity 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
struct vcpu_svm {
	struct kvm_vcpu vcpu;
	struct vmcb *vmcb;
	unsigned long vmcb_pa;
	struct svm_cpu_data *svm_data;
	uint64_t asid_generation;
	uint64_t sysenter_esp;
	uint64_t sysenter_eip;

	u64 next_rip;

	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
	u64 host_gs_base;

	u32 *msrpm;

115
	struct nested_state nested;
J
Jan Kiszka 已提交
116 117

	bool nmi_singlestep;
118 119 120

	unsigned int3_injected;
	unsigned long int3_rip;
A
Avi Kivity 已提交
121 122
};

123 124
#define MSR_INVALID			0xffffffffU

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
static struct svm_direct_access_msrs {
	u32 index;   /* Index of the MSR */
	bool always; /* True if intercept is always on */
} direct_access_msrs[] = {
	{ .index = MSR_K6_STAR,				.always = true  },
	{ .index = MSR_IA32_SYSENTER_CS,		.always = true  },
#ifdef CONFIG_X86_64
	{ .index = MSR_GS_BASE,				.always = true  },
	{ .index = MSR_FS_BASE,				.always = true  },
	{ .index = MSR_KERNEL_GS_BASE,			.always = true  },
	{ .index = MSR_LSTAR,				.always = true  },
	{ .index = MSR_CSTAR,				.always = true  },
	{ .index = MSR_SYSCALL_MASK,			.always = true  },
#endif
	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
	{ .index = MSR_IA32_LASTINTTOIP,		.always = false },
	{ .index = MSR_INVALID,				.always = false },
};

146 147 148 149
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static bool npt_enabled = true;
#else
J
Joerg Roedel 已提交
150
static bool npt_enabled;
151
#endif
152 153 154
static int npt = 1;

module_param(npt, int, S_IRUGO);
155

156
static int nested = 1;
157 158
module_param(nested, int, S_IRUGO);

159
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
160
static void svm_complete_interrupts(struct vcpu_svm *svm);
161

162
static int nested_svm_exit_handled(struct vcpu_svm *svm);
163
static int nested_svm_intercept(struct vcpu_svm *svm);
164 165 166 167
static int nested_svm_vmexit(struct vcpu_svm *svm);
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
				      bool has_error_code, u32 error_code);

168 169
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
R
Rusty Russell 已提交
170
	return container_of(vcpu, struct vcpu_svm, vcpu);
171 172
}

A
Alexander Graf 已提交
173 174
static inline bool is_nested(struct vcpu_svm *svm)
{
175
	return svm->nested.vmcb;
A
Alexander Graf 已提交
176 177
}

178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
static inline void enable_gif(struct vcpu_svm *svm)
{
	svm->vcpu.arch.hflags |= HF_GIF_MASK;
}

static inline void disable_gif(struct vcpu_svm *svm)
{
	svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
}

static inline bool gif_set(struct vcpu_svm *svm)
{
	return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
}

193
static unsigned long iopm_base;
A
Avi Kivity 已提交
194 195 196 197

struct kvm_ldttss_desc {
	u16 limit0;
	u16 base0;
J
Joerg Roedel 已提交
198 199
	unsigned base1:8, type:5, dpl:2, p:1;
	unsigned limit1:4, zero0:3, g:1, base2:8;
A
Avi Kivity 已提交
200 201 202 203 204 205 206
	u32 base3;
	u32 zero1;
} __attribute__((packed));

struct svm_cpu_data {
	int cpu;

A
Avi Kivity 已提交
207 208 209
	u64 asid_generation;
	u32 max_asid;
	u32 next_asid;
A
Avi Kivity 已提交
210 211 212 213 214 215
	struct kvm_ldttss_desc *tss_desc;

	struct page *save_area;
};

static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
216
static uint32_t svm_features;
A
Avi Kivity 已提交
217 218 219 220 221 222 223 224

struct svm_init_data {
	int cpu;
	int r;
};

static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};

225
#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
A
Avi Kivity 已提交
226 227 228
#define MSRS_RANGE_SIZE 2048
#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
static u32 svm_msrpm_offset(u32 msr)
{
	u32 offset;
	int i;

	for (i = 0; i < NUM_MSR_MAPS; i++) {
		if (msr < msrpm_ranges[i] ||
		    msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
			continue;

		offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
		offset += (i * MSRS_RANGE_SIZE);       /* add range offset */

		/* Now we have the u8 offset - but need the u32 offset */
		return offset / 4;
	}

	/* MSR not in any range */
	return MSR_INVALID;
}

A
Avi Kivity 已提交
250 251
#define MAX_INST_SIZE 15

252 253 254 255 256
static inline u32 svm_has(u32 feat)
{
	return svm_features & feat;
}

A
Avi Kivity 已提交
257 258
static inline void clgi(void)
{
259
	asm volatile (__ex(SVM_CLGI));
A
Avi Kivity 已提交
260 261 262 263
}

static inline void stgi(void)
{
264
	asm volatile (__ex(SVM_STGI));
A
Avi Kivity 已提交
265 266 267 268
}

static inline void invlpga(unsigned long addr, u32 asid)
{
J
Joerg Roedel 已提交
269
	asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
A
Avi Kivity 已提交
270 271 272 273
}

static inline void force_new_asid(struct kvm_vcpu *vcpu)
{
274
	to_svm(vcpu)->asid_generation--;
A
Avi Kivity 已提交
275 276 277 278 279 280 281 282 283
}

static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
{
	force_new_asid(vcpu);
}

static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
284
	if (!npt_enabled && !(efer & EFER_LMA))
285
		efer &= ~EFER_LME;
A
Avi Kivity 已提交
286

287
	to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
288
	vcpu->arch.efer = efer;
A
Avi Kivity 已提交
289 290 291 292 293 294 295 296
}

static int is_external_interrupt(u32 info)
{
	info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
	return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
}

297 298 299 300 301 302
static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	u32 ret = 0;

	if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
303
		ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
304 305 306 307 308 309 310 311 312 313 314 315 316 317
	return ret & mask;
}

static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if (mask == 0)
		svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
	else
		svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;

}

A
Avi Kivity 已提交
318 319
static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
320 321 322
	struct vcpu_svm *svm = to_svm(vcpu);

	if (!svm->next_rip) {
A
Avi Kivity 已提交
323
		if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
324 325
				EMULATE_DONE)
			printk(KERN_DEBUG "%s: NOP\n", __func__);
A
Avi Kivity 已提交
326 327
		return;
	}
328 329 330
	if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
		printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
		       __func__, kvm_rip_read(vcpu), svm->next_rip);
A
Avi Kivity 已提交
331

332
	kvm_rip_write(vcpu, svm->next_rip);
333
	svm_set_interrupt_shadow(vcpu, 0);
A
Avi Kivity 已提交
334 335
}

J
Jan Kiszka 已提交
336 337 338 339 340
static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
				bool has_error_code, u32 error_code)
{
	struct vcpu_svm *svm = to_svm(vcpu);

J
Joerg Roedel 已提交
341 342 343 344
	/*
	 * If we are within a nested VM we'd better #VMEXIT and let the guest
	 * handle the exception
	 */
J
Jan Kiszka 已提交
345 346 347
	if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
		return;

348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
	if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) {
		unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);

		/*
		 * For guest debugging where we have to reinject #BP if some
		 * INT3 is guest-owned:
		 * Emulate nRIP by moving RIP forward. Will fail if injection
		 * raises a fault that is not intercepted. Still better than
		 * failing in all cases.
		 */
		skip_emulated_instruction(&svm->vcpu);
		rip = kvm_rip_read(&svm->vcpu);
		svm->int3_rip = rip + svm->vmcb->save.cs.base;
		svm->int3_injected = rip - old_rip;
	}

J
Jan Kiszka 已提交
364 365 366 367 368 369 370
	svm->vmcb->control.event_inj = nr
		| SVM_EVTINJ_VALID
		| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
		| SVM_EVTINJ_TYPE_EXEPT;
	svm->vmcb->control.event_inj_err = error_code;
}

A
Avi Kivity 已提交
371 372
static int has_svm(void)
{
373
	const char *msg;
A
Avi Kivity 已提交
374

375
	if (!cpu_has_svm(&msg)) {
J
Joe Perches 已提交
376
		printk(KERN_INFO "has_svm: %s\n", msg);
A
Avi Kivity 已提交
377 378 379 380 381 382 383 384
		return 0;
	}

	return 1;
}

static void svm_hardware_disable(void *garbage)
{
385
	cpu_svm_disable();
A
Avi Kivity 已提交
386 387
}

388
static int svm_hardware_enable(void *garbage)
A
Avi Kivity 已提交
389 390
{

391
	struct svm_cpu_data *sd;
A
Avi Kivity 已提交
392
	uint64_t efer;
393
	struct desc_ptr gdt_descr;
A
Avi Kivity 已提交
394 395 396
	struct desc_struct *gdt;
	int me = raw_smp_processor_id();

397 398 399 400
	rdmsrl(MSR_EFER, efer);
	if (efer & EFER_SVME)
		return -EBUSY;

A
Avi Kivity 已提交
401
	if (!has_svm()) {
402 403
		printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
		       me);
404
		return -EINVAL;
A
Avi Kivity 已提交
405
	}
406
	sd = per_cpu(svm_data, me);
A
Avi Kivity 已提交
407

408
	if (!sd) {
409
		printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
A
Avi Kivity 已提交
410
		       me);
411
		return -EINVAL;
A
Avi Kivity 已提交
412 413
	}

414 415 416
	sd->asid_generation = 1;
	sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
	sd->next_asid = sd->max_asid + 1;
A
Avi Kivity 已提交
417

418
	native_store_gdt(&gdt_descr);
419
	gdt = (struct desc_struct *)gdt_descr.address;
420
	sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
A
Avi Kivity 已提交
421

422
	wrmsrl(MSR_EFER, efer | EFER_SVME);
A
Avi Kivity 已提交
423

424
	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
425 426

	return 0;
A
Avi Kivity 已提交
427 428
}

429 430
static void svm_cpu_uninit(int cpu)
{
431
	struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
432

433
	if (!sd)
434 435 436
		return;

	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
437 438
	__free_page(sd->save_area);
	kfree(sd);
439 440
}

A
Avi Kivity 已提交
441 442
static int svm_cpu_init(int cpu)
{
443
	struct svm_cpu_data *sd;
A
Avi Kivity 已提交
444 445
	int r;

446 447
	sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
	if (!sd)
A
Avi Kivity 已提交
448
		return -ENOMEM;
449 450
	sd->cpu = cpu;
	sd->save_area = alloc_page(GFP_KERNEL);
A
Avi Kivity 已提交
451
	r = -ENOMEM;
452
	if (!sd->save_area)
A
Avi Kivity 已提交
453 454
		goto err_1;

455
	per_cpu(svm_data, cpu) = sd;
A
Avi Kivity 已提交
456 457 458 459

	return 0;

err_1:
460
	kfree(sd);
A
Avi Kivity 已提交
461 462 463 464
	return r;

}

465 466 467 468 469 470 471 472 473 474 475
static bool valid_msr_intercept(u32 index)
{
	int i;

	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
		if (direct_access_msrs[i].index == index)
			return true;

	return false;
}

476 477
static void set_msr_interception(u32 *msrpm, unsigned msr,
				 int read, int write)
A
Avi Kivity 已提交
478
{
479 480 481
	u8 bit_read, bit_write;
	unsigned long tmp;
	u32 offset;
A
Avi Kivity 已提交
482

483 484 485 486 487 488
	/*
	 * If this warning triggers extend the direct_access_msrs list at the
	 * beginning of the file
	 */
	WARN_ON(!valid_msr_intercept(msr));

489 490 491 492 493 494 495 496 497 498 499
	offset    = svm_msrpm_offset(msr);
	bit_read  = 2 * (msr & 0x0f);
	bit_write = 2 * (msr & 0x0f) + 1;
	tmp       = msrpm[offset];

	BUG_ON(offset == MSR_INVALID);

	read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);

	msrpm[offset] = tmp;
A
Avi Kivity 已提交
500 501
}

502 503
static void svm_vcpu_init_msrpm(u32 *msrpm)
{
504 505
	int i;

506 507
	memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));

508 509 510 511 512 513
	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
		if (!direct_access_msrs[i].always)
			continue;

		set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
	}
514 515
}

516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
static void add_msr_offset(u32 offset)
{
	int i;

	for (i = 0; i < MSRPM_OFFSETS; ++i) {

		/* Offset already in list? */
		if (msrpm_offsets[i] == offset)
			return;

		/* Slot used by another offset? */
		if (msrpm_offsets[i] != MSR_INVALID)
			continue;

		/* Add offset to list */
		msrpm_offsets[i] = offset;

		return;
	}

	/*
	 * If this BUG triggers the msrpm_offsets table has an overflow. Just
	 * increase MSRPM_OFFSETS in this case.
	 */
	BUG();
}

static void init_msrpm_offsets(void)
{
	int i;

	memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));

	for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
		u32 offset;

		offset = svm_msrpm_offset(direct_access_msrs[i].index);
		BUG_ON(offset == MSR_INVALID);

		add_msr_offset(offset);
	}
}

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
static void svm_enable_lbrv(struct vcpu_svm *svm)
{
	u32 *msrpm = svm->msrpm;

	svm->vmcb->control.lbr_ctl = 1;
	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}

static void svm_disable_lbrv(struct vcpu_svm *svm)
{
	u32 *msrpm = svm->msrpm;

	svm->vmcb->control.lbr_ctl = 0;
	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}

A
Avi Kivity 已提交
581 582 583 584
static __init int svm_hardware_setup(void)
{
	int cpu;
	struct page *iopm_pages;
585
	void *iopm_va;
A
Avi Kivity 已提交
586 587 588 589 590 591
	int r;

	iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);

	if (!iopm_pages)
		return -ENOMEM;
592 593 594

	iopm_va = page_address(iopm_pages);
	memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
A
Avi Kivity 已提交
595 596
	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;

597 598
	init_msrpm_offsets();

599 600 601
	if (boot_cpu_has(X86_FEATURE_NX))
		kvm_enable_efer_bits(EFER_NX);

A
Alexander Graf 已提交
602 603 604
	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
		kvm_enable_efer_bits(EFER_FFXSR);

605 606 607 608 609
	if (nested) {
		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
		kvm_enable_efer_bits(EFER_SVME);
	}

Z
Zachary Amsden 已提交
610
	for_each_possible_cpu(cpu) {
A
Avi Kivity 已提交
611 612
		r = svm_cpu_init(cpu);
		if (r)
613
			goto err;
A
Avi Kivity 已提交
614
	}
615 616 617

	svm_features = cpuid_edx(SVM_CPUID_FUNC);

618 619 620
	if (!svm_has(SVM_FEATURE_NPT))
		npt_enabled = false;

621 622 623 624 625
	if (npt_enabled && !npt) {
		printk(KERN_INFO "kvm: Nested Paging disabled\n");
		npt_enabled = false;
	}

626
	if (npt_enabled) {
627
		printk(KERN_INFO "kvm: Nested Paging enabled\n");
628
		kvm_enable_tdp();
629 630
	} else
		kvm_disable_tdp();
631

A
Avi Kivity 已提交
632 633
	return 0;

634
err:
A
Avi Kivity 已提交
635 636 637 638 639 640 641
	__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
	iopm_base = 0;
	return r;
}

static __exit void svm_hardware_unsetup(void)
{
642 643
	int cpu;

Z
Zachary Amsden 已提交
644
	for_each_possible_cpu(cpu)
645 646
		svm_cpu_uninit(cpu);

A
Avi Kivity 已提交
647
	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
648
	iopm_base = 0;
A
Avi Kivity 已提交
649 650 651 652 653 654
}

static void init_seg(struct vmcb_seg *seg)
{
	seg->selector = 0;
	seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
J
Joerg Roedel 已提交
655
		      SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
A
Avi Kivity 已提交
656 657 658 659 660 661 662 663 664 665 666 667
	seg->limit = 0xffff;
	seg->base = 0;
}

static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
{
	seg->selector = 0;
	seg->attrib = SVM_SELECTOR_P_MASK | type;
	seg->limit = 0xffff;
	seg->base = 0;
}

668
static void init_vmcb(struct vcpu_svm *svm)
A
Avi Kivity 已提交
669
{
670 671
	struct vmcb_control_area *control = &svm->vmcb->control;
	struct vmcb_save_area *save = &svm->vmcb->save;
A
Avi Kivity 已提交
672

673 674
	svm->vcpu.fpu_active = 1;

J
Joerg Roedel 已提交
675
	control->intercept_cr_read =	INTERCEPT_CR0_MASK |
A
Avi Kivity 已提交
676
					INTERCEPT_CR3_MASK |
677
					INTERCEPT_CR4_MASK;
A
Avi Kivity 已提交
678

J
Joerg Roedel 已提交
679
	control->intercept_cr_write =	INTERCEPT_CR0_MASK |
A
Avi Kivity 已提交
680
					INTERCEPT_CR3_MASK |
681 682
					INTERCEPT_CR4_MASK |
					INTERCEPT_CR8_MASK;
A
Avi Kivity 已提交
683

J
Joerg Roedel 已提交
684
	control->intercept_dr_read =	INTERCEPT_DR0_MASK |
A
Avi Kivity 已提交
685 686
					INTERCEPT_DR1_MASK |
					INTERCEPT_DR2_MASK |
687 688 689 690 691
					INTERCEPT_DR3_MASK |
					INTERCEPT_DR4_MASK |
					INTERCEPT_DR5_MASK |
					INTERCEPT_DR6_MASK |
					INTERCEPT_DR7_MASK;
A
Avi Kivity 已提交
692

J
Joerg Roedel 已提交
693
	control->intercept_dr_write =	INTERCEPT_DR0_MASK |
A
Avi Kivity 已提交
694 695 696
					INTERCEPT_DR1_MASK |
					INTERCEPT_DR2_MASK |
					INTERCEPT_DR3_MASK |
697
					INTERCEPT_DR4_MASK |
A
Avi Kivity 已提交
698
					INTERCEPT_DR5_MASK |
699
					INTERCEPT_DR6_MASK |
A
Avi Kivity 已提交
700 701
					INTERCEPT_DR7_MASK;

702
	control->intercept_exceptions = (1 << PF_VECTOR) |
703 704
					(1 << UD_VECTOR) |
					(1 << MC_VECTOR);
A
Avi Kivity 已提交
705 706


J
Joerg Roedel 已提交
707
	control->intercept =	(1ULL << INTERCEPT_INTR) |
A
Avi Kivity 已提交
708
				(1ULL << INTERCEPT_NMI) |
709
				(1ULL << INTERCEPT_SMI) |
A
Avi Kivity 已提交
710
				(1ULL << INTERCEPT_SELECTIVE_CR0) |
A
Avi Kivity 已提交
711
				(1ULL << INTERCEPT_CPUID) |
712
				(1ULL << INTERCEPT_INVD) |
A
Avi Kivity 已提交
713
				(1ULL << INTERCEPT_HLT) |
M
Marcelo Tosatti 已提交
714
				(1ULL << INTERCEPT_INVLPG) |
A
Avi Kivity 已提交
715 716 717 718
				(1ULL << INTERCEPT_INVLPGA) |
				(1ULL << INTERCEPT_IOIO_PROT) |
				(1ULL << INTERCEPT_MSR_PROT) |
				(1ULL << INTERCEPT_TASK_SWITCH) |
719
				(1ULL << INTERCEPT_SHUTDOWN) |
A
Avi Kivity 已提交
720 721 722 723 724 725
				(1ULL << INTERCEPT_VMRUN) |
				(1ULL << INTERCEPT_VMMCALL) |
				(1ULL << INTERCEPT_VMLOAD) |
				(1ULL << INTERCEPT_VMSAVE) |
				(1ULL << INTERCEPT_STGI) |
				(1ULL << INTERCEPT_CLGI) |
726
				(1ULL << INTERCEPT_SKINIT) |
727
				(1ULL << INTERCEPT_WBINVD) |
728 729
				(1ULL << INTERCEPT_MONITOR) |
				(1ULL << INTERCEPT_MWAIT);
A
Avi Kivity 已提交
730 731

	control->iopm_base_pa = iopm_base;
732
	control->msrpm_base_pa = __pa(svm->msrpm);
733
	control->tsc_offset = 0;
A
Avi Kivity 已提交
734 735 736 737 738 739 740 741 742 743 744 745 746
	control->int_ctl = V_INTR_MASKING_MASK;

	init_seg(&save->es);
	init_seg(&save->ss);
	init_seg(&save->ds);
	init_seg(&save->fs);
	init_seg(&save->gs);

	save->cs.selector = 0xf000;
	/* Executable/Readable Code Segment */
	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
	save->cs.limit = 0xffff;
747 748 749 750 751 752 753
	/*
	 * cs.base should really be 0xffff0000, but vmx can't handle that, so
	 * be consistent with it.
	 *
	 * Replace when we have real mode working for vmx.
	 */
	save->cs.base = 0xf0000;
A
Avi Kivity 已提交
754 755 756 757 758 759 760

	save->gdtr.limit = 0xffff;
	save->idtr.limit = 0xffff;

	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);

761
	save->efer = EFER_SVME;
M
Mike Day 已提交
762
	save->dr6 = 0xffff0ff0;
A
Avi Kivity 已提交
763 764 765
	save->dr7 = 0x400;
	save->rflags = 2;
	save->rip = 0x0000fff0;
766
	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
A
Avi Kivity 已提交
767

J
Joerg Roedel 已提交
768 769
	/*
	 * This is the guest-visible cr0 value.
770
	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
A
Avi Kivity 已提交
771
	 */
772 773 774
	svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
	kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);

775
	save->cr4 = X86_CR4_PAE;
A
Avi Kivity 已提交
776
	/* rdx = ?? */
777 778 779 780

	if (npt_enabled) {
		/* Setup VMCB for Nested Paging */
		control->nested_ctl = 1;
M
Marcelo Tosatti 已提交
781 782
		control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
					(1ULL << INTERCEPT_INVLPG));
783
		control->intercept_exceptions &= ~(1 << PF_VECTOR);
784 785
		control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
		control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
786 787 788 789
		save->g_pat = 0x0007040600070406ULL;
		save->cr3 = 0;
		save->cr4 = 0;
	}
790
	force_new_asid(&svm->vcpu);
791

792
	svm->nested.vmcb = 0;
793 794
	svm->vcpu.arch.hflags = 0;

795 796 797 798 799
	if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
		control->pause_filter_count = 3000;
		control->intercept |= (1ULL << INTERCEPT_PAUSE);
	}

800
	enable_gif(svm);
A
Avi Kivity 已提交
801 802
}

803
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
804 805 806
{
	struct vcpu_svm *svm = to_svm(vcpu);

807
	init_vmcb(svm);
A
Avi Kivity 已提交
808

809
	if (!kvm_vcpu_is_bsp(vcpu)) {
810
		kvm_rip_write(vcpu, 0);
811 812
		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
A
Avi Kivity 已提交
813
	}
814 815
	vcpu->arch.regs_avail = ~0;
	vcpu->arch.regs_dirty = ~0;
816 817

	return 0;
818 819
}

R
Rusty Russell 已提交
820
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
A
Avi Kivity 已提交
821
{
822
	struct vcpu_svm *svm;
A
Avi Kivity 已提交
823
	struct page *page;
824
	struct page *msrpm_pages;
A
Alexander Graf 已提交
825
	struct page *hsave_page;
A
Alexander Graf 已提交
826
	struct page *nested_msrpm_pages;
R
Rusty Russell 已提交
827
	int err;
A
Avi Kivity 已提交
828

829
	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
R
Rusty Russell 已提交
830 831 832 833 834 835 836 837 838
	if (!svm) {
		err = -ENOMEM;
		goto out;
	}

	err = kvm_vcpu_init(&svm->vcpu, kvm, id);
	if (err)
		goto free_svm;

839
	err = -ENOMEM;
A
Avi Kivity 已提交
840
	page = alloc_page(GFP_KERNEL);
841
	if (!page)
R
Rusty Russell 已提交
842
		goto uninit;
A
Avi Kivity 已提交
843

844 845
	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
	if (!msrpm_pages)
846
		goto free_page1;
A
Alexander Graf 已提交
847 848 849

	nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
	if (!nested_msrpm_pages)
850
		goto free_page2;
851

A
Alexander Graf 已提交
852 853
	hsave_page = alloc_page(GFP_KERNEL);
	if (!hsave_page)
854 855
		goto free_page3;

856
	svm->nested.hsave = page_address(hsave_page);
A
Alexander Graf 已提交
857

858 859 860
	svm->msrpm = page_address(msrpm_pages);
	svm_vcpu_init_msrpm(svm->msrpm);

861
	svm->nested.msrpm = page_address(nested_msrpm_pages);
862
	svm_vcpu_init_msrpm(svm->nested.msrpm);
A
Alexander Graf 已提交
863

864 865 866 867
	svm->vmcb = page_address(page);
	clear_page(svm->vmcb);
	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
	svm->asid_generation = 0;
868
	init_vmcb(svm);
869

R
Rusty Russell 已提交
870
	fx_init(&svm->vcpu);
871
	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
872
	if (kvm_vcpu_is_bsp(&svm->vcpu))
873
		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
A
Avi Kivity 已提交
874

R
Rusty Russell 已提交
875
	return &svm->vcpu;
876

877 878 879 880 881 882
free_page3:
	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
free_page2:
	__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
free_page1:
	__free_page(page);
R
Rusty Russell 已提交
883 884 885
uninit:
	kvm_vcpu_uninit(&svm->vcpu);
free_svm:
886
	kmem_cache_free(kvm_vcpu_cache, svm);
R
Rusty Russell 已提交
887 888
out:
	return ERR_PTR(err);
A
Avi Kivity 已提交
889 890 891 892
}

static void svm_free_vcpu(struct kvm_vcpu *vcpu)
{
893 894
	struct vcpu_svm *svm = to_svm(vcpu);

R
Rusty Russell 已提交
895
	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
896
	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
897 898
	__free_page(virt_to_page(svm->nested.hsave));
	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
R
Rusty Russell 已提交
899
	kvm_vcpu_uninit(vcpu);
900
	kmem_cache_free(kvm_vcpu_cache, svm);
A
Avi Kivity 已提交
901 902
}

903
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
A
Avi Kivity 已提交
904
{
905
	struct vcpu_svm *svm = to_svm(vcpu);
906
	int i;
907 908

	if (unlikely(cpu != vcpu->cpu)) {
909
		u64 delta;
910

911 912 913 914 915 916 917 918 919 920
		if (check_tsc_unstable()) {
			/*
			 * Make sure that the guest sees a monotonically
			 * increasing TSC.
			 */
			delta = vcpu->arch.host_tsc - native_read_tsc();
			svm->vmcb->control.tsc_offset += delta;
			if (is_nested(svm))
				svm->nested.hsave->control.tsc_offset += delta;
		}
921
		vcpu->cpu = cpu;
M
Marcelo Tosatti 已提交
922
		kvm_migrate_timers(vcpu);
923
		svm->asid_generation = 0;
924
	}
925 926

	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
927
		rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
A
Avi Kivity 已提交
928 929 930 931
}

static void svm_vcpu_put(struct kvm_vcpu *vcpu)
{
932
	struct vcpu_svm *svm = to_svm(vcpu);
933 934
	int i;

935
	++vcpu->stat.host_state_reload;
936
	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
937
		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
938

939
	vcpu->arch.host_tsc = native_read_tsc();
A
Avi Kivity 已提交
940 941 942 943
}

static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
944
	return to_svm(vcpu)->vmcb->save.rflags;
A
Avi Kivity 已提交
945 946 947 948
}

static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
949
	to_svm(vcpu)->vmcb->save.rflags = rflags;
A
Avi Kivity 已提交
950 951
}

A
Avi Kivity 已提交
952 953 954 955 956 957 958 959 960 961 962 963
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{
	switch (reg) {
	case VCPU_EXREG_PDPTR:
		BUG_ON(!npt_enabled);
		load_pdptrs(vcpu, vcpu->arch.cr3);
		break;
	default:
		BUG();
	}
}

964 965 966 967 968 969 970 971 972 973
static void svm_set_vintr(struct vcpu_svm *svm)
{
	svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
}

static void svm_clear_vintr(struct vcpu_svm *svm)
{
	svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
}

A
Avi Kivity 已提交
974 975
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
{
976
	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
A
Avi Kivity 已提交
977 978 979 980 981 982 983 984 985 986 987 988

	switch (seg) {
	case VCPU_SREG_CS: return &save->cs;
	case VCPU_SREG_DS: return &save->ds;
	case VCPU_SREG_ES: return &save->es;
	case VCPU_SREG_FS: return &save->fs;
	case VCPU_SREG_GS: return &save->gs;
	case VCPU_SREG_SS: return &save->ss;
	case VCPU_SREG_TR: return &save->tr;
	case VCPU_SREG_LDTR: return &save->ldtr;
	}
	BUG();
A
Al Viro 已提交
989
	return NULL;
A
Avi Kivity 已提交
990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
}

static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
	struct vmcb_seg *s = svm_seg(vcpu, seg);

	return s->base;
}

static void svm_get_segment(struct kvm_vcpu *vcpu,
			    struct kvm_segment *var, int seg)
{
	struct vmcb_seg *s = svm_seg(vcpu, seg);

	var->base = s->base;
	var->limit = s->limit;
	var->selector = s->selector;
	var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
	var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
	var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
	var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
	var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
	var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
	var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
	var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
1015

J
Joerg Roedel 已提交
1016 1017
	/*
	 * AMD's VMCB does not have an explicit unusable field, so emulate it
1018 1019 1020 1021
	 * for cross vendor migration purposes by "not present"
	 */
	var->unusable = !var->present || (var->type == 0);

1022 1023 1024 1025 1026 1027 1028
	switch (seg) {
	case VCPU_SREG_CS:
		/*
		 * SVM always stores 0 for the 'G' bit in the CS selector in
		 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
		 * Intel's VMENTRY has a check on the 'G' bit.
		 */
1029
		var->g = s->limit > 0xfffff;
1030 1031 1032 1033 1034 1035
		break;
	case VCPU_SREG_TR:
		/*
		 * Work around a bug where the busy flag in the tr selector
		 * isn't exposed
		 */
1036
		var->type |= 0x2;
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
		break;
	case VCPU_SREG_DS:
	case VCPU_SREG_ES:
	case VCPU_SREG_FS:
	case VCPU_SREG_GS:
		/*
		 * The accessed bit must always be set in the segment
		 * descriptor cache, although it can be cleared in the
		 * descriptor, the cached bit always remains at 1. Since
		 * Intel has a check on this, set it here to support
		 * cross-vendor migration.
		 */
		if (!var->unusable)
			var->type |= 0x1;
		break;
1052
	case VCPU_SREG_SS:
J
Joerg Roedel 已提交
1053 1054
		/*
		 * On AMD CPUs sometimes the DB bit in the segment
1055 1056 1057 1058 1059 1060 1061
		 * descriptor is left as 1, although the whole segment has
		 * been made unusable. Clear it here to pass an Intel VMX
		 * entry check when cross vendor migrating.
		 */
		if (var->unusable)
			var->db = 0;
		break;
1062
	}
A
Avi Kivity 已提交
1063 1064
}

1065 1066 1067 1068 1069 1070 1071
static int svm_get_cpl(struct kvm_vcpu *vcpu)
{
	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;

	return save->cpl;
}

1072
static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
A
Avi Kivity 已提交
1073
{
1074 1075
	struct vcpu_svm *svm = to_svm(vcpu);

1076 1077
	dt->size = svm->vmcb->save.idtr.limit;
	dt->address = svm->vmcb->save.idtr.base;
A
Avi Kivity 已提交
1078 1079
}

1080
static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
A
Avi Kivity 已提交
1081
{
1082 1083
	struct vcpu_svm *svm = to_svm(vcpu);

1084 1085
	svm->vmcb->save.idtr.limit = dt->size;
	svm->vmcb->save.idtr.base = dt->address ;
A
Avi Kivity 已提交
1086 1087
}

1088
static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
A
Avi Kivity 已提交
1089
{
1090 1091
	struct vcpu_svm *svm = to_svm(vcpu);

1092 1093
	dt->size = svm->vmcb->save.gdtr.limit;
	dt->address = svm->vmcb->save.gdtr.base;
A
Avi Kivity 已提交
1094 1095
}

1096
static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
A
Avi Kivity 已提交
1097
{
1098 1099
	struct vcpu_svm *svm = to_svm(vcpu);

1100 1101
	svm->vmcb->save.gdtr.limit = dt->size;
	svm->vmcb->save.gdtr.base = dt->address ;
A
Avi Kivity 已提交
1102 1103
}

1104 1105 1106 1107
static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
}

1108
static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1109 1110 1111
{
}

A
Avi Kivity 已提交
1112 1113
static void update_cr0_intercept(struct vcpu_svm *svm)
{
1114
	struct vmcb *vmcb = svm->vmcb;
A
Avi Kivity 已提交
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
	ulong gcr0 = svm->vcpu.arch.cr0;
	u64 *hcr0 = &svm->vmcb->save.cr0;

	if (!svm->vcpu.fpu_active)
		*hcr0 |= SVM_CR0_SELECTIVE_MASK;
	else
		*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
			| (gcr0 & SVM_CR0_SELECTIVE_MASK);


	if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1126 1127 1128 1129 1130 1131 1132 1133 1134 1135
		vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
		vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
		if (is_nested(svm)) {
			struct vmcb *hsave = svm->nested.hsave;

			hsave->control.intercept_cr_read  &= ~INTERCEPT_CR0_MASK;
			hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
			vmcb->control.intercept_cr_read  |= svm->nested.intercept_cr_read;
			vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write;
		}
A
Avi Kivity 已提交
1136 1137 1138
	} else {
		svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
		svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1139 1140 1141 1142 1143 1144
		if (is_nested(svm)) {
			struct vmcb *hsave = svm->nested.hsave;

			hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
			hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
		}
A
Avi Kivity 已提交
1145 1146 1147
	}
}

A
Avi Kivity 已提交
1148 1149
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
1150 1151
	struct vcpu_svm *svm = to_svm(vcpu);

1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
	if (is_nested(svm)) {
		/*
		 * We are here because we run in nested mode, the host kvm
		 * intercepts cr0 writes but the l1 hypervisor does not.
		 * But the L1 hypervisor may intercept selective cr0 writes.
		 * This needs to be checked here.
		 */
		unsigned long old, new;

		/* Remove bits that would trigger a real cr0 write intercept */
		old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
		new = cr0 & SVM_CR0_SELECTIVE_MASK;

		if (old == new) {
			/* cr0 write with ts and mp unchanged */
			svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
			if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
				return;
		}
	}

1173
#ifdef CONFIG_X86_64
1174
	if (vcpu->arch.efer & EFER_LME) {
1175
		if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1176
			vcpu->arch.efer |= EFER_LMA;
1177
			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
A
Avi Kivity 已提交
1178 1179
		}

M
Mike Day 已提交
1180
		if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1181
			vcpu->arch.efer &= ~EFER_LMA;
1182
			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
A
Avi Kivity 已提交
1183 1184 1185
		}
	}
#endif
1186
	vcpu->arch.cr0 = cr0;
1187 1188 1189

	if (!npt_enabled)
		cr0 |= X86_CR0_PG | X86_CR0_WP;
1190 1191

	if (!vcpu->fpu_active)
J
Joerg Roedel 已提交
1192
		cr0 |= X86_CR0_TS;
1193 1194 1195 1196 1197 1198
	/*
	 * re-enable caching here because the QEMU bios
	 * does not do it - this results in some delay at
	 * reboot
	 */
	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1199
	svm->vmcb->save.cr0 = cr0;
A
Avi Kivity 已提交
1200
	update_cr0_intercept(svm);
A
Avi Kivity 已提交
1201 1202 1203 1204
}

static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
1205
	unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1206 1207 1208 1209
	unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;

	if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
		force_new_asid(vcpu);
1210

1211 1212 1213
	vcpu->arch.cr4 = cr4;
	if (!npt_enabled)
		cr4 |= X86_CR4_PAE;
1214
	cr4 |= host_cr4_mce;
1215
	to_svm(vcpu)->vmcb->save.cr4 = cr4;
A
Avi Kivity 已提交
1216 1217 1218 1219 1220
}

static void svm_set_segment(struct kvm_vcpu *vcpu,
			    struct kvm_segment *var, int seg)
{
1221
	struct vcpu_svm *svm = to_svm(vcpu);
A
Avi Kivity 已提交
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239
	struct vmcb_seg *s = svm_seg(vcpu, seg);

	s->base = var->base;
	s->limit = var->limit;
	s->selector = var->selector;
	if (var->unusable)
		s->attrib = 0;
	else {
		s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
		s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
		s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
		s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
		s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
		s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
		s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
		s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
	}
	if (seg == VCPU_SREG_CS)
1240 1241
		svm->vmcb->save.cpl
			= (svm->vmcb->save.cs.attrib
A
Avi Kivity 已提交
1242 1243 1244 1245
			   >> SVM_SELECTOR_DPL_SHIFT) & 3;

}

1246
static void update_db_intercept(struct kvm_vcpu *vcpu)
A
Avi Kivity 已提交
1247
{
J
Jan Kiszka 已提交
1248 1249 1250 1251
	struct vcpu_svm *svm = to_svm(vcpu);

	svm->vmcb->control.intercept_exceptions &=
		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
1252

J
Jan Kiszka 已提交
1253
	if (svm->nmi_singlestep)
1254 1255
		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);

J
Jan Kiszka 已提交
1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
		if (vcpu->guest_debug &
		    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
			svm->vmcb->control.intercept_exceptions |=
				1 << DB_VECTOR;
		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
			svm->vmcb->control.intercept_exceptions |=
				1 << BP_VECTOR;
	} else
		vcpu->guest_debug = 0;
1266 1267
}

1268
static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1269 1270 1271
{
	struct vcpu_svm *svm = to_svm(vcpu);

1272 1273 1274 1275 1276
	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
	else
		svm->vmcb->save.dr7 = vcpu->arch.dr7;

1277
	update_db_intercept(vcpu);
A
Avi Kivity 已提交
1278 1279 1280 1281
}

static void load_host_msrs(struct kvm_vcpu *vcpu)
{
1282
#ifdef CONFIG_X86_64
1283
	wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
1284
#endif
A
Avi Kivity 已提交
1285 1286 1287 1288
}

static void save_host_msrs(struct kvm_vcpu *vcpu)
{
1289
#ifdef CONFIG_X86_64
1290
	rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
1291
#endif
A
Avi Kivity 已提交
1292 1293
}

1294
static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
A
Avi Kivity 已提交
1295
{
1296 1297 1298
	if (sd->next_asid > sd->max_asid) {
		++sd->asid_generation;
		sd->next_asid = 1;
1299
		svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
A
Avi Kivity 已提交
1300 1301
	}

1302 1303
	svm->asid_generation = sd->asid_generation;
	svm->vmcb->control.asid = sd->next_asid++;
A
Avi Kivity 已提交
1304 1305
}

1306
static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest)
A
Avi Kivity 已提交
1307
{
1308 1309 1310 1311
	struct vcpu_svm *svm = to_svm(vcpu);

	switch (dr) {
	case 0 ... 3:
1312
		*dest = vcpu->arch.db[dr];
1313
		break;
1314 1315 1316 1317
	case 4:
		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
			return EMULATE_FAIL; /* will re-inject UD */
		/* fall through */
1318 1319
	case 6:
		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1320
			*dest = vcpu->arch.dr6;
1321
		else
1322
			*dest = svm->vmcb->save.dr6;
1323
		break;
1324 1325 1326 1327
	case 5:
		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
			return EMULATE_FAIL; /* will re-inject UD */
		/* fall through */
1328 1329
	case 7:
		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1330
			*dest = vcpu->arch.dr7;
1331
		else
1332
			*dest = svm->vmcb->save.dr7;
1333 1334 1335
		break;
	}

1336
	return EMULATE_DONE;
A
Avi Kivity 已提交
1337 1338
}

1339
static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
A
Avi Kivity 已提交
1340
{
1341 1342
	struct vcpu_svm *svm = to_svm(vcpu);

A
Avi Kivity 已提交
1343 1344
	switch (dr) {
	case 0 ... 3:
1345 1346 1347
		vcpu->arch.db[dr] = value;
		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
			vcpu->arch.eff_db[dr] = value;
1348 1349 1350 1351 1352
		break;
	case 4:
		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
			return EMULATE_FAIL; /* will re-inject UD */
		/* fall through */
1353 1354
	case 6:
		vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
1355 1356 1357 1358 1359
		break;
	case 5:
		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
			return EMULATE_FAIL; /* will re-inject UD */
		/* fall through */
1360 1361 1362 1363 1364 1365
	case 7:
		vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
			svm->vmcb->save.dr7 = vcpu->arch.dr7;
			vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
		}
1366
		break;
A
Avi Kivity 已提交
1367
	}
1368 1369

	return EMULATE_DONE;
A
Avi Kivity 已提交
1370 1371
}

A
Avi Kivity 已提交
1372
static int pf_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
1373 1374 1375 1376
{
	u64 fault_address;
	u32 error_code;

1377 1378
	fault_address  = svm->vmcb->control.exit_info_2;
	error_code = svm->vmcb->control.exit_info_1;
1379

1380
	trace_kvm_page_fault(fault_address, error_code);
1381 1382
	if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
		kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1383
	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
A
Avi Kivity 已提交
1384 1385
}

A
Avi Kivity 已提交
1386
static int db_interception(struct vcpu_svm *svm)
J
Jan Kiszka 已提交
1387
{
A
Avi Kivity 已提交
1388 1389
	struct kvm_run *kvm_run = svm->vcpu.run;

J
Jan Kiszka 已提交
1390
	if (!(svm->vcpu.guest_debug &
1391
	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
J
Jan Kiszka 已提交
1392
		!svm->nmi_singlestep) {
J
Jan Kiszka 已提交
1393 1394 1395
		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
		return 1;
	}
1396

J
Jan Kiszka 已提交
1397 1398
	if (svm->nmi_singlestep) {
		svm->nmi_singlestep = false;
1399 1400 1401 1402 1403 1404 1405
		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
			svm->vmcb->save.rflags &=
				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
		update_db_intercept(&svm->vcpu);
	}

	if (svm->vcpu.guest_debug &
J
Joerg Roedel 已提交
1406
	    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1407 1408 1409 1410 1411 1412 1413 1414
		kvm_run->exit_reason = KVM_EXIT_DEBUG;
		kvm_run->debug.arch.pc =
			svm->vmcb->save.cs.base + svm->vmcb->save.rip;
		kvm_run->debug.arch.exception = DB_VECTOR;
		return 0;
	}

	return 1;
J
Jan Kiszka 已提交
1415 1416
}

A
Avi Kivity 已提交
1417
static int bp_interception(struct vcpu_svm *svm)
J
Jan Kiszka 已提交
1418
{
A
Avi Kivity 已提交
1419 1420
	struct kvm_run *kvm_run = svm->vcpu.run;

J
Jan Kiszka 已提交
1421 1422 1423 1424 1425 1426
	kvm_run->exit_reason = KVM_EXIT_DEBUG;
	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
	kvm_run->debug.arch.exception = BP_VECTOR;
	return 0;
}

A
Avi Kivity 已提交
1427
static int ud_interception(struct vcpu_svm *svm)
1428 1429 1430
{
	int er;

A
Avi Kivity 已提交
1431
	er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
1432
	if (er != EMULATE_DONE)
1433
		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1434 1435 1436
	return 1;
}

A
Avi Kivity 已提交
1437
static void svm_fpu_activate(struct kvm_vcpu *vcpu)
A
Anthony Liguori 已提交
1438
{
A
Avi Kivity 已提交
1439
	struct vcpu_svm *svm = to_svm(vcpu);
1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450
	u32 excp;

	if (is_nested(svm)) {
		u32 h_excp, n_excp;

		h_excp  = svm->nested.hsave->control.intercept_exceptions;
		n_excp  = svm->nested.intercept_exceptions;
		h_excp &= ~(1 << NM_VECTOR);
		excp    = h_excp | n_excp;
	} else {
		excp  = svm->vmcb->control.intercept_exceptions;
J
Joerg Roedel 已提交
1451
		excp &= ~(1 << NM_VECTOR);
1452 1453 1454 1455
	}

	svm->vmcb->control.intercept_exceptions = excp;

R
Rusty Russell 已提交
1456
	svm->vcpu.fpu_active = 1;
A
Avi Kivity 已提交
1457
	update_cr0_intercept(svm);
A
Avi Kivity 已提交
1458
}
1459

A
Avi Kivity 已提交
1460 1461 1462
static int nm_interception(struct vcpu_svm *svm)
{
	svm_fpu_activate(&svm->vcpu);
1463
	return 1;
A
Anthony Liguori 已提交
1464 1465
}

A
Avi Kivity 已提交
1466
static int mc_interception(struct vcpu_svm *svm)
1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478
{
	/*
	 * On an #MC intercept the MCE handler is not called automatically in
	 * the host. So do it by hand here.
	 */
	asm volatile (
		"int $0x12\n");
	/* not sure if we ever come back to this point */

	return 1;
}

A
Avi Kivity 已提交
1479
static int shutdown_interception(struct vcpu_svm *svm)
1480
{
A
Avi Kivity 已提交
1481 1482
	struct kvm_run *kvm_run = svm->vcpu.run;

1483 1484 1485 1486
	/*
	 * VMCB is undefined after a SHUTDOWN intercept
	 * so reinitialize it.
	 */
1487
	clear_page(svm->vmcb);
1488
	init_vmcb(svm);
1489 1490 1491 1492 1493

	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
	return 0;
}

A
Avi Kivity 已提交
1494
static int io_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
1495
{
M
Mike Day 已提交
1496
	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1497
	int size, in, string;
1498
	unsigned port;
A
Avi Kivity 已提交
1499

R
Rusty Russell 已提交
1500
	++svm->vcpu.stat.io_exits;
A
Avi Kivity 已提交
1501

1502
	svm->next_rip = svm->vmcb->control.exit_info_2;
A
Avi Kivity 已提交
1503

1504 1505 1506
	string = (io_info & SVM_IOIO_STR_MASK) != 0;

	if (string) {
1507
		if (emulate_instruction(&svm->vcpu,
A
Avi Kivity 已提交
1508
					0, 0, 0) == EMULATE_DO_MMIO)
1509 1510 1511 1512
			return 0;
		return 1;
	}

1513 1514 1515
	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
	port = io_info >> 16;
	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
A
Avi Kivity 已提交
1516

1517
	skip_emulated_instruction(&svm->vcpu);
A
Avi Kivity 已提交
1518
	return kvm_emulate_pio(&svm->vcpu, in, size, port);
A
Avi Kivity 已提交
1519 1520
}

A
Avi Kivity 已提交
1521
static int nmi_interception(struct vcpu_svm *svm)
1522 1523 1524 1525
{
	return 1;
}

A
Avi Kivity 已提交
1526
static int intr_interception(struct vcpu_svm *svm)
1527 1528 1529 1530 1531
{
	++svm->vcpu.stat.irq_exits;
	return 1;
}

A
Avi Kivity 已提交
1532
static int nop_on_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
1533 1534 1535 1536
{
	return 1;
}

A
Avi Kivity 已提交
1537
static int halt_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
1538
{
1539
	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
R
Rusty Russell 已提交
1540 1541
	skip_emulated_instruction(&svm->vcpu);
	return kvm_emulate_halt(&svm->vcpu);
A
Avi Kivity 已提交
1542 1543
}

A
Avi Kivity 已提交
1544
static int vmmcall_interception(struct vcpu_svm *svm)
1545
{
1546
	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
R
Rusty Russell 已提交
1547
	skip_emulated_instruction(&svm->vcpu);
1548 1549
	kvm_emulate_hypercall(&svm->vcpu);
	return 1;
1550 1551
}

1552 1553
static int nested_svm_check_permissions(struct vcpu_svm *svm)
{
1554
	if (!(svm->vcpu.arch.efer & EFER_SVME)
1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567
	    || !is_paging(&svm->vcpu)) {
		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
		return 1;
	}

	if (svm->vmcb->save.cpl) {
		kvm_inject_gp(&svm->vcpu, 0);
		return 1;
	}

       return 0;
}

1568 1569 1570
static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
				      bool has_error_code, u32 error_code)
{
1571 1572
	int vmexit;

1573 1574
	if (!is_nested(svm))
		return 0;
1575

1576 1577 1578 1579 1580
	svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
	svm->vmcb->control.exit_code_hi = 0;
	svm->vmcb->control.exit_info_1 = error_code;
	svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;

1581 1582 1583 1584 1585
	vmexit = nested_svm_intercept(svm);
	if (vmexit == NESTED_EXIT_DONE)
		svm->nested.exit_required = true;

	return vmexit;
1586 1587
}

1588 1589
/* This function returns true if it is save to enable the irq window */
static inline bool nested_svm_intr(struct vcpu_svm *svm)
1590
{
1591
	if (!is_nested(svm))
1592
		return true;
1593

1594
	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1595
		return true;
1596

1597
	if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1598
		return false;
1599

1600 1601 1602
	svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
	svm->vmcb->control.exit_info_1 = 0;
	svm->vmcb->control.exit_info_2 = 0;
1603

1604 1605 1606 1607 1608 1609 1610 1611
	if (svm->nested.intercept & 1ULL) {
		/*
		 * The #vmexit can't be emulated here directly because this
		 * code path runs with irqs and preemtion disabled. A
		 * #vmexit emulation might sleep. Only signal request for
		 * the #vmexit here.
		 */
		svm->nested.exit_required = true;
1612
		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1613
		return false;
1614 1615
	}

1616
	return true;
1617 1618
}

1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
/* This function returns true if it is save to enable the nmi window */
static inline bool nested_svm_nmi(struct vcpu_svm *svm)
{
	if (!is_nested(svm))
		return true;

	if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
		return true;

	svm->vmcb->control.exit_code = SVM_EXIT_NMI;
	svm->nested.exit_required = true;

	return false;
}

1634
static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
1635 1636 1637
{
	struct page *page;

1638 1639
	might_sleep();

1640 1641 1642 1643
	page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
	if (is_error_page(page))
		goto error;

1644 1645 1646
	*_page = page;

	return kmap(page);
1647 1648 1649 1650 1651 1652 1653 1654

error:
	kvm_release_page_clean(page);
	kvm_inject_gp(&svm->vcpu, 0);

	return NULL;
}

1655
static void nested_svm_unmap(struct page *page)
1656
{
1657
	kunmap(page);
1658 1659 1660
	kvm_release_page_dirty(page);
}

1661
static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1662 1663
{
	u32 param = svm->vmcb->control.exit_info_1 & 1;
1664 1665
	u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
	u32 t0, t1;
1666
	int ret;
1667
	u8 val;
1668

1669
	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1670
		return NESTED_EXIT_HOST;
1671

1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687
	switch (msr) {
	case 0 ... 0x1fff:
		t0 = (msr * 2) % 8;
		t1 = msr / 8;
		break;
	case 0xc0000000 ... 0xc0001fff:
		t0 = (8192 + msr - 0xc0000000) * 2;
		t1 = (t0 / 8);
		t0 %= 8;
		break;
	case 0xc0010000 ... 0xc0011fff:
		t0 = (16384 + msr - 0xc0010000) * 2;
		t1 = (t0 / 8);
		t0 %= 8;
		break;
	default:
1688
		ret = NESTED_EXIT_DONE;
1689
		goto out;
1690 1691
	}

1692
	if (!kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + t1, &val, 1))
1693
		ret = val & ((1 << param) << t0) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1694 1695 1696

out:
	return ret;
1697 1698
}

1699
static int nested_svm_exit_special(struct vcpu_svm *svm)
1700 1701
{
	u32 exit_code = svm->vmcb->control.exit_code;
1702

1703 1704 1705 1706 1707
	switch (exit_code) {
	case SVM_EXIT_INTR:
	case SVM_EXIT_NMI:
		return NESTED_EXIT_HOST;
	case SVM_EXIT_NPF:
J
Joerg Roedel 已提交
1708
		/* For now we are always handling NPFs when using them */
1709 1710 1711 1712
		if (npt_enabled)
			return NESTED_EXIT_HOST;
		break;
	case SVM_EXIT_EXCP_BASE + PF_VECTOR:
J
Joerg Roedel 已提交
1713
		/* When we're shadowing, trap PFs */
1714 1715 1716
		if (!npt_enabled)
			return NESTED_EXIT_HOST;
		break;
1717 1718 1719
	case SVM_EXIT_EXCP_BASE + NM_VECTOR:
		nm_interception(svm);
		break;
1720 1721
	default:
		break;
1722 1723
	}

1724 1725 1726 1727 1728 1729
	return NESTED_EXIT_CONTINUE;
}

/*
 * If this function returns true, this #vmexit was already handled
 */
1730
static int nested_svm_intercept(struct vcpu_svm *svm)
1731 1732 1733 1734
{
	u32 exit_code = svm->vmcb->control.exit_code;
	int vmexit = NESTED_EXIT_HOST;

1735
	switch (exit_code) {
1736
	case SVM_EXIT_MSR:
1737
		vmexit = nested_svm_exit_handled_msr(svm);
1738
		break;
1739 1740
	case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
		u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
J
Joerg Roedel 已提交
1741
		if (svm->nested.intercept_cr_read & cr_bits)
1742
			vmexit = NESTED_EXIT_DONE;
1743 1744 1745 1746
		break;
	}
	case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
		u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
J
Joerg Roedel 已提交
1747
		if (svm->nested.intercept_cr_write & cr_bits)
1748
			vmexit = NESTED_EXIT_DONE;
1749 1750 1751 1752
		break;
	}
	case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
		u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
J
Joerg Roedel 已提交
1753
		if (svm->nested.intercept_dr_read & dr_bits)
1754
			vmexit = NESTED_EXIT_DONE;
1755 1756 1757 1758
		break;
	}
	case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
		u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
J
Joerg Roedel 已提交
1759
		if (svm->nested.intercept_dr_write & dr_bits)
1760
			vmexit = NESTED_EXIT_DONE;
1761 1762 1763 1764
		break;
	}
	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
J
Joerg Roedel 已提交
1765
		if (svm->nested.intercept_exceptions & excp_bits)
1766
			vmexit = NESTED_EXIT_DONE;
1767 1768 1769 1770
		break;
	}
	default: {
		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
J
Joerg Roedel 已提交
1771
		if (svm->nested.intercept & exit_bits)
1772
			vmexit = NESTED_EXIT_DONE;
1773 1774 1775
	}
	}

1776 1777 1778 1779 1780 1781 1782 1783 1784 1785
	return vmexit;
}

static int nested_svm_exit_handled(struct vcpu_svm *svm)
{
	int vmexit;

	vmexit = nested_svm_intercept(svm);

	if (vmexit == NESTED_EXIT_DONE)
1786 1787 1788
		nested_svm_vmexit(svm);

	return vmexit;
1789 1790
}

1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822
static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
{
	struct vmcb_control_area *dst  = &dst_vmcb->control;
	struct vmcb_control_area *from = &from_vmcb->control;

	dst->intercept_cr_read    = from->intercept_cr_read;
	dst->intercept_cr_write   = from->intercept_cr_write;
	dst->intercept_dr_read    = from->intercept_dr_read;
	dst->intercept_dr_write   = from->intercept_dr_write;
	dst->intercept_exceptions = from->intercept_exceptions;
	dst->intercept            = from->intercept;
	dst->iopm_base_pa         = from->iopm_base_pa;
	dst->msrpm_base_pa        = from->msrpm_base_pa;
	dst->tsc_offset           = from->tsc_offset;
	dst->asid                 = from->asid;
	dst->tlb_ctl              = from->tlb_ctl;
	dst->int_ctl              = from->int_ctl;
	dst->int_vector           = from->int_vector;
	dst->int_state            = from->int_state;
	dst->exit_code            = from->exit_code;
	dst->exit_code_hi         = from->exit_code_hi;
	dst->exit_info_1          = from->exit_info_1;
	dst->exit_info_2          = from->exit_info_2;
	dst->exit_int_info        = from->exit_int_info;
	dst->exit_int_info_err    = from->exit_int_info_err;
	dst->nested_ctl           = from->nested_ctl;
	dst->event_inj            = from->event_inj;
	dst->event_inj_err        = from->event_inj_err;
	dst->nested_cr3           = from->nested_cr3;
	dst->lbr_ctl              = from->lbr_ctl;
}

1823
static int nested_svm_vmexit(struct vcpu_svm *svm)
1824
{
1825
	struct vmcb *nested_vmcb;
1826
	struct vmcb *hsave = svm->nested.hsave;
J
Joerg Roedel 已提交
1827
	struct vmcb *vmcb = svm->vmcb;
1828
	struct page *page;
1829

1830 1831 1832 1833 1834 1835
	trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
				       vmcb->control.exit_info_1,
				       vmcb->control.exit_info_2,
				       vmcb->control.exit_int_info,
				       vmcb->control.exit_int_info_err);

1836
	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
1837 1838 1839
	if (!nested_vmcb)
		return 1;

1840 1841 1842
	/* Exit nested SVM mode */
	svm->nested.vmcb = 0;

1843
	/* Give the current vmcb to the guest */
J
Joerg Roedel 已提交
1844 1845 1846 1847 1848 1849 1850 1851
	disable_gif(svm);

	nested_vmcb->save.es     = vmcb->save.es;
	nested_vmcb->save.cs     = vmcb->save.cs;
	nested_vmcb->save.ss     = vmcb->save.ss;
	nested_vmcb->save.ds     = vmcb->save.ds;
	nested_vmcb->save.gdtr   = vmcb->save.gdtr;
	nested_vmcb->save.idtr   = vmcb->save.idtr;
1852
	nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
J
Joerg Roedel 已提交
1853 1854
	if (npt_enabled)
		nested_vmcb->save.cr3    = vmcb->save.cr3;
1855 1856
	else
		nested_vmcb->save.cr3    = svm->vcpu.arch.cr3;
J
Joerg Roedel 已提交
1857
	nested_vmcb->save.cr2    = vmcb->save.cr2;
1858
	nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
J
Joerg Roedel 已提交
1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875
	nested_vmcb->save.rflags = vmcb->save.rflags;
	nested_vmcb->save.rip    = vmcb->save.rip;
	nested_vmcb->save.rsp    = vmcb->save.rsp;
	nested_vmcb->save.rax    = vmcb->save.rax;
	nested_vmcb->save.dr7    = vmcb->save.dr7;
	nested_vmcb->save.dr6    = vmcb->save.dr6;
	nested_vmcb->save.cpl    = vmcb->save.cpl;

	nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
	nested_vmcb->control.int_vector        = vmcb->control.int_vector;
	nested_vmcb->control.int_state         = vmcb->control.int_state;
	nested_vmcb->control.exit_code         = vmcb->control.exit_code;
	nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
	nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891

	/*
	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
	 * to make sure that we do not lose injected events. So check event_inj
	 * here and copy it to exit_int_info if it is valid.
	 * Exit_int_info and event_inj can't be both valid because the case
	 * below only happens on a VMRUN instruction intercept which has
	 * no valid exit_int_info set.
	 */
	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
		struct vmcb_control_area *nc = &nested_vmcb->control;

		nc->exit_int_info     = vmcb->control.event_inj;
		nc->exit_int_info_err = vmcb->control.event_inj_err;
	}

J
Joerg Roedel 已提交
1892 1893 1894
	nested_vmcb->control.tlb_ctl           = 0;
	nested_vmcb->control.event_inj         = 0;
	nested_vmcb->control.event_inj_err     = 0;
1895 1896 1897 1898 1899 1900

	/* We always set V_INTR_MASKING and remember the old value in hflags */
	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
		nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;

	/* Restore the original control entries */
1901
	copy_vmcb_control_area(vmcb, hsave);
1902

1903 1904
	kvm_clear_exception_queue(&svm->vcpu);
	kvm_clear_interrupt_queue(&svm->vcpu);
1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929

	/* Restore selected save entries */
	svm->vmcb->save.es = hsave->save.es;
	svm->vmcb->save.cs = hsave->save.cs;
	svm->vmcb->save.ss = hsave->save.ss;
	svm->vmcb->save.ds = hsave->save.ds;
	svm->vmcb->save.gdtr = hsave->save.gdtr;
	svm->vmcb->save.idtr = hsave->save.idtr;
	svm->vmcb->save.rflags = hsave->save.rflags;
	svm_set_efer(&svm->vcpu, hsave->save.efer);
	svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
	svm_set_cr4(&svm->vcpu, hsave->save.cr4);
	if (npt_enabled) {
		svm->vmcb->save.cr3 = hsave->save.cr3;
		svm->vcpu.arch.cr3 = hsave->save.cr3;
	} else {
		kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
	}
	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
	svm->vmcb->save.dr7 = 0;
	svm->vmcb->save.cpl = 0;
	svm->vmcb->control.exit_int_info = 0;

1930
	nested_svm_unmap(page);
1931 1932 1933 1934 1935 1936

	kvm_mmu_reset_context(&svm->vcpu);
	kvm_mmu_load(&svm->vcpu);

	return 0;
}
A
Alexander Graf 已提交
1937

1938
static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
A
Alexander Graf 已提交
1939
{
1940 1941 1942 1943 1944
	/*
	 * This function merges the msr permission bitmaps of kvm and the
	 * nested vmcb. It is omptimized in that it only merges the parts where
	 * the kvm msr permission bitmap may contain zero bits
	 */
A
Alexander Graf 已提交
1945
	int i;
1946

1947 1948
	if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
		return true;
1949

1950 1951 1952
	for (i = 0; i < MSRPM_OFFSETS; i++) {
		u32 value, p;
		u64 offset;
1953

1954 1955
		if (msrpm_offsets[i] == 0xffffffff)
			break;
A
Alexander Graf 已提交
1956

1957 1958 1959 1960 1961 1962 1963 1964 1965 1966
		offset = svm->nested.vmcb_msrpm + msrpm_offsets[i];
		p      = msrpm_offsets[i] / 4;

		if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
			return false;

		svm->nested.msrpm[p] = svm->msrpm[p] | value;
	}

	svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
1967 1968

	return true;
A
Alexander Graf 已提交
1969 1970
}

1971
static bool nested_svm_vmrun(struct vcpu_svm *svm)
A
Alexander Graf 已提交
1972
{
1973
	struct vmcb *nested_vmcb;
1974
	struct vmcb *hsave = svm->nested.hsave;
J
Joerg Roedel 已提交
1975
	struct vmcb *vmcb = svm->vmcb;
1976
	struct page *page;
1977 1978 1979
	u64 vmcb_gpa;

	vmcb_gpa = svm->vmcb->save.rax;
A
Alexander Graf 已提交
1980

1981
	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
1982 1983 1984
	if (!nested_vmcb)
		return false;

1985
	trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
1986 1987 1988 1989 1990
			       nested_vmcb->save.rip,
			       nested_vmcb->control.int_ctl,
			       nested_vmcb->control.event_inj,
			       nested_vmcb->control.nested_ctl);

1991 1992 1993 1994 1995
	trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read,
				    nested_vmcb->control.intercept_cr_write,
				    nested_vmcb->control.intercept_exceptions,
				    nested_vmcb->control.intercept);

A
Alexander Graf 已提交
1996
	/* Clear internal status */
1997 1998
	kvm_clear_exception_queue(&svm->vcpu);
	kvm_clear_interrupt_queue(&svm->vcpu);
A
Alexander Graf 已提交
1999

J
Joerg Roedel 已提交
2000 2001 2002 2003
	/*
	 * Save the old vmcb, so we don't need to pick what we save, but can
	 * restore everything when a VMEXIT occurs
	 */
J
Joerg Roedel 已提交
2004 2005 2006 2007 2008 2009
	hsave->save.es     = vmcb->save.es;
	hsave->save.cs     = vmcb->save.cs;
	hsave->save.ss     = vmcb->save.ss;
	hsave->save.ds     = vmcb->save.ds;
	hsave->save.gdtr   = vmcb->save.gdtr;
	hsave->save.idtr   = vmcb->save.idtr;
2010
	hsave->save.efer   = svm->vcpu.arch.efer;
2011
	hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
J
Joerg Roedel 已提交
2012 2013 2014 2015 2016 2017 2018 2019 2020 2021
	hsave->save.cr4    = svm->vcpu.arch.cr4;
	hsave->save.rflags = vmcb->save.rflags;
	hsave->save.rip    = svm->next_rip;
	hsave->save.rsp    = vmcb->save.rsp;
	hsave->save.rax    = vmcb->save.rax;
	if (npt_enabled)
		hsave->save.cr3    = vmcb->save.cr3;
	else
		hsave->save.cr3    = svm->vcpu.arch.cr3;

2022
	copy_vmcb_control_area(hsave, vmcb);
A
Alexander Graf 已提交
2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042

	if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
		svm->vcpu.arch.hflags |= HF_HIF_MASK;
	else
		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;

	/* Load the nested guest state */
	svm->vmcb->save.es = nested_vmcb->save.es;
	svm->vmcb->save.cs = nested_vmcb->save.cs;
	svm->vmcb->save.ss = nested_vmcb->save.ss;
	svm->vmcb->save.ds = nested_vmcb->save.ds;
	svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
	svm->vmcb->save.idtr = nested_vmcb->save.idtr;
	svm->vmcb->save.rflags = nested_vmcb->save.rflags;
	svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
	svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
	svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
	if (npt_enabled) {
		svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
		svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
2043
	} else
A
Alexander Graf 已提交
2044
		kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
2045 2046 2047 2048

	/* Guest paging mode is active - reset mmu */
	kvm_mmu_reset_context(&svm->vcpu);

J
Joerg Roedel 已提交
2049
	svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
A
Alexander Graf 已提交
2050 2051 2052
	kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
	kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
	kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
J
Joerg Roedel 已提交
2053

A
Alexander Graf 已提交
2054 2055 2056 2057 2058 2059 2060 2061
	/* In case we don't even reach vcpu_run, the fields are not updated */
	svm->vmcb->save.rax = nested_vmcb->save.rax;
	svm->vmcb->save.rsp = nested_vmcb->save.rsp;
	svm->vmcb->save.rip = nested_vmcb->save.rip;
	svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
	svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
	svm->vmcb->save.cpl = nested_vmcb->save.cpl;

2062
	svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
A
Alexander Graf 已提交
2063

J
Joerg Roedel 已提交
2064 2065 2066 2067 2068 2069 2070 2071
	/* cache intercepts */
	svm->nested.intercept_cr_read    = nested_vmcb->control.intercept_cr_read;
	svm->nested.intercept_cr_write   = nested_vmcb->control.intercept_cr_write;
	svm->nested.intercept_dr_read    = nested_vmcb->control.intercept_dr_read;
	svm->nested.intercept_dr_write   = nested_vmcb->control.intercept_dr_write;
	svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
	svm->nested.intercept            = nested_vmcb->control.intercept;

A
Alexander Graf 已提交
2072 2073 2074 2075 2076 2077 2078
	force_new_asid(&svm->vcpu);
	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
	else
		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;

2079 2080 2081 2082 2083 2084
	if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
		/* We only want the cr8 intercept bits of the guest */
		svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK;
		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
	}

J
Joerg Roedel 已提交
2085 2086 2087 2088
	/*
	 * We don't want a nested guest to be more powerful than the guest, so
	 * all intercepts are ORed
	 */
2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102
	svm->vmcb->control.intercept_cr_read |=
		nested_vmcb->control.intercept_cr_read;
	svm->vmcb->control.intercept_cr_write |=
		nested_vmcb->control.intercept_cr_write;
	svm->vmcb->control.intercept_dr_read |=
		nested_vmcb->control.intercept_dr_read;
	svm->vmcb->control.intercept_dr_write |=
		nested_vmcb->control.intercept_dr_write;
	svm->vmcb->control.intercept_exceptions |=
		nested_vmcb->control.intercept_exceptions;

	svm->vmcb->control.intercept |= nested_vmcb->control.intercept;

	svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
A
Alexander Graf 已提交
2103 2104 2105 2106 2107 2108
	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
	svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;

2109
	nested_svm_unmap(page);
2110

2111 2112 2113
	/* nested_vmcb is our indicator if nested SVM is activated */
	svm->nested.vmcb = vmcb_gpa;

2114
	enable_gif(svm);
A
Alexander Graf 已提交
2115

2116
	return true;
A
Alexander Graf 已提交
2117 2118
}

2119
static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134
{
	to_vmcb->save.fs = from_vmcb->save.fs;
	to_vmcb->save.gs = from_vmcb->save.gs;
	to_vmcb->save.tr = from_vmcb->save.tr;
	to_vmcb->save.ldtr = from_vmcb->save.ldtr;
	to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
	to_vmcb->save.star = from_vmcb->save.star;
	to_vmcb->save.lstar = from_vmcb->save.lstar;
	to_vmcb->save.cstar = from_vmcb->save.cstar;
	to_vmcb->save.sfmask = from_vmcb->save.sfmask;
	to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
	to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
}

A
Avi Kivity 已提交
2135
static int vmload_interception(struct vcpu_svm *svm)
2136
{
2137
	struct vmcb *nested_vmcb;
2138
	struct page *page;
2139

2140 2141 2142 2143 2144 2145
	if (nested_svm_check_permissions(svm))
		return 1;

	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
	skip_emulated_instruction(&svm->vcpu);

2146
	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2147 2148 2149 2150
	if (!nested_vmcb)
		return 1;

	nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2151
	nested_svm_unmap(page);
2152 2153 2154 2155

	return 1;
}

A
Avi Kivity 已提交
2156
static int vmsave_interception(struct vcpu_svm *svm)
2157
{
2158
	struct vmcb *nested_vmcb;
2159
	struct page *page;
2160

2161 2162 2163 2164 2165 2166
	if (nested_svm_check_permissions(svm))
		return 1;

	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
	skip_emulated_instruction(&svm->vcpu);

2167
	nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2168 2169 2170 2171
	if (!nested_vmcb)
		return 1;

	nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2172
	nested_svm_unmap(page);
2173 2174 2175 2176

	return 1;
}

A
Avi Kivity 已提交
2177
static int vmrun_interception(struct vcpu_svm *svm)
A
Alexander Graf 已提交
2178 2179 2180 2181 2182 2183 2184
{
	if (nested_svm_check_permissions(svm))
		return 1;

	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
	skip_emulated_instruction(&svm->vcpu);

2185
	if (!nested_svm_vmrun(svm))
A
Alexander Graf 已提交
2186 2187
		return 1;

2188
	if (!nested_svm_vmrun_msrpm(svm))
2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200
		goto failed;

	return 1;

failed:

	svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
	svm->vmcb->control.exit_code_hi = 0;
	svm->vmcb->control.exit_info_1  = 0;
	svm->vmcb->control.exit_info_2  = 0;

	nested_svm_vmexit(svm);
A
Alexander Graf 已提交
2201 2202 2203 2204

	return 1;
}

A
Avi Kivity 已提交
2205
static int stgi_interception(struct vcpu_svm *svm)
2206 2207 2208 2209 2210 2211 2212
{
	if (nested_svm_check_permissions(svm))
		return 1;

	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
	skip_emulated_instruction(&svm->vcpu);

2213
	enable_gif(svm);
2214 2215 2216 2217

	return 1;
}

A
Avi Kivity 已提交
2218
static int clgi_interception(struct vcpu_svm *svm)
2219 2220 2221 2222 2223 2224 2225
{
	if (nested_svm_check_permissions(svm))
		return 1;

	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
	skip_emulated_instruction(&svm->vcpu);

2226
	disable_gif(svm);
2227 2228 2229 2230 2231 2232 2233 2234

	/* After a CLGI no interrupts should come */
	svm_clear_vintr(svm);
	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;

	return 1;
}

A
Avi Kivity 已提交
2235
static int invlpga_interception(struct vcpu_svm *svm)
A
Alexander Graf 已提交
2236 2237 2238
{
	struct kvm_vcpu *vcpu = &svm->vcpu;

2239 2240 2241
	trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
			  vcpu->arch.regs[VCPU_REGS_RAX]);

A
Alexander Graf 已提交
2242 2243 2244 2245 2246 2247 2248 2249
	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
	kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);

	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
	skip_emulated_instruction(&svm->vcpu);
	return 1;
}

2250 2251 2252 2253 2254 2255 2256 2257
static int skinit_interception(struct vcpu_svm *svm)
{
	trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);

	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
	return 1;
}

A
Avi Kivity 已提交
2258
static int invalid_op_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
2259
{
2260
	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
A
Avi Kivity 已提交
2261 2262 2263
	return 1;
}

A
Avi Kivity 已提交
2264
static int task_switch_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
2265
{
2266
	u16 tss_selector;
2267 2268 2269
	int reason;
	int int_type = svm->vmcb->control.exit_int_info &
		SVM_EXITINTINFO_TYPE_MASK;
2270
	int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2271 2272 2273 2274
	uint32_t type =
		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
	uint32_t idt_v =
		svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2275 2276

	tss_selector = (u16)svm->vmcb->control.exit_info_1;
2277

2278 2279
	if (svm->vmcb->control.exit_info_2 &
	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2280 2281 2282 2283
		reason = TASK_SWITCH_IRET;
	else if (svm->vmcb->control.exit_info_2 &
		 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
		reason = TASK_SWITCH_JMP;
2284
	else if (idt_v)
2285 2286 2287 2288
		reason = TASK_SWITCH_GATE;
	else
		reason = TASK_SWITCH_CALL;

2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303
	if (reason == TASK_SWITCH_GATE) {
		switch (type) {
		case SVM_EXITINTINFO_TYPE_NMI:
			svm->vcpu.arch.nmi_injected = false;
			break;
		case SVM_EXITINTINFO_TYPE_EXEPT:
			kvm_clear_exception_queue(&svm->vcpu);
			break;
		case SVM_EXITINTINFO_TYPE_INTR:
			kvm_clear_interrupt_queue(&svm->vcpu);
			break;
		default:
			break;
		}
	}
2304

2305 2306 2307
	if (reason != TASK_SWITCH_GATE ||
	    int_type == SVM_EXITINTINFO_TYPE_SOFT ||
	    (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2308 2309
	     (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
		skip_emulated_instruction(&svm->vcpu);
2310 2311

	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
A
Avi Kivity 已提交
2312 2313
}

A
Avi Kivity 已提交
2314
static int cpuid_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
2315
{
2316
	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
R
Rusty Russell 已提交
2317
	kvm_emulate_cpuid(&svm->vcpu);
2318
	return 1;
A
Avi Kivity 已提交
2319 2320
}

A
Avi Kivity 已提交
2321
static int iret_interception(struct vcpu_svm *svm)
2322 2323 2324
{
	++svm->vcpu.stat.nmi_window_exits;
	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
2325
	svm->vcpu.arch.hflags |= HF_IRET_MASK;
2326 2327 2328
	return 1;
}

A
Avi Kivity 已提交
2329
static int invlpg_interception(struct vcpu_svm *svm)
M
Marcelo Tosatti 已提交
2330
{
A
Avi Kivity 已提交
2331
	if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
M
Marcelo Tosatti 已提交
2332 2333 2334 2335
		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
	return 1;
}

A
Avi Kivity 已提交
2336
static int emulate_on_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
2337
{
A
Avi Kivity 已提交
2338
	if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
2339
		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
A
Avi Kivity 已提交
2340 2341 2342
	return 1;
}

A
Avi Kivity 已提交
2343
static int cr8_write_interception(struct vcpu_svm *svm)
2344
{
A
Avi Kivity 已提交
2345 2346
	struct kvm_run *kvm_run = svm->vcpu.run;

2347 2348
	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
	/* instruction emulation calls kvm_set_cr8() */
A
Avi Kivity 已提交
2349
	emulate_instruction(&svm->vcpu, 0, 0, 0);
2350 2351
	if (irqchip_in_kernel(svm->vcpu.kvm)) {
		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
2352
		return 1;
2353
	}
2354 2355
	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
		return 1;
2356 2357 2358 2359
	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
	return 0;
}

A
Avi Kivity 已提交
2360 2361
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
{
2362 2363
	struct vcpu_svm *svm = to_svm(vcpu);

A
Avi Kivity 已提交
2364
	switch (ecx) {
2365
	case MSR_IA32_TSC: {
2366
		u64 tsc_offset;
A
Avi Kivity 已提交
2367

2368 2369 2370 2371 2372 2373
		if (is_nested(svm))
			tsc_offset = svm->nested.hsave->control.tsc_offset;
		else
			tsc_offset = svm->vmcb->control.tsc_offset;

		*data = tsc_offset + native_read_tsc();
A
Avi Kivity 已提交
2374 2375
		break;
	}
2376
	case MSR_K6_STAR:
2377
		*data = svm->vmcb->save.star;
A
Avi Kivity 已提交
2378
		break;
2379
#ifdef CONFIG_X86_64
A
Avi Kivity 已提交
2380
	case MSR_LSTAR:
2381
		*data = svm->vmcb->save.lstar;
A
Avi Kivity 已提交
2382 2383
		break;
	case MSR_CSTAR:
2384
		*data = svm->vmcb->save.cstar;
A
Avi Kivity 已提交
2385 2386
		break;
	case MSR_KERNEL_GS_BASE:
2387
		*data = svm->vmcb->save.kernel_gs_base;
A
Avi Kivity 已提交
2388 2389
		break;
	case MSR_SYSCALL_MASK:
2390
		*data = svm->vmcb->save.sfmask;
A
Avi Kivity 已提交
2391 2392 2393
		break;
#endif
	case MSR_IA32_SYSENTER_CS:
2394
		*data = svm->vmcb->save.sysenter_cs;
A
Avi Kivity 已提交
2395 2396
		break;
	case MSR_IA32_SYSENTER_EIP:
2397
		*data = svm->sysenter_eip;
A
Avi Kivity 已提交
2398 2399
		break;
	case MSR_IA32_SYSENTER_ESP:
2400
		*data = svm->sysenter_esp;
A
Avi Kivity 已提交
2401
		break;
J
Joerg Roedel 已提交
2402 2403 2404 2405 2406
	/*
	 * Nobody will change the following 5 values in the VMCB so we can
	 * safely return them on rdmsr. They will always be 0 until LBRV is
	 * implemented.
	 */
2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421
	case MSR_IA32_DEBUGCTLMSR:
		*data = svm->vmcb->save.dbgctl;
		break;
	case MSR_IA32_LASTBRANCHFROMIP:
		*data = svm->vmcb->save.br_from;
		break;
	case MSR_IA32_LASTBRANCHTOIP:
		*data = svm->vmcb->save.br_to;
		break;
	case MSR_IA32_LASTINTFROMIP:
		*data = svm->vmcb->save.last_excp_from;
		break;
	case MSR_IA32_LASTINTTOIP:
		*data = svm->vmcb->save.last_excp_to;
		break;
A
Alexander Graf 已提交
2422
	case MSR_VM_HSAVE_PA:
2423
		*data = svm->nested.hsave_msr;
A
Alexander Graf 已提交
2424
		break;
2425
	case MSR_VM_CR:
2426
		*data = svm->nested.vm_cr_msr;
2427
		break;
2428 2429 2430
	case MSR_IA32_UCODE_REV:
		*data = 0x01000065;
		break;
A
Avi Kivity 已提交
2431
	default:
2432
		return kvm_get_msr_common(vcpu, ecx, data);
A
Avi Kivity 已提交
2433 2434 2435 2436
	}
	return 0;
}

A
Avi Kivity 已提交
2437
static int rdmsr_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
2438
{
2439
	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
A
Avi Kivity 已提交
2440 2441
	u64 data;

2442 2443
	if (svm_get_msr(&svm->vcpu, ecx, &data)) {
		trace_kvm_msr_read_ex(ecx);
2444
		kvm_inject_gp(&svm->vcpu, 0);
2445
	} else {
2446
		trace_kvm_msr_read(ecx, data);
2447

2448
		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
2449
		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
2450
		svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
R
Rusty Russell 已提交
2451
		skip_emulated_instruction(&svm->vcpu);
A
Avi Kivity 已提交
2452 2453 2454 2455
	}
	return 1;
}

2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	int svm_dis, chg_mask;

	if (data & ~SVM_VM_CR_VALID_MASK)
		return 1;

	chg_mask = SVM_VM_CR_VALID_MASK;

	if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
		chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);

	svm->nested.vm_cr_msr &= ~chg_mask;
	svm->nested.vm_cr_msr |= (data & chg_mask);

	svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;

	/* check for svm_disable while efer.svme is set */
	if (svm_dis && (vcpu->arch.efer & EFER_SVME))
		return 1;

	return 0;
}

A
Avi Kivity 已提交
2481 2482
static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
{
2483 2484
	struct vcpu_svm *svm = to_svm(vcpu);

A
Avi Kivity 已提交
2485
	switch (ecx) {
2486
	case MSR_IA32_TSC: {
2487 2488 2489 2490 2491 2492 2493 2494 2495 2496
		u64 tsc_offset = data - native_read_tsc();
		u64 g_tsc_offset = 0;

		if (is_nested(svm)) {
			g_tsc_offset = svm->vmcb->control.tsc_offset -
				       svm->nested.hsave->control.tsc_offset;
			svm->nested.hsave->control.tsc_offset = tsc_offset;
		}

		svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
A
Avi Kivity 已提交
2497 2498 2499

		break;
	}
2500
	case MSR_K6_STAR:
2501
		svm->vmcb->save.star = data;
A
Avi Kivity 已提交
2502
		break;
2503
#ifdef CONFIG_X86_64
A
Avi Kivity 已提交
2504
	case MSR_LSTAR:
2505
		svm->vmcb->save.lstar = data;
A
Avi Kivity 已提交
2506 2507
		break;
	case MSR_CSTAR:
2508
		svm->vmcb->save.cstar = data;
A
Avi Kivity 已提交
2509 2510
		break;
	case MSR_KERNEL_GS_BASE:
2511
		svm->vmcb->save.kernel_gs_base = data;
A
Avi Kivity 已提交
2512 2513
		break;
	case MSR_SYSCALL_MASK:
2514
		svm->vmcb->save.sfmask = data;
A
Avi Kivity 已提交
2515 2516 2517
		break;
#endif
	case MSR_IA32_SYSENTER_CS:
2518
		svm->vmcb->save.sysenter_cs = data;
A
Avi Kivity 已提交
2519 2520
		break;
	case MSR_IA32_SYSENTER_EIP:
2521
		svm->sysenter_eip = data;
2522
		svm->vmcb->save.sysenter_eip = data;
A
Avi Kivity 已提交
2523 2524
		break;
	case MSR_IA32_SYSENTER_ESP:
2525
		svm->sysenter_esp = data;
2526
		svm->vmcb->save.sysenter_esp = data;
A
Avi Kivity 已提交
2527
		break;
2528
	case MSR_IA32_DEBUGCTLMSR:
2529 2530
		if (!svm_has(SVM_FEATURE_LBRV)) {
			pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
2531
					__func__, data);
2532 2533 2534 2535 2536 2537 2538 2539 2540 2541
			break;
		}
		if (data & DEBUGCTL_RESERVED_BITS)
			return 1;

		svm->vmcb->save.dbgctl = data;
		if (data & (1ULL<<0))
			svm_enable_lbrv(svm);
		else
			svm_disable_lbrv(svm);
2542
		break;
A
Alexander Graf 已提交
2543
	case MSR_VM_HSAVE_PA:
2544
		svm->nested.hsave_msr = data;
2545
		break;
2546
	case MSR_VM_CR:
2547
		return svm_set_vm_cr(vcpu, data);
2548 2549 2550
	case MSR_VM_IGNNE:
		pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
		break;
A
Avi Kivity 已提交
2551
	default:
2552
		return kvm_set_msr_common(vcpu, ecx, data);
A
Avi Kivity 已提交
2553 2554 2555 2556
	}
	return 0;
}

A
Avi Kivity 已提交
2557
static int wrmsr_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
2558
{
2559
	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2560
	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
2561
		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2562 2563


2564
	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2565 2566
	if (svm_set_msr(&svm->vcpu, ecx, data)) {
		trace_kvm_msr_write_ex(ecx, data);
2567
		kvm_inject_gp(&svm->vcpu, 0);
2568 2569
	} else {
		trace_kvm_msr_write(ecx, data);
R
Rusty Russell 已提交
2570
		skip_emulated_instruction(&svm->vcpu);
2571
	}
A
Avi Kivity 已提交
2572 2573 2574
	return 1;
}

A
Avi Kivity 已提交
2575
static int msr_interception(struct vcpu_svm *svm)
A
Avi Kivity 已提交
2576
{
R
Rusty Russell 已提交
2577
	if (svm->vmcb->control.exit_info_1)
A
Avi Kivity 已提交
2578
		return wrmsr_interception(svm);
A
Avi Kivity 已提交
2579
	else
A
Avi Kivity 已提交
2580
		return rdmsr_interception(svm);
A
Avi Kivity 已提交
2581 2582
}

A
Avi Kivity 已提交
2583
static int interrupt_window_interception(struct vcpu_svm *svm)
2584
{
A
Avi Kivity 已提交
2585 2586
	struct kvm_run *kvm_run = svm->vcpu.run;

2587
	svm_clear_vintr(svm);
2588
	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2589 2590 2591 2592
	/*
	 * If the user space waits to inject interrupts, exit as soon as
	 * possible
	 */
2593 2594 2595
	if (!irqchip_in_kernel(svm->vcpu.kvm) &&
	    kvm_run->request_interrupt_window &&
	    !kvm_cpu_has_interrupt(&svm->vcpu)) {
R
Rusty Russell 已提交
2596
		++svm->vcpu.stat.irq_window_exits;
2597 2598 2599 2600 2601 2602 2603
		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
		return 0;
	}

	return 1;
}

2604 2605 2606 2607 2608 2609
static int pause_interception(struct vcpu_svm *svm)
{
	kvm_vcpu_on_spin(&(svm->vcpu));
	return 1;
}

A
Avi Kivity 已提交
2610
static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
J
Joerg Roedel 已提交
2611 2612 2613 2614
	[SVM_EXIT_READ_CR0]			= emulate_on_interception,
	[SVM_EXIT_READ_CR3]			= emulate_on_interception,
	[SVM_EXIT_READ_CR4]			= emulate_on_interception,
	[SVM_EXIT_READ_CR8]			= emulate_on_interception,
A
Avi Kivity 已提交
2615
	[SVM_EXIT_CR0_SEL_WRITE]		= emulate_on_interception,
J
Joerg Roedel 已提交
2616 2617 2618 2619 2620
	[SVM_EXIT_WRITE_CR0]			= emulate_on_interception,
	[SVM_EXIT_WRITE_CR3]			= emulate_on_interception,
	[SVM_EXIT_WRITE_CR4]			= emulate_on_interception,
	[SVM_EXIT_WRITE_CR8]			= cr8_write_interception,
	[SVM_EXIT_READ_DR0]			= emulate_on_interception,
A
Avi Kivity 已提交
2621 2622 2623
	[SVM_EXIT_READ_DR1]			= emulate_on_interception,
	[SVM_EXIT_READ_DR2]			= emulate_on_interception,
	[SVM_EXIT_READ_DR3]			= emulate_on_interception,
2624 2625 2626 2627
	[SVM_EXIT_READ_DR4]			= emulate_on_interception,
	[SVM_EXIT_READ_DR5]			= emulate_on_interception,
	[SVM_EXIT_READ_DR6]			= emulate_on_interception,
	[SVM_EXIT_READ_DR7]			= emulate_on_interception,
A
Avi Kivity 已提交
2628 2629 2630 2631
	[SVM_EXIT_WRITE_DR0]			= emulate_on_interception,
	[SVM_EXIT_WRITE_DR1]			= emulate_on_interception,
	[SVM_EXIT_WRITE_DR2]			= emulate_on_interception,
	[SVM_EXIT_WRITE_DR3]			= emulate_on_interception,
2632
	[SVM_EXIT_WRITE_DR4]			= emulate_on_interception,
A
Avi Kivity 已提交
2633
	[SVM_EXIT_WRITE_DR5]			= emulate_on_interception,
2634
	[SVM_EXIT_WRITE_DR6]			= emulate_on_interception,
A
Avi Kivity 已提交
2635
	[SVM_EXIT_WRITE_DR7]			= emulate_on_interception,
J
Jan Kiszka 已提交
2636 2637
	[SVM_EXIT_EXCP_BASE + DB_VECTOR]	= db_interception,
	[SVM_EXIT_EXCP_BASE + BP_VECTOR]	= bp_interception,
2638
	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
J
Joerg Roedel 已提交
2639 2640 2641 2642
	[SVM_EXIT_EXCP_BASE + PF_VECTOR]	= pf_interception,
	[SVM_EXIT_EXCP_BASE + NM_VECTOR]	= nm_interception,
	[SVM_EXIT_EXCP_BASE + MC_VECTOR]	= mc_interception,
	[SVM_EXIT_INTR]				= intr_interception,
2643
	[SVM_EXIT_NMI]				= nmi_interception,
A
Avi Kivity 已提交
2644 2645
	[SVM_EXIT_SMI]				= nop_on_interception,
	[SVM_EXIT_INIT]				= nop_on_interception,
2646
	[SVM_EXIT_VINTR]			= interrupt_window_interception,
A
Avi Kivity 已提交
2647
	[SVM_EXIT_CPUID]			= cpuid_interception,
2648
	[SVM_EXIT_IRET]                         = iret_interception,
2649
	[SVM_EXIT_INVD]                         = emulate_on_interception,
2650
	[SVM_EXIT_PAUSE]			= pause_interception,
A
Avi Kivity 已提交
2651
	[SVM_EXIT_HLT]				= halt_interception,
M
Marcelo Tosatti 已提交
2652
	[SVM_EXIT_INVLPG]			= invlpg_interception,
A
Alexander Graf 已提交
2653
	[SVM_EXIT_INVLPGA]			= invlpga_interception,
J
Joerg Roedel 已提交
2654
	[SVM_EXIT_IOIO]				= io_interception,
A
Avi Kivity 已提交
2655 2656
	[SVM_EXIT_MSR]				= msr_interception,
	[SVM_EXIT_TASK_SWITCH]			= task_switch_interception,
2657
	[SVM_EXIT_SHUTDOWN]			= shutdown_interception,
A
Alexander Graf 已提交
2658
	[SVM_EXIT_VMRUN]			= vmrun_interception,
2659
	[SVM_EXIT_VMMCALL]			= vmmcall_interception,
2660 2661
	[SVM_EXIT_VMLOAD]			= vmload_interception,
	[SVM_EXIT_VMSAVE]			= vmsave_interception,
2662 2663
	[SVM_EXIT_STGI]				= stgi_interception,
	[SVM_EXIT_CLGI]				= clgi_interception,
2664
	[SVM_EXIT_SKINIT]			= skinit_interception,
2665
	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
2666 2667
	[SVM_EXIT_MONITOR]			= invalid_op_interception,
	[SVM_EXIT_MWAIT]			= invalid_op_interception,
2668
	[SVM_EXIT_NPF]				= pf_interception,
A
Avi Kivity 已提交
2669 2670
};

A
Avi Kivity 已提交
2671
static int handle_exit(struct kvm_vcpu *vcpu)
A
Avi Kivity 已提交
2672
{
2673
	struct vcpu_svm *svm = to_svm(vcpu);
A
Avi Kivity 已提交
2674
	struct kvm_run *kvm_run = vcpu->run;
2675
	u32 exit_code = svm->vmcb->control.exit_code;
A
Avi Kivity 已提交
2676

2677
	trace_kvm_exit(exit_code, svm->vmcb->save.rip);
2678

2679 2680 2681 2682 2683 2684 2685
	if (unlikely(svm->nested.exit_required)) {
		nested_svm_vmexit(svm);
		svm->nested.exit_required = false;

		return 1;
	}

2686
	if (is_nested(svm)) {
2687 2688
		int vmexit;

2689 2690 2691 2692 2693 2694
		trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
					svm->vmcb->control.exit_info_1,
					svm->vmcb->control.exit_info_2,
					svm->vmcb->control.exit_int_info,
					svm->vmcb->control.exit_int_info_err);

2695 2696 2697 2698 2699 2700
		vmexit = nested_svm_exit_special(svm);

		if (vmexit == NESTED_EXIT_CONTINUE)
			vmexit = nested_svm_exit_handled(svm);

		if (vmexit == NESTED_EXIT_DONE)
2701 2702 2703
			return 1;
	}

2704 2705
	svm_complete_interrupts(svm);

2706
	if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2707
		vcpu->arch.cr0 = svm->vmcb->save.cr0;
2708
	if (npt_enabled)
2709
		vcpu->arch.cr3 = svm->vmcb->save.cr3;
2710 2711 2712 2713 2714 2715 2716 2717

	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
		kvm_run->fail_entry.hardware_entry_failure_reason
			= svm->vmcb->control.exit_code;
		return 0;
	}

2718
	if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
2719
	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
2720
	    exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
A
Avi Kivity 已提交
2721 2722
		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
		       "exit_code 0x%x\n",
2723
		       __func__, svm->vmcb->control.exit_int_info,
A
Avi Kivity 已提交
2724 2725
		       exit_code);

2726
	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
J
Joe Perches 已提交
2727
	    || !svm_exit_handlers[exit_code]) {
A
Avi Kivity 已提交
2728
		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
2729
		kvm_run->hw.hardware_exit_reason = exit_code;
A
Avi Kivity 已提交
2730 2731 2732
		return 0;
	}

A
Avi Kivity 已提交
2733
	return svm_exit_handlers[exit_code](svm);
A
Avi Kivity 已提交
2734 2735 2736 2737 2738 2739
}

static void reload_tss(struct kvm_vcpu *vcpu)
{
	int cpu = raw_smp_processor_id();

2740 2741
	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
	sd->tss_desc->type = 9; /* available 32/64-bit TSS */
A
Avi Kivity 已提交
2742 2743 2744
	load_TR_desc();
}

R
Rusty Russell 已提交
2745
static void pre_svm_run(struct vcpu_svm *svm)
A
Avi Kivity 已提交
2746 2747 2748
{
	int cpu = raw_smp_processor_id();

2749
	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
A
Avi Kivity 已提交
2750

2751
	svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2752
	/* FIXME: handle wraparound of asid_generation */
2753 2754
	if (svm->asid_generation != sd->asid_generation)
		new_asid(svm, sd);
A
Avi Kivity 已提交
2755 2756
}

2757 2758 2759 2760 2761 2762 2763 2764 2765
static void svm_inject_nmi(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
	vcpu->arch.hflags |= HF_NMI_MASK;
	svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
	++vcpu->stat.nmi_injections;
}
A
Avi Kivity 已提交
2766

2767
static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
A
Avi Kivity 已提交
2768 2769 2770
{
	struct vmcb_control_area *control;

2771
	trace_kvm_inj_virq(irq);
2772

2773
	++svm->vcpu.stat.irq_injections;
R
Rusty Russell 已提交
2774
	control = &svm->vmcb->control;
2775
	control->int_vector = irq;
A
Avi Kivity 已提交
2776 2777 2778 2779 2780
	control->int_ctl &= ~V_INTR_PRIO_MASK;
	control->int_ctl |= V_IRQ_MASK |
		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
}

2781
static void svm_set_irq(struct kvm_vcpu *vcpu)
E
Eddie Dong 已提交
2782 2783 2784
{
	struct vcpu_svm *svm = to_svm(vcpu);

2785
	BUG_ON(!(gif_set(svm)));
2786

2787 2788
	svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
E
Eddie Dong 已提交
2789 2790
}

2791
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
2792 2793 2794
{
	struct vcpu_svm *svm = to_svm(vcpu);

2795 2796 2797
	if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
		return;

2798
	if (irr == -1)
2799 2800
		return;

2801 2802 2803
	if (tpr >= irr)
		svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
}
2804

2805 2806 2807 2808 2809 2810
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vmcb *vmcb = svm->vmcb;
	return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
2811 2812
}

J
Jan Kiszka 已提交
2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832
static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
}

static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if (masked) {
		svm->vcpu.arch.hflags |= HF_NMI_MASK;
		svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
	} else {
		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
		svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
	}
}

2833 2834 2835 2836
static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vmcb *vmcb = svm->vmcb;
2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848
	int ret;

	if (!gif_set(svm) ||
	     (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
		return 0;

	ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);

	if (is_nested(svm))
		return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);

	return ret;
2849 2850
}

2851
static void enable_irq_window(struct kvm_vcpu *vcpu)
A
Avi Kivity 已提交
2852
{
2853 2854
	struct vcpu_svm *svm = to_svm(vcpu);

J
Joerg Roedel 已提交
2855 2856 2857 2858 2859 2860
	/*
	 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
	 * 1, because that's a separate STGI/VMRUN intercept.  The next time we
	 * get that intercept, this function will be called again though and
	 * we'll get the vintr intercept.
	 */
2861
	if (gif_set(svm) && nested_svm_intr(svm)) {
2862 2863 2864
		svm_set_vintr(svm);
		svm_inject_irq(svm, 0x0);
	}
2865 2866
}

2867
static void enable_nmi_window(struct kvm_vcpu *vcpu)
2868
{
2869
	struct vcpu_svm *svm = to_svm(vcpu);
2870

2871 2872 2873 2874
	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
	    == HF_NMI_MASK)
		return; /* IRET will cause a vm exit */

J
Joerg Roedel 已提交
2875 2876 2877 2878
	/*
	 * Something prevents NMI from been injected. Single step over possible
	 * problem (IRET or exception injection or interrupt shadow)
	 */
2879 2880 2881 2882 2883
	if (gif_set(svm) && nested_svm_nmi(svm)) {
		svm->nmi_singlestep = true;
		svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
		update_db_intercept(vcpu);
	}
2884 2885
}

2886 2887 2888 2889 2890
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
	return 0;
}

2891 2892 2893 2894 2895
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
{
	force_new_asid(vcpu);
}

2896 2897 2898 2899
static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
{
}

2900 2901 2902 2903
static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

2904 2905 2906
	if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
		return;

2907 2908
	if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
		int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
2909
		kvm_set_cr8(vcpu, cr8);
2910 2911 2912
	}
}

2913 2914 2915 2916 2917
static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	u64 cr8;

2918 2919 2920
	if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
		return;

2921 2922 2923 2924 2925
	cr8 = kvm_get_cr8(vcpu);
	svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
}

2926 2927 2928 2929 2930
static void svm_complete_interrupts(struct vcpu_svm *svm)
{
	u8 vector;
	int type;
	u32 exitintinfo = svm->vmcb->control.exit_int_info;
2931 2932 2933
	unsigned int3_injected = svm->int3_injected;

	svm->int3_injected = 0;
2934

2935 2936 2937
	if (svm->vcpu.arch.hflags & HF_IRET_MASK)
		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);

2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952
	svm->vcpu.arch.nmi_injected = false;
	kvm_clear_exception_queue(&svm->vcpu);
	kvm_clear_interrupt_queue(&svm->vcpu);

	if (!(exitintinfo & SVM_EXITINTINFO_VALID))
		return;

	vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
	type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;

	switch (type) {
	case SVM_EXITINTINFO_TYPE_NMI:
		svm->vcpu.arch.nmi_injected = true;
		break;
	case SVM_EXITINTINFO_TYPE_EXEPT:
2953 2954
		if (is_nested(svm))
			break;
2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965
		/*
		 * In case of software exceptions, do not reinject the vector,
		 * but re-execute the instruction instead. Rewind RIP first
		 * if we emulated INT3 before.
		 */
		if (kvm_exception_is_soft(vector)) {
			if (vector == BP_VECTOR && int3_injected &&
			    kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
				kvm_rip_write(&svm->vcpu,
					      kvm_rip_read(&svm->vcpu) -
					      int3_injected);
2966
			break;
2967
		}
2968 2969 2970 2971 2972 2973 2974 2975
		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
			u32 err = svm->vmcb->control.exit_int_info_err;
			kvm_queue_exception_e(&svm->vcpu, vector, err);

		} else
			kvm_queue_exception(&svm->vcpu, vector);
		break;
	case SVM_EXITINTINFO_TYPE_INTR:
2976
		kvm_queue_interrupt(&svm->vcpu, vector, false);
2977 2978 2979 2980 2981 2982
		break;
	default:
		break;
	}
}

2983 2984 2985 2986 2987 2988
#ifdef CONFIG_X86_64
#define R "r"
#else
#define R "e"
#endif

A
Avi Kivity 已提交
2989
static void svm_vcpu_run(struct kvm_vcpu *vcpu)
A
Avi Kivity 已提交
2990
{
2991
	struct vcpu_svm *svm = to_svm(vcpu);
A
Avi Kivity 已提交
2992 2993 2994
	u16 fs_selector;
	u16 gs_selector;
	u16 ldt_selector;
2995

2996 2997 2998 2999 3000 3001 3002
	/*
	 * A vmexit emulation is required before the vcpu can be executed
	 * again.
	 */
	if (unlikely(svm->nested.exit_required))
		return;

3003 3004 3005 3006
	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];

R
Rusty Russell 已提交
3007
	pre_svm_run(svm);
A
Avi Kivity 已提交
3008

3009 3010
	sync_lapic_to_cr8(vcpu);

A
Avi Kivity 已提交
3011
	save_host_msrs(vcpu);
3012 3013 3014
	fs_selector = kvm_read_fs();
	gs_selector = kvm_read_gs();
	ldt_selector = kvm_read_ldt();
3015
	svm->vmcb->save.cr2 = vcpu->arch.cr2;
3016 3017 3018
	/* required for live migration with NPT */
	if (npt_enabled)
		svm->vmcb->save.cr3 = vcpu->arch.cr3;
A
Avi Kivity 已提交
3019

3020 3021 3022
	clgi();

	local_irq_enable();
3023

A
Avi Kivity 已提交
3024
	asm volatile (
3025 3026 3027 3028 3029 3030 3031
		"push %%"R"bp; \n\t"
		"mov %c[rbx](%[svm]), %%"R"bx \n\t"
		"mov %c[rcx](%[svm]), %%"R"cx \n\t"
		"mov %c[rdx](%[svm]), %%"R"dx \n\t"
		"mov %c[rsi](%[svm]), %%"R"si \n\t"
		"mov %c[rdi](%[svm]), %%"R"di \n\t"
		"mov %c[rbp](%[svm]), %%"R"bp \n\t"
3032
#ifdef CONFIG_X86_64
R
Rusty Russell 已提交
3033 3034 3035 3036 3037 3038 3039 3040
		"mov %c[r8](%[svm]),  %%r8  \n\t"
		"mov %c[r9](%[svm]),  %%r9  \n\t"
		"mov %c[r10](%[svm]), %%r10 \n\t"
		"mov %c[r11](%[svm]), %%r11 \n\t"
		"mov %c[r12](%[svm]), %%r12 \n\t"
		"mov %c[r13](%[svm]), %%r13 \n\t"
		"mov %c[r14](%[svm]), %%r14 \n\t"
		"mov %c[r15](%[svm]), %%r15 \n\t"
A
Avi Kivity 已提交
3041 3042 3043
#endif

		/* Enter guest mode */
3044 3045
		"push %%"R"ax \n\t"
		"mov %c[vmcb](%[svm]), %%"R"ax \n\t"
3046 3047 3048
		__ex(SVM_VMLOAD) "\n\t"
		__ex(SVM_VMRUN) "\n\t"
		__ex(SVM_VMSAVE) "\n\t"
3049
		"pop %%"R"ax \n\t"
A
Avi Kivity 已提交
3050 3051

		/* Save guest registers, load host registers */
3052 3053 3054 3055 3056 3057
		"mov %%"R"bx, %c[rbx](%[svm]) \n\t"
		"mov %%"R"cx, %c[rcx](%[svm]) \n\t"
		"mov %%"R"dx, %c[rdx](%[svm]) \n\t"
		"mov %%"R"si, %c[rsi](%[svm]) \n\t"
		"mov %%"R"di, %c[rdi](%[svm]) \n\t"
		"mov %%"R"bp, %c[rbp](%[svm]) \n\t"
3058
#ifdef CONFIG_X86_64
R
Rusty Russell 已提交
3059 3060 3061 3062 3063 3064 3065 3066
		"mov %%r8,  %c[r8](%[svm]) \n\t"
		"mov %%r9,  %c[r9](%[svm]) \n\t"
		"mov %%r10, %c[r10](%[svm]) \n\t"
		"mov %%r11, %c[r11](%[svm]) \n\t"
		"mov %%r12, %c[r12](%[svm]) \n\t"
		"mov %%r13, %c[r13](%[svm]) \n\t"
		"mov %%r14, %c[r14](%[svm]) \n\t"
		"mov %%r15, %c[r15](%[svm]) \n\t"
A
Avi Kivity 已提交
3067
#endif
3068
		"pop %%"R"bp"
A
Avi Kivity 已提交
3069
		:
R
Rusty Russell 已提交
3070
		: [svm]"a"(svm),
A
Avi Kivity 已提交
3071
		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3072 3073 3074 3075 3076 3077
		  [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
		  [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
		  [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
		  [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
		  [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
		  [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3078
#ifdef CONFIG_X86_64
3079 3080 3081 3082 3083 3084 3085 3086
		  , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
		  [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
		  [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
		  [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
		  [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
		  [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
		  [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
		  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
A
Avi Kivity 已提交
3087
#endif
3088
		: "cc", "memory"
3089
		, R"bx", R"cx", R"dx", R"si", R"di"
3090 3091 3092 3093
#ifdef CONFIG_X86_64
		, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
#endif
		);
A
Avi Kivity 已提交
3094

3095
	vcpu->arch.cr2 = svm->vmcb->save.cr2;
3096 3097 3098
	vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
	vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
A
Avi Kivity 已提交
3099

3100 3101 3102
	kvm_load_fs(fs_selector);
	kvm_load_gs(gs_selector);
	kvm_load_ldt(ldt_selector);
A
Avi Kivity 已提交
3103 3104 3105 3106
	load_host_msrs(vcpu);

	reload_tss(vcpu);

3107 3108 3109 3110
	local_irq_disable();

	stgi();

3111 3112
	sync_cr8_to_lapic(vcpu);

3113
	svm->next_rip = 0;
3114

A
Avi Kivity 已提交
3115 3116 3117 3118
	if (npt_enabled) {
		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
		vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
	}
A
Avi Kivity 已提交
3119 3120
}

3121 3122
#undef R

A
Avi Kivity 已提交
3123 3124
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
3125 3126
	struct vcpu_svm *svm = to_svm(vcpu);

3127 3128 3129 3130 3131 3132
	if (npt_enabled) {
		svm->vmcb->control.nested_cr3 = root;
		force_new_asid(vcpu);
		return;
	}

3133
	svm->vmcb->save.cr3 = root;
A
Avi Kivity 已提交
3134 3135 3136 3137 3138
	force_new_asid(vcpu);
}

static int is_disabled(void)
{
3139 3140 3141 3142 3143 3144
	u64 vm_cr;

	rdmsrl(MSR_VM_CR, vm_cr);
	if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
		return 1;

A
Avi Kivity 已提交
3145 3146 3147
	return 0;
}

I
Ingo Molnar 已提交
3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158
static void
svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
{
	/*
	 * Patch in the VMMCALL instruction:
	 */
	hypercall[0] = 0x0f;
	hypercall[1] = 0x01;
	hypercall[2] = 0xd9;
}

Y
Yang, Sheng 已提交
3159 3160 3161 3162 3163
static void svm_check_processor_compat(void *rtn)
{
	*(int *)rtn = 0;
}

3164 3165 3166 3167 3168
static bool svm_cpu_has_accelerated_tpr(void)
{
	return false;
}

3169 3170 3171 3172 3173 3174 3175 3176 3177
static int get_npt_level(void)
{
#ifdef CONFIG_X86_64
	return PT64_ROOT_LEVEL;
#else
	return PT32E_ROOT_LEVEL;
#endif
}

3178
static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
S
Sheng Yang 已提交
3179 3180 3181 3182
{
	return 0;
}

3183 3184 3185 3186
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
}

3187
static const struct trace_print_flags svm_exit_reasons_str[] = {
J
Joerg Roedel 已提交
3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205
	{ SVM_EXIT_READ_CR0,			"read_cr0" },
	{ SVM_EXIT_READ_CR3,			"read_cr3" },
	{ SVM_EXIT_READ_CR4,			"read_cr4" },
	{ SVM_EXIT_READ_CR8,			"read_cr8" },
	{ SVM_EXIT_WRITE_CR0,			"write_cr0" },
	{ SVM_EXIT_WRITE_CR3,			"write_cr3" },
	{ SVM_EXIT_WRITE_CR4,			"write_cr4" },
	{ SVM_EXIT_WRITE_CR8,			"write_cr8" },
	{ SVM_EXIT_READ_DR0,			"read_dr0" },
	{ SVM_EXIT_READ_DR1,			"read_dr1" },
	{ SVM_EXIT_READ_DR2,			"read_dr2" },
	{ SVM_EXIT_READ_DR3,			"read_dr3" },
	{ SVM_EXIT_WRITE_DR0,			"write_dr0" },
	{ SVM_EXIT_WRITE_DR1,			"write_dr1" },
	{ SVM_EXIT_WRITE_DR2,			"write_dr2" },
	{ SVM_EXIT_WRITE_DR3,			"write_dr3" },
	{ SVM_EXIT_WRITE_DR5,			"write_dr5" },
	{ SVM_EXIT_WRITE_DR7,			"write_dr7" },
3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239
	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" },
	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" },
	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" },
	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" },
	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" },
	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" },
	{ SVM_EXIT_INTR,			"interrupt" },
	{ SVM_EXIT_NMI,				"nmi" },
	{ SVM_EXIT_SMI,				"smi" },
	{ SVM_EXIT_INIT,			"init" },
	{ SVM_EXIT_VINTR,			"vintr" },
	{ SVM_EXIT_CPUID,			"cpuid" },
	{ SVM_EXIT_INVD,			"invd" },
	{ SVM_EXIT_HLT,				"hlt" },
	{ SVM_EXIT_INVLPG,			"invlpg" },
	{ SVM_EXIT_INVLPGA,			"invlpga" },
	{ SVM_EXIT_IOIO,			"io" },
	{ SVM_EXIT_MSR,				"msr" },
	{ SVM_EXIT_TASK_SWITCH,			"task_switch" },
	{ SVM_EXIT_SHUTDOWN,			"shutdown" },
	{ SVM_EXIT_VMRUN,			"vmrun" },
	{ SVM_EXIT_VMMCALL,			"hypercall" },
	{ SVM_EXIT_VMLOAD,			"vmload" },
	{ SVM_EXIT_VMSAVE,			"vmsave" },
	{ SVM_EXIT_STGI,			"stgi" },
	{ SVM_EXIT_CLGI,			"clgi" },
	{ SVM_EXIT_SKINIT,			"skinit" },
	{ SVM_EXIT_WBINVD,			"wbinvd" },
	{ SVM_EXIT_MONITOR,			"monitor" },
	{ SVM_EXIT_MWAIT,			"mwait" },
	{ SVM_EXIT_NPF,				"npf" },
	{ -1, NULL }
};

3240
static int svm_get_lpage_level(void)
3241
{
3242
	return PT_PDPE_LEVEL;
3243 3244
}

3245 3246 3247 3248 3249
static bool svm_rdtscp_supported(void)
{
	return false;
}

3250 3251 3252 3253 3254
static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
3255 3256 3257
	if (is_nested(svm))
		svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR;
	update_cr0_intercept(svm);
3258 3259
}

3260
static struct kvm_x86_ops svm_x86_ops = {
A
Avi Kivity 已提交
3261 3262 3263 3264
	.cpu_has_kvm_support = has_svm,
	.disabled_by_bios = is_disabled,
	.hardware_setup = svm_hardware_setup,
	.hardware_unsetup = svm_hardware_unsetup,
Y
Yang, Sheng 已提交
3265
	.check_processor_compatibility = svm_check_processor_compat,
A
Avi Kivity 已提交
3266 3267
	.hardware_enable = svm_hardware_enable,
	.hardware_disable = svm_hardware_disable,
3268
	.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
A
Avi Kivity 已提交
3269 3270 3271

	.vcpu_create = svm_create_vcpu,
	.vcpu_free = svm_free_vcpu,
3272
	.vcpu_reset = svm_vcpu_reset,
A
Avi Kivity 已提交
3273

3274
	.prepare_guest_switch = svm_prepare_guest_switch,
A
Avi Kivity 已提交
3275 3276 3277 3278 3279 3280 3281 3282 3283
	.vcpu_load = svm_vcpu_load,
	.vcpu_put = svm_vcpu_put,

	.set_guest_debug = svm_guest_debug,
	.get_msr = svm_get_msr,
	.set_msr = svm_set_msr,
	.get_segment_base = svm_get_segment_base,
	.get_segment = svm_get_segment,
	.set_segment = svm_set_segment,
3284
	.get_cpl = svm_get_cpl,
3285
	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
3286
	.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
3287
	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
A
Avi Kivity 已提交
3288 3289 3290 3291 3292 3293 3294 3295 3296 3297
	.set_cr0 = svm_set_cr0,
	.set_cr3 = svm_set_cr3,
	.set_cr4 = svm_set_cr4,
	.set_efer = svm_set_efer,
	.get_idt = svm_get_idt,
	.set_idt = svm_set_idt,
	.get_gdt = svm_get_gdt,
	.set_gdt = svm_set_gdt,
	.get_dr = svm_get_dr,
	.set_dr = svm_set_dr,
A
Avi Kivity 已提交
3298
	.cache_reg = svm_cache_reg,
A
Avi Kivity 已提交
3299 3300
	.get_rflags = svm_get_rflags,
	.set_rflags = svm_set_rflags,
A
Avi Kivity 已提交
3301
	.fpu_activate = svm_fpu_activate,
3302
	.fpu_deactivate = svm_fpu_deactivate,
A
Avi Kivity 已提交
3303 3304 3305 3306

	.tlb_flush = svm_flush_tlb,

	.run = svm_vcpu_run,
3307
	.handle_exit = handle_exit,
A
Avi Kivity 已提交
3308
	.skip_emulated_instruction = skip_emulated_instruction,
3309 3310
	.set_interrupt_shadow = svm_set_interrupt_shadow,
	.get_interrupt_shadow = svm_get_interrupt_shadow,
I
Ingo Molnar 已提交
3311
	.patch_hypercall = svm_patch_hypercall,
E
Eddie Dong 已提交
3312
	.set_irq = svm_set_irq,
3313
	.set_nmi = svm_inject_nmi,
3314
	.queue_exception = svm_queue_exception,
3315
	.interrupt_allowed = svm_interrupt_allowed,
3316
	.nmi_allowed = svm_nmi_allowed,
J
Jan Kiszka 已提交
3317 3318
	.get_nmi_mask = svm_get_nmi_mask,
	.set_nmi_mask = svm_set_nmi_mask,
3319 3320 3321
	.enable_nmi_window = enable_nmi_window,
	.enable_irq_window = enable_irq_window,
	.update_cr8_intercept = update_cr8_intercept,
3322 3323

	.set_tss_addr = svm_set_tss_addr,
3324
	.get_tdp_level = get_npt_level,
3325
	.get_mt_mask = svm_get_mt_mask,
3326 3327

	.exit_reasons_str = svm_exit_reasons_str,
3328
	.get_lpage_level = svm_get_lpage_level,
3329 3330

	.cpuid_update = svm_cpuid_update,
3331 3332

	.rdtscp_supported = svm_rdtscp_supported,
A
Avi Kivity 已提交
3333 3334 3335 3336
};

static int __init svm_init(void)
{
3337
	return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
3338
			      THIS_MODULE);
A
Avi Kivity 已提交
3339 3340 3341 3342
}

static void __exit svm_exit(void)
{
3343
	kvm_exit();
A
Avi Kivity 已提交
3344 3345 3346 3347
}

module_init(svm_init)
module_exit(svm_exit)