svm.h 14.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * AMD SVM support
 *
 * Copyright (C) 2006 Qumranet, Inc.
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
 *
 * Authors:
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *   Avi Kivity   <avi@qumranet.com>
 */

#ifndef __SVM_SVM_H
#define __SVM_SVM_H

#include <linux/kvm_types.h>
#include <linux/kvm_host.h>

#include <asm/svm.h>

static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
	MSR_FS_BASE,
#endif
	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
	MSR_TSC_AUX,
};

#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)

34
#define MAX_DIRECT_ACCESS_MSRS	15
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
#define MSRPM_OFFSETS	16
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;

enum {
	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
			    pause filter count */
	VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
	VMCB_ASID,	 /* ASID */
	VMCB_INTR,	 /* int_ctl, int_vector */
	VMCB_NPT,        /* npt_en, nCR3, gPAT */
	VMCB_CR,	 /* CR0, CR3, CR4, EFER */
	VMCB_DR,         /* DR6, DR7 */
	VMCB_DT,         /* GDT, IDT */
	VMCB_SEG,        /* CS, DS, SS, ES, CPL */
	VMCB_CR2,        /* CR2 only */
	VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
	VMCB_AVIC,       /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
			  * AVIC PHYSICAL_TABLE pointer,
			  * AVIC LOGICAL_TABLE pointer
			  */
	VMCB_DIRTY_MAX,
};

/* TPR and CR2 are always written before VMRUN */
#define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))

struct kvm_sev_info {
	bool active;		/* SEV enabled guest */
64
	bool es_active;		/* SEV-ES enabled guest */
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
	unsigned int asid;	/* ASID used for this guest */
	unsigned int handle;	/* SEV firmware handle */
	int fd;			/* SEV device fd */
	unsigned long pages_locked; /* Number of pages locked */
	struct list_head regions_list;  /* List of registered regions */
};

struct kvm_svm {
	struct kvm kvm;

	/* Struct members for AVIC */
	u32 avic_vm_id;
	struct page *avic_logical_id_table_page;
	struct page *avic_physical_id_table_page;
	struct hlist_node hnode;

	struct kvm_sev_info sev_info;
};

struct kvm_vcpu;

86
struct svm_nested_state {
87 88 89
	struct vmcb *hsave;
	u64 hsave_msr;
	u64 vm_cr_msr;
90
	u64 vmcb12_gpa;
91 92 93 94

	/* These are the merged vectors */
	u32 *msrpm;

95 96 97 98
	/* A VMRUN has started but has not yet been performed, so
	 * we cannot inject a nested vmexit yet.  */
	bool nested_run_pending;

99 100
	/* cache for control fields of the guest */
	struct vmcb_control_area ctl;
101 102

	bool initialized;
103 104 105 106 107 108 109
};

struct vcpu_svm {
	struct kvm_vcpu vcpu;
	struct vmcb *vmcb;
	unsigned long vmcb_pa;
	struct svm_cpu_data *svm_data;
C
Cathy Avery 已提交
110
	u32 asid;
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
	uint64_t asid_generation;
	uint64_t sysenter_esp;
	uint64_t sysenter_eip;
	uint64_t tsc_aux;

	u64 msr_decfg;

	u64 next_rip;

	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
	struct {
		u16 fs;
		u16 gs;
		u16 ldt;
		u64 gs_base;
	} host;

	u64 spec_ctrl;
	/*
	 * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
	 * translated into the appropriate L2_CFG bits on the host to
	 * perform speculative control.
	 */
	u64 virt_spec_ctrl;

	u32 *msrpm;

	ulong nmi_iret_rip;

140
	struct svm_nested_state nested;
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164

	bool nmi_singlestep;
	u64 nmi_singlestep_guest_rflags;

	unsigned int3_injected;
	unsigned long int3_rip;

	/* cached guest cpuid flags for faster access */
	bool nrips_enabled	: 1;

	u32 ldr_reg;
	u32 dfr_reg;
	struct page *avic_backing_page;
	u64 *avic_physical_id_cache;
	bool avic_is_running;

	/*
	 * Per-vcpu list of struct amd_svm_iommu_ir:
	 * This is used mainly to store interrupt remapping information used
	 * when update the vcpu affinity. This avoids the need to scan for
	 * IRTE and try to match ga_tag in the IOMMU driver.
	 */
	struct list_head ir_list;
	spinlock_t ir_list_lock;
165 166 167 168 169 170

	/* Save desired MSR intercept (read: pass-through) state */
	struct {
		DECLARE_BITMAP(read, MAX_DIRECT_ACCESS_MSRS);
		DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
	} shadow_msr_intercept;
171 172 173 174

	/* SEV-ES support */
	struct vmcb_save_area *vmsa;
	struct ghcb *ghcb;
175 176
};

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
struct svm_cpu_data {
	int cpu;

	u64 asid_generation;
	u32 max_asid;
	u32 next_asid;
	u32 min_asid;
	struct kvm_ldttss_desc *tss_desc;

	struct page *save_area;
	struct vmcb *current_vmcb;

	/* index = sev_asid, value = vmcb pointer */
	struct vmcb **sev_vmcbs;
};

DECLARE_PER_CPU(struct svm_cpu_data *, svm_data);

195 196
void recalc_intercepts(struct vcpu_svm *svm);

197 198 199 200 201
static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
{
	return container_of(kvm, struct kvm_svm, kvm);
}

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
static inline bool sev_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

	return sev->active;
#else
	return false;
#endif
}

static inline bool sev_es_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

	return sev_guest(kvm) && sev->es_active;
#else
	return false;
#endif
}

224
static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
225 226 227 228
{
	vmcb->control.clean = 0;
}

229
static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
230 231 232 233 234
{
	vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
			       & ~VMCB_ALWAYS_DIRTY_MASK;
}

235
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
{
	vmcb->control.clean &= ~(1 << bit);
}

static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
	return container_of(vcpu, struct vcpu_svm, vcpu);
}

static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
{
	if (is_guest_mode(&svm->vcpu))
		return svm->nested.hsave;
	else
		return svm->vmcb;
}

253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
{
	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
	__set_bit(bit, (unsigned long *)&control->intercepts);
}

static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
{
	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
	__clear_bit(bit, (unsigned long *)&control->intercepts);
}

static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
{
	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
	return test_bit(bit, (unsigned long *)&control->intercepts);
}

271 272 273 274
static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	if (!sev_es_guest(svm->vcpu.kvm)) {
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
	}

292 293
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
294 295 296 297 298 299 300 301

	recalc_intercepts(svm);
}

static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

302
	vmcb->control.intercepts[INTERCEPT_DR] = 0;
303

304 305 306 307 308 309
	/* DR7 access must remain intercepted for an SEV-ES guest */
	if (sev_es_guest(svm->vcpu.kvm)) {
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
		vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
	}

310 311 312
	recalc_intercepts(svm);
}

313
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
314 315 316
{
	struct vmcb *vmcb = get_host_vmcb(svm);

317 318
	WARN_ON_ONCE(bit >= 32);
	vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
319 320 321 322

	recalc_intercepts(svm);
}

323
static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
324 325 326
{
	struct vmcb *vmcb = get_host_vmcb(svm);

327 328
	WARN_ON_ONCE(bit >= 32);
	vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
329 330 331 332

	recalc_intercepts(svm);
}

333
static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
334 335 336
{
	struct vmcb *vmcb = get_host_vmcb(svm);

337
	vmcb_set_intercept(&vmcb->control, bit);
338 339 340 341

	recalc_intercepts(svm);
}

342
static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
343 344 345
{
	struct vmcb *vmcb = get_host_vmcb(svm);

346
	vmcb_clr_intercept(&vmcb->control, bit);
347 348 349 350

	recalc_intercepts(svm);
}

351
static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
352
{
353
	return vmcb_is_intercept(&svm->vmcb->control, bit);
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
}

static inline bool vgif_enabled(struct vcpu_svm *svm)
{
	return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
}

static inline void enable_gif(struct vcpu_svm *svm)
{
	if (vgif_enabled(svm))
		svm->vmcb->control.int_ctl |= V_GIF_MASK;
	else
		svm->vcpu.arch.hflags |= HF_GIF_MASK;
}

static inline void disable_gif(struct vcpu_svm *svm)
{
	if (vgif_enabled(svm))
		svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
	else
		svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
}

static inline bool gif_set(struct vcpu_svm *svm)
{
	if (vgif_enabled(svm))
		return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
	else
		return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
}

/* svm.c */
386 387
#define MSR_CR3_LEGACY_RESERVED_MASK		0xfe7U
#define MSR_CR3_LEGACY_PAE_RESERVED_MASK	0x7U
388
#define MSR_CR3_LONG_MBZ_MASK			0xfff0000000000000U
389
#define MSR_INVALID				0xffffffffU
390

391 392 393
extern int sev;
extern int sev_es;

394
u32 svm_msrpm_offset(u32 msr);
395 396 397 398
u32 *svm_vcpu_alloc_msrpm(void);
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
void svm_vcpu_free_msrpm(u32 *msrpm);

399
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
400
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
401
void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
402
void svm_flush_tlb(struct kvm_vcpu *vcpu);
403
void disable_nmi_singlestep(struct vcpu_svm *svm);
404 405 406
bool svm_smi_blocked(struct kvm_vcpu *vcpu);
bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
P
Paolo Bonzini 已提交
407
void svm_set_gif(struct vcpu_svm *svm, bool value);
408 409 410 411 412 413 414

/* nested.c */

#define NESTED_EXIT_HOST	0	/* Exit handled on host level */
#define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
#define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */

415
static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
416
{
P
Paolo Bonzini 已提交
417 418 419
	struct vcpu_svm *svm = to_svm(vcpu);

	return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK);
420 421
}

422 423
static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
{
424
	return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
425 426
}

427 428
static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
{
429
	return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
430 431
}

432 433
static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
{
434
	return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
435 436
}

437 438
int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
			 struct vmcb *nested_vmcb);
439
void svm_leave_nested(struct vcpu_svm *svm);
440 441
void svm_free_nested(struct vcpu_svm *svm);
int svm_allocate_nested(struct vcpu_svm *svm);
442 443 444 445 446 447 448 449
int nested_svm_vmrun(struct vcpu_svm *svm);
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
int nested_svm_vmexit(struct vcpu_svm *svm);
int nested_svm_exit_handled(struct vcpu_svm *svm);
int nested_svm_check_permissions(struct vcpu_svm *svm);
int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
			       bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
450
void sync_nested_vmcb_control(struct vcpu_svm *svm);
451

452 453
extern struct kvm_x86_nested_ops svm_nested_ops;

454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
/* avic.c */

#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK	(0xFF)
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT			31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK		(1 << 31)

#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK	(0xFFULL)
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK	(0xFFFFFFFFFFULL << 12)
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK		(1ULL << 62)
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK		(1ULL << 63)

#define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL

extern int avic;

static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
472
	vmcb_mark_dirty(svm->vmcb, VMCB_AVIC);
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
}

static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);
	u64 *entry = svm->avic_physical_id_cache;

	if (!entry)
		return false;

	return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
}

int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
void avic_init_vmcb(struct vcpu_svm *svm);
void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
int avic_incomplete_ipi_interception(struct vcpu_svm *svm);
int avic_unaccelerated_access_interception(struct vcpu_svm *svm);
int avic_init_vcpu(struct vcpu_svm *svm);
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_post_state_restore(struct kvm_vcpu *vcpu);
void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
bool svm_check_apicv_inhibit_reasons(ulong bit);
void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate);
void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr);
int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
		       uint32_t guest_irq, bool set);
void svm_vcpu_blocking(struct kvm_vcpu *vcpu);
void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);

511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
/* sev.c */

extern unsigned int max_sev_asid;

static inline bool svm_sev_enabled(void)
{
	return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
}

void sev_vm_destroy(struct kvm *kvm);
int svm_mem_enc_op(struct kvm *kvm, void __user *argp);
int svm_register_enc_region(struct kvm *kvm,
			    struct kvm_enc_region *range);
int svm_unregister_enc_region(struct kvm *kvm,
			      struct kvm_enc_region *range);
void pre_sev_run(struct vcpu_svm *svm, int cpu);
527
void __init sev_hardware_setup(void);
528
void sev_hardware_teardown(void);
529
void sev_free_vcpu(struct kvm_vcpu *vcpu);
530

531
#endif