cpuid.c 27.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
A
Avi Kivity 已提交
2 3 4 5 6 7 8 9 10 11 12
/*
 * Kernel-based Virtual Machine driver for Linux
 * cpuid support routines
 *
 * derived from arch/x86/kvm/x86.c
 *
 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
 * Copyright IBM Corporation, 2008
 */

#include <linux/kvm_host.h>
13
#include <linux/export.h>
14 15
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
16 17
#include <linux/sched/stat.h>

18
#include <asm/processor.h>
A
Avi Kivity 已提交
19
#include <asm/user.h>
20
#include <asm/fpu/xstate.h>
A
Avi Kivity 已提交
21 22 23 24
#include "cpuid.h"
#include "lapic.h"
#include "mmu.h"
#include "trace.h"
25
#include "pmu.h"
A
Avi Kivity 已提交
26

27
static u32 xstate_required_size(u64 xstate_bv, bool compacted)
28 29 30 31
{
	int feature_bit = 0;
	u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;

D
Dave Hansen 已提交
32
	xstate_bv &= XFEATURE_MASK_EXTEND;
33 34
	while (xstate_bv) {
		if (xstate_bv & 0x1) {
35
		        u32 eax, ebx, ecx, edx, offset;
36
		        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
37 38
			offset = compacted ? ret : ebx;
			ret = max(ret, offset + eax);
39 40 41 42 43 44 45 46 47
		}

		xstate_bv >>= 1;
		feature_bit++;
	}

	return ret;
}

48
#define F feature_bit
49

50
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
A
Avi Kivity 已提交
51 52 53 54 55 56
{
	struct kvm_cpuid_entry2 *best;
	struct kvm_lapic *apic = vcpu->arch.apic;

	best = kvm_find_cpuid_entry(vcpu, 1, 0);
	if (!best)
57
		return 0;
A
Avi Kivity 已提交
58 59

	/* Update OSXSAVE bit */
60 61 62
	if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1)
		cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
				   kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
A
Avi Kivity 已提交
63

64 65
	cpuid_entry_change(best, X86_FEATURE_APIC,
			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
66

A
Avi Kivity 已提交
67
	if (apic) {
68
		if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
A
Avi Kivity 已提交
69 70 71 72
			apic->lapic_timer.timer_mode_mask = 3 << 17;
		else
			apic->lapic_timer.timer_mode_mask = 1 << 17;
	}
73

74
	best = kvm_find_cpuid_entry(vcpu, 7, 0);
75 76 77
	if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
		cpuid_entry_change(best, X86_FEATURE_OSPKE,
				   kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
78

79
	best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
80
	if (!best) {
81
		vcpu->arch.guest_supported_xcr0 = 0;
82 83
		vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
	} else {
84
		vcpu->arch.guest_supported_xcr0 =
85
			(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
86
		vcpu->arch.guest_xstate_size = best->ebx =
87
			xstate_required_size(vcpu->arch.xcr0, false);
88
	}
89

90
	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
91 92
	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
93 94
		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

95
	/*
96 97
	 * The existing code assumes virtual address is 48-bit or 57-bit in the
	 * canonical address checks; exit if it is ever changed.
98 99
	 */
	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
100 101 102 103 104 105
	if (best) {
		int vaddr_bits = (best->eax & 0xff00) >> 8;

		if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
			return -EINVAL;
	}
106

107 108 109 110 111
	best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);

112 113
	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
114 115 116 117
		if (best)
			cpuid_entry_change(best, X86_FEATURE_MWAIT,
					   vcpu->arch.ia32_misc_enable_msr &
					   MSR_IA32_MISC_ENABLE_MWAIT);
118 119
	}

120 121
	/* Update physical-address width */
	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
122
	kvm_mmu_reset_context(vcpu);
123

124
	kvm_pmu_refresh(vcpu);
125
	return 0;
A
Avi Kivity 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
}

static int is_efer_nx(void)
{
	unsigned long long efer = 0;

	rdmsrl_safe(MSR_EFER, &efer);
	return efer & EFER_NX;
}

static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
{
	int i;
	struct kvm_cpuid_entry2 *e, *entry;

	entry = NULL;
	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
		e = &vcpu->arch.cpuid_entries[i];
		if (e->function == 0x80000001) {
			entry = e;
			break;
		}
	}
149
	if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) {
150
		cpuid_entry_clear(entry, X86_FEATURE_NX);
A
Avi Kivity 已提交
151 152 153 154
		printk(KERN_INFO "kvm: guest NX capability removed\n");
	}
}

155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *best;

	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
	if (!best || best->eax < 0x80000008)
		goto not_found;
	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
	if (best)
		return best->eax & 0xff;
not_found:
	return 36;
}
EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);

A
Avi Kivity 已提交
170 171 172 173 174 175
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
			     struct kvm_cpuid *cpuid,
			     struct kvm_cpuid_entry __user *entries)
{
	int r, i;
176
	struct kvm_cpuid_entry *cpuid_entries = NULL;
A
Avi Kivity 已提交
177 178 179 180 181

	r = -E2BIG;
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		goto out;
	r = -ENOMEM;
182
	if (cpuid->nent) {
183 184 185
		cpuid_entries =
			vmalloc(array_size(sizeof(struct kvm_cpuid_entry),
					   cpuid->nent));
186 187 188 189 190 191 192
		if (!cpuid_entries)
			goto out;
		r = -EFAULT;
		if (copy_from_user(cpuid_entries, entries,
				   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
			goto out;
	}
A
Avi Kivity 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	for (i = 0; i < cpuid->nent; i++) {
		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
		vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
		vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
		vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
		vcpu->arch.cpuid_entries[i].index = 0;
		vcpu->arch.cpuid_entries[i].flags = 0;
		vcpu->arch.cpuid_entries[i].padding[0] = 0;
		vcpu->arch.cpuid_entries[i].padding[1] = 0;
		vcpu->arch.cpuid_entries[i].padding[2] = 0;
	}
	vcpu->arch.cpuid_nent = cpuid->nent;
	cpuid_fix_nx_cap(vcpu);
	kvm_apic_set_version(vcpu);
	kvm_x86_ops->cpuid_update(vcpu);
209
	r = kvm_update_cpuid(vcpu);
A
Avi Kivity 已提交
210 211

out:
212
	vfree(cpuid_entries);
A
Avi Kivity 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	return r;
}

int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
			      struct kvm_cpuid2 *cpuid,
			      struct kvm_cpuid_entry2 __user *entries)
{
	int r;

	r = -E2BIG;
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		goto out;
	r = -EFAULT;
	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
		goto out;
	vcpu->arch.cpuid_nent = cpuid->nent;
	kvm_apic_set_version(vcpu);
	kvm_x86_ops->cpuid_update(vcpu);
232
	r = kvm_update_cpuid(vcpu);
A
Avi Kivity 已提交
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
out:
	return r;
}

int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
			      struct kvm_cpuid2 *cpuid,
			      struct kvm_cpuid_entry2 __user *entries)
{
	int r;

	r = -E2BIG;
	if (cpuid->nent < vcpu->arch.cpuid_nent)
		goto out;
	r = -EFAULT;
	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
		goto out;
	return 0;

out:
	cpuid->nent = vcpu->arch.cpuid_nent;
	return r;
}

257 258 259 260 261 262 263
struct kvm_cpuid_array {
	struct kvm_cpuid_entry2 *entries;
	const int maxnent;
	int nent;
};

static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
264
					      u32 function, u32 index)
A
Avi Kivity 已提交
265
{
266 267 268
	struct kvm_cpuid_entry2 *entry;

	if (array->nent >= array->maxnent)
269
		return NULL;
270 271

	entry = &array->entries[array->nent++];
272

A
Avi Kivity 已提交
273 274
	entry->function = function;
	entry->index = index;
275 276
	entry->flags = 0;

A
Avi Kivity 已提交
277 278
	cpuid_count(entry->function, entry->index,
		    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
279 280 281 282 283 284 285 286 287

	switch (function) {
	case 2:
		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
		break;
	case 4:
	case 7:
	case 0xb:
	case 0xd:
288 289 290
	case 0xf:
	case 0x10:
	case 0x12:
291
	case 0x14:
292 293 294
	case 0x17:
	case 0x18:
	case 0x1f:
295 296 297 298
	case 0x8000001d:
		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		break;
	}
299 300

	return entry;
A
Avi Kivity 已提交
301 302
}

303
static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
B
Borislav Petkov 已提交
304
{
305 306
	struct kvm_cpuid_entry2 *entry = &array->entries[array->nent];

307 308 309 310
	entry->function = func;
	entry->index = 0;
	entry->flags = 0;

B
Borislav Petkov 已提交
311 312
	switch (func) {
	case 0:
P
Paolo Bonzini 已提交
313
		entry->eax = 7;
314
		++array->nent;
B
Borislav Petkov 已提交
315 316 317
		break;
	case 1:
		entry->ecx = F(MOVBE);
318
		++array->nent;
B
Borislav Petkov 已提交
319
		break;
P
Paolo Bonzini 已提交
320 321
	case 7:
		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
322 323
		entry->eax = 0;
		entry->ecx = F(RDPID);
324
		++array->nent;
B
Borislav Petkov 已提交
325 326 327 328
	default:
		break;
	}

B
Borislav Petkov 已提交
329 330 331
	return 0;
}

332
static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
333 334 335 336 337
{
	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
	unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
	unsigned f_la57;
338
	unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0;
339 340 341 342

	/* cpuid 7.0.ebx */
	const u32 kvm_cpuid_7_0_ebx_x86_features =
		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
343
		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
344 345 346 347 348 349
		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
		F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;

	/* cpuid 7.0.ecx*/
	const u32 kvm_cpuid_7_0_ecx_x86_features =
350
		F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
351 352
		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
353
		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
354 355 356 357 358 359 360

	/* cpuid 7.0.edx*/
	const u32 kvm_cpuid_7_0_edx_x86_features =
		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
		F(MD_CLEAR);

361 362 363 364
	/* cpuid 7.1.eax */
	const u32 kvm_cpuid_7_1_eax_x86_features =
		F(AVX512_BF16);

365
	switch (entry->index) {
366
	case 0:
367
		entry->eax = min(entry->eax, 1u);
368
		entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
369
		cpuid_entry_mask(entry, CPUID_7_0_EBX);
370
		/* TSC_ADJUST is emulated */
371
		cpuid_entry_set(entry, X86_FEATURE_TSC_ADJUST);
372 373

		entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
374
		f_la57 = cpuid_entry_get(entry, X86_FEATURE_LA57);
375
		cpuid_entry_mask(entry, CPUID_7_ECX);
376 377 378
		/* Set LA57 based on hardware capability. */
		entry->ecx |= f_la57;
		entry->ecx |= f_umip;
379
		entry->ecx |= f_pku;
380 381
		/* PKU is not yet implemented for shadow paging. */
		if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
382
			cpuid_entry_clear(entry, X86_FEATURE_PKU);
383 384

		entry->edx &= kvm_cpuid_7_0_edx_x86_features;
385
		cpuid_entry_mask(entry, CPUID_7_EDX);
386
		if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
387
			cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL);
388
		if (boot_cpu_has(X86_FEATURE_STIBP))
389
			cpuid_entry_set(entry, X86_FEATURE_INTEL_STIBP);
390
		if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
391
			cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL_SSBD);
392 393 394 395
		/*
		 * We emulate ARCH_CAPABILITIES in software even
		 * if the host doesn't support it.
		 */
396
		cpuid_entry_set(entry, X86_FEATURE_ARCH_CAPABILITIES);
397
		break;
398 399 400 401 402 403
	case 1:
		entry->eax &= kvm_cpuid_7_1_eax_x86_features;
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
404 405 406 407 408 409 410 411 412 413
	default:
		WARN_ON_ONCE(1);
		entry->eax = 0;
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	}
}

414
static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
A
Avi Kivity 已提交
415
{
416
	struct kvm_cpuid_entry2 *entry;
417
	int r, i, max_idx;
A
Avi Kivity 已提交
418 419 420 421 422 423 424 425 426 427
	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
	unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
				? F(GBPAGES) : 0;
	unsigned f_lm = F(LM);
#else
	unsigned f_gbpages = 0;
	unsigned f_lm = 0;
#endif
	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
428
	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
429
	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
A
Avi Kivity 已提交
430 431

	/* cpuid 1.edx */
432
	const u32 kvm_cpuid_1_edx_x86_features =
A
Avi Kivity 已提交
433 434 435 436
		F(FPU) | F(VME) | F(DE) | F(PSE) |
		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
437
		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
A
Avi Kivity 已提交
438 439 440 441
		0 /* Reserved, DS, ACPI */ | F(MMX) |
		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
		0 /* HTT, TM, Reserved, PBE */;
	/* cpuid 0x80000001.edx */
442
	const u32 kvm_cpuid_8000_0001_edx_x86_features =
A
Avi Kivity 已提交
443 444 445 446 447 448 449 450 451
		F(FPU) | F(VME) | F(DE) | F(PSE) |
		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
		F(PAT) | F(PSE36) | 0 /* Reserved */ |
		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
		F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
	/* cpuid 1.ecx */
452
	const u32 kvm_cpuid_1_ecx_x86_features =
453 454
		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
		 * but *not* advertised to guests via CPUID ! */
A
Avi Kivity 已提交
455 456 457
		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
		0 /* DS-CPL, VMX, SMX, EST */ |
		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
458
		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
459
		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
A
Avi Kivity 已提交
460 461 462 463
		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
		F(F16C) | F(RDRAND);
	/* cpuid 0x80000001.ecx */
464
	const u32 kvm_cpuid_8000_0001_ecx_x86_features =
A
Avi Kivity 已提交
465 466
		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
467
		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
468
		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
469
		F(TOPOEXT) | F(PERFCTR_CORE);
A
Avi Kivity 已提交
470

A
Ashok Raj 已提交
471 472
	/* cpuid 0x80000008.ebx */
	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
473 474 475
		F(CLZERO) | F(XSAVEERPTR) |
		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
A
Ashok Raj 已提交
476

A
Avi Kivity 已提交
477
	/* cpuid 0xC0000001.edx */
478
	const u32 kvm_cpuid_C000_0001_edx_x86_features =
A
Avi Kivity 已提交
479 480 481 482
		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
		F(PMM) | F(PMM_EN);

P
Paolo Bonzini 已提交
483
	/* cpuid 0xD.1.eax */
484
	const u32 kvm_cpuid_D_1_eax_x86_features =
485
		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
P
Paolo Bonzini 已提交
486

A
Avi Kivity 已提交
487 488
	/* all calls to cpuid_count() should be made on the same cpu */
	get_cpu();
489 490 491

	r = -E2BIG;

492 493
	entry = do_host_cpuid(array, function, 0);
	if (WARN_ON(!entry))
494 495
		goto out;

A
Avi Kivity 已提交
496 497
	switch (function) {
	case 0:
498 499
		/* Limited to the highest leaf implemented in KVM. */
		entry->eax = min(entry->eax, 0x1fU);
A
Avi Kivity 已提交
500 501
		break;
	case 1:
502
		entry->edx &= kvm_cpuid_1_edx_x86_features;
503
		cpuid_entry_mask(entry, CPUID_1_EDX);
504
		entry->ecx &= kvm_cpuid_1_ecx_x86_features;
505
		cpuid_entry_mask(entry, CPUID_1_ECX);
A
Avi Kivity 已提交
506 507
		/* we support x2apic emulation even if host does not support
		 * it since we emulate x2apic in software */
508
		cpuid_entry_set(entry, X86_FEATURE_X2APIC);
A
Avi Kivity 已提交
509 510 511 512 513
		break;
	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
	 * may return different values. This forces us to get_cpu() before
	 * issuing the first command, and also to emulate this annoying behavior
	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
514
	case 2:
A
Avi Kivity 已提交
515
		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
516 517

		for (i = 1, max_idx = entry->eax & 0xff; i < max_idx; ++i) {
518 519
			entry = do_host_cpuid(array, function, 0);
			if (!entry)
520
				goto out;
A
Avi Kivity 已提交
521 522
		}
		break;
523 524
	/* functions 4 and 0x8000001d have additional index. */
	case 4:
525 526 527 528 529
	case 0x8000001d:
		/*
		 * Read entries until the cache type in the previous entry is
		 * zero, i.e. indicates an invalid entry.
		 */
530 531 532
		for (i = 1; entry->eax & 0x1f; ++i) {
			entry = do_host_cpuid(array, function, i);
			if (!entry)
533
				goto out;
A
Avi Kivity 已提交
534 535
		}
		break;
J
Jan Kiszka 已提交
536 537 538 539 540 541
	case 6: /* Thermal management */
		entry->eax = 0x4; /* allow ARAT */
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
542
	/* function 7 has additional index. */
543
	case 7:
544
		do_cpuid_7_mask(entry);
545

546
		for (i = 1, max_idx = entry->eax; i <= max_idx; i++) {
547 548
			entry = do_host_cpuid(array, function, i);
			if (!entry)
549 550
				goto out;

551
			do_cpuid_7_mask(entry);
552
		}
A
Avi Kivity 已提交
553 554 555
		break;
	case 9:
		break;
556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
	case 0xa: { /* Architectural Performance Monitoring */
		struct x86_pmu_capability cap;
		union cpuid10_eax eax;
		union cpuid10_edx edx;

		perf_get_x86_pmu_capability(&cap);

		/*
		 * Only support guest architectural pmu on a host
		 * with architectural pmu.
		 */
		if (!cap.version)
			memset(&cap, 0, sizeof(cap));

		eax.split.version_id = min(cap.version, 2);
		eax.split.num_counters = cap.num_counters_gp;
		eax.split.bit_width = cap.bit_width_gp;
		eax.split.mask_length = cap.events_mask_len;

		edx.split.num_counters_fixed = cap.num_counters_fixed;
		edx.split.bit_width_fixed = cap.bit_width_fixed;
		edx.split.reserved = 0;

		entry->eax = eax.full;
		entry->ebx = cap.events_mask;
		entry->ecx = 0;
		entry->edx = edx.full;
		break;
	}
585 586 587 588 589
	/*
	 * Per Intel's SDM, the 0x1f is a superset of 0xb,
	 * thus they can be handled by common code.
	 */
	case 0x1f:
590
	case 0xb:
591
		/*
592 593 594
		 * Populate entries until the level type (ECX[15:8]) of the
		 * previous entry is zero.  Note, CPUID EAX.{0x1f,0xb}.0 is
		 * the starting entry, filled by the primary do_host_cpuid().
595
		 */
596 597 598
		for (i = 1; entry->ecx & 0xff00; ++i) {
			entry = do_host_cpuid(array, function, i);
			if (!entry)
599
				goto out;
A
Avi Kivity 已提交
600 601
		}
		break;
602 603 604
	case 0xd:
		entry->eax &= supported_xcr0;
		entry->ebx = xstate_required_size(supported_xcr0, false);
605
		entry->ecx = entry->ebx;
606 607
		entry->edx &= supported_xcr0 >> 32;
		if (!supported_xcr0)
P
Paolo Bonzini 已提交
608 609
			break;

610 611
		entry = do_host_cpuid(array, function, 1);
		if (!entry)
612 613
			goto out;

614
		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
615
		cpuid_entry_mask(entry, CPUID_D_1_EAX);
616
		if (entry->eax & (F(XSAVES)|F(XSAVEC)))
617
			entry->ebx = xstate_required_size(supported_xcr0, true);
618
		else
619
			entry->ebx = 0;
620
		/* Saving XSS controlled state via XSAVES isn't supported. */
621 622
		entry->ecx = 0;
		entry->edx = 0;
623

624
		for (i = 2; i < 64; ++i) {
625
			if (!(supported_xcr0 & BIT_ULL(i)))
626
				continue;
627

628
			entry = do_host_cpuid(array, function, i);
629
			if (!entry)
630 631
				goto out;

632
			/*
633
			 * The supported check above should have filtered out
634 635 636 637 638
			 * invalid sub-leafs as well as sub-leafs managed by
			 * IA32_XSS MSR.  Only XCR0-managed sub-leafs should
			 * reach this point, and they should have a non-zero
			 * save state size.
			 */
639 640
			if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 1))) {
				--array->nent;
641
				continue;
642
			}
643

644 645
			entry->ecx = 0;
			entry->edx = 0;
A
Avi Kivity 已提交
646 647
		}
		break;
648
	/* Intel PT */
649
	case 0x14:
650 651
		if (!f_intel_pt) {
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
652
			break;
653
		}
654

655
		for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) {
656
			if (!do_host_cpuid(array, function, i))
657 658 659
				goto out;
		}
		break;
A
Avi Kivity 已提交
660
	case KVM_CPUID_SIGNATURE: {
661 662
		static const char signature[12] = "KVMKVMKVM\0\0";
		const u32 *sigptr = (const u32 *)signature;
663
		entry->eax = KVM_CPUID_FEATURES;
A
Avi Kivity 已提交
664 665 666 667 668 669 670 671 672 673
		entry->ebx = sigptr[0];
		entry->ecx = sigptr[1];
		entry->edx = sigptr[2];
		break;
	}
	case KVM_CPUID_FEATURES:
		entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
			     (1 << KVM_FEATURE_ASYNC_PF) |
674
			     (1 << KVM_FEATURE_PV_EOI) |
675
			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
676
			     (1 << KVM_FEATURE_PV_UNHALT) |
677
			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
678
			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
679
			     (1 << KVM_FEATURE_PV_SEND_IPI) |
680 681
			     (1 << KVM_FEATURE_POLL_CONTROL) |
			     (1 << KVM_FEATURE_PV_SCHED_YIELD);
A
Avi Kivity 已提交
682 683 684 685 686 687 688 689 690

		if (sched_info_on())
			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);

		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	case 0x80000000:
691
		entry->eax = min(entry->eax, 0x8000001f);
A
Avi Kivity 已提交
692 693
		break;
	case 0x80000001:
694
		entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
695
		cpuid_entry_mask(entry, CPUID_8000_0001_EDX);
696
		entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
697
		cpuid_entry_mask(entry, CPUID_8000_0001_ECX);
A
Avi Kivity 已提交
698
		break;
699 700 701 702 703 704 705
	case 0x80000007: /* Advanced power management */
		/* invariant TSC is CPUID.80000007H:EDX[8] */
		entry->edx &= (1 << 8);
		/* mask against host */
		entry->edx &= boot_cpu_data.x86_power;
		entry->eax = entry->ebx = entry->ecx = 0;
		break;
A
Avi Kivity 已提交
706 707 708 709 710 711 712 713
	case 0x80000008: {
		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
		unsigned phys_as = entry->eax & 0xff;

		if (!g_phys_as)
			g_phys_as = phys_as;
		entry->eax = g_phys_as | (virt_as << 8);
A
Ashok Raj 已提交
714
		entry->edx = 0;
715
		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
716
		cpuid_entry_mask(entry, CPUID_8000_0008_EBX);
717
		/*
718 719 720
		 * AMD has separate bits for each SPEC_CTRL bit.
		 * arch/x86/kernel/cpu/bugs.c is kind enough to
		 * record that in cpufeatures so use them.
721
		 */
722
		if (boot_cpu_has(X86_FEATURE_IBPB))
723
			cpuid_entry_set(entry, X86_FEATURE_AMD_IBPB);
724
		if (boot_cpu_has(X86_FEATURE_IBRS))
725
			cpuid_entry_set(entry, X86_FEATURE_AMD_IBRS);
726
		if (boot_cpu_has(X86_FEATURE_STIBP))
727
			cpuid_entry_set(entry, X86_FEATURE_AMD_STIBP);
728
		if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
729
			cpuid_entry_set(entry, X86_FEATURE_AMD_SSBD);
730
		if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
731
			cpuid_entry_set(entry, X86_FEATURE_AMD_SSB_NO);
732 733 734 735 736 737
		/*
		 * The preference is to use SPEC CTRL MSR instead of the
		 * VIRT_SPEC MSR.
		 */
		if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
		    !boot_cpu_has(X86_FEATURE_AMD_SSBD))
738
			cpuid_entry_set(entry, X86_FEATURE_VIRT_SSBD);
A
Avi Kivity 已提交
739 740 741 742 743 744
		break;
	}
	case 0x80000019:
		entry->ecx = entry->edx = 0;
		break;
	case 0x8000001a:
745
	case 0x8000001e:
A
Avi Kivity 已提交
746
		break;
747 748 749 750 751
	/* Support memory encryption cpuid if host supports it */
	case 0x8000001F:
		if (!boot_cpu_has(X86_FEATURE_SEV))
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
		break;
A
Avi Kivity 已提交
752 753 754 755 756 757
	/*Add support for Centaur's CPUID instruction*/
	case 0xC0000000:
		/*Just support up to 0xC0000004 now*/
		entry->eax = min(entry->eax, 0xC0000004);
		break;
	case 0xC0000001:
758
		entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
759
		cpuid_entry_mask(entry, CPUID_C000_0001_EDX);
A
Avi Kivity 已提交
760 761 762 763 764 765 766 767 768 769 770
		break;
	case 3: /* Processor serial number */
	case 5: /* MONITOR/MWAIT */
	case 0xC0000002:
	case 0xC0000003:
	case 0xC0000004:
	default:
		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
		break;
	}

771
	kvm_x86_ops->set_supported_cpuid(entry);
A
Avi Kivity 已提交
772

773 774 775
	r = 0;

out:
A
Avi Kivity 已提交
776
	put_cpu();
777 778

	return r;
A
Avi Kivity 已提交
779 780
}

781 782
static int do_cpuid_func(struct kvm_cpuid_array *array, u32 func,
			 unsigned int type)
B
Borislav Petkov 已提交
783
{
784
	if (array->nent >= array->maxnent)
785 786
		return -E2BIG;

B
Borislav Petkov 已提交
787
	if (type == KVM_GET_EMULATED_CPUID)
788
		return __do_cpuid_func_emulated(array, func);
B
Borislav Petkov 已提交
789

790
	return __do_cpuid_func(array, func);
B
Borislav Petkov 已提交
791 792
}

793
#define CENTAUR_CPUID_SIGNATURE 0xC0000000
794

795 796
static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func,
			  unsigned int type)
797 798 799 800
{
	u32 limit;
	int r;

801 802 803 804
	if (func == CENTAUR_CPUID_SIGNATURE &&
	    boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
		return 0;

805
	r = do_cpuid_func(array, func, type);
806 807 808
	if (r)
		return r;

809
	limit = array->entries[array->nent - 1].eax;
810
	for (func = func + 1; func <= limit; ++func) {
811
		r = do_cpuid_func(array, func, type);
812 813 814 815 816 817 818
		if (r)
			break;
	}

	return r;
}

B
Borislav Petkov 已提交
819 820 821 822
static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
				 __u32 num_entries, unsigned int ioctl_type)
{
	int i;
B
Borislav Petkov 已提交
823
	__u32 pad[3];
B
Borislav Petkov 已提交
824 825 826 827 828 829 830 831 832 833 834 835 836

	if (ioctl_type != KVM_GET_EMULATED_CPUID)
		return false;

	/*
	 * We want to make sure that ->padding is being passed clean from
	 * userspace in case we want to use it for something in the future.
	 *
	 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
	 * have to give ourselves satisfied only with the emulated side. /me
	 * sheds a tear.
	 */
	for (i = 0; i < num_entries; i++) {
B
Borislav Petkov 已提交
837 838 839 840
		if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
			return true;

		if (pad[0] || pad[1] || pad[2])
B
Borislav Petkov 已提交
841 842 843 844 845 846 847 848
			return true;
	}
	return false;
}

int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
			    struct kvm_cpuid_entry2 __user *entries,
			    unsigned int type)
A
Avi Kivity 已提交
849
{
850 851
	static const u32 funcs[] = {
		0, 0x80000000, CENTAUR_CPUID_SIGNATURE, KVM_CPUID_SIGNATURE,
852
	};
A
Avi Kivity 已提交
853

854 855 856 857 858
	struct kvm_cpuid_array array = {
		.nent = 0,
		.maxnent = cpuid->nent,
	};
	int r, i;
859

A
Avi Kivity 已提交
860
	if (cpuid->nent < 1)
861
		return -E2BIG;
A
Avi Kivity 已提交
862 863
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
B
Borislav Petkov 已提交
864 865 866 867

	if (sanity_check_entries(entries, cpuid->nent, type))
		return -EINVAL;

868
	array.entries = vzalloc(array_size(sizeof(struct kvm_cpuid_entry2),
869
					   cpuid->nent));
870
	if (!array.entries)
871
		return -ENOMEM;
A
Avi Kivity 已提交
872

873
	for (i = 0; i < ARRAY_SIZE(funcs); i++) {
874
		r = get_cpuid_func(&array, funcs[i], type);
875
		if (r)
A
Avi Kivity 已提交
876 877
			goto out_free;
	}
878
	cpuid->nent = array.nent;
A
Avi Kivity 已提交
879

880 881
	if (copy_to_user(entries, array.entries,
			 array.nent * sizeof(struct kvm_cpuid_entry2)))
882
		r = -EFAULT;
A
Avi Kivity 已提交
883 884

out_free:
885
	vfree(array.entries);
A
Avi Kivity 已提交
886 887 888 889 890 891
	return r;
}

static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
{
	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
892 893 894
	struct kvm_cpuid_entry2 *ej;
	int j = i;
	int nent = vcpu->arch.cpuid_nent;
A
Avi Kivity 已提交
895 896 897

	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
	/* when no next entry is found, the current entry[i] is reselected */
898 899 900 901 902 903 904 905
	do {
		j = (j + 1) % nent;
		ej = &vcpu->arch.cpuid_entries[j];
	} while (ej->function != e->function);

	ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;

	return j;
A
Avi Kivity 已提交
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
}

/* find an entry with matching function, matching index (if needed), and that
 * should be read next (if it's stateful) */
static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
	u32 function, u32 index)
{
	if (e->function != function)
		return 0;
	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
		return 0;
	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
		return 0;
	return 1;
}

struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
					      u32 function, u32 index)
{
	int i;
	struct kvm_cpuid_entry2 *best = NULL;

	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
		struct kvm_cpuid_entry2 *e;

		e = &vcpu->arch.cpuid_entries[i];
		if (is_matching_cpuid_entry(e, function, index)) {
			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
				move_to_next_stateful_cpuid_entry(vcpu, i);
			best = e;
			break;
		}
	}
	return best;
}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);

/*
945 946 947
 * If the basic or extended CPUID leaf requested is higher than the
 * maximum supported basic or extended leaf, respectively, then it is
 * out of range.
A
Avi Kivity 已提交
948
 */
949
static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
A
Avi Kivity 已提交
950
{
951 952 953 954
	struct kvm_cpuid_entry2 *max;

	max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
	return max && function <= max->eax;
A
Avi Kivity 已提交
955 956
}

957 958
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
	       u32 *ecx, u32 *edx, bool check_limit)
A
Avi Kivity 已提交
959
{
960
	u32 function = *eax, index = *ecx;
961 962 963
	struct kvm_cpuid_entry2 *entry;
	struct kvm_cpuid_entry2 *max;
	bool found;
964

965 966 967 968 969
	entry = kvm_find_cpuid_entry(vcpu, function, index);
	found = entry;
	/*
	 * Intel CPUID semantics treats any query for an out-of-range
	 * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were
970 971
	 * requested. AMD CPUID semantics returns all zeroes for any
	 * undefined leaf, whether or not the leaf is in range.
972
	 */
973 974
	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
	    !cpuid_function_in_range(vcpu, function)) {
975 976 977 978 979
		max = kvm_find_cpuid_entry(vcpu, 0, 0);
		if (max) {
			function = max->eax;
			entry = kvm_find_cpuid_entry(vcpu, function, index);
		}
980
	}
981 982 983 984 985
	if (entry) {
		*eax = entry->eax;
		*ebx = entry->ebx;
		*ecx = entry->ecx;
		*edx = entry->edx;
986 987 988 989 990 991
		if (function == 7 && index == 0) {
			u64 data;
		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
			    (data & TSX_CTRL_CPUID_CLEAR))
				*ebx &= ~(F(RTM) | F(HLE));
		}
992
	} else {
993
		*eax = *ebx = *ecx = *edx = 0;
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
		/*
		 * When leaf 0BH or 1FH is defined, CL is pass-through
		 * and EDX is always the x2APIC ID, even for undefined
		 * subleaves. Index 1 will exist iff the leaf is
		 * implemented, so we pass through CL iff leaf 1
		 * exists. EDX can be copied from any existing index.
		 */
		if (function == 0xb || function == 0x1f) {
			entry = kvm_find_cpuid_entry(vcpu, function, 1);
			if (entry) {
				*ecx = index & 0xff;
				*edx = entry->edx;
			}
		}
	}
	trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found);
	return found;
1011
}
1012
EXPORT_SYMBOL_GPL(kvm_cpuid);
1013

1014
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1015
{
1016
	u32 eax, ebx, ecx, edx;
1017

K
Kyle Huey 已提交
1018 1019 1020
	if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
		return 1;

1021 1022
	eax = kvm_rax_read(vcpu);
	ecx = kvm_rcx_read(vcpu);
1023
	kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
1024 1025 1026 1027
	kvm_rax_write(vcpu, eax);
	kvm_rbx_write(vcpu, ebx);
	kvm_rcx_write(vcpu, ecx);
	kvm_rdx_write(vcpu, edx);
1028
	return kvm_skip_emulated_instruction(vcpu);
A
Avi Kivity 已提交
1029 1030
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);