cpuid.c 27.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
A
Avi Kivity 已提交
2 3 4 5 6 7 8 9 10 11 12
/*
 * Kernel-based Virtual Machine driver for Linux
 * cpuid support routines
 *
 * derived from arch/x86/kvm/x86.c
 *
 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
 * Copyright IBM Corporation, 2008
 */

#include <linux/kvm_host.h>
13
#include <linux/export.h>
14 15
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
16 17
#include <linux/sched/stat.h>

18
#include <asm/processor.h>
A
Avi Kivity 已提交
19
#include <asm/user.h>
20
#include <asm/fpu/xstate.h>
A
Avi Kivity 已提交
21 22 23 24
#include "cpuid.h"
#include "lapic.h"
#include "mmu.h"
#include "trace.h"
25
#include "pmu.h"
A
Avi Kivity 已提交
26

27
static u32 xstate_required_size(u64 xstate_bv, bool compacted)
28 29 30 31
{
	int feature_bit = 0;
	u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;

D
Dave Hansen 已提交
32
	xstate_bv &= XFEATURE_MASK_EXTEND;
33 34
	while (xstate_bv) {
		if (xstate_bv & 0x1) {
35
		        u32 eax, ebx, ecx, edx, offset;
36
		        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
37 38
			offset = compacted ? ret : ebx;
			ret = max(ret, offset + eax);
39 40 41 42 43 44 45 46 47
		}

		xstate_bv >>= 1;
		feature_bit++;
	}

	return ret;
}

48
#define F feature_bit
49

50
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
A
Avi Kivity 已提交
51 52 53 54 55 56
{
	struct kvm_cpuid_entry2 *best;
	struct kvm_lapic *apic = vcpu->arch.apic;

	best = kvm_find_cpuid_entry(vcpu, 1, 0);
	if (!best)
57
		return 0;
A
Avi Kivity 已提交
58 59

	/* Update OSXSAVE bit */
60 61 62
	if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1)
		cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
				   kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
A
Avi Kivity 已提交
63

64 65
	cpuid_entry_change(best, X86_FEATURE_APIC,
			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
66

A
Avi Kivity 已提交
67
	if (apic) {
68
		if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
A
Avi Kivity 已提交
69 70 71 72
			apic->lapic_timer.timer_mode_mask = 3 << 17;
		else
			apic->lapic_timer.timer_mode_mask = 1 << 17;
	}
73

74
	best = kvm_find_cpuid_entry(vcpu, 7, 0);
75 76 77
	if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
		cpuid_entry_change(best, X86_FEATURE_OSPKE,
				   kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
78

79
	best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
80
	if (!best) {
81
		vcpu->arch.guest_supported_xcr0 = 0;
82 83
		vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
	} else {
84
		vcpu->arch.guest_supported_xcr0 =
85
			(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
86
		vcpu->arch.guest_xstate_size = best->ebx =
87
			xstate_required_size(vcpu->arch.xcr0, false);
88
	}
89

90
	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
91 92
	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
93 94
		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

95
	/*
96 97
	 * The existing code assumes virtual address is 48-bit or 57-bit in the
	 * canonical address checks; exit if it is ever changed.
98 99
	 */
	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
100 101 102 103 104 105
	if (best) {
		int vaddr_bits = (best->eax & 0xff00) >> 8;

		if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
			return -EINVAL;
	}
106

107 108 109 110 111
	best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);

112 113
	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
114 115 116 117
		if (best)
			cpuid_entry_change(best, X86_FEATURE_MWAIT,
					   vcpu->arch.ia32_misc_enable_msr &
					   MSR_IA32_MISC_ENABLE_MWAIT);
118 119
	}

120 121
	/* Update physical-address width */
	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
122
	kvm_mmu_reset_context(vcpu);
123

124
	kvm_pmu_refresh(vcpu);
125
	return 0;
A
Avi Kivity 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
}

static int is_efer_nx(void)
{
	unsigned long long efer = 0;

	rdmsrl_safe(MSR_EFER, &efer);
	return efer & EFER_NX;
}

static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
{
	int i;
	struct kvm_cpuid_entry2 *e, *entry;

	entry = NULL;
	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
		e = &vcpu->arch.cpuid_entries[i];
		if (e->function == 0x80000001) {
			entry = e;
			break;
		}
	}
149
	if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) {
150
		cpuid_entry_clear(entry, X86_FEATURE_NX);
A
Avi Kivity 已提交
151 152 153 154
		printk(KERN_INFO "kvm: guest NX capability removed\n");
	}
}

155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *best;

	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
	if (!best || best->eax < 0x80000008)
		goto not_found;
	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
	if (best)
		return best->eax & 0xff;
not_found:
	return 36;
}
EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);

A
Avi Kivity 已提交
170 171 172 173 174 175
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
			     struct kvm_cpuid *cpuid,
			     struct kvm_cpuid_entry __user *entries)
{
	int r, i;
176
	struct kvm_cpuid_entry *cpuid_entries = NULL;
A
Avi Kivity 已提交
177 178 179 180 181

	r = -E2BIG;
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		goto out;
	r = -ENOMEM;
182
	if (cpuid->nent) {
183 184 185
		cpuid_entries =
			vmalloc(array_size(sizeof(struct kvm_cpuid_entry),
					   cpuid->nent));
186 187 188 189 190 191 192
		if (!cpuid_entries)
			goto out;
		r = -EFAULT;
		if (copy_from_user(cpuid_entries, entries,
				   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
			goto out;
	}
A
Avi Kivity 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	for (i = 0; i < cpuid->nent; i++) {
		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
		vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
		vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
		vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
		vcpu->arch.cpuid_entries[i].index = 0;
		vcpu->arch.cpuid_entries[i].flags = 0;
		vcpu->arch.cpuid_entries[i].padding[0] = 0;
		vcpu->arch.cpuid_entries[i].padding[1] = 0;
		vcpu->arch.cpuid_entries[i].padding[2] = 0;
	}
	vcpu->arch.cpuid_nent = cpuid->nent;
	cpuid_fix_nx_cap(vcpu);
	kvm_apic_set_version(vcpu);
	kvm_x86_ops->cpuid_update(vcpu);
209
	r = kvm_update_cpuid(vcpu);
A
Avi Kivity 已提交
210 211

out:
212
	vfree(cpuid_entries);
A
Avi Kivity 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	return r;
}

int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
			      struct kvm_cpuid2 *cpuid,
			      struct kvm_cpuid_entry2 __user *entries)
{
	int r;

	r = -E2BIG;
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		goto out;
	r = -EFAULT;
	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
		goto out;
	vcpu->arch.cpuid_nent = cpuid->nent;
	kvm_apic_set_version(vcpu);
	kvm_x86_ops->cpuid_update(vcpu);
232
	r = kvm_update_cpuid(vcpu);
A
Avi Kivity 已提交
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
out:
	return r;
}

int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
			      struct kvm_cpuid2 *cpuid,
			      struct kvm_cpuid_entry2 __user *entries)
{
	int r;

	r = -E2BIG;
	if (cpuid->nent < vcpu->arch.cpuid_nent)
		goto out;
	r = -EFAULT;
	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
		goto out;
	return 0;

out:
	cpuid->nent = vcpu->arch.cpuid_nent;
	return r;
}

257 258 259 260 261 262 263
struct kvm_cpuid_array {
	struct kvm_cpuid_entry2 *entries;
	const int maxnent;
	int nent;
};

static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
264
					      u32 function, u32 index)
A
Avi Kivity 已提交
265
{
266 267 268
	struct kvm_cpuid_entry2 *entry;

	if (array->nent >= array->maxnent)
269
		return NULL;
270 271

	entry = &array->entries[array->nent++];
272

A
Avi Kivity 已提交
273 274
	entry->function = function;
	entry->index = index;
275 276
	entry->flags = 0;

A
Avi Kivity 已提交
277 278
	cpuid_count(entry->function, entry->index,
		    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
279 280 281 282 283 284 285 286 287

	switch (function) {
	case 2:
		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
		break;
	case 4:
	case 7:
	case 0xb:
	case 0xd:
288 289 290
	case 0xf:
	case 0x10:
	case 0x12:
291
	case 0x14:
292 293 294
	case 0x17:
	case 0x18:
	case 0x1f:
295 296 297 298
	case 0x8000001d:
		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		break;
	}
299 300

	return entry;
A
Avi Kivity 已提交
301 302
}

303
static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
B
Borislav Petkov 已提交
304
{
305 306
	struct kvm_cpuid_entry2 *entry = &array->entries[array->nent];

307 308 309 310
	entry->function = func;
	entry->index = 0;
	entry->flags = 0;

B
Borislav Petkov 已提交
311 312
	switch (func) {
	case 0:
P
Paolo Bonzini 已提交
313
		entry->eax = 7;
314
		++array->nent;
B
Borislav Petkov 已提交
315 316 317
		break;
	case 1:
		entry->ecx = F(MOVBE);
318
		++array->nent;
B
Borislav Petkov 已提交
319
		break;
P
Paolo Bonzini 已提交
320 321
	case 7:
		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
322 323
		entry->eax = 0;
		entry->ecx = F(RDPID);
324
		++array->nent;
B
Borislav Petkov 已提交
325 326 327 328
	default:
		break;
	}

B
Borislav Petkov 已提交
329 330 331
	return 0;
}

332
static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
333 334 335 336 337 338 339
{
	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
	unsigned f_la57;

	/* cpuid 7.0.ebx */
	const u32 kvm_cpuid_7_0_ebx_x86_features =
		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
340
		F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
341 342 343 344 345 346
		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
		F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;

	/* cpuid 7.0.ecx*/
	const u32 kvm_cpuid_7_0_ecx_x86_features =
347
		F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
348 349
		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
350
		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
351 352 353 354 355 356 357

	/* cpuid 7.0.edx*/
	const u32 kvm_cpuid_7_0_edx_x86_features =
		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
		F(MD_CLEAR);

358 359 360 361
	/* cpuid 7.1.eax */
	const u32 kvm_cpuid_7_1_eax_x86_features =
		F(AVX512_BF16);

362
	switch (entry->index) {
363
	case 0:
364
		entry->eax = min(entry->eax, 1u);
365
		entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
366
		cpuid_entry_mask(entry, CPUID_7_0_EBX);
367
		/* TSC_ADJUST is emulated */
368
		cpuid_entry_set(entry, X86_FEATURE_TSC_ADJUST);
369 370

		entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
371
		f_la57 = cpuid_entry_get(entry, X86_FEATURE_LA57);
372
		cpuid_entry_mask(entry, CPUID_7_ECX);
373 374 375 376
		/* Set LA57 based on hardware capability. */
		entry->ecx |= f_la57;

		entry->edx &= kvm_cpuid_7_0_edx_x86_features;
377
		cpuid_entry_mask(entry, CPUID_7_EDX);
378
		if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
379
			cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL);
380
		if (boot_cpu_has(X86_FEATURE_STIBP))
381
			cpuid_entry_set(entry, X86_FEATURE_INTEL_STIBP);
382
		if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
383
			cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL_SSBD);
384 385 386 387
		/*
		 * We emulate ARCH_CAPABILITIES in software even
		 * if the host doesn't support it.
		 */
388
		cpuid_entry_set(entry, X86_FEATURE_ARCH_CAPABILITIES);
389
		break;
390 391 392 393 394 395
	case 1:
		entry->eax &= kvm_cpuid_7_1_eax_x86_features;
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
396 397 398 399 400 401 402 403 404 405
	default:
		WARN_ON_ONCE(1);
		entry->eax = 0;
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	}
}

406
static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
A
Avi Kivity 已提交
407
{
408
	struct kvm_cpuid_entry2 *entry;
409
	int r, i, max_idx;
A
Avi Kivity 已提交
410 411 412 413 414 415 416 417 418
	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
	unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
				? F(GBPAGES) : 0;
	unsigned f_lm = F(LM);
#else
	unsigned f_gbpages = 0;
	unsigned f_lm = 0;
#endif
419
	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
420
	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
A
Avi Kivity 已提交
421 422

	/* cpuid 1.edx */
423
	const u32 kvm_cpuid_1_edx_x86_features =
A
Avi Kivity 已提交
424 425 426 427
		F(FPU) | F(VME) | F(DE) | F(PSE) |
		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
428
		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
A
Avi Kivity 已提交
429 430 431 432
		0 /* Reserved, DS, ACPI */ | F(MMX) |
		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
		0 /* HTT, TM, Reserved, PBE */;
	/* cpuid 0x80000001.edx */
433
	const u32 kvm_cpuid_8000_0001_edx_x86_features =
A
Avi Kivity 已提交
434 435 436 437 438 439
		F(FPU) | F(VME) | F(DE) | F(PSE) |
		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
		F(PAT) | F(PSE36) | 0 /* Reserved */ |
		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
440
		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
A
Avi Kivity 已提交
441 442
		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
	/* cpuid 1.ecx */
443
	const u32 kvm_cpuid_1_ecx_x86_features =
444 445
		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
		 * but *not* advertised to guests via CPUID ! */
A
Avi Kivity 已提交
446 447 448
		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
		0 /* DS-CPL, VMX, SMX, EST */ |
		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
449
		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
450
		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
A
Avi Kivity 已提交
451 452 453 454
		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
		F(F16C) | F(RDRAND);
	/* cpuid 0x80000001.ecx */
455
	const u32 kvm_cpuid_8000_0001_ecx_x86_features =
A
Avi Kivity 已提交
456 457
		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
458
		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
459
		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
460
		F(TOPOEXT) | F(PERFCTR_CORE);
A
Avi Kivity 已提交
461

A
Ashok Raj 已提交
462 463
	/* cpuid 0x80000008.ebx */
	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
464 465 466
		F(CLZERO) | F(XSAVEERPTR) |
		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
A
Ashok Raj 已提交
467

A
Avi Kivity 已提交
468
	/* cpuid 0xC0000001.edx */
469
	const u32 kvm_cpuid_C000_0001_edx_x86_features =
A
Avi Kivity 已提交
470 471 472 473
		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
		F(PMM) | F(PMM_EN);

P
Paolo Bonzini 已提交
474
	/* cpuid 0xD.1.eax */
475
	const u32 kvm_cpuid_D_1_eax_x86_features =
476
		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
P
Paolo Bonzini 已提交
477

A
Avi Kivity 已提交
478 479
	/* all calls to cpuid_count() should be made on the same cpu */
	get_cpu();
480 481 482

	r = -E2BIG;

483 484
	entry = do_host_cpuid(array, function, 0);
	if (WARN_ON(!entry))
485 486
		goto out;

A
Avi Kivity 已提交
487 488
	switch (function) {
	case 0:
489 490
		/* Limited to the highest leaf implemented in KVM. */
		entry->eax = min(entry->eax, 0x1fU);
A
Avi Kivity 已提交
491 492
		break;
	case 1:
493
		entry->edx &= kvm_cpuid_1_edx_x86_features;
494
		cpuid_entry_mask(entry, CPUID_1_EDX);
495
		entry->ecx &= kvm_cpuid_1_ecx_x86_features;
496
		cpuid_entry_mask(entry, CPUID_1_ECX);
A
Avi Kivity 已提交
497 498
		/* we support x2apic emulation even if host does not support
		 * it since we emulate x2apic in software */
499
		cpuid_entry_set(entry, X86_FEATURE_X2APIC);
A
Avi Kivity 已提交
500 501 502 503 504
		break;
	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
	 * may return different values. This forces us to get_cpu() before
	 * issuing the first command, and also to emulate this annoying behavior
	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
505
	case 2:
A
Avi Kivity 已提交
506
		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
507 508

		for (i = 1, max_idx = entry->eax & 0xff; i < max_idx; ++i) {
509 510
			entry = do_host_cpuid(array, function, 0);
			if (!entry)
511
				goto out;
A
Avi Kivity 已提交
512 513
		}
		break;
514 515
	/* functions 4 and 0x8000001d have additional index. */
	case 4:
516 517 518 519 520
	case 0x8000001d:
		/*
		 * Read entries until the cache type in the previous entry is
		 * zero, i.e. indicates an invalid entry.
		 */
521 522 523
		for (i = 1; entry->eax & 0x1f; ++i) {
			entry = do_host_cpuid(array, function, i);
			if (!entry)
524
				goto out;
A
Avi Kivity 已提交
525 526
		}
		break;
J
Jan Kiszka 已提交
527 528 529 530 531 532
	case 6: /* Thermal management */
		entry->eax = 0x4; /* allow ARAT */
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
533
	/* function 7 has additional index. */
534
	case 7:
535
		do_cpuid_7_mask(entry);
536

537
		for (i = 1, max_idx = entry->eax; i <= max_idx; i++) {
538 539
			entry = do_host_cpuid(array, function, i);
			if (!entry)
540 541
				goto out;

542
			do_cpuid_7_mask(entry);
543
		}
A
Avi Kivity 已提交
544 545 546
		break;
	case 9:
		break;
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
	case 0xa: { /* Architectural Performance Monitoring */
		struct x86_pmu_capability cap;
		union cpuid10_eax eax;
		union cpuid10_edx edx;

		perf_get_x86_pmu_capability(&cap);

		/*
		 * Only support guest architectural pmu on a host
		 * with architectural pmu.
		 */
		if (!cap.version)
			memset(&cap, 0, sizeof(cap));

		eax.split.version_id = min(cap.version, 2);
		eax.split.num_counters = cap.num_counters_gp;
		eax.split.bit_width = cap.bit_width_gp;
		eax.split.mask_length = cap.events_mask_len;

		edx.split.num_counters_fixed = cap.num_counters_fixed;
		edx.split.bit_width_fixed = cap.bit_width_fixed;
		edx.split.reserved = 0;

		entry->eax = eax.full;
		entry->ebx = cap.events_mask;
		entry->ecx = 0;
		entry->edx = edx.full;
		break;
	}
576 577 578 579 580
	/*
	 * Per Intel's SDM, the 0x1f is a superset of 0xb,
	 * thus they can be handled by common code.
	 */
	case 0x1f:
581
	case 0xb:
582
		/*
583 584 585
		 * Populate entries until the level type (ECX[15:8]) of the
		 * previous entry is zero.  Note, CPUID EAX.{0x1f,0xb}.0 is
		 * the starting entry, filled by the primary do_host_cpuid().
586
		 */
587 588 589
		for (i = 1; entry->ecx & 0xff00; ++i) {
			entry = do_host_cpuid(array, function, i);
			if (!entry)
590
				goto out;
A
Avi Kivity 已提交
591 592
		}
		break;
593 594 595
	case 0xd:
		entry->eax &= supported_xcr0;
		entry->ebx = xstate_required_size(supported_xcr0, false);
596
		entry->ecx = entry->ebx;
597 598
		entry->edx &= supported_xcr0 >> 32;
		if (!supported_xcr0)
P
Paolo Bonzini 已提交
599 600
			break;

601 602
		entry = do_host_cpuid(array, function, 1);
		if (!entry)
603 604
			goto out;

605
		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
606
		cpuid_entry_mask(entry, CPUID_D_1_EAX);
607
		if (entry->eax & (F(XSAVES)|F(XSAVEC)))
608
			entry->ebx = xstate_required_size(supported_xcr0, true);
609
		else
610
			entry->ebx = 0;
611
		/* Saving XSS controlled state via XSAVES isn't supported. */
612 613
		entry->ecx = 0;
		entry->edx = 0;
614

615
		for (i = 2; i < 64; ++i) {
616
			if (!(supported_xcr0 & BIT_ULL(i)))
617
				continue;
618

619
			entry = do_host_cpuid(array, function, i);
620
			if (!entry)
621 622
				goto out;

623
			/*
624
			 * The supported check above should have filtered out
625 626 627 628 629
			 * invalid sub-leafs as well as sub-leafs managed by
			 * IA32_XSS MSR.  Only XCR0-managed sub-leafs should
			 * reach this point, and they should have a non-zero
			 * save state size.
			 */
630 631
			if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 1))) {
				--array->nent;
632
				continue;
633
			}
634

635 636
			entry->ecx = 0;
			entry->edx = 0;
A
Avi Kivity 已提交
637 638
		}
		break;
639
	/* Intel PT */
640
	case 0x14:
641 642
		if (!f_intel_pt) {
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
643
			break;
644
		}
645

646
		for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) {
647
			if (!do_host_cpuid(array, function, i))
648 649 650
				goto out;
		}
		break;
A
Avi Kivity 已提交
651
	case KVM_CPUID_SIGNATURE: {
652 653
		static const char signature[12] = "KVMKVMKVM\0\0";
		const u32 *sigptr = (const u32 *)signature;
654
		entry->eax = KVM_CPUID_FEATURES;
A
Avi Kivity 已提交
655 656 657 658 659 660 661 662 663 664
		entry->ebx = sigptr[0];
		entry->ecx = sigptr[1];
		entry->edx = sigptr[2];
		break;
	}
	case KVM_CPUID_FEATURES:
		entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
			     (1 << KVM_FEATURE_ASYNC_PF) |
665
			     (1 << KVM_FEATURE_PV_EOI) |
666
			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
667
			     (1 << KVM_FEATURE_PV_UNHALT) |
668
			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
669
			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
670
			     (1 << KVM_FEATURE_PV_SEND_IPI) |
671 672
			     (1 << KVM_FEATURE_POLL_CONTROL) |
			     (1 << KVM_FEATURE_PV_SCHED_YIELD);
A
Avi Kivity 已提交
673 674 675 676 677 678 679 680 681

		if (sched_info_on())
			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);

		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	case 0x80000000:
682
		entry->eax = min(entry->eax, 0x8000001f);
A
Avi Kivity 已提交
683 684
		break;
	case 0x80000001:
685
		entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
686
		cpuid_entry_mask(entry, CPUID_8000_0001_EDX);
687
		entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
688
		cpuid_entry_mask(entry, CPUID_8000_0001_ECX);
A
Avi Kivity 已提交
689
		break;
690 691 692 693 694 695 696
	case 0x80000007: /* Advanced power management */
		/* invariant TSC is CPUID.80000007H:EDX[8] */
		entry->edx &= (1 << 8);
		/* mask against host */
		entry->edx &= boot_cpu_data.x86_power;
		entry->eax = entry->ebx = entry->ecx = 0;
		break;
A
Avi Kivity 已提交
697 698 699 700 701 702 703 704
	case 0x80000008: {
		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
		unsigned phys_as = entry->eax & 0xff;

		if (!g_phys_as)
			g_phys_as = phys_as;
		entry->eax = g_phys_as | (virt_as << 8);
A
Ashok Raj 已提交
705
		entry->edx = 0;
706
		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
707
		cpuid_entry_mask(entry, CPUID_8000_0008_EBX);
708
		/*
709 710 711
		 * AMD has separate bits for each SPEC_CTRL bit.
		 * arch/x86/kernel/cpu/bugs.c is kind enough to
		 * record that in cpufeatures so use them.
712
		 */
713
		if (boot_cpu_has(X86_FEATURE_IBPB))
714
			cpuid_entry_set(entry, X86_FEATURE_AMD_IBPB);
715
		if (boot_cpu_has(X86_FEATURE_IBRS))
716
			cpuid_entry_set(entry, X86_FEATURE_AMD_IBRS);
717
		if (boot_cpu_has(X86_FEATURE_STIBP))
718
			cpuid_entry_set(entry, X86_FEATURE_AMD_STIBP);
719
		if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
720
			cpuid_entry_set(entry, X86_FEATURE_AMD_SSBD);
721
		if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
722
			cpuid_entry_set(entry, X86_FEATURE_AMD_SSB_NO);
723 724 725 726 727 728
		/*
		 * The preference is to use SPEC CTRL MSR instead of the
		 * VIRT_SPEC MSR.
		 */
		if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
		    !boot_cpu_has(X86_FEATURE_AMD_SSBD))
729
			cpuid_entry_set(entry, X86_FEATURE_VIRT_SSBD);
A
Avi Kivity 已提交
730 731 732 733 734 735
		break;
	}
	case 0x80000019:
		entry->ecx = entry->edx = 0;
		break;
	case 0x8000001a:
736
	case 0x8000001e:
A
Avi Kivity 已提交
737
		break;
738 739 740 741 742
	/* Support memory encryption cpuid if host supports it */
	case 0x8000001F:
		if (!boot_cpu_has(X86_FEATURE_SEV))
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
		break;
A
Avi Kivity 已提交
743 744 745 746 747 748
	/*Add support for Centaur's CPUID instruction*/
	case 0xC0000000:
		/*Just support up to 0xC0000004 now*/
		entry->eax = min(entry->eax, 0xC0000004);
		break;
	case 0xC0000001:
749
		entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
750
		cpuid_entry_mask(entry, CPUID_C000_0001_EDX);
A
Avi Kivity 已提交
751 752 753 754 755 756 757 758 759 760 761
		break;
	case 3: /* Processor serial number */
	case 5: /* MONITOR/MWAIT */
	case 0xC0000002:
	case 0xC0000003:
	case 0xC0000004:
	default:
		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
		break;
	}

762
	kvm_x86_ops->set_supported_cpuid(entry);
A
Avi Kivity 已提交
763

764 765 766
	r = 0;

out:
A
Avi Kivity 已提交
767
	put_cpu();
768 769

	return r;
A
Avi Kivity 已提交
770 771
}

772 773
static int do_cpuid_func(struct kvm_cpuid_array *array, u32 func,
			 unsigned int type)
B
Borislav Petkov 已提交
774
{
775
	if (array->nent >= array->maxnent)
776 777
		return -E2BIG;

B
Borislav Petkov 已提交
778
	if (type == KVM_GET_EMULATED_CPUID)
779
		return __do_cpuid_func_emulated(array, func);
B
Borislav Petkov 已提交
780

781
	return __do_cpuid_func(array, func);
B
Borislav Petkov 已提交
782 783
}

784
#define CENTAUR_CPUID_SIGNATURE 0xC0000000
785

786 787
static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func,
			  unsigned int type)
788 789 790 791
{
	u32 limit;
	int r;

792 793 794 795
	if (func == CENTAUR_CPUID_SIGNATURE &&
	    boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
		return 0;

796
	r = do_cpuid_func(array, func, type);
797 798 799
	if (r)
		return r;

800
	limit = array->entries[array->nent - 1].eax;
801
	for (func = func + 1; func <= limit; ++func) {
802
		r = do_cpuid_func(array, func, type);
803 804 805 806 807 808 809
		if (r)
			break;
	}

	return r;
}

B
Borislav Petkov 已提交
810 811 812 813
static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
				 __u32 num_entries, unsigned int ioctl_type)
{
	int i;
B
Borislav Petkov 已提交
814
	__u32 pad[3];
B
Borislav Petkov 已提交
815 816 817 818 819 820 821 822 823 824 825 826 827

	if (ioctl_type != KVM_GET_EMULATED_CPUID)
		return false;

	/*
	 * We want to make sure that ->padding is being passed clean from
	 * userspace in case we want to use it for something in the future.
	 *
	 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
	 * have to give ourselves satisfied only with the emulated side. /me
	 * sheds a tear.
	 */
	for (i = 0; i < num_entries; i++) {
B
Borislav Petkov 已提交
828 829 830 831
		if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
			return true;

		if (pad[0] || pad[1] || pad[2])
B
Borislav Petkov 已提交
832 833 834 835 836 837 838 839
			return true;
	}
	return false;
}

int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
			    struct kvm_cpuid_entry2 __user *entries,
			    unsigned int type)
A
Avi Kivity 已提交
840
{
841 842
	static const u32 funcs[] = {
		0, 0x80000000, CENTAUR_CPUID_SIGNATURE, KVM_CPUID_SIGNATURE,
843
	};
A
Avi Kivity 已提交
844

845 846 847 848 849
	struct kvm_cpuid_array array = {
		.nent = 0,
		.maxnent = cpuid->nent,
	};
	int r, i;
850

A
Avi Kivity 已提交
851
	if (cpuid->nent < 1)
852
		return -E2BIG;
A
Avi Kivity 已提交
853 854
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
B
Borislav Petkov 已提交
855 856 857 858

	if (sanity_check_entries(entries, cpuid->nent, type))
		return -EINVAL;

859
	array.entries = vzalloc(array_size(sizeof(struct kvm_cpuid_entry2),
860
					   cpuid->nent));
861
	if (!array.entries)
862
		return -ENOMEM;
A
Avi Kivity 已提交
863

864
	for (i = 0; i < ARRAY_SIZE(funcs); i++) {
865
		r = get_cpuid_func(&array, funcs[i], type);
866
		if (r)
A
Avi Kivity 已提交
867 868
			goto out_free;
	}
869
	cpuid->nent = array.nent;
A
Avi Kivity 已提交
870

871 872
	if (copy_to_user(entries, array.entries,
			 array.nent * sizeof(struct kvm_cpuid_entry2)))
873
		r = -EFAULT;
A
Avi Kivity 已提交
874 875

out_free:
876
	vfree(array.entries);
A
Avi Kivity 已提交
877 878 879 880 881 882
	return r;
}

static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
{
	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
883 884 885
	struct kvm_cpuid_entry2 *ej;
	int j = i;
	int nent = vcpu->arch.cpuid_nent;
A
Avi Kivity 已提交
886 887 888

	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
	/* when no next entry is found, the current entry[i] is reselected */
889 890 891 892 893 894 895 896
	do {
		j = (j + 1) % nent;
		ej = &vcpu->arch.cpuid_entries[j];
	} while (ej->function != e->function);

	ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;

	return j;
A
Avi Kivity 已提交
897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
}

/* find an entry with matching function, matching index (if needed), and that
 * should be read next (if it's stateful) */
static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
	u32 function, u32 index)
{
	if (e->function != function)
		return 0;
	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
		return 0;
	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
		return 0;
	return 1;
}

struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
					      u32 function, u32 index)
{
	int i;
	struct kvm_cpuid_entry2 *best = NULL;

	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
		struct kvm_cpuid_entry2 *e;

		e = &vcpu->arch.cpuid_entries[i];
		if (is_matching_cpuid_entry(e, function, index)) {
			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
				move_to_next_stateful_cpuid_entry(vcpu, i);
			best = e;
			break;
		}
	}
	return best;
}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);

/*
936 937 938
 * If the basic or extended CPUID leaf requested is higher than the
 * maximum supported basic or extended leaf, respectively, then it is
 * out of range.
A
Avi Kivity 已提交
939
 */
940
static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
A
Avi Kivity 已提交
941
{
942 943 944 945
	struct kvm_cpuid_entry2 *max;

	max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
	return max && function <= max->eax;
A
Avi Kivity 已提交
946 947
}

948 949
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
	       u32 *ecx, u32 *edx, bool check_limit)
A
Avi Kivity 已提交
950
{
951
	u32 function = *eax, index = *ecx;
952 953 954
	struct kvm_cpuid_entry2 *entry;
	struct kvm_cpuid_entry2 *max;
	bool found;
955

956 957 958 959 960
	entry = kvm_find_cpuid_entry(vcpu, function, index);
	found = entry;
	/*
	 * Intel CPUID semantics treats any query for an out-of-range
	 * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were
961 962
	 * requested. AMD CPUID semantics returns all zeroes for any
	 * undefined leaf, whether or not the leaf is in range.
963
	 */
964 965
	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
	    !cpuid_function_in_range(vcpu, function)) {
966 967 968 969 970
		max = kvm_find_cpuid_entry(vcpu, 0, 0);
		if (max) {
			function = max->eax;
			entry = kvm_find_cpuid_entry(vcpu, function, index);
		}
971
	}
972 973 974 975 976
	if (entry) {
		*eax = entry->eax;
		*ebx = entry->ebx;
		*ecx = entry->ecx;
		*edx = entry->edx;
977 978 979 980 981 982
		if (function == 7 && index == 0) {
			u64 data;
		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
			    (data & TSX_CTRL_CPUID_CLEAR))
				*ebx &= ~(F(RTM) | F(HLE));
		}
983
	} else {
984
		*eax = *ebx = *ecx = *edx = 0;
985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001
		/*
		 * When leaf 0BH or 1FH is defined, CL is pass-through
		 * and EDX is always the x2APIC ID, even for undefined
		 * subleaves. Index 1 will exist iff the leaf is
		 * implemented, so we pass through CL iff leaf 1
		 * exists. EDX can be copied from any existing index.
		 */
		if (function == 0xb || function == 0x1f) {
			entry = kvm_find_cpuid_entry(vcpu, function, 1);
			if (entry) {
				*ecx = index & 0xff;
				*edx = entry->edx;
			}
		}
	}
	trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found);
	return found;
1002
}
1003
EXPORT_SYMBOL_GPL(kvm_cpuid);
1004

1005
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1006
{
1007
	u32 eax, ebx, ecx, edx;
1008

K
Kyle Huey 已提交
1009 1010 1011
	if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
		return 1;

1012 1013
	eax = kvm_rax_read(vcpu);
	ecx = kvm_rcx_read(vcpu);
1014
	kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
1015 1016 1017 1018
	kvm_rax_write(vcpu, eax);
	kvm_rbx_write(vcpu, ebx);
	kvm_rcx_write(vcpu, ecx);
	kvm_rdx_write(vcpu, edx);
1019
	return kvm_skip_emulated_instruction(vcpu);
A
Avi Kivity 已提交
1020 1021
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);