cpuid.c 27.0 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
A
Avi Kivity 已提交
2 3 4 5 6 7 8 9 10 11 12
/*
 * Kernel-based Virtual Machine driver for Linux
 * cpuid support routines
 *
 * derived from arch/x86/kvm/x86.c
 *
 * Copyright 2011 Red Hat, Inc. and/or its affiliates.
 * Copyright IBM Corporation, 2008
 */

#include <linux/kvm_host.h>
13
#include <linux/export.h>
14 15
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
16 17
#include <linux/sched/stat.h>

18
#include <asm/processor.h>
A
Avi Kivity 已提交
19
#include <asm/user.h>
20
#include <asm/fpu/xstate.h>
A
Avi Kivity 已提交
21 22 23 24
#include "cpuid.h"
#include "lapic.h"
#include "mmu.h"
#include "trace.h"
25
#include "pmu.h"
A
Avi Kivity 已提交
26

27
static u32 xstate_required_size(u64 xstate_bv, bool compacted)
28 29 30 31
{
	int feature_bit = 0;
	u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;

D
Dave Hansen 已提交
32
	xstate_bv &= XFEATURE_MASK_EXTEND;
33 34
	while (xstate_bv) {
		if (xstate_bv & 0x1) {
35
		        u32 eax, ebx, ecx, edx, offset;
36
		        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
37 38
			offset = compacted ? ret : ebx;
			ret = max(ret, offset + eax);
39 40 41 42 43 44 45 46 47
		}

		xstate_bv >>= 1;
		feature_bit++;
	}

	return ret;
}

48
#define F feature_bit
49

50
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
A
Avi Kivity 已提交
51 52 53 54 55 56
{
	struct kvm_cpuid_entry2 *best;
	struct kvm_lapic *apic = vcpu->arch.apic;

	best = kvm_find_cpuid_entry(vcpu, 1, 0);
	if (!best)
57
		return 0;
A
Avi Kivity 已提交
58 59

	/* Update OSXSAVE bit */
60 61 62
	if (boot_cpu_has(X86_FEATURE_XSAVE) && best->function == 0x1)
		cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
				   kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
A
Avi Kivity 已提交
63

64 65
	cpuid_entry_change(best, X86_FEATURE_APIC,
			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
66

A
Avi Kivity 已提交
67
	if (apic) {
68
		if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
A
Avi Kivity 已提交
69 70 71 72
			apic->lapic_timer.timer_mode_mask = 3 << 17;
		else
			apic->lapic_timer.timer_mode_mask = 1 << 17;
	}
73

74
	best = kvm_find_cpuid_entry(vcpu, 7, 0);
75 76 77
	if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
		cpuid_entry_change(best, X86_FEATURE_OSPKE,
				   kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
78

79
	best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
80
	if (!best) {
81
		vcpu->arch.guest_supported_xcr0 = 0;
82 83
		vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
	} else {
84
		vcpu->arch.guest_supported_xcr0 =
85
			(best->eax | ((u64)best->edx << 32)) & supported_xcr0;
86
		vcpu->arch.guest_xstate_size = best->ebx =
87
			xstate_required_size(vcpu->arch.xcr0, false);
88
	}
89

90
	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
91 92
	if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) ||
		     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
93 94
		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

95
	/*
96 97
	 * The existing code assumes virtual address is 48-bit or 57-bit in the
	 * canonical address checks; exit if it is ever changed.
98 99
	 */
	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
100 101 102 103 104 105
	if (best) {
		int vaddr_bits = (best->eax & 0xff00) >> 8;

		if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
			return -EINVAL;
	}
106

107 108 109 110 111
	best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);

112 113
	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
		best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
114 115 116 117
		if (best)
			cpuid_entry_change(best, X86_FEATURE_MWAIT,
					   vcpu->arch.ia32_misc_enable_msr &
					   MSR_IA32_MISC_ENABLE_MWAIT);
118 119
	}

120 121
	/* Update physical-address width */
	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
122
	kvm_mmu_reset_context(vcpu);
123

124
	kvm_pmu_refresh(vcpu);
125
	return 0;
A
Avi Kivity 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
}

static int is_efer_nx(void)
{
	unsigned long long efer = 0;

	rdmsrl_safe(MSR_EFER, &efer);
	return efer & EFER_NX;
}

static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
{
	int i;
	struct kvm_cpuid_entry2 *e, *entry;

	entry = NULL;
	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
		e = &vcpu->arch.cpuid_entries[i];
		if (e->function == 0x80000001) {
			entry = e;
			break;
		}
	}
149
	if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) {
150
		cpuid_entry_clear(entry, X86_FEATURE_NX);
A
Avi Kivity 已提交
151 152 153 154
		printk(KERN_INFO "kvm: guest NX capability removed\n");
	}
}

155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *best;

	best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
	if (!best || best->eax < 0x80000008)
		goto not_found;
	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
	if (best)
		return best->eax & 0xff;
not_found:
	return 36;
}
EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);

A
Avi Kivity 已提交
170 171 172 173 174 175
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
			     struct kvm_cpuid *cpuid,
			     struct kvm_cpuid_entry __user *entries)
{
	int r, i;
176
	struct kvm_cpuid_entry *cpuid_entries = NULL;
A
Avi Kivity 已提交
177 178 179 180 181

	r = -E2BIG;
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		goto out;
	r = -ENOMEM;
182
	if (cpuid->nent) {
183 184 185
		cpuid_entries =
			vmalloc(array_size(sizeof(struct kvm_cpuid_entry),
					   cpuid->nent));
186 187 188 189 190 191 192
		if (!cpuid_entries)
			goto out;
		r = -EFAULT;
		if (copy_from_user(cpuid_entries, entries,
				   cpuid->nent * sizeof(struct kvm_cpuid_entry)))
			goto out;
	}
A
Avi Kivity 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	for (i = 0; i < cpuid->nent; i++) {
		vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
		vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
		vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
		vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
		vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
		vcpu->arch.cpuid_entries[i].index = 0;
		vcpu->arch.cpuid_entries[i].flags = 0;
		vcpu->arch.cpuid_entries[i].padding[0] = 0;
		vcpu->arch.cpuid_entries[i].padding[1] = 0;
		vcpu->arch.cpuid_entries[i].padding[2] = 0;
	}
	vcpu->arch.cpuid_nent = cpuid->nent;
	cpuid_fix_nx_cap(vcpu);
	kvm_apic_set_version(vcpu);
	kvm_x86_ops->cpuid_update(vcpu);
209
	r = kvm_update_cpuid(vcpu);
A
Avi Kivity 已提交
210 211

out:
212
	vfree(cpuid_entries);
A
Avi Kivity 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
	return r;
}

int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
			      struct kvm_cpuid2 *cpuid,
			      struct kvm_cpuid_entry2 __user *entries)
{
	int r;

	r = -E2BIG;
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		goto out;
	r = -EFAULT;
	if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
			   cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
		goto out;
	vcpu->arch.cpuid_nent = cpuid->nent;
	kvm_apic_set_version(vcpu);
	kvm_x86_ops->cpuid_update(vcpu);
232
	r = kvm_update_cpuid(vcpu);
A
Avi Kivity 已提交
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
out:
	return r;
}

int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
			      struct kvm_cpuid2 *cpuid,
			      struct kvm_cpuid_entry2 __user *entries)
{
	int r;

	r = -E2BIG;
	if (cpuid->nent < vcpu->arch.cpuid_nent)
		goto out;
	r = -EFAULT;
	if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
			 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
		goto out;
	return 0;

out:
	cpuid->nent = vcpu->arch.cpuid_nent;
	return r;
}

257 258 259 260 261 262 263
struct kvm_cpuid_array {
	struct kvm_cpuid_entry2 *entries;
	const int maxnent;
	int nent;
};

static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
264
					      u32 function, u32 index)
A
Avi Kivity 已提交
265
{
266 267 268
	struct kvm_cpuid_entry2 *entry;

	if (array->nent >= array->maxnent)
269
		return NULL;
270 271

	entry = &array->entries[array->nent++];
272

A
Avi Kivity 已提交
273 274
	entry->function = function;
	entry->index = index;
275 276
	entry->flags = 0;

A
Avi Kivity 已提交
277 278
	cpuid_count(entry->function, entry->index,
		    &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
279 280 281 282 283 284 285 286 287

	switch (function) {
	case 2:
		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
		break;
	case 4:
	case 7:
	case 0xb:
	case 0xd:
288 289 290
	case 0xf:
	case 0x10:
	case 0x12:
291
	case 0x14:
292 293 294
	case 0x17:
	case 0x18:
	case 0x1f:
295 296 297 298
	case 0x8000001d:
		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		break;
	}
299 300

	return entry;
A
Avi Kivity 已提交
301 302
}

303
static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
B
Borislav Petkov 已提交
304
{
305 306
	struct kvm_cpuid_entry2 *entry = &array->entries[array->nent];

307 308 309 310
	entry->function = func;
	entry->index = 0;
	entry->flags = 0;

B
Borislav Petkov 已提交
311 312
	switch (func) {
	case 0:
P
Paolo Bonzini 已提交
313
		entry->eax = 7;
314
		++array->nent;
B
Borislav Petkov 已提交
315 316 317
		break;
	case 1:
		entry->ecx = F(MOVBE);
318
		++array->nent;
B
Borislav Petkov 已提交
319
		break;
P
Paolo Bonzini 已提交
320 321
	case 7:
		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
322 323
		entry->eax = 0;
		entry->ecx = F(RDPID);
324
		++array->nent;
B
Borislav Petkov 已提交
325 326 327 328
	default:
		break;
	}

B
Borislav Petkov 已提交
329 330 331
	return 0;
}

332
static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
333 334 335 336 337 338 339
{
	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
	unsigned f_la57;

	/* cpuid 7.0.ebx */
	const u32 kvm_cpuid_7_0_ebx_x86_features =
		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
340
		F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
341 342 343 344 345 346
		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
		F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;

	/* cpuid 7.0.ecx*/
	const u32 kvm_cpuid_7_0_ecx_x86_features =
347
		F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
348 349
		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
350
		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
351 352 353 354 355 356 357

	/* cpuid 7.0.edx*/
	const u32 kvm_cpuid_7_0_edx_x86_features =
		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
		F(MD_CLEAR);

358 359 360 361
	/* cpuid 7.1.eax */
	const u32 kvm_cpuid_7_1_eax_x86_features =
		F(AVX512_BF16);

362
	switch (entry->index) {
363
	case 0:
364
		entry->eax = min(entry->eax, 1u);
365
		entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
366
		cpuid_entry_mask(entry, CPUID_7_0_EBX);
367
		/* TSC_ADJUST is emulated */
368
		cpuid_entry_set(entry, X86_FEATURE_TSC_ADJUST);
369 370

		entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
371
		f_la57 = cpuid_entry_get(entry, X86_FEATURE_LA57);
372
		cpuid_entry_mask(entry, CPUID_7_ECX);
373 374 375 376
		/* Set LA57 based on hardware capability. */
		entry->ecx |= f_la57;

		entry->edx &= kvm_cpuid_7_0_edx_x86_features;
377
		cpuid_entry_mask(entry, CPUID_7_EDX);
378
		if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
379
			cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL);
380
		if (boot_cpu_has(X86_FEATURE_STIBP))
381
			cpuid_entry_set(entry, X86_FEATURE_INTEL_STIBP);
382
		if (boot_cpu_has(X86_FEATURE_AMD_SSBD))
383
			cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL_SSBD);
384 385 386 387
		/*
		 * We emulate ARCH_CAPABILITIES in software even
		 * if the host doesn't support it.
		 */
388
		cpuid_entry_set(entry, X86_FEATURE_ARCH_CAPABILITIES);
389
		break;
390 391 392 393 394 395
	case 1:
		entry->eax &= kvm_cpuid_7_1_eax_x86_features;
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
396 397 398 399 400 401 402 403 404 405
	default:
		WARN_ON_ONCE(1);
		entry->eax = 0;
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	}
}

406
static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
A
Avi Kivity 已提交
407
{
408
	struct kvm_cpuid_entry2 *entry;
409
	int r, i, max_idx;
A
Avi Kivity 已提交
410 411 412 413 414 415 416 417 418 419
	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
	unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
				? F(GBPAGES) : 0;
	unsigned f_lm = F(LM);
#else
	unsigned f_gbpages = 0;
	unsigned f_lm = 0;
#endif
	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
420
	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
421
	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
A
Avi Kivity 已提交
422 423

	/* cpuid 1.edx */
424
	const u32 kvm_cpuid_1_edx_x86_features =
A
Avi Kivity 已提交
425 426 427 428
		F(FPU) | F(VME) | F(DE) | F(PSE) |
		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
429
		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
A
Avi Kivity 已提交
430 431 432 433
		0 /* Reserved, DS, ACPI */ | F(MMX) |
		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
		0 /* HTT, TM, Reserved, PBE */;
	/* cpuid 0x80000001.edx */
434
	const u32 kvm_cpuid_8000_0001_edx_x86_features =
A
Avi Kivity 已提交
435 436 437 438 439 440 441 442 443
		F(FPU) | F(VME) | F(DE) | F(PSE) |
		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
		F(PAT) | F(PSE36) | 0 /* Reserved */ |
		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
		F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
	/* cpuid 1.ecx */
444
	const u32 kvm_cpuid_1_ecx_x86_features =
445 446
		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
		 * but *not* advertised to guests via CPUID ! */
A
Avi Kivity 已提交
447 448 449
		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
		0 /* DS-CPL, VMX, SMX, EST */ |
		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
450
		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
451
		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
A
Avi Kivity 已提交
452 453 454 455
		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
		F(F16C) | F(RDRAND);
	/* cpuid 0x80000001.ecx */
456
	const u32 kvm_cpuid_8000_0001_ecx_x86_features =
A
Avi Kivity 已提交
457 458
		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
459
		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
460
		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
461
		F(TOPOEXT) | F(PERFCTR_CORE);
A
Avi Kivity 已提交
462

A
Ashok Raj 已提交
463 464
	/* cpuid 0x80000008.ebx */
	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
465 466 467
		F(CLZERO) | F(XSAVEERPTR) |
		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
A
Ashok Raj 已提交
468

A
Avi Kivity 已提交
469
	/* cpuid 0xC0000001.edx */
470
	const u32 kvm_cpuid_C000_0001_edx_x86_features =
A
Avi Kivity 已提交
471 472 473 474
		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
		F(PMM) | F(PMM_EN);

P
Paolo Bonzini 已提交
475
	/* cpuid 0xD.1.eax */
476
	const u32 kvm_cpuid_D_1_eax_x86_features =
477
		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
P
Paolo Bonzini 已提交
478

A
Avi Kivity 已提交
479 480
	/* all calls to cpuid_count() should be made on the same cpu */
	get_cpu();
481 482 483

	r = -E2BIG;

484 485
	entry = do_host_cpuid(array, function, 0);
	if (WARN_ON(!entry))
486 487
		goto out;

A
Avi Kivity 已提交
488 489
	switch (function) {
	case 0:
490 491
		/* Limited to the highest leaf implemented in KVM. */
		entry->eax = min(entry->eax, 0x1fU);
A
Avi Kivity 已提交
492 493
		break;
	case 1:
494
		entry->edx &= kvm_cpuid_1_edx_x86_features;
495
		cpuid_entry_mask(entry, CPUID_1_EDX);
496
		entry->ecx &= kvm_cpuid_1_ecx_x86_features;
497
		cpuid_entry_mask(entry, CPUID_1_ECX);
A
Avi Kivity 已提交
498 499
		/* we support x2apic emulation even if host does not support
		 * it since we emulate x2apic in software */
500
		cpuid_entry_set(entry, X86_FEATURE_X2APIC);
A
Avi Kivity 已提交
501 502 503 504 505
		break;
	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
	 * may return different values. This forces us to get_cpu() before
	 * issuing the first command, and also to emulate this annoying behavior
	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
506
	case 2:
A
Avi Kivity 已提交
507
		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
508 509

		for (i = 1, max_idx = entry->eax & 0xff; i < max_idx; ++i) {
510 511
			entry = do_host_cpuid(array, function, 0);
			if (!entry)
512
				goto out;
A
Avi Kivity 已提交
513 514
		}
		break;
515 516
	/* functions 4 and 0x8000001d have additional index. */
	case 4:
517 518 519 520 521
	case 0x8000001d:
		/*
		 * Read entries until the cache type in the previous entry is
		 * zero, i.e. indicates an invalid entry.
		 */
522 523 524
		for (i = 1; entry->eax & 0x1f; ++i) {
			entry = do_host_cpuid(array, function, i);
			if (!entry)
525
				goto out;
A
Avi Kivity 已提交
526 527
		}
		break;
J
Jan Kiszka 已提交
528 529 530 531 532 533
	case 6: /* Thermal management */
		entry->eax = 0x4; /* allow ARAT */
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
534
	/* function 7 has additional index. */
535
	case 7:
536
		do_cpuid_7_mask(entry);
537

538
		for (i = 1, max_idx = entry->eax; i <= max_idx; i++) {
539 540
			entry = do_host_cpuid(array, function, i);
			if (!entry)
541 542
				goto out;

543
			do_cpuid_7_mask(entry);
544
		}
A
Avi Kivity 已提交
545 546 547
		break;
	case 9:
		break;
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576
	case 0xa: { /* Architectural Performance Monitoring */
		struct x86_pmu_capability cap;
		union cpuid10_eax eax;
		union cpuid10_edx edx;

		perf_get_x86_pmu_capability(&cap);

		/*
		 * Only support guest architectural pmu on a host
		 * with architectural pmu.
		 */
		if (!cap.version)
			memset(&cap, 0, sizeof(cap));

		eax.split.version_id = min(cap.version, 2);
		eax.split.num_counters = cap.num_counters_gp;
		eax.split.bit_width = cap.bit_width_gp;
		eax.split.mask_length = cap.events_mask_len;

		edx.split.num_counters_fixed = cap.num_counters_fixed;
		edx.split.bit_width_fixed = cap.bit_width_fixed;
		edx.split.reserved = 0;

		entry->eax = eax.full;
		entry->ebx = cap.events_mask;
		entry->ecx = 0;
		entry->edx = edx.full;
		break;
	}
577 578 579 580 581
	/*
	 * Per Intel's SDM, the 0x1f is a superset of 0xb,
	 * thus they can be handled by common code.
	 */
	case 0x1f:
582
	case 0xb:
583
		/*
584 585 586
		 * Populate entries until the level type (ECX[15:8]) of the
		 * previous entry is zero.  Note, CPUID EAX.{0x1f,0xb}.0 is
		 * the starting entry, filled by the primary do_host_cpuid().
587
		 */
588 589 590
		for (i = 1; entry->ecx & 0xff00; ++i) {
			entry = do_host_cpuid(array, function, i);
			if (!entry)
591
				goto out;
A
Avi Kivity 已提交
592 593
		}
		break;
594 595 596
	case 0xd:
		entry->eax &= supported_xcr0;
		entry->ebx = xstate_required_size(supported_xcr0, false);
597
		entry->ecx = entry->ebx;
598 599
		entry->edx &= supported_xcr0 >> 32;
		if (!supported_xcr0)
P
Paolo Bonzini 已提交
600 601
			break;

602 603
		entry = do_host_cpuid(array, function, 1);
		if (!entry)
604 605
			goto out;

606
		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
607
		cpuid_entry_mask(entry, CPUID_D_1_EAX);
608
		if (entry->eax & (F(XSAVES)|F(XSAVEC)))
609
			entry->ebx = xstate_required_size(supported_xcr0, true);
610
		else
611
			entry->ebx = 0;
612
		/* Saving XSS controlled state via XSAVES isn't supported. */
613 614
		entry->ecx = 0;
		entry->edx = 0;
615

616
		for (i = 2; i < 64; ++i) {
617
			if (!(supported_xcr0 & BIT_ULL(i)))
618
				continue;
619

620
			entry = do_host_cpuid(array, function, i);
621
			if (!entry)
622 623
				goto out;

624
			/*
625
			 * The supported check above should have filtered out
626 627 628 629 630
			 * invalid sub-leafs as well as sub-leafs managed by
			 * IA32_XSS MSR.  Only XCR0-managed sub-leafs should
			 * reach this point, and they should have a non-zero
			 * save state size.
			 */
631 632
			if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 1))) {
				--array->nent;
633
				continue;
634
			}
635

636 637
			entry->ecx = 0;
			entry->edx = 0;
A
Avi Kivity 已提交
638 639
		}
		break;
640
	/* Intel PT */
641
	case 0x14:
642 643
		if (!f_intel_pt) {
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
644
			break;
645
		}
646

647
		for (i = 1, max_idx = entry->eax; i <= max_idx; ++i) {
648
			if (!do_host_cpuid(array, function, i))
649 650 651
				goto out;
		}
		break;
A
Avi Kivity 已提交
652
	case KVM_CPUID_SIGNATURE: {
653 654
		static const char signature[12] = "KVMKVMKVM\0\0";
		const u32 *sigptr = (const u32 *)signature;
655
		entry->eax = KVM_CPUID_FEATURES;
A
Avi Kivity 已提交
656 657 658 659 660 661 662 663 664 665
		entry->ebx = sigptr[0];
		entry->ecx = sigptr[1];
		entry->edx = sigptr[2];
		break;
	}
	case KVM_CPUID_FEATURES:
		entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
			     (1 << KVM_FEATURE_ASYNC_PF) |
666
			     (1 << KVM_FEATURE_PV_EOI) |
667
			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
668
			     (1 << KVM_FEATURE_PV_UNHALT) |
669
			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
670
			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
671
			     (1 << KVM_FEATURE_PV_SEND_IPI) |
672 673
			     (1 << KVM_FEATURE_POLL_CONTROL) |
			     (1 << KVM_FEATURE_PV_SCHED_YIELD);
A
Avi Kivity 已提交
674 675 676 677 678 679 680 681 682

		if (sched_info_on())
			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);

		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	case 0x80000000:
683
		entry->eax = min(entry->eax, 0x8000001f);
A
Avi Kivity 已提交
684 685
		break;
	case 0x80000001:
686
		entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
687
		cpuid_entry_mask(entry, CPUID_8000_0001_EDX);
688
		entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
689
		cpuid_entry_mask(entry, CPUID_8000_0001_ECX);
A
Avi Kivity 已提交
690
		break;
691 692 693 694 695 696 697
	case 0x80000007: /* Advanced power management */
		/* invariant TSC is CPUID.80000007H:EDX[8] */
		entry->edx &= (1 << 8);
		/* mask against host */
		entry->edx &= boot_cpu_data.x86_power;
		entry->eax = entry->ebx = entry->ecx = 0;
		break;
A
Avi Kivity 已提交
698 699 700 701 702 703 704 705
	case 0x80000008: {
		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
		unsigned phys_as = entry->eax & 0xff;

		if (!g_phys_as)
			g_phys_as = phys_as;
		entry->eax = g_phys_as | (virt_as << 8);
A
Ashok Raj 已提交
706
		entry->edx = 0;
707
		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
708
		cpuid_entry_mask(entry, CPUID_8000_0008_EBX);
709
		/*
710 711 712
		 * AMD has separate bits for each SPEC_CTRL bit.
		 * arch/x86/kernel/cpu/bugs.c is kind enough to
		 * record that in cpufeatures so use them.
713
		 */
714
		if (boot_cpu_has(X86_FEATURE_IBPB))
715
			cpuid_entry_set(entry, X86_FEATURE_AMD_IBPB);
716
		if (boot_cpu_has(X86_FEATURE_IBRS))
717
			cpuid_entry_set(entry, X86_FEATURE_AMD_IBRS);
718
		if (boot_cpu_has(X86_FEATURE_STIBP))
719
			cpuid_entry_set(entry, X86_FEATURE_AMD_STIBP);
720
		if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
721
			cpuid_entry_set(entry, X86_FEATURE_AMD_SSBD);
722
		if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
723
			cpuid_entry_set(entry, X86_FEATURE_AMD_SSB_NO);
724 725 726 727 728 729
		/*
		 * The preference is to use SPEC CTRL MSR instead of the
		 * VIRT_SPEC MSR.
		 */
		if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
		    !boot_cpu_has(X86_FEATURE_AMD_SSBD))
730
			cpuid_entry_set(entry, X86_FEATURE_VIRT_SSBD);
A
Avi Kivity 已提交
731 732 733 734 735 736
		break;
	}
	case 0x80000019:
		entry->ecx = entry->edx = 0;
		break;
	case 0x8000001a:
737
	case 0x8000001e:
A
Avi Kivity 已提交
738
		break;
739 740 741 742 743
	/* Support memory encryption cpuid if host supports it */
	case 0x8000001F:
		if (!boot_cpu_has(X86_FEATURE_SEV))
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
		break;
A
Avi Kivity 已提交
744 745 746 747 748 749
	/*Add support for Centaur's CPUID instruction*/
	case 0xC0000000:
		/*Just support up to 0xC0000004 now*/
		entry->eax = min(entry->eax, 0xC0000004);
		break;
	case 0xC0000001:
750
		entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
751
		cpuid_entry_mask(entry, CPUID_C000_0001_EDX);
A
Avi Kivity 已提交
752 753 754 755 756 757 758 759 760 761 762
		break;
	case 3: /* Processor serial number */
	case 5: /* MONITOR/MWAIT */
	case 0xC0000002:
	case 0xC0000003:
	case 0xC0000004:
	default:
		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
		break;
	}

763
	kvm_x86_ops->set_supported_cpuid(entry);
A
Avi Kivity 已提交
764

765 766 767
	r = 0;

out:
A
Avi Kivity 已提交
768
	put_cpu();
769 770

	return r;
A
Avi Kivity 已提交
771 772
}

773 774
static int do_cpuid_func(struct kvm_cpuid_array *array, u32 func,
			 unsigned int type)
B
Borislav Petkov 已提交
775
{
776
	if (array->nent >= array->maxnent)
777 778
		return -E2BIG;

B
Borislav Petkov 已提交
779
	if (type == KVM_GET_EMULATED_CPUID)
780
		return __do_cpuid_func_emulated(array, func);
B
Borislav Petkov 已提交
781

782
	return __do_cpuid_func(array, func);
B
Borislav Petkov 已提交
783 784
}

785
#define CENTAUR_CPUID_SIGNATURE 0xC0000000
786

787 788
static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func,
			  unsigned int type)
789 790 791 792
{
	u32 limit;
	int r;

793 794 795 796
	if (func == CENTAUR_CPUID_SIGNATURE &&
	    boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
		return 0;

797
	r = do_cpuid_func(array, func, type);
798 799 800
	if (r)
		return r;

801
	limit = array->entries[array->nent - 1].eax;
802
	for (func = func + 1; func <= limit; ++func) {
803
		r = do_cpuid_func(array, func, type);
804 805 806 807 808 809 810
		if (r)
			break;
	}

	return r;
}

B
Borislav Petkov 已提交
811 812 813 814
static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
				 __u32 num_entries, unsigned int ioctl_type)
{
	int i;
B
Borislav Petkov 已提交
815
	__u32 pad[3];
B
Borislav Petkov 已提交
816 817 818 819 820 821 822 823 824 825 826 827 828

	if (ioctl_type != KVM_GET_EMULATED_CPUID)
		return false;

	/*
	 * We want to make sure that ->padding is being passed clean from
	 * userspace in case we want to use it for something in the future.
	 *
	 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
	 * have to give ourselves satisfied only with the emulated side. /me
	 * sheds a tear.
	 */
	for (i = 0; i < num_entries; i++) {
B
Borislav Petkov 已提交
829 830 831 832
		if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
			return true;

		if (pad[0] || pad[1] || pad[2])
B
Borislav Petkov 已提交
833 834 835 836 837 838 839 840
			return true;
	}
	return false;
}

int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
			    struct kvm_cpuid_entry2 __user *entries,
			    unsigned int type)
A
Avi Kivity 已提交
841
{
842 843
	static const u32 funcs[] = {
		0, 0x80000000, CENTAUR_CPUID_SIGNATURE, KVM_CPUID_SIGNATURE,
844
	};
A
Avi Kivity 已提交
845

846 847 848 849 850
	struct kvm_cpuid_array array = {
		.nent = 0,
		.maxnent = cpuid->nent,
	};
	int r, i;
851

A
Avi Kivity 已提交
852
	if (cpuid->nent < 1)
853
		return -E2BIG;
A
Avi Kivity 已提交
854 855
	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
B
Borislav Petkov 已提交
856 857 858 859

	if (sanity_check_entries(entries, cpuid->nent, type))
		return -EINVAL;

860
	array.entries = vzalloc(array_size(sizeof(struct kvm_cpuid_entry2),
861
					   cpuid->nent));
862
	if (!array.entries)
863
		return -ENOMEM;
A
Avi Kivity 已提交
864

865
	for (i = 0; i < ARRAY_SIZE(funcs); i++) {
866
		r = get_cpuid_func(&array, funcs[i], type);
867
		if (r)
A
Avi Kivity 已提交
868 869
			goto out_free;
	}
870
	cpuid->nent = array.nent;
A
Avi Kivity 已提交
871

872 873
	if (copy_to_user(entries, array.entries,
			 array.nent * sizeof(struct kvm_cpuid_entry2)))
874
		r = -EFAULT;
A
Avi Kivity 已提交
875 876

out_free:
877
	vfree(array.entries);
A
Avi Kivity 已提交
878 879 880 881 882 883
	return r;
}

static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
{
	struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
884 885 886
	struct kvm_cpuid_entry2 *ej;
	int j = i;
	int nent = vcpu->arch.cpuid_nent;
A
Avi Kivity 已提交
887 888 889

	e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
	/* when no next entry is found, the current entry[i] is reselected */
890 891 892 893 894 895 896 897
	do {
		j = (j + 1) % nent;
		ej = &vcpu->arch.cpuid_entries[j];
	} while (ej->function != e->function);

	ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;

	return j;
A
Avi Kivity 已提交
898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
}

/* find an entry with matching function, matching index (if needed), and that
 * should be read next (if it's stateful) */
static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
	u32 function, u32 index)
{
	if (e->function != function)
		return 0;
	if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
		return 0;
	if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
	    !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
		return 0;
	return 1;
}

struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
					      u32 function, u32 index)
{
	int i;
	struct kvm_cpuid_entry2 *best = NULL;

	for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
		struct kvm_cpuid_entry2 *e;

		e = &vcpu->arch.cpuid_entries[i];
		if (is_matching_cpuid_entry(e, function, index)) {
			if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
				move_to_next_stateful_cpuid_entry(vcpu, i);
			best = e;
			break;
		}
	}
	return best;
}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);

/*
937 938 939
 * If the basic or extended CPUID leaf requested is higher than the
 * maximum supported basic or extended leaf, respectively, then it is
 * out of range.
A
Avi Kivity 已提交
940
 */
941
static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
A
Avi Kivity 已提交
942
{
943 944 945 946
	struct kvm_cpuid_entry2 *max;

	max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
	return max && function <= max->eax;
A
Avi Kivity 已提交
947 948
}

949 950
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
	       u32 *ecx, u32 *edx, bool check_limit)
A
Avi Kivity 已提交
951
{
952
	u32 function = *eax, index = *ecx;
953 954 955
	struct kvm_cpuid_entry2 *entry;
	struct kvm_cpuid_entry2 *max;
	bool found;
956

957 958 959 960 961
	entry = kvm_find_cpuid_entry(vcpu, function, index);
	found = entry;
	/*
	 * Intel CPUID semantics treats any query for an out-of-range
	 * leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were
962 963
	 * requested. AMD CPUID semantics returns all zeroes for any
	 * undefined leaf, whether or not the leaf is in range.
964
	 */
965 966
	if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
	    !cpuid_function_in_range(vcpu, function)) {
967 968 969 970 971
		max = kvm_find_cpuid_entry(vcpu, 0, 0);
		if (max) {
			function = max->eax;
			entry = kvm_find_cpuid_entry(vcpu, function, index);
		}
972
	}
973 974 975 976 977
	if (entry) {
		*eax = entry->eax;
		*ebx = entry->ebx;
		*ecx = entry->ecx;
		*edx = entry->edx;
978 979 980 981 982 983
		if (function == 7 && index == 0) {
			u64 data;
		        if (!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
			    (data & TSX_CTRL_CPUID_CLEAR))
				*ebx &= ~(F(RTM) | F(HLE));
		}
984
	} else {
985
		*eax = *ebx = *ecx = *edx = 0;
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
		/*
		 * When leaf 0BH or 1FH is defined, CL is pass-through
		 * and EDX is always the x2APIC ID, even for undefined
		 * subleaves. Index 1 will exist iff the leaf is
		 * implemented, so we pass through CL iff leaf 1
		 * exists. EDX can be copied from any existing index.
		 */
		if (function == 0xb || function == 0x1f) {
			entry = kvm_find_cpuid_entry(vcpu, function, 1);
			if (entry) {
				*ecx = index & 0xff;
				*edx = entry->edx;
			}
		}
	}
	trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found);
	return found;
1003
}
1004
EXPORT_SYMBOL_GPL(kvm_cpuid);
1005

1006
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
1007
{
1008
	u32 eax, ebx, ecx, edx;
1009

K
Kyle Huey 已提交
1010 1011 1012
	if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
		return 1;

1013 1014
	eax = kvm_rax_read(vcpu);
	ecx = kvm_rcx_read(vcpu);
1015
	kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
1016 1017 1018 1019
	kvm_rax_write(vcpu, eax);
	kvm_rbx_write(vcpu, ebx);
	kvm_rcx_write(vcpu, ecx);
	kvm_rdx_write(vcpu, edx);
1020
	return kvm_skip_emulated_instruction(vcpu);
A
Avi Kivity 已提交
1021 1022
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);