diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c index 15e1203faef03097d08766b6a2653909fa62a1d1..7237cb25f77d27030ffe3f160320a52346938a3b 100644 --- a/drivers/kvm/x86.c +++ b/drivers/kvm/x86.c @@ -646,6 +646,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SET_TSS_ADDR: + case KVM_CAP_EXT_CPUID: r = 1; break; default: @@ -708,13 +709,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_put_guest_fpu(vcpu); } -static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +static int is_efer_nx(void) { u64 efer; - int i; - struct kvm_cpuid_entry *e, *entry; rdmsrl(MSR_EFER, efer); + return efer & EFER_NX; +} + +static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_cpuid_entry2 *e, *entry; + entry = NULL; for (i = 0; i < vcpu->cpuid_nent; ++i) { e = &vcpu->cpuid_entries[i]; @@ -723,15 +730,56 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) break; } } - if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { + if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) { entry->edx &= ~(1 << 20); printk(KERN_INFO "kvm: guest NX capability removed\n"); } } +/* when an old userspace process fills a new kernel module */ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) +{ + int r, i; + struct kvm_cpuid_entry *cpuid_entries; + + r = -E2BIG; + if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) + goto out; + r = -ENOMEM; + cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent); + if (!cpuid_entries) + goto out; + r = -EFAULT; + if (copy_from_user(cpuid_entries, entries, + cpuid->nent * sizeof(struct kvm_cpuid_entry))) + goto out_free; + for (i = 0; i < cpuid->nent; i++) { + vcpu->cpuid_entries[i].function = cpuid_entries[i].function; + vcpu->cpuid_entries[i].eax = cpuid_entries[i].eax; + vcpu->cpuid_entries[i].ebx = cpuid_entries[i].ebx; + vcpu->cpuid_entries[i].ecx = cpuid_entries[i].ecx; + vcpu->cpuid_entries[i].edx = cpuid_entries[i].edx; + vcpu->cpuid_entries[i].index = 0; + vcpu->cpuid_entries[i].flags = 0; + vcpu->cpuid_entries[i].padding[0] = 0; + vcpu->cpuid_entries[i].padding[1] = 0; + vcpu->cpuid_entries[i].padding[2] = 0; + } + vcpu->cpuid_nent = cpuid->nent; + cpuid_fix_nx_cap(vcpu); + r = 0; + +out_free: + vfree(cpuid_entries); +out: + return r; +} + +static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) { int r; @@ -740,16 +788,198 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, goto out; r = -EFAULT; if (copy_from_user(&vcpu->cpuid_entries, entries, - cpuid->nent * sizeof(struct kvm_cpuid_entry))) + cpuid->nent * sizeof(struct kvm_cpuid_entry2))) goto out; vcpu->cpuid_nent = cpuid->nent; - cpuid_fix_nx_cap(vcpu); return 0; out: return r; } +static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) +{ + int r; + + r = -E2BIG; + if (cpuid->nent < vcpu->cpuid_nent) + goto out; + r = -EFAULT; + if (copy_to_user(entries, &vcpu->cpuid_entries, + vcpu->cpuid_nent * sizeof(struct kvm_cpuid_entry2))) + goto out; + return 0; + +out: + cpuid->nent = vcpu->cpuid_nent; + return r; +} + +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + +static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index) +{ + entry->function = function; + entry->index = index; + cpuid_count(entry->function, entry->index, + &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); + entry->flags = 0; +} + +static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index, int *nent, int maxnent) +{ + const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) | + bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | + bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | + bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | + bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | + bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) | + bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | + bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) | + bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) | + bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP); + const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) | + bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | + bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | + bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | + bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | + bit(X86_FEATURE_PGE) | + bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | + bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) | + bit(X86_FEATURE_SYSCALL) | + (bit(X86_FEATURE_NX) && is_efer_nx()) | +#ifdef CONFIG_X86_64 + bit(X86_FEATURE_LM) | +#endif + bit(X86_FEATURE_MMXEXT) | + bit(X86_FEATURE_3DNOWEXT) | + bit(X86_FEATURE_3DNOW); + const u32 kvm_supported_word3_x86_features = + bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); + const u32 kvm_supported_word6_x86_features = + bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY); + + /* all func 2 cpuid_count() should be called on the same cpu */ + get_cpu(); + do_cpuid_1_ent(entry, function, index); + ++*nent; + + switch (function) { + case 0: + entry->eax = min(entry->eax, (u32)0xb); + break; + case 1: + entry->edx &= kvm_supported_word0_x86_features; + entry->ecx &= kvm_supported_word3_x86_features; + break; + /* function 2 entries are STATEFUL. That is, repeated cpuid commands + * may return different values. This forces us to get_cpu() before + * issuing the first command, and also to emulate this annoying behavior + * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ + case 2: { + int t, times = entry->eax & 0xff; + + entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; + for (t = 1; t < times && *nent < maxnent; ++t) { + do_cpuid_1_ent(&entry[t], function, 0); + entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; + ++*nent; + } + break; + } + /* function 4 and 0xb have additional index. */ + case 4: { + int index, cache_type; + + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + /* read more entries until cache_type is zero */ + for (index = 1; *nent < maxnent; ++index) { + cache_type = entry[index - 1].eax & 0x1f; + if (!cache_type) + break; + do_cpuid_1_ent(&entry[index], function, index); + entry[index].flags |= + KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + ++*nent; + } + break; + } + case 0xb: { + int index, level_type; + + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + /* read more entries until level_type is zero */ + for (index = 1; *nent < maxnent; ++index) { + level_type = entry[index - 1].ecx & 0xff; + if (!level_type) + break; + do_cpuid_1_ent(&entry[index], function, index); + entry[index].flags |= + KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + ++*nent; + } + break; + } + case 0x80000000: + entry->eax = min(entry->eax, 0x8000001a); + break; + case 0x80000001: + entry->edx &= kvm_supported_word1_x86_features; + entry->ecx &= kvm_supported_word6_x86_features; + break; + } + put_cpu(); +} + +static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm, + struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries) +{ + struct kvm_cpuid_entry2 *cpuid_entries; + int limit, nent = 0, r = -E2BIG; + u32 func; + + if (cpuid->nent < 1) + goto out; + r = -ENOMEM; + cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); + if (!cpuid_entries) + goto out; + + do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent); + limit = cpuid_entries[0].eax; + for (func = 1; func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + r = -E2BIG; + if (nent >= cpuid->nent) + goto out_free; + + do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent); + limit = cpuid_entries[nent - 1].eax; + for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func) + do_cpuid_ent(&cpuid_entries[nent], func, 0, + &nent, cpuid->nent); + r = -EFAULT; + if (copy_to_user(entries, cpuid_entries, + nent * sizeof(struct kvm_cpuid_entry2))) + goto out_free; + cpuid->nent = nent; + r = 0; + +out_free: + vfree(cpuid_entries); +out: + return r; +} + static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { @@ -816,6 +1046,36 @@ long kvm_arch_vcpu_ioctl(struct file *filp, goto out; break; } + case KVM_SET_CPUID2: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid, + cpuid_arg->entries); + if (r) + goto out; + break; + } + case KVM_GET_CPUID2: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid, + cpuid_arg->entries); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + goto out; + r = 0; + break; + } case KVM_GET_MSRS: r = msr_io(vcpu, argp, kvm_get_msr, 1); break; @@ -1111,6 +1371,24 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_SUPPORTED_CPUID: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid, + cpuid_arg->entries); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + goto out; + r = 0; + break; + } default: ; } @@ -1908,14 +2186,47 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, } } +static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) +{ + struct kvm_cpuid_entry2 *e = &vcpu->cpuid_entries[i]; + int j, nent = vcpu->cpuid_nent; + + e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; + /* when no next entry is found, the current entry[i] is reselected */ + for (j = i + 1; j == i; j = (j + 1) % nent) { + struct kvm_cpuid_entry2 *ej = &vcpu->cpuid_entries[j]; + if (ej->function == e->function) { + ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; + return j; + } + } + return 0; /* silence gcc, even though control never reaches here */ +} + +/* find an entry with matching function, matching index (if needed), and that + * should be read next (if it's stateful) */ +static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e, + u32 function, u32 index) +{ + if (e->function != function) + return 0; + if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index) + return 0; + if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) && + !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT)) + return 0; + return 1; +} + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { int i; - u32 function; - struct kvm_cpuid_entry *e, *best; + u32 function, index; + struct kvm_cpuid_entry2 *e, *best; kvm_x86_ops->cache_regs(vcpu); function = vcpu->regs[VCPU_REGS_RAX]; + index = vcpu->regs[VCPU_REGS_RCX]; vcpu->regs[VCPU_REGS_RAX] = 0; vcpu->regs[VCPU_REGS_RBX] = 0; vcpu->regs[VCPU_REGS_RCX] = 0; @@ -1923,7 +2234,9 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) best = NULL; for (i = 0; i < vcpu->cpuid_nent; ++i) { e = &vcpu->cpuid_entries[i]; - if (e->function == function) { + if (is_matching_cpuid_entry(e, function, index)) { + if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) + move_to_next_stateful_cpuid_entry(vcpu, i); best = e; break; } diff --git a/drivers/kvm/x86.h b/drivers/kvm/x86.h index b1528c9f566fbe50b91c81f9c1861d2d60fbdb96..78ab1e108d8b3fead568cb38728e660197a4d6bb 100644 --- a/drivers/kvm/x86.h +++ b/drivers/kvm/x86.h @@ -149,7 +149,7 @@ struct kvm_vcpu { int halt_request; /* real mode on Intel only */ int cpuid_nent; - struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; /* emulate context */ diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 4837d759bec09ab49e5cba30c2af8b8597a75d7c..17afa81d6d6657c83c0c457a7807744044354215 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h @@ -151,5 +151,26 @@ struct kvm_cpuid { struct kvm_cpuid_entry entries[0]; }; +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 +#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 +#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 + +/* for KVM_SET_CPUID2 */ +struct kvm_cpuid2 { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry2 entries[0]; +}; #endif diff --git a/include/linux/kvm.h b/include/linux/kvm.h index fd4f900fcce3b512c1b8927f3bb8a6f12fce0692..b751552f2e302206f3e32b88acf65c8ddab960c6 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -224,6 +224,7 @@ struct kvm_signal_mask { #define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2 #define KVM_CAP_USER_MEMORY 3 #define KVM_CAP_SET_TSS_ADDR 4 +#define KVM_CAP_EXT_CPUID 5 /* * ioctls for VM fds @@ -241,6 +242,7 @@ struct kvm_signal_mask { #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) +#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x48, struct kvm_cpuid2) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) @@ -266,5 +268,7 @@ struct kvm_signal_mask { #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) #define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state) #define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state) +#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) +#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) #endif