提交 97fcc07b 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Mostly bugfixes; plus, support for XMM arguments to Hyper-V hypercalls
  now obeys KVM_CAP_HYPERV_ENFORCE_CPUID.

  Both the XMM arguments feature and KVM_CAP_HYPERV_ENFORCE_CPUID are
  new in 5.14, and each did not know of the other"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86/mmu: Fix per-cpu counter corruption on 32-bit builds
  KVM: selftests: fix hyperv_clock test
  KVM: SVM: improve the code readability for ASID management
  KVM: SVM: Fix off-by-one indexing when nullifying last used SEV VMCB
  KVM: Do not leak memory for duplicate debugfs directories
  KVM: selftests: Test access to XMM fast hypercalls
  KVM: x86: hyper-v: Check if guest is allowed to use XMM registers for hypercall input
  KVM: x86: Introduce trace_kvm_hv_hypercall_done()
  KVM: x86: hyper-v: Check access to hypercall before reading XMM registers
  KVM: x86: accept userspace interrupt only if no event is injected
......@@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
{
trace_kvm_hv_hypercall_done(result);
kvm_hv_hypercall_set_result(vcpu, result);
++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);
......@@ -2139,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct kvm_hv_hcall hc;
u64 ret = HV_STATUS_SUCCESS;
......@@ -2173,17 +2175,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
hc.rep = !!(hc.rep_cnt || hc.rep_idx);
if (hc.fast && is_xmm_fast_hypercall(&hc))
kvm_hv_hypercall_read_xmm(&hc);
trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
hc.ingpa, hc.outgpa);
if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) {
if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
ret = HV_STATUS_ACCESS_DENIED;
goto hypercall_complete;
}
if (hc.fast && is_xmm_fast_hypercall(&hc)) {
if (unlikely(hv_vcpu->enforce_cpuid &&
!(hv_vcpu->cpuid_cache.features_edx &
HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
kvm_hv_hypercall_read_xmm(&hc);
}
switch (hc.code) {
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
if (unlikely(hc.rep)) {
......
......@@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt)
* aggregate version in order to make the slab shrinker
* faster
*/
static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
{
kvm->arch.n_used_mmu_pages += nr;
percpu_counter_add(&kvm_total_used_mmu_pages, nr);
......
......@@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock);
unsigned int max_sev_asid;
static unsigned int min_sev_asid;
static unsigned long sev_me_mask;
static unsigned int nr_asids;
static unsigned long *sev_asid_bitmap;
static unsigned long *sev_reclaim_asid_bitmap;
......@@ -78,11 +79,11 @@ struct enc_region {
/* Called with the sev_bitmap_lock held, or on shutdown */
static int sev_flush_asids(int min_asid, int max_asid)
{
int ret, pos, error = 0;
int ret, asid, error = 0;
/* Check if there are any ASIDs to reclaim before performing a flush */
pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid);
if (pos >= max_asid)
asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
if (asid > max_asid)
return -EBUSY;
/*
......@@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
max_sev_asid);
bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
nr_asids);
bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
return true;
}
static int sev_asid_new(struct kvm_sev_info *sev)
{
int pos, min_asid, max_asid, ret;
int asid, min_asid, max_asid, ret;
bool retry = true;
enum misc_res_type type;
......@@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev)
* SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
*/
min_asid = sev->es_active ? 0 : min_sev_asid - 1;
min_asid = sev->es_active ? 1 : min_sev_asid;
max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
again:
pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
if (pos >= max_asid) {
asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
if (asid > max_asid) {
if (retry && __sev_recycle_asids(min_asid, max_asid)) {
retry = false;
goto again;
......@@ -157,11 +158,11 @@ static int sev_asid_new(struct kvm_sev_info *sev)
goto e_uncharge;
}
__set_bit(pos, sev_asid_bitmap);
__set_bit(asid, sev_asid_bitmap);
mutex_unlock(&sev_bitmap_lock);
return pos + 1;
return asid;
e_uncharge:
misc_cg_uncharge(type, sev->misc_cg, 1);
put_misc_cg(sev->misc_cg);
......@@ -179,17 +180,16 @@ static int sev_get_asid(struct kvm *kvm)
static void sev_asid_free(struct kvm_sev_info *sev)
{
struct svm_cpu_data *sd;
int cpu, pos;
int cpu;
enum misc_res_type type;
mutex_lock(&sev_bitmap_lock);
pos = sev->asid - 1;
__set_bit(pos, sev_reclaim_asid_bitmap);
__set_bit(sev->asid, sev_reclaim_asid_bitmap);
for_each_possible_cpu(cpu) {
sd = per_cpu(svm_data, cpu);
sd->sev_vmcbs[pos] = NULL;
sd->sev_vmcbs[sev->asid] = NULL;
}
mutex_unlock(&sev_bitmap_lock);
......@@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void)
min_sev_asid = edx;
sev_me_mask = 1UL << (ebx & 0x3f);
/* Initialize SEV ASID bitmaps */
sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
/*
* Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
* even though it's never used, so that the bitmap is indexed by the
* actual ASID.
*/
nr_asids = max_sev_asid + 1;
sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
if (!sev_asid_bitmap)
goto out;
sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
if (!sev_reclaim_asid_bitmap) {
bitmap_free(sev_asid_bitmap);
sev_asid_bitmap = NULL;
......@@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void)
return;
/* No need to take sev_bitmap_lock, all VMs have been destroyed. */
sev_flush_asids(0, max_sev_asid);
sev_flush_asids(1, max_sev_asid);
bitmap_free(sev_asid_bitmap);
bitmap_free(sev_reclaim_asid_bitmap);
......@@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
if (!sev_enabled)
return 0;
sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL);
sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
if (!sd->sev_vmcbs)
return -ENOMEM;
......
......@@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall,
__entry->outgpa)
);
TRACE_EVENT(kvm_hv_hypercall_done,
TP_PROTO(u64 result),
TP_ARGS(result),
TP_STRUCT__entry(
__field(__u64, result)
),
TP_fast_assign(
__entry->result = result;
),
TP_printk("result 0x%llx", __entry->result)
);
/*
* Tracepoint for Xen hypercall.
*/
......
......@@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
{
return kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu);
/*
* Do not cause an interrupt window exit if an exception
* is pending or an event needs reinjection; userspace
* might want to inject the interrupt manually using KVM_SET_REGS
* or KVM_SET_SREGS. For that to work, we must be at an
* instruction boundary and with no events half-injected.
*/
return (kvm_arch_interrupt_allowed(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu) &&
!kvm_event_needs_reinjection(vcpu) &&
!vcpu->arch.exception.pending);
}
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
......
......@@ -117,7 +117,7 @@
#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4)
#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
......@@ -182,4 +182,7 @@
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
/* hypercall options */
#define HV_HYPERCALL_FAST_BIT BIT(16)
#endif /* !SELFTEST_KVM_HYPERV_H */
......@@ -215,7 +215,7 @@ int main(void)
vcpu_set_hv_cpuid(vm, VCPU_ID);
tsc_page_gva = vm_vaddr_alloc_page(vm);
memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
"TSC page has to be page aligned\n");
vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
......
......@@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val)
}
static int nr_gp;
static int nr_ud;
static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
vm_vaddr_t output_address)
......@@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs)
regs->rip = (uint64_t)&wrmsr_end;
}
static void guest_ud_handler(struct ex_regs *regs)
{
nr_ud++;
regs->rip += 3;
}
struct msr_data {
uint32_t idx;
bool available;
......@@ -90,6 +97,7 @@ struct msr_data {
struct hcall_data {
uint64_t control;
uint64_t expect;
bool ud_expected;
};
static void guest_msr(struct msr_data *msr)
......@@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr)
static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
{
int i = 0;
u64 res, input, output;
wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
while (hcall->control) {
GUEST_ASSERT(hypercall(hcall->control, pgs_gpa,
pgs_gpa + 4096) == hcall->expect);
nr_ud = 0;
if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
input = pgs_gpa;
output = pgs_gpa + 4096;
} else {
input = output = 0;
}
res = hypercall(hcall->control, input, output);
if (hcall->ud_expected)
GUEST_ASSERT(nr_ud == 1);
else
GUEST_ASSERT(res == hcall->expect);
GUEST_SYNC(i++);
}
......@@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
recomm.ebx = 0xfff;
hcall->expect = HV_STATUS_SUCCESS;
break;
case 17:
/* XMM fast hypercall */
hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
hcall->ud_expected = true;
break;
case 18:
feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
hcall->ud_expected = false;
hcall->expect = HV_STATUS_SUCCESS;
break;
case 19:
/* END */
hcall->control = 0;
break;
......@@ -625,6 +656,10 @@ int main(void)
/* Test hypercalls */
vm = vm_create_default(VCPU_ID, 0, guest_hcall);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
/* Hypercall input/output */
hcall_page = vm_vaddr_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
......
......@@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
{
static DEFINE_MUTEX(kvm_debugfs_lock);
struct dentry *dent;
char dir_name[ITOA_MAX_LEN * 2];
struct kvm_stat_data *stat_data;
const struct _kvm_stats_desc *pdesc;
......@@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
mutex_lock(&kvm_debugfs_lock);
dent = debugfs_lookup(dir_name, kvm_debugfs_dir);
if (dent) {
pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name);
dput(dent);
mutex_unlock(&kvm_debugfs_lock);
return 0;
}
dent = debugfs_create_dir(dir_name, kvm_debugfs_dir);
mutex_unlock(&kvm_debugfs_lock);
if (IS_ERR(dent))
return 0;
kvm->debugfs_dentry = dent;
kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
sizeof(*kvm->debugfs_stat_data),
GFP_KERNEL_ACCOUNT);
......@@ -5201,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
}
add_uevent_var(env, "PID=%d", kvm->userspace_pid);
if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
if (kvm->debugfs_dentry) {
char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
if (p) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册