提交 7caf9571 编写于 作者: D David Woodhouse 提交者: Paolo Bonzini

KVM: x86/xen: Use gfn_to_pfn_cache for vcpu_info

Currently, the fast path of kvm_xen_set_evtchn_fast() doesn't set the
index bits in the target vCPU's evtchn_pending_sel, because it only has
a userspace virtual address with which to do so. It just sets them in
the kernel, and kvm_xen_has_interrupt() then completes the delivery to
the actual vcpu_info structure when the vCPU runs.

Using a gfn_to_pfn_cache allows kvm_xen_set_evtchn_fast() to do the full
delivery in the common case.

Clean up the fallback case too, by moving the deferred delivery out into
a separate kvm_xen_inject_pending_events() function which isn't ever
called in atomic contexts as __kvm_xen_has_interrupt() is.
Signed-off-by: NDavid Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220303154127.202856-6-dwmw2@infradead.org>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 916d3608
...@@ -606,9 +606,8 @@ struct kvm_vcpu_hv { ...@@ -606,9 +606,8 @@ struct kvm_vcpu_hv {
struct kvm_vcpu_xen { struct kvm_vcpu_xen {
u64 hypercall_rip; u64 hypercall_rip;
u32 current_runstate; u32 current_runstate;
bool vcpu_info_set;
bool vcpu_time_info_set; bool vcpu_time_info_set;
struct gfn_to_hva_cache vcpu_info_cache; struct gfn_to_pfn_cache vcpu_info_cache;
struct gfn_to_hva_cache vcpu_time_info_cache; struct gfn_to_hva_cache vcpu_time_info_cache;
struct gfn_to_pfn_cache runstate_cache; struct gfn_to_pfn_cache runstate_cache;
u64 last_steal; u64 last_steal;
......
...@@ -3158,9 +3158,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) ...@@ -3158,9 +3158,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
if (vcpu->pv_time.active) if (vcpu->pv_time.active)
kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0); kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
if (vcpu->xen.vcpu_info_set) if (vcpu->xen.vcpu_info_cache.active)
kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_info_cache, kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
offsetof(struct compat_vcpu_info, time)); offsetof(struct compat_vcpu_info, time));
if (vcpu->xen.vcpu_time_info_set) if (vcpu->xen.vcpu_time_info_set)
kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0); kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
if (!v->vcpu_idx) if (!v->vcpu_idx)
...@@ -10424,6 +10424,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu) ...@@ -10424,6 +10424,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
break; break;
kvm_clear_request(KVM_REQ_UNBLOCK, vcpu); kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
if (kvm_xen_has_pending_events(vcpu))
kvm_xen_inject_pending_events(vcpu);
if (kvm_cpu_has_pending_timer(vcpu)) if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu); kvm_inject_pending_timer_irqs(vcpu);
...@@ -12236,6 +12239,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) ...@@ -12236,6 +12239,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
kvm_x86_ops.nested_ops->hv_timer_pending(vcpu)) kvm_x86_ops.nested_ops->hv_timer_pending(vcpu))
return true; return true;
if (kvm_xen_has_pending_events(vcpu))
return true;
return false; return false;
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "x86.h" #include "x86.h"
#include "xen.h" #include "xen.h"
#include "lapic.h"
#include "hyperv.h" #include "hyperv.h"
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
...@@ -246,23 +247,79 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) ...@@ -246,23 +247,79 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
} }
int __kvm_xen_has_interrupt(struct kvm_vcpu *v) /*
* On event channel delivery, the vcpu_info may not have been accessible.
* In that case, there are bits in vcpu->arch.xen.evtchn_pending_sel which
* need to be marked into the vcpu_info (and evtchn_upcall_pending set).
* Do so now that we can sleep in the context of the vCPU to bring the
* page in, and refresh the pfn cache for it.
*/
void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
{ {
unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel); unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
bool atomic = in_atomic() || !task_is_running(current); struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
int err; unsigned long flags;
if (!evtchn_pending_sel)
return;
/*
* Yes, this is an open-coded loop. But that's just what put_user()
* does anyway. Page it in and retry the instruction. We're just a
* little more honest about it.
*/
read_lock_irqsave(&gpc->lock, flags);
while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
sizeof(struct vcpu_info))) {
read_unlock_irqrestore(&gpc->lock, flags);
if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
sizeof(struct vcpu_info)))
return;
read_lock_irqsave(&gpc->lock, flags);
}
/* Now gpc->khva is a valid kernel address for the vcpu_info */
if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) {
struct vcpu_info *vi = gpc->khva;
asm volatile(LOCK_PREFIX "orq %0, %1\n"
"notq %0\n"
LOCK_PREFIX "andq %0, %2\n"
: "=r" (evtchn_pending_sel),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
: "0" (evtchn_pending_sel));
WRITE_ONCE(vi->evtchn_upcall_pending, 1);
} else {
u32 evtchn_pending_sel32 = evtchn_pending_sel;
struct compat_vcpu_info *vi = gpc->khva;
asm volatile(LOCK_PREFIX "orl %0, %1\n"
"notl %0\n"
LOCK_PREFIX "andl %0, %2\n"
: "=r" (evtchn_pending_sel32),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
: "0" (evtchn_pending_sel32));
WRITE_ONCE(vi->evtchn_upcall_pending, 1);
}
read_unlock_irqrestore(&gpc->lock, flags);
mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
}
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
{
struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
unsigned long flags;
u8 rc = 0; u8 rc = 0;
/* /*
* If the global upcall vector (HVMIRQ_callback_vector) is set and * If the global upcall vector (HVMIRQ_callback_vector) is set and
* the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
*/ */
struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
struct kvm_memslots *slots = kvm_memslots(v->kvm);
bool ghc_valid = slots->generation == ghc->generation &&
!kvm_is_error_hva(ghc->hva) && ghc->memslot;
unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
/* No need for compat handling here */ /* No need for compat handling here */
BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
...@@ -272,101 +329,35 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) ...@@ -272,101 +329,35 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
BUILD_BUG_ON(sizeof(rc) != BUILD_BUG_ON(sizeof(rc) !=
sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
/* read_lock_irqsave(&gpc->lock, flags);
* For efficiency, this mirrors the checks for using the valid while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
* cache in kvm_read_guest_offset_cached(), but just uses sizeof(struct vcpu_info))) {
* __get_user() instead. And falls back to the slow path. read_unlock_irqrestore(&gpc->lock, flags);
*/
if (!evtchn_pending_sel && ghc_valid) {
/* Fast path */
pagefault_disable();
err = __get_user(rc, (u8 __user *)ghc->hva + offset);
pagefault_enable();
if (!err)
return rc;
}
/* Slow path */
/* /*
* This function gets called from kvm_vcpu_block() after setting the * This function gets called from kvm_vcpu_block() after setting the
* task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
* from a HLT. So we really mustn't sleep. If the page ended up absent * from a HLT. So we really mustn't sleep. If the page ended up absent
* at that point, just return 1 in order to trigger an immediate wake, * at that point, just return 1 in order to trigger an immediate wake,
* and we'll end up getting called again from a context where we *can* * and we'll end up getting called again from a context where we *can*
* fault in the page and wait for it. * fault in the page and wait for it.
*/ */
if (atomic) if (in_atomic() || !task_is_running(current))
return 1; return 1;
if (!ghc_valid) { if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
err = kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len); sizeof(struct vcpu_info))) {
if (err || !ghc->memslot) {
/* /*
* If this failed, userspace has screwed up the * If this failed, userspace has screwed up the
* vcpu_info mapping. No interrupts for you. * vcpu_info mapping. No interrupts for you.
*/ */
return 0; return 0;
} }
read_lock_irqsave(&gpc->lock, flags);
} }
/* rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending;
* Now we have a valid (protected by srcu) userspace HVA in read_unlock_irqrestore(&gpc->lock, flags);
* ghc->hva which points to the struct vcpu_info. If there
* are any bits in the in-kernel evtchn_pending_sel then
* we need to write those to the guest vcpu_info and set
* its evtchn_upcall_pending flag. If there aren't any bits
* to add, we only want to *check* evtchn_upcall_pending.
*/
if (evtchn_pending_sel) {
bool long_mode = v->kvm->arch.xen.long_mode;
if (!user_access_begin((void __user *)ghc->hva, sizeof(struct vcpu_info)))
return 0;
if (IS_ENABLED(CONFIG_64BIT) && long_mode) {
struct vcpu_info __user *vi = (void __user *)ghc->hva;
/* Attempt to set the evtchn_pending_sel bits in the
* guest, and if that succeeds then clear the same
* bits in the in-kernel version. */
asm volatile("1:\t" LOCK_PREFIX "orq %0, %1\n"
"\tnotq %0\n"
"\t" LOCK_PREFIX "andq %0, %2\n"
"2:\n"
_ASM_EXTABLE_UA(1b, 2b)
: "=r" (evtchn_pending_sel),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
: "0" (evtchn_pending_sel));
} else {
struct compat_vcpu_info __user *vi = (void __user *)ghc->hva;
u32 evtchn_pending_sel32 = evtchn_pending_sel;
/* Attempt to set the evtchn_pending_sel bits in the
* guest, and if that succeeds then clear the same
* bits in the in-kernel version. */
asm volatile("1:\t" LOCK_PREFIX "orl %0, %1\n"
"\tnotl %0\n"
"\t" LOCK_PREFIX "andl %0, %2\n"
"2:\n"
_ASM_EXTABLE_UA(1b, 2b)
: "=r" (evtchn_pending_sel32),
"+m" (vi->evtchn_pending_sel),
"+m" (v->arch.xen.evtchn_pending_sel)
: "0" (evtchn_pending_sel32));
}
rc = 1;
unsafe_put_user(rc, (u8 __user *)ghc->hva + offset, err);
err:
user_access_end();
mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
} else {
__get_user(rc, (u8 __user *)ghc->hva + offset);
}
return rc; return rc;
} }
...@@ -456,25 +447,18 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -456,25 +447,18 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
offsetof(struct compat_vcpu_info, time)); offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) { if (data->u.gpa == GPA_INVALID) {
vcpu->arch.xen.vcpu_info_set = false; kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
r = 0; r = 0;
break; break;
} }
/* It must fit within a single page */ r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
r = -EINVAL;
break;
}
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache, &vcpu->arch.xen.vcpu_info_cache,
data->u.gpa, NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct vcpu_info)); sizeof(struct vcpu_info));
if (!r) { if (!r)
vcpu->arch.xen.vcpu_info_set = true;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
}
break; break;
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
...@@ -630,7 +614,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) ...@@ -630,7 +614,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
switch (data->type) { switch (data->type) {
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
if (vcpu->arch.xen.vcpu_info_set) if (vcpu->arch.xen.vcpu_info_cache.active)
data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
else else
data->u.gpa = GPA_INVALID; data->u.gpa = GPA_INVALID;
...@@ -903,16 +887,17 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, ...@@ -903,16 +887,17 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
if (!vcpu) if (!vcpu)
return -1; return -1;
if (!vcpu->arch.xen.vcpu_info_set) if (!vcpu->arch.xen.vcpu_info_cache.active)
return -1; return -1;
if (e->xen_evtchn.port >= max_evtchn_port(kvm)) if (e->xen_evtchn.port >= max_evtchn_port(kvm))
return -1; return -1;
rc = -EWOULDBLOCK; rc = -EWOULDBLOCK;
read_lock_irqsave(&gpc->lock, flags);
idx = srcu_read_lock(&kvm->srcu); idx = srcu_read_lock(&kvm->srcu);
read_lock_irqsave(&gpc->lock, flags);
if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
goto out_rcu; goto out_rcu;
...@@ -940,17 +925,44 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e, ...@@ -940,17 +925,44 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
} else if (test_bit(e->xen_evtchn.port, mask_bits)) { } else if (test_bit(e->xen_evtchn.port, mask_bits)) {
rc = -1; /* Masked */ rc = -1; /* Masked */
} else { } else {
rc = 1; /* Delivered. But was the vCPU waking already? */ rc = 1; /* Delivered to the bitmap in shared_info. */
if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
kick_vcpu = true; read_unlock_irqrestore(&gpc->lock, flags);
gpc = &vcpu->arch.xen.vcpu_info_cache;
read_lock_irqsave(&gpc->lock, flags);
if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) {
/*
* Could not access the vcpu_info. Set the bit in-kernel
* and prod the vCPU to deliver it for itself.
*/
if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
kick_vcpu = true;
goto out_rcu;
}
if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
struct vcpu_info *vcpu_info = gpc->khva;
if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) {
WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
kick_vcpu = true;
}
} else {
struct compat_vcpu_info *vcpu_info = gpc->khva;
if (!test_and_set_bit(port_word_bit,
(unsigned long *)&vcpu_info->evtchn_pending_sel)) {
WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
kick_vcpu = true;
}
}
} }
out_rcu: out_rcu:
srcu_read_unlock(&kvm->srcu, idx);
read_unlock_irqrestore(&gpc->lock, flags); read_unlock_irqrestore(&gpc->lock, flags);
srcu_read_unlock(&kvm->srcu, idx);
if (kick_vcpu) { if (kick_vcpu) {
kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
} }
...@@ -1052,4 +1064,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) ...@@ -1052,4 +1064,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
{ {
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.runstate_cache); &vcpu->arch.xen.runstate_cache);
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache);
} }
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
extern struct static_key_false_deferred kvm_xen_enabled; extern struct static_key_false_deferred kvm_xen_enabled;
int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu); int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu);
int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data); int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data); int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
...@@ -46,11 +47,19 @@ static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) ...@@ -46,11 +47,19 @@ static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu) static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
{ {
if (static_branch_unlikely(&kvm_xen_enabled.key) && if (static_branch_unlikely(&kvm_xen_enabled.key) &&
vcpu->arch.xen.vcpu_info_set && vcpu->kvm->arch.xen.upcall_vector) vcpu->arch.xen.vcpu_info_cache.active &&
vcpu->kvm->arch.xen.upcall_vector)
return __kvm_xen_has_interrupt(vcpu); return __kvm_xen_has_interrupt(vcpu);
return 0; return 0;
} }
static inline bool kvm_xen_has_pending_events(struct kvm_vcpu *vcpu)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
vcpu->arch.xen.evtchn_pending_sel;
}
#else #else
static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
{ {
...@@ -83,6 +92,15 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu) ...@@ -83,6 +92,15 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
{ {
return 0; return 0;
} }
static inline void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu)
{
}
static inline bool kvm_xen_has_pending_events(struct kvm_vcpu *vcpu)
{
return false;
}
#endif #endif
int kvm_xen_hypercall(struct kvm_vcpu *vcpu); int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册