提交 42057e18 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Mixed bugfixes. Perhaps the most interesting one is a latent bug that
  was finally triggered by PCID support"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm/x86: Handle async PF in RCU read-side critical sections
  KVM: nVMX: Fix nested #PF intends to break L1's vmlauch/vmresume
  KVM: VMX: use cmpxchg64
  KVM: VMX: simplify and fix vmx_vcpu_pi_load
  KVM: VMX: avoid double list add with VT-d posted interrupts
  KVM: VMX: extract __pi_post_block
  KVM: PPC: Book3S HV: Check for updated HDSISR on P9 HDSI exception
  KVM: nVMX: fix HOST_CR3/HOST_CR4 cache
...@@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ...@@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
mtspr SPRN_PPR, r0 mtspr SPRN_PPR, r0
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Move canary into DSISR to check for later */
BEGIN_FTR_SECTION
li r0, 0x7fff
mtspr SPRN_HDSISR, r0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r0, VCPU_GPR(R0)(r4) ld r0, VCPU_GPR(R0)(r4)
ld r4, VCPU_GPR(R4)(r4) ld r4, VCPU_GPR(R4)(r4)
...@@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ...@@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
kvmppc_hdsi: kvmppc_hdsi:
ld r3, VCPU_KVM(r9) ld r3, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r3) lbz r0, KVM_RADIX(r3)
cmpwi r0, 0
mfspr r4, SPRN_HDAR mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR mfspr r6, SPRN_HDSISR
BEGIN_FTR_SECTION
/* Look for DSISR canary. If we find it, retry instruction */
cmpdi r6, 0x7fff
beq 6f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
cmpwi r0, 0
bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
/* HPTE not found fault or protection fault? */ /* HPTE not found fault or protection fault? */
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
......
...@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token) ...@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token; n.token = token;
n.cpu = smp_processor_id(); n.cpu = smp_processor_id();
n.halted = is_idle_task(current) || preempt_count() > 1; n.halted = is_idle_task(current) || preempt_count() > 1 ||
rcu_preempt_depth();
init_swait_queue_head(&n.wq); init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list); hlist_add_head(&n.link, &b->list);
raw_spin_unlock(&b->lock); raw_spin_unlock(&b->lock);
......
...@@ -200,6 +200,8 @@ struct loaded_vmcs { ...@@ -200,6 +200,8 @@ struct loaded_vmcs {
int cpu; int cpu;
bool launched; bool launched;
bool nmi_known_unmasked; bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
struct list_head loaded_vmcss_on_cpu_link; struct list_head loaded_vmcss_on_cpu_link;
}; };
...@@ -600,8 +602,6 @@ struct vcpu_vmx { ...@@ -600,8 +602,6 @@ struct vcpu_vmx {
int gs_ldt_reload_needed; int gs_ldt_reload_needed;
int fs_reload_needed; int fs_reload_needed;
u64 msr_host_bndcfgs; u64 msr_host_bndcfgs;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
} host_state; } host_state;
struct { struct {
int vm86_active; int vm86_active;
...@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) ...@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
struct pi_desc old, new; struct pi_desc old, new;
unsigned int dest; unsigned int dest;
if (!kvm_arch_has_assigned_device(vcpu->kvm) || /*
!irq_remapping_cap(IRQ_POSTING_CAP) || * In case of hot-plug or hot-unplug, we may have to undo
!kvm_vcpu_apicv_active(vcpu)) * vmx_vcpu_pi_put even if there is no assigned device. And we
* always keep PI.NDST up to date for simplicity: it makes the
* code easier, and CPU migration is not a fast path.
*/
if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
return; return;
/*
* First handle the simple case where no cmpxchg is necessary; just
* allow posting non-urgent interrupts.
*
* If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
* PI.NDST: pi_post_block will do it for us and the wakeup_handler
* expects the VCPU to be on the blocked_vcpu_list that matches
* PI.NDST.
*/
if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
vcpu->cpu == cpu) {
pi_clear_sn(pi_desc);
return;
}
/* The full case. */
do { do {
old.control = new.control = pi_desc->control; old.control = new.control = pi_desc->control;
/* dest = cpu_physical_id(cpu);
* If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
* are two possible cases:
* 1. After running 'pre_block', context switch
* happened. For this case, 'sn' was set in
* vmx_vcpu_put(), so we need to clear it here.
* 2. After running 'pre_block', we were blocked,
* and woken up by some other guy. For this case,
* we don't need to do anything, 'pi_post_block'
* will do everything for us. However, we cannot
* check whether it is case #1 or case #2 here
* (maybe, not needed), so we also clear sn here,
* I think it is not a big deal.
*/
if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
if (vcpu->cpu != cpu) {
dest = cpu_physical_id(cpu);
if (x2apic_enabled())
new.ndst = dest;
else
new.ndst = (dest << 8) & 0xFF00;
}
/* set 'NV' to 'notification vector' */ if (x2apic_enabled())
new.nv = POSTED_INTR_VECTOR; new.ndst = dest;
} else
new.ndst = (dest << 8) & 0xFF00;
/* Allow posting non-urgent interrupts */
new.sn = 0; new.sn = 0;
} while (cmpxchg(&pi_desc->control, old.control, } while (cmpxchg64(&pi_desc->control, old.control,
new.control) != old.control); new.control) != old.control);
} }
static void decache_tsc_multiplier(struct vcpu_vmx *vmx) static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
...@@ -5178,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) ...@@ -5178,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
*/ */
cr3 = __read_cr3(); cr3 = __read_cr3();
vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
vmx->host_state.vmcs_host_cr3 = cr3; vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
/* Save the most likely value for this task's CR4 in the VMCS. */ /* Save the most likely value for this task's CR4 in the VMCS. */
cr4 = cr4_read_shadow(); cr4 = cr4_read_shadow();
vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
vmx->host_state.vmcs_host_cr4 = cr4; vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -9273,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -9273,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
cr3 = __get_current_cr3_fast(); cr3 = __get_current_cr3_fast();
if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
vmcs_writel(HOST_CR3, cr3); vmcs_writel(HOST_CR3, cr3);
vmx->host_state.vmcs_host_cr3 = cr3; vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
} }
cr4 = cr4_read_shadow(); cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
vmcs_writel(HOST_CR4, cr4); vmcs_writel(HOST_CR4, cr4);
vmx->host_state.vmcs_host_cr4 = cr4; vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
} }
/* When single-stepping over STI and MOV SS, we must clear the /* When single-stepping over STI and MOV SS, we must clear the
...@@ -9591,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) ...@@ -9591,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
/*
* Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
* or POSTED_INTR_WAKEUP_VECTOR.
*/
vmx->pi_desc.nv = POSTED_INTR_VECTOR;
vmx->pi_desc.sn = 1;
return &vmx->vcpu; return &vmx->vcpu;
free_vmcs: free_vmcs:
...@@ -9839,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, ...@@ -9839,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
WARN_ON(!is_guest_mode(vcpu)); WARN_ON(!is_guest_mode(vcpu));
if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
!to_vmx(vcpu)->nested.nested_run_pending) {
vmcs12->vm_exit_intr_error_code = fault->error_code; vmcs12->vm_exit_intr_error_code = fault->error_code;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
...@@ -11704,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, ...@@ -11704,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
} }
static void __pi_post_block(struct kvm_vcpu *vcpu)
{
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct pi_desc old, new;
unsigned int dest;
do {
old.control = new.control = pi_desc->control;
WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
"Wakeup handler not enabled while the VCPU is blocked\n");
dest = cpu_physical_id(vcpu->cpu);
if (x2apic_enabled())
new.ndst = dest;
else
new.ndst = (dest << 8) & 0xFF00;
/* set 'NV' to 'notification vector' */
new.nv = POSTED_INTR_VECTOR;
} while (cmpxchg64(&pi_desc->control, old.control,
new.control) != old.control);
if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
list_del(&vcpu->blocked_vcpu_list);
spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
vcpu->pre_pcpu = -1;
}
}
/* /*
* This routine does the following things for vCPU which is going * This routine does the following things for vCPU which is going
* to be blocked if VT-d PI is enabled. * to be blocked if VT-d PI is enabled.
...@@ -11719,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, ...@@ -11719,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
*/ */
static int pi_pre_block(struct kvm_vcpu *vcpu) static int pi_pre_block(struct kvm_vcpu *vcpu)
{ {
unsigned long flags;
unsigned int dest; unsigned int dest;
struct pi_desc old, new; struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
...@@ -11729,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) ...@@ -11729,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
!kvm_vcpu_apicv_active(vcpu)) !kvm_vcpu_apicv_active(vcpu))
return 0; return 0;
vcpu->pre_pcpu = vcpu->cpu; WARN_ON(irqs_disabled());
spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, local_irq_disable();
vcpu->pre_pcpu), flags); if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
list_add_tail(&vcpu->blocked_vcpu_list, vcpu->pre_pcpu = vcpu->cpu;
&per_cpu(blocked_vcpu_on_cpu, spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
vcpu->pre_pcpu)); list_add_tail(&vcpu->blocked_vcpu_list,
spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, &per_cpu(blocked_vcpu_on_cpu,
vcpu->pre_pcpu), flags); vcpu->pre_pcpu));
spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
}
do { do {
old.control = new.control = pi_desc->control; old.control = new.control = pi_desc->control;
/*
* We should not block the vCPU if
* an interrupt is posted for it.
*/
if (pi_test_on(pi_desc) == 1) {
spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
vcpu->pre_pcpu), flags);
list_del(&vcpu->blocked_vcpu_list);
spin_unlock_irqrestore(
&per_cpu(blocked_vcpu_on_cpu_lock,
vcpu->pre_pcpu), flags);
vcpu->pre_pcpu = -1;
return 1;
}
WARN((pi_desc->sn == 1), WARN((pi_desc->sn == 1),
"Warning: SN field of posted-interrupts " "Warning: SN field of posted-interrupts "
"is set before blocking\n"); "is set before blocking\n");
...@@ -11778,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu) ...@@ -11778,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
/* set 'NV' to 'wakeup vector' */ /* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR; new.nv = POSTED_INTR_WAKEUP_VECTOR;
} while (cmpxchg(&pi_desc->control, old.control, } while (cmpxchg64(&pi_desc->control, old.control,
new.control) != old.control); new.control) != old.control);
return 0; /* We should not block the vCPU if an interrupt is posted for it. */
if (pi_test_on(pi_desc) == 1)
__pi_post_block(vcpu);
local_irq_enable();
return (vcpu->pre_pcpu == -1);
} }
static int vmx_pre_block(struct kvm_vcpu *vcpu) static int vmx_pre_block(struct kvm_vcpu *vcpu)
...@@ -11797,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu) ...@@ -11797,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
static void pi_post_block(struct kvm_vcpu *vcpu) static void pi_post_block(struct kvm_vcpu *vcpu)
{ {
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); if (vcpu->pre_pcpu == -1)
struct pi_desc old, new;
unsigned int dest;
unsigned long flags;
if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
!kvm_vcpu_apicv_active(vcpu))
return; return;
do { WARN_ON(irqs_disabled());
old.control = new.control = pi_desc->control; local_irq_disable();
__pi_post_block(vcpu);
dest = cpu_physical_id(vcpu->cpu); local_irq_enable();
if (x2apic_enabled())
new.ndst = dest;
else
new.ndst = (dest << 8) & 0xFF00;
/* Allow posting non-urgent interrupts */
new.sn = 0;
/* set 'NV' to 'notification vector' */
new.nv = POSTED_INTR_VECTOR;
} while (cmpxchg(&pi_desc->control, old.control,
new.control) != old.control);
if(vcpu->pre_pcpu != -1) {
spin_lock_irqsave(
&per_cpu(blocked_vcpu_on_cpu_lock,
vcpu->pre_pcpu), flags);
list_del(&vcpu->blocked_vcpu_list);
spin_unlock_irqrestore(
&per_cpu(blocked_vcpu_on_cpu_lock,
vcpu->pre_pcpu), flags);
vcpu->pre_pcpu = -1;
}
} }
static void vmx_post_block(struct kvm_vcpu *vcpu) static void vmx_post_block(struct kvm_vcpu *vcpu)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册