提交 4e67483a 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "PPC:
   - user-triggerable use-after-free in HPT resizing
   - stale TLB entries in the guest
   - trap-and-emulate (PR) KVM guests failing to start under pHyp

  x86:
   - Another "Spectre" fix.
   - async pagefault fix
   - Revert an old fix for x86 nested virtualization, which turned out
     to do more harm than good
   - Check shrinker registration return code, to avoid warnings from
     upcoming 4.16 -mm patches"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: Add memory barrier on vmcs field lookup
  KVM: x86: emulate #UD while in guest mode
  x86: kvm: propagate register_shrinker return code
  KVM MMU: check pending exception before injecting APF
  KVM: PPC: Book3S HV: Always flush TLB in kvmppc_alloc_reset_hpt()
  KVM: PPC: Book3S PR: Fix WIMG handling under pHyp
  KVM: PPC: Book3S HV: Fix use after free in case of multiple resize requests
  KVM: PPC: Book3S HV: Drop prepare_done from struct kvm_resize_hpt
......@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
gpte->may_read = true;
gpte->may_write = true;
gpte->page_size = MMU_PAGE_4K;
gpte->wimg = HPTE_R_M;
return 0;
}
......
......@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
u32 order;
/* These fields protected by kvm->lock */
/* Possible values and their usage:
* <0 an error occurred during allocation,
* -EBUSY allocation is in the progress,
* 0 allocation made successfuly.
*/
int error;
bool prepare_done;
/* Private to the work thread, until prepare_done is true,
* then protected by kvm->resize_hpt_sem */
/* Private to the work thread, until error != -EBUSY,
* then protected by kvm->lock.
*/
struct kvm_hpt_info hpt;
};
......@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
* Reset all the reverse-mapping chains for all memslots
*/
kvmppc_rmap_reset(kvm);
/* Ensure that each vcpu will flush its TLB on next entry. */
cpumask_setall(&kvm->arch.need_tlb_flush);
err = 0;
goto out;
}
......@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
kvmppc_set_hpt(kvm, &info);
out:
if (err == 0)
/* Ensure that each vcpu will flush its TLB on next entry. */
cpumask_setall(&kvm->arch.need_tlb_flush);
mutex_unlock(&kvm->lock);
return err;
}
......@@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
{
BUG_ON(kvm->arch.resize_hpt != resize);
if (WARN_ON(!mutex_is_locked(&kvm->lock)))
return;
if (!resize)
return;
if (resize->hpt.virt)
kvmppc_free_hpt(&resize->hpt);
if (resize->error != -EBUSY) {
if (resize->hpt.virt)
kvmppc_free_hpt(&resize->hpt);
kfree(resize);
}
kvm->arch.resize_hpt = NULL;
kfree(resize);
if (kvm->arch.resize_hpt == resize)
kvm->arch.resize_hpt = NULL;
}
static void resize_hpt_prepare_work(struct work_struct *work)
......@@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
struct kvm_resize_hpt,
work);
struct kvm *kvm = resize->kvm;
int err;
int err = 0;
resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
resize->order);
err = resize_hpt_allocate(resize);
if (WARN_ON(resize->error != -EBUSY))
return;
mutex_lock(&kvm->lock);
/* Request is still current? */
if (kvm->arch.resize_hpt == resize) {
/* We may request large allocations here:
* do not sleep with kvm->lock held for a while.
*/
mutex_unlock(&kvm->lock);
resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
resize->order);
err = resize_hpt_allocate(resize);
/* We have strict assumption about -EBUSY
* when preparing for HPT resize.
*/
if (WARN_ON(err == -EBUSY))
err = -EINPROGRESS;
mutex_lock(&kvm->lock);
/* It is possible that kvm->arch.resize_hpt != resize
* after we grab kvm->lock again.
*/
}
resize->error = err;
resize->prepare_done = true;
if (kvm->arch.resize_hpt != resize)
resize_hpt_release(kvm, resize);
mutex_unlock(&kvm->lock);
}
......@@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
if (resize) {
if (resize->order == shift) {
/* Suitable resize in progress */
if (resize->prepare_done) {
ret = resize->error;
if (ret != 0)
resize_hpt_release(kvm, resize);
} else {
/* Suitable resize in progress? */
ret = resize->error;
if (ret == -EBUSY)
ret = 100; /* estimated time in ms */
}
else if (ret)
resize_hpt_release(kvm, resize);
goto out;
}
......@@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
ret = -ENOMEM;
goto out;
}
resize->error = -EBUSY;
resize->order = shift;
resize->kvm = kvm;
INIT_WORK(&resize->work, resize_hpt_prepare_work);
......@@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
if (!resize || (resize->order != shift))
goto out;
ret = -EBUSY;
if (!resize->prepare_done)
goto out;
ret = resize->error;
if (ret != 0)
if (ret)
goto out;
ret = resize_hpt_rehash(resize);
if (ret != 0)
if (ret)
goto out;
resize_hpt_pivot(resize);
......
......@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
#define MSR_USER32 MSR_USER
#define MSR_USER64 MSR_USER
#define HW_PAGE_SIZE PAGE_SIZE
#define HPTE_R_M _PAGE_COHERENT
#endif
static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
......@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pte.eaddr = eaddr;
pte.vpage = eaddr >> 12;
pte.page_size = MMU_PAGE_64K;
pte.wimg = HPTE_R_M;
}
switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
......
......@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!lapic_in_kernel(vcpu) ||
kvm_event_needs_reinjection(vcpu)))
kvm_event_needs_reinjection(vcpu) ||
vcpu->arch.exception.pending))
return false;
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
......@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
kvm_mmu_clear_all_pte_masks();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
0, SLAB_ACCOUNT, NULL);
if (!pte_list_desc_cache)
goto nomem;
goto out;
mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
sizeof(struct kvm_mmu_page),
0, SLAB_ACCOUNT, NULL);
if (!mmu_page_header_cache)
goto nomem;
goto out;
if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
goto nomem;
goto out;
register_shrinker(&mmu_shrinker);
ret = register_shrinker(&mmu_shrinker);
if (ret)
goto out;
return 0;
nomem:
out:
mmu_destroy_caches();
return -ENOMEM;
return ret;
}
/*
......
......@@ -361,7 +361,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h;
struct nested_state *g;
u32 h_intercept_exceptions;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
......@@ -372,14 +371,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
h = &svm->nested.hsave->control;
g = &svm->nested;
/* No need to intercept #UD if L1 doesn't intercept it */
h_intercept_exceptions =
h->intercept_exceptions & ~(1U << UD_VECTOR);
c->intercept_cr = h->intercept_cr | g->intercept_cr;
c->intercept_dr = h->intercept_dr | g->intercept_dr;
c->intercept_exceptions =
h_intercept_exceptions | g->intercept_exceptions;
c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
c->intercept = h->intercept | g->intercept;
}
......@@ -2202,7 +2196,6 @@ static int ud_interception(struct vcpu_svm *svm)
{
int er;
WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
......
......@@ -899,8 +899,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
{
BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
vmcs_field_to_offset_table[field] == 0)
if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
return -ENOENT;
/*
* FIXME: Mitigation for CVE-2017-5753. To be replaced with a
* generic mechanism.
*/
asm("lfence");
if (vmcs_field_to_offset_table[field] == 0)
return -ENOENT;
return vmcs_field_to_offset_table[field];
......@@ -1887,7 +1895,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
......@@ -1905,8 +1913,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
else
eb |= 1u << UD_VECTOR;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
......@@ -5917,7 +5923,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return 1; /* already handled by vmx_vcpu_run() */
if (is_invalid_opcode(intr_info)) {
WARN_ON_ONCE(is_guest_mode(vcpu));
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册