提交 cdb06e9d 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "PPC:

   - Fix guest time accounting in the host

   - Fix large-page backing for radix guests on POWER9

   - Fix HPT guests on POWER9 backed by 2M or 1G pages

   - Compile fixes for some configs and gcc versions

  s390:

   - Fix random memory corruption when running as guest2 (e.g. KVM in
     LPAR) and starting guest3 (e.g. nested KVM) with many CPUs

   - Export forgotten io interrupt delivery statistics counter"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: s390: fix memory overwrites when not using SCA entries
  KVM: PPC: Book3S HV: Fix guest time accounting with VIRT_CPU_ACCOUNTING_GEN
  KVM: PPC: Book3S HV: Fix VRMA initialization with 2MB or 1GB memory backing
  KVM: PPC: Book3S HV: Fix handling of large pages in radix page fault handler
  KVM: s390: provide io interrupt kvm_stat
  KVM: PPC: Book3S: Fix compile error that occurs with some gcc versions
  KVM: PPC: Fix compile error that occurs when CONFIG_ALTIVEC=n
...@@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep) ...@@ -195,6 +195,12 @@ static void kvmppc_pte_free(pte_t *ptep)
kmem_cache_free(kvm_pte_cache, ptep); kmem_cache_free(kvm_pte_cache, ptep);
} }
/* Like pmd_huge() and pmd_large(), but works regardless of config options */
static inline int pmd_is_leaf(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_PTE);
}
static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
unsigned int level, unsigned long mmu_seq) unsigned int level, unsigned long mmu_seq)
{ {
...@@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, ...@@ -219,7 +225,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
else else
new_pmd = pmd_alloc_one(kvm->mm, gpa); new_pmd = pmd_alloc_one(kvm->mm, gpa);
if (level == 0 && !(pmd && pmd_present(*pmd))) if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
new_ptep = kvmppc_pte_alloc(); new_ptep = kvmppc_pte_alloc();
/* Check if we might have been invalidated; let the guest retry if so */ /* Check if we might have been invalidated; let the guest retry if so */
...@@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, ...@@ -244,12 +250,30 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
new_pmd = NULL; new_pmd = NULL;
} }
pmd = pmd_offset(pud, gpa); pmd = pmd_offset(pud, gpa);
if (pmd_large(*pmd)) { if (pmd_is_leaf(*pmd)) {
/* Someone else has instantiated a large page here; retry */ unsigned long lgpa = gpa & PMD_MASK;
/*
* If we raced with another CPU which has just put
* a 2MB pte in after we saw a pte page, try again.
*/
if (level == 0 && !new_ptep) {
ret = -EAGAIN; ret = -EAGAIN;
goto out_unlock; goto out_unlock;
} }
if (level == 1 && !pmd_none(*pmd)) { /* Valid 2MB page here already, remove it */
old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
~0UL, 0, lgpa, PMD_SHIFT);
kvmppc_radix_tlbie_page(kvm, lgpa, PMD_SHIFT);
if (old & _PAGE_DIRTY) {
unsigned long gfn = lgpa >> PAGE_SHIFT;
struct kvm_memory_slot *memslot;
memslot = gfn_to_memslot(kvm, gfn);
if (memslot && memslot->dirty_bitmap)
kvmppc_update_dirty_map(memslot,
gfn, PMD_SIZE);
}
} else if (level == 1 && !pmd_none(*pmd)) {
/* /*
* There's a page table page here, but we wanted * There's a page table page here, but we wanted
* to install a large page. Tell the caller and let * to install a large page. Tell the caller and let
...@@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -412,28 +436,24 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
} else { } else {
page = pages[0]; page = pages[0];
pfn = page_to_pfn(page); pfn = page_to_pfn(page);
if (PageHuge(page)) { if (PageCompound(page)) {
page = compound_head(page); pte_size <<= compound_order(compound_head(page));
pte_size <<= compound_order(page);
/* See if we can insert a 2MB large-page PTE here */ /* See if we can insert a 2MB large-page PTE here */
if (pte_size >= PMD_SIZE && if (pte_size >= PMD_SIZE &&
(gpa & PMD_MASK & PAGE_MASK) == (gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & PMD_MASK & PAGE_MASK)) { (hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1; level = 1;
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
} }
} }
/* See if we can provide write access */ /* See if we can provide write access */
if (writing) { if (writing) {
/*
* We assume gup_fast has set dirty on the host PTE.
*/
pgflags |= _PAGE_WRITE; pgflags |= _PAGE_WRITE;
} else { } else {
local_irq_save(flags); local_irq_save(flags);
ptep = find_current_mm_pte(current->mm->pgd, ptep = find_current_mm_pte(current->mm->pgd,
hva, NULL, NULL); hva, NULL, NULL);
if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) if (ptep && pte_write(*ptep))
pgflags |= _PAGE_WRITE; pgflags |= _PAGE_WRITE;
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -459,18 +479,15 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pte = pfn_pte(pfn, __pgprot(pgflags)); pte = pfn_pte(pfn, __pgprot(pgflags));
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
} }
if (ret == 0 || ret == -EAGAIN)
ret = RESUME_GUEST;
if (page) { if (page) {
/* if (!ret && (pgflags & _PAGE_WRITE))
* We drop pages[0] here, not page because page might set_page_dirty_lock(page);
* have been set to the head page of a compound, but put_page(page);
* we have to drop the reference on the correct tail
* page to match the get inside gup()
*/
put_page(pages[0]);
} }
if (ret == 0 || ret == -EAGAIN)
ret = RESUME_GUEST;
return ret; return ret;
} }
...@@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm) ...@@ -644,7 +661,7 @@ void kvmppc_free_radix(struct kvm *kvm)
continue; continue;
pmd = pmd_offset(pud, 0); pmd = pmd_offset(pud, 0);
for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) { for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
if (pmd_huge(*pmd)) { if (pmd_is_leaf(*pmd)) {
pmd_clear(pmd); pmd_clear(pmd);
continue; continue;
} }
......
...@@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -2885,7 +2885,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/ */
trace_hardirqs_on(); trace_hardirqs_on();
guest_enter(); guest_enter_irqoff();
srcu_idx = srcu_read_lock(&vc->kvm->srcu); srcu_idx = srcu_read_lock(&vc->kvm->srcu);
...@@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -2893,8 +2893,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
srcu_read_unlock(&vc->kvm->srcu, srcu_idx); srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
guest_exit();
trace_hardirqs_off(); trace_hardirqs_off();
set_irq_happened(trap); set_irq_happened(trap);
...@@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -2937,6 +2935,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_set_host_core(pcpu); kvmppc_set_host_core(pcpu);
local_irq_enable(); local_irq_enable();
guest_exit();
/* Let secondaries go back to the offline loop */ /* Let secondaries go back to the offline loop */
for (i = 0; i < controlled_threads; ++i) { for (i = 0; i < controlled_threads; ++i) {
...@@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) ...@@ -3656,15 +3655,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto up_out; goto up_out;
psize = vma_kernel_pagesize(vma); psize = vma_kernel_pagesize(vma);
porder = __ilog2(psize);
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
/* We can handle 4k, 64k or 16M pages in the VRMA */ /* We can handle 4k, 64k or 16M pages in the VRMA */
err = -EINVAL; if (psize >= 0x1000000)
if (!(psize == 0x1000 || psize == 0x10000 || psize = 0x1000000;
psize == 0x1000000)) else if (psize >= 0x10000)
goto out_srcu; psize = 0x10000;
else
psize = 0x1000;
porder = __ilog2(psize);
senc = slb_pgsize_encoding(psize); senc = slb_pgsize_encoding(psize);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
......
...@@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, ...@@ -1345,7 +1345,7 @@ static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu, int kvmppc_handle_load128_by2x64(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int rt, int is_default_endian) unsigned int rt, int is_default_endian)
{ {
enum emulation_result emulated; enum emulation_result emulated = EMULATE_DONE;
while (vcpu->arch.mmio_vmx_copy_nums) { while (vcpu->arch.mmio_vmx_copy_nums) {
emulated = __kvmppc_handle_load(run, vcpu, rt, 8, emulated = __kvmppc_handle_load(run, vcpu, rt, 8,
...@@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ...@@ -1608,7 +1608,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu); kvm_sigset_deactivate(vcpu);
#ifdef CONFIG_ALTIVEC
out: out:
#endif
vcpu_put(vcpu); vcpu_put(vcpu);
return r; return r;
} }
......
...@@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { ...@@ -86,6 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) }, { "exit_wait_state", VCPU_STAT(exit_wait_state) },
{ "instruction_epsw", VCPU_STAT(instruction_epsw) }, { "instruction_epsw", VCPU_STAT(instruction_epsw) },
{ "instruction_gs", VCPU_STAT(instruction_gs) }, { "instruction_gs", VCPU_STAT(instruction_gs) },
...@@ -2146,6 +2147,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu) ...@@ -2146,6 +2147,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu)
/* we still need the basic sca for the ipte control */ /* we still need the basic sca for the ipte control */
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
return;
} }
read_lock(&vcpu->kvm->arch.sca_lock); read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) { if (vcpu->kvm->arch.use_esca) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册