diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 13a688fc8cd0c90177493aee7f3b2f1806843dd3..2fdc865ca3741e89b0e356724467c7846fc42a80 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start, return hash__vmemmap_remove_mapping(start, page_size); } #endif -struct page *realmode_pfn_to_page(unsigned long pfn); static inline pte_t pmd_pte(pmd_t pmd) { diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index ab3a4fba38e397ca7240eadacd98fa1e8beee73f..3d4b88cb859924cf2fe438a49c7be2e6febbaa12 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction); -extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, - unsigned long *hpa, enum dma_data_direction *direction); #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b2f89b621b159148c6d836e3c9c3d47a89aafe47..b694d6af115080765cad72b749bea690dbb6fb79 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned int pageshift, unsigned long *hpa); +extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index af7a20dc6e0934e0945c95e21c7b1eb07e11fdd1..19b4c628f3beced4211da41d48c528085254fee9 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_xchg); -#ifdef CONFIG_PPC_BOOK3S_64 -long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, - unsigned long *hpa, enum dma_data_direction *direction) -{ - long ret; - - ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); - - if (!ret && ((*direction == DMA_FROM_DEVICE) || - (*direction == DMA_BIDIRECTIONAL))) { - struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT); - - if (likely(pg)) { - SetPageDirty(pg); - } else { - tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); - ret = -EFAULT; - } - } - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm); -#endif - int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index fd6e8c13685f4c0223749647ad04c34a71ad4589..933c574e1cf795d65855b60d763c62edf4d1996a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long mmu_seq, pte_size; - unsigned long gpa, gfn, hva, pfn; + unsigned long mmu_seq; + unsigned long gpa, gfn, hva; struct kvm_memory_slot *memslot; struct page *page = NULL; long ret; @@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ hva = gfn_to_hva_memslot(memslot, gfn); if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) { - pfn = page_to_pfn(page); upgrade_write = true; } else { + unsigned long pfn; + /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, writing, upgrade_p); @@ -639,63 +640,45 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - /* See if we can insert a 1GB or 2MB large PTE here */ - level = 0; - if (page && PageCompound(page)) { - pte_size = PAGE_SIZE << compound_order(compound_head(page)); - if (pte_size >= PUD_SIZE && - (gpa & (PUD_SIZE - PAGE_SIZE)) == - (hva & (PUD_SIZE - PAGE_SIZE))) { - level = 2; - pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1); - } else if (pte_size >= PMD_SIZE && - (gpa & (PMD_SIZE - PAGE_SIZE)) == - (hva & (PMD_SIZE - PAGE_SIZE))) { - level = 1; - pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); - } - } - /* - * Compute the PTE value that we need to insert. + * Read the PTE from the process' radix tree and use that + * so we get the shift and attribute bits. */ - if (page) { - pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE | - _PAGE_ACCESSED; - if (writing || upgrade_write) - pgflags |= _PAGE_WRITE | _PAGE_DIRTY; - pte = pfn_pte(pfn, __pgprot(pgflags)); + local_irq_disable(); + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + pte = *ptep; + local_irq_enable(); + + /* Get pte level from shift/size */ + if (shift == PUD_SHIFT && + (gpa & (PUD_SIZE - PAGE_SIZE)) == + (hva & (PUD_SIZE - PAGE_SIZE))) { + level = 2; + } else if (shift == PMD_SHIFT && + (gpa & (PMD_SIZE - PAGE_SIZE)) == + (hva & (PMD_SIZE - PAGE_SIZE))) { + level = 1; } else { - /* - * Read the PTE from the process' radix tree and use that - * so we get the attribute bits. - */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); - pte = *ptep; - local_irq_enable(); - if (shift == PUD_SHIFT && - (gpa & (PUD_SIZE - PAGE_SIZE)) == - (hva & (PUD_SIZE - PAGE_SIZE))) { - level = 2; - } else if (shift == PMD_SHIFT && - (gpa & (PMD_SIZE - PAGE_SIZE)) == - (hva & (PMD_SIZE - PAGE_SIZE))) { - level = 1; - } else if (shift && shift != PAGE_SHIFT) { - /* Adjust PFN */ - unsigned long mask = (1ul << shift) - PAGE_SIZE; - pte = __pte(pte_val(pte) | (hva & mask)); - } - pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); - if (writing || upgrade_write) { - if (pte_val(pte) & _PAGE_WRITE) - pte = __pte(pte_val(pte) | _PAGE_DIRTY); - } else { - pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); + level = 0; + if (shift > PAGE_SHIFT) { + /* + * If the pte maps more than one page, bring over + * bits from the virtual address to get the real + * address of the specific single page we want. + */ + unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; + pte = __pte(pte_val(pte) | (hva & rpnmask)); } } + pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); + if (writing || upgrade_write) { + if (pte_val(pte) & _PAGE_WRITE) + pte = __pte(pte_val(pte) | _PAGE_DIRTY); + } else { + pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); + } + /* Allocate space in the tree and write the PTE */ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 506a4d40045883026a9046919d23e695f35bc219..6821ead4b4ebc128a9a772a712feefb213b1bad0 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry) +static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) +{ + long ret; + + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) { + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); + /* + * kvmppc_rm_tce_iommu_do_map() updates the UA cache after + * calling this so we still get here a valid UA. + */ + if (pua && *pua) + mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua)); + } + + return ret; +} + +static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, + unsigned long entry) { unsigned long hpa = 0; enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, unsigned long hpa = 0; long ret; - if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir)) + if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir)) /* * real mode xchg can fail if struct page crosses * a page boundary @@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); if (ret) - iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); return ret; } @@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) return H_CLOSED; - ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); if (ret) { mm_iommu_mapped_dec(mem); /* @@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } kvmppc_tce_put(stt, entry, tce); @@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, goto unlock_exit; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } kvmppc_tce_put(stt, entry + i, tce); @@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 51ce091914f9760b85853d7f197bceb5ef73e02a..7a9886f98b0c12e8df43fe0e8a897a7d4cbeac9d 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr, { } -/* - * We do not have access to the sparsemem vmemmap, so we fallback to - * walking the list of sparsemem blocks which we already maintain for - * the sake of crashdump. In the long run, we might want to maintain - * a tree if performance of that linear walk becomes a problem. - * - * realmode_pfn_to_page functions can fail due to: - * 1) As real sparsemem blocks do not lay in RAM continously (they - * are in virtual address space which is not available in the real mode), - * the requested page struct can be split between blocks so get_page/put_page - * may fail. - * 2) When huge pages are used, the get_page/put_page API will fail - * in real mode as the linked addresses in the page struct are virtual - * too. - */ -struct page *realmode_pfn_to_page(unsigned long pfn) -{ - struct vmemmap_backing *vmem_back; - struct page *page; - unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; - unsigned long pg_va = (unsigned long) pfn_to_page(pfn); - - for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) { - if (pg_va < vmem_back->virt_addr) - continue; - - /* After vmemmap_list entry free is possible, need check all */ - if ((pg_va + sizeof(struct page)) <= - (vmem_back->virt_addr + page_size)) { - page = (struct page *) (vmem_back->phys + pg_va - - vmem_back->virt_addr); - return page; - } - } - - /* Probably that page struct is split between real pages */ - return NULL; -} -EXPORT_SYMBOL_GPL(realmode_pfn_to_page); - -#else - -struct page *realmode_pfn_to_page(unsigned long pfn) -{ - struct page *page = pfn_to_page(pfn); - return page; -} -EXPORT_SYMBOL_GPL(realmode_pfn_to_page); - #endif /* CONFIG_SPARSEMEM_VMEMMAP */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index c9ee9e23845f20d3f3b4b523e40a2a453d30ed7c..56c2234cc6ae70b8c7c27c64897bb3560ad86219 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -18,11 +18,15 @@ #include #include #include +#include #include #include static DEFINE_MUTEX(mem_list_mutex); +#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 +#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) + struct mm_iommu_table_group_mem_t { struct list_head next; struct rcu_head rcu; @@ -263,6 +267,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) if (!page) continue; + if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) + SetPageDirty(page); + put_page(page); mem->hpas[i] = 0; } @@ -360,7 +367,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, return ret; } -EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm); struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries) @@ -390,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (pageshift > mem->pageshift) return -EFAULT; - *hpa = *va | (ua & ~PAGE_MASK); + *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); return 0; } @@ -413,11 +419,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (!pa) return -EFAULT; - *hpa = *pa | (ua & ~PAGE_MASK); + *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); return 0; } -EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm); + +extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) +{ + struct mm_iommu_table_group_mem_t *mem; + long entry; + void *va; + unsigned long *pa; + + mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE); + if (!mem) + return; + + entry = (ua - mem->ua) >> PAGE_SHIFT; + va = &mem->hpas[entry]; + + pa = (void *) vmalloc_to_phys(va); + if (!pa) + return; + + *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY; +} long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) {