提交 b66081e2 编写于 作者: M Michael Ellerman

Merge branch 'kvm-ppc-fixes' of paulus/powerpc into topic/ppc-kvm

Some commits we'd like to share between the powerpc and kvm-ppc tree for
next have dependencies on commits that went into 4.19 via the
kvm-ppc-fixes branch and weren't merged before 4.19-rc3, which is our
base commit.

So merge the kvm-ppc-fixes branch into topic/ppc-kvm.
...@@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start, ...@@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
return hash__vmemmap_remove_mapping(start, page_size); return hash__vmemmap_remove_mapping(start, page_size);
} }
#endif #endif
struct page *realmode_pfn_to_page(unsigned long pfn);
static inline pte_t pmd_pte(pmd_t pmd) static inline pte_t pmd_pte(pmd_t pmd)
{ {
......
...@@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev); ...@@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void); extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction); unsigned long *hpa, enum dma_data_direction *direction);
extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction);
#else #else
static inline void iommu_register_group(struct iommu_table_group *table_group, static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, int pci_domain_number,
......
...@@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, ...@@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa); unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa); unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif #endif
......
...@@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, ...@@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
} }
EXPORT_SYMBOL_GPL(iommu_tce_xchg); EXPORT_SYMBOL_GPL(iommu_tce_xchg);
#ifdef CONFIG_PPC_BOOK3S_64
long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction)
{
long ret;
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
if (likely(pg)) {
SetPageDirty(pg);
} else {
tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
ret = -EFAULT;
}
}
return ret;
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
#endif
int iommu_take_ownership(struct iommu_table *tbl) int iommu_take_ownership(struct iommu_table *tbl)
{ {
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
......
...@@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr) unsigned long ea, unsigned long dsisr)
{ {
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
unsigned long mmu_seq, pte_size; unsigned long mmu_seq;
unsigned long gpa, gfn, hva, pfn; unsigned long gpa, gfn, hva;
struct kvm_memory_slot *memslot; struct kvm_memory_slot *memslot;
struct page *page = NULL; struct page *page = NULL;
long ret; long ret;
...@@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/ */
hva = gfn_to_hva_memslot(memslot, gfn); hva = gfn_to_hva_memslot(memslot, gfn);
if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) { if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
pfn = page_to_pfn(page);
upgrade_write = true; upgrade_write = true;
} else { } else {
unsigned long pfn;
/* Call KVM generic code to do the slow-path check */ /* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
writing, upgrade_p); writing, upgrade_p);
...@@ -639,63 +640,45 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -639,63 +640,45 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
} }
} }
/* See if we can insert a 1GB or 2MB large PTE here */
level = 0;
if (page && PageCompound(page)) {
pte_size = PAGE_SIZE << compound_order(compound_head(page));
if (pte_size >= PUD_SIZE &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
} else if (pte_size >= PMD_SIZE &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
}
}
/* /*
* Compute the PTE value that we need to insert. * Read the PTE from the process' radix tree and use that
* so we get the shift and attribute bits.
*/ */
if (page) { local_irq_disable();
pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE | ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
_PAGE_ACCESSED; pte = *ptep;
if (writing || upgrade_write) local_irq_enable();
pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
pte = pfn_pte(pfn, __pgprot(pgflags)); /* Get pte level from shift/size */
if (shift == PUD_SHIFT &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
} else if (shift == PMD_SHIFT &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
} else { } else {
/* level = 0;
* Read the PTE from the process' radix tree and use that if (shift > PAGE_SHIFT) {
* so we get the attribute bits. /*
*/ * If the pte maps more than one page, bring over
local_irq_disable(); * bits from the virtual address to get the real
ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); * address of the specific single page we want.
pte = *ptep; */
local_irq_enable(); unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
if (shift == PUD_SHIFT && pte = __pte(pte_val(pte) | (hva & rpnmask));
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
} else if (shift == PMD_SHIFT &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
} else if (shift && shift != PAGE_SHIFT) {
/* Adjust PFN */
unsigned long mask = (1ul << shift) - PAGE_SIZE;
pte = __pte(pte_val(pte) | (hva & mask));
}
pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
if (writing || upgrade_write) {
if (pte_val(pte) & _PAGE_WRITE)
pte = __pte(pte_val(pte) | _PAGE_DIRTY);
} else {
pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
} }
} }
pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
if (writing || upgrade_write) {
if (pte_val(pte) & _PAGE_WRITE)
pte = __pte(pte_val(pte) | _PAGE_DIRTY);
} else {
pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
}
/* Allocate space in the tree and write the PTE */ /* Allocate space in the tree and write the PTE */
ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
......
...@@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, ...@@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry) static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL))) {
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry);
/*
* kvmppc_rm_tce_iommu_do_map() updates the UA cache after
* calling this so we still get here a valid UA.
*/
if (pua && *pua)
mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua));
}
return ret;
}
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
unsigned long entry)
{ {
unsigned long hpa = 0; unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE; enum dma_data_direction dir = DMA_NONE;
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
} }
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
...@@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, ...@@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0; unsigned long hpa = 0;
long ret; long ret;
if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir)) if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir))
/* /*
* real mode xchg can fail if struct page crosses * real mode xchg can fail if struct page crosses
* a page boundary * a page boundary
...@@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, ...@@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret) if (ret)
iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
return ret; return ret;
} }
...@@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, ...@@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
return H_CLOSED; return H_CLOSED;
ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir);
if (ret) { if (ret) {
mm_iommu_mapped_dec(mem); mm_iommu_mapped_dec(mem);
/* /*
...@@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ...@@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
return ret; return ret;
WARN_ON_ONCE_RM(1); WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry, tce); kvmppc_tce_put(stt, entry, tce);
...@@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ...@@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
goto unlock_exit; goto unlock_exit;
WARN_ON_ONCE_RM(1); WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
kvmppc_tce_put(stt, entry + i, tce); kvmppc_tce_put(stt, entry + i, tce);
...@@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, ...@@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
return ret; return ret;
WARN_ON_ONCE_RM(1); WARN_ON_ONCE_RM(1);
kvmppc_rm_clear_tce(stit->tbl, entry); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
} }
} }
......
...@@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr, ...@@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr,
{ {
} }
/*
* We do not have access to the sparsemem vmemmap, so we fallback to
* walking the list of sparsemem blocks which we already maintain for
* the sake of crashdump. In the long run, we might want to maintain
* a tree if performance of that linear walk becomes a problem.
*
* realmode_pfn_to_page functions can fail due to:
* 1) As real sparsemem blocks do not lay in RAM continously (they
* are in virtual address space which is not available in the real mode),
* the requested page struct can be split between blocks so get_page/put_page
* may fail.
* 2) When huge pages are used, the get_page/put_page API will fail
* in real mode as the linked addresses in the page struct are virtual
* too.
*/
struct page *realmode_pfn_to_page(unsigned long pfn)
{
struct vmemmap_backing *vmem_back;
struct page *page;
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
if (pg_va < vmem_back->virt_addr)
continue;
/* After vmemmap_list entry free is possible, need check all */
if ((pg_va + sizeof(struct page)) <=
(vmem_back->virt_addr + page_size)) {
page = (struct page *) (vmem_back->phys + pg_va -
vmem_back->virt_addr);
return page;
}
}
/* Probably that page struct is split between real pages */
return NULL;
}
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#else
struct page *realmode_pfn_to_page(unsigned long pfn)
{
struct page *page = pfn_to_page(pfn);
return page;
}
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#endif /* CONFIG_SPARSEMEM_VMEMMAP */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
......
...@@ -18,11 +18,15 @@ ...@@ -18,11 +18,15 @@
#include <linux/migrate.h> #include <linux/migrate.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/sizes.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/pte-walk.h> #include <asm/pte-walk.h>
static DEFINE_MUTEX(mem_list_mutex); static DEFINE_MUTEX(mem_list_mutex);
#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
struct mm_iommu_table_group_mem_t { struct mm_iommu_table_group_mem_t {
struct list_head next; struct list_head next;
struct rcu_head rcu; struct rcu_head rcu;
...@@ -263,6 +267,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) ...@@ -263,6 +267,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
if (!page) if (!page)
continue; continue;
if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
SetPageDirty(page);
put_page(page); put_page(page);
mem->hpas[i] = 0; mem->hpas[i] = 0;
} }
...@@ -360,7 +367,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, ...@@ -360,7 +367,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
unsigned long ua, unsigned long entries) unsigned long ua, unsigned long entries)
...@@ -390,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, ...@@ -390,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift) if (pageshift > mem->pageshift)
return -EFAULT; return -EFAULT;
*hpa = *va | (ua & ~PAGE_MASK); *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0; return 0;
} }
...@@ -413,11 +419,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, ...@@ -413,11 +419,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (!pa) if (!pa)
return -EFAULT; return -EFAULT;
*hpa = *pa | (ua & ~PAGE_MASK); *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
{
struct mm_iommu_table_group_mem_t *mem;
long entry;
void *va;
unsigned long *pa;
mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
if (!mem)
return;
entry = (ua - mem->ua) >> PAGE_SHIFT;
va = &mem->hpas[entry];
pa = (void *) vmalloc_to_phys(va);
if (!pa)
return;
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
}
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{ {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册