提交 2e5bbb54 编写于 作者: B Balbir Singh 提交者: Michael Ellerman

KVM: PPC: Book3S HV: Migrate pinned pages out of CMA

When PCI Device pass-through is enabled via VFIO, KVM-PPC will
pin pages using get_user_pages_fast(). One of the downsides of
the pinning is that the page could be in CMA region. The CMA
region is used for other allocations like the hash page table.
Ideally we want the pinned pages to be from non CMA region.

This patch (currently only for KVM PPC with VFIO) forcefully
migrates the pages out (huge pages are omitted for the moment).
There are more efficient ways of doing this, but that might
be elaborate and might impact a larger audience beyond just
the kvm ppc implementation.

The magic is in new_iommu_non_cma_page() which allocates the
new page from a non CMA region.

I've tested the patches lightly at my end. The full solution
requires migration of THP pages in the CMA region. That work
will be done incrementally on top of this.
Signed-off-by: NBalbir Singh <bsingharora@gmail.com>
Acked-by: NAlexey Kardashevskiy <aik@ozlabs.ru>
[mpe: Merged via powerpc tree as that's where the changes are]
Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
上级 360aebd8
...@@ -18,6 +18,7 @@ extern void destroy_context(struct mm_struct *mm); ...@@ -18,6 +18,7 @@ extern void destroy_context(struct mm_struct *mm);
#ifdef CONFIG_SPAPR_TCE_IOMMU #ifdef CONFIG_SPAPR_TCE_IOMMU
struct mm_iommu_table_group_mem_t; struct mm_iommu_table_group_mem_t;
extern int isolate_lru_page(struct page *page); /* from internal.h */
extern bool mm_iommu_preregistered(void); extern bool mm_iommu_preregistered(void);
extern long mm_iommu_get(unsigned long ua, unsigned long entries, extern long mm_iommu_get(unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem); struct mm_iommu_table_group_mem_t **pmem);
......
...@@ -15,6 +15,9 @@ ...@@ -15,6 +15,9 @@
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/migrate.h>
#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
static DEFINE_MUTEX(mem_list_mutex); static DEFINE_MUTEX(mem_list_mutex);
...@@ -72,6 +75,55 @@ bool mm_iommu_preregistered(void) ...@@ -72,6 +75,55 @@ bool mm_iommu_preregistered(void)
} }
EXPORT_SYMBOL_GPL(mm_iommu_preregistered); EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
/*
* Taken from alloc_migrate_target with changes to remove CMA allocations
*/
struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
int **resultp)
{
gfp_t gfp_mask = GFP_USER;
struct page *new_page;
if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
return NULL;
if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;
/*
* We don't want the allocation to force an OOM if possibe
*/
new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
return new_page;
}
static int mm_iommu_move_page_from_cma(struct page *page)
{
int ret = 0;
LIST_HEAD(cma_migrate_pages);
/* Ignore huge pages for now */
if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
return -EBUSY;
lru_add_drain();
ret = isolate_lru_page(page);
if (ret)
return ret;
list_add(&page->lru, &cma_migrate_pages);
put_page(page); /* Drop the gup reference */
ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
NULL, 0, MIGRATE_SYNC, MR_CMA);
if (ret) {
if (!list_empty(&cma_migrate_pages))
putback_movable_pages(&cma_migrate_pages);
}
return 0;
}
long mm_iommu_get(unsigned long ua, unsigned long entries, long mm_iommu_get(unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem) struct mm_iommu_table_group_mem_t **pmem)
{ {
...@@ -124,15 +176,36 @@ long mm_iommu_get(unsigned long ua, unsigned long entries, ...@@ -124,15 +176,36 @@ long mm_iommu_get(unsigned long ua, unsigned long entries,
for (i = 0; i < entries; ++i) { for (i = 0; i < entries; ++i) {
if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
1/* pages */, 1/* iswrite */, &page)) { 1/* pages */, 1/* iswrite */, &page)) {
ret = -EFAULT;
for (j = 0; j < i; ++j) for (j = 0; j < i; ++j)
put_page(pfn_to_page( put_page(pfn_to_page(mem->hpas[j] >>
mem->hpas[j] >> PAGE_SHIFT)); PAGE_SHIFT));
vfree(mem->hpas); vfree(mem->hpas);
kfree(mem); kfree(mem);
ret = -EFAULT;
goto unlock_exit; goto unlock_exit;
} }
/*
* If we get a page from the CMA zone, since we are going to
* be pinning these entries, we might as well move them out
* of the CMA zone if possible. NOTE: faulting in + migration
* can be expensive. Batching can be considered later
*/
if (get_pageblock_migratetype(page) == MIGRATE_CMA) {
if (mm_iommu_move_page_from_cma(page))
goto populate;
if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
1/* pages */, 1/* iswrite */,
&page)) {
ret = -EFAULT;
for (j = 0; j < i; ++j)
put_page(pfn_to_page(mem->hpas[j] >>
PAGE_SHIFT));
vfree(mem->hpas);
kfree(mem);
goto unlock_exit;
}
}
populate:
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册