提交 c18bb367 编写于 作者: W Wang Wensheng 提交者: Zheng Zengkai

share_pool: Implement sp_walk_page_range()

ascend inclusion
category: Feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4NDAW
CVE: NA

-------------------

This is a simple wrap of walk_page_range() to get all the pages of a
spa. It doesn't support holes.
Signed-off-by: NWang Wensheng <wangwensheng4@huawei.com>
Signed-off-by: NTang Yizhou <tangyizhou@huawei.com>
Reviewed-by: Kefeng Wang<wangkefeng.wang@huawei.com>
Reviewed-by: NWeilong Chen <chenweilong@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 05a944e0
......@@ -51,6 +51,7 @@
#include <linux/mmzone.h>
#include <linux/timekeeping.h>
#include <linux/time64.h>
#include <linux/pagewalk.h>
/* access control mode macros */
#define AC_NONE 0
......@@ -494,6 +495,12 @@ static struct file *spa_file(struct sp_area *spa)
return spa->spg->file;
}
static inline void check_interrupt_context(void)
{
if (unlikely(in_interrupt()))
panic("function can't be used in interrupt context\n");
}
static struct sp_group *create_spg(int spg_id)
{
return NULL;
......@@ -664,6 +671,201 @@ void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size,
}
EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u);
static int sp_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
struct sp_walk_data *sp_walk_data = walk->private;
sp_walk_data->pmd = pmd;
return 0;
}
static int sp_pte_entry(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
struct page *page;
struct sp_walk_data *sp_walk_data = walk->private;
pmd_t *pmd = sp_walk_data->pmd;
retry:
if (unlikely(!pte_present(*pte))) {
swp_entry_t entry;
if (pte_none(*pte))
goto no_page;
entry = pte_to_swp_entry(*pte);
if (!is_migration_entry(entry))
goto no_page;
migration_entry_wait(walk->mm, pmd, addr);
goto retry;
}
page = pte_page(*pte);
get_page(page);
sp_walk_data->pages[sp_walk_data->page_count++] = page;
return 0;
no_page:
pr_debug("the page of addr %lx unexpectedly not in RAM\n",
(unsigned long)addr);
return -EFAULT;
}
static int sp_test_walk(unsigned long addr, unsigned long next,
struct mm_walk *walk)
{
/*
* FIXME: The devmm driver uses remap_pfn_range() but actually there
* are associated struct pages, so they should use vm_map_pages() or
* similar APIs. Before the driver has been converted to correct APIs
* we use this test_walk() callback so we can treat VM_PFNMAP VMAs as
* normal VMAs.
*/
return 0;
}
static int sp_pte_hole(unsigned long start, unsigned long end,
int depth, struct mm_walk *walk)
{
pr_debug("hole [%lx, %lx) appeared unexpectedly\n", (unsigned long)start, (unsigned long)end);
return -EFAULT;
}
static int sp_hugetlb_entry(pte_t *ptep, unsigned long hmask,
unsigned long addr, unsigned long next,
struct mm_walk *walk)
{
pte_t pte = huge_ptep_get(ptep);
struct page *page = pte_page(pte);
struct sp_walk_data *sp_walk_data;
if (unlikely(!pte_present(pte))) {
pr_debug("the page of addr %lx unexpectedly not in RAM\n", (unsigned long)addr);
return -EFAULT;
}
sp_walk_data = walk->private;
get_page(page);
sp_walk_data->pages[sp_walk_data->page_count++] = page;
return 0;
}
/*
* __sp_walk_page_range() - Walk page table with caller specific callbacks.
* @uva: the start VA of user memory.
* @size: the size of user memory.
* @mm: mm struct of the target task.
* @sp_walk_data: a structure of a page pointer array.
*
* the caller must hold mm->mmap_lock
*
* Notes for parameter alignment:
* When size == 0, let it be page_size, so that at least one page is walked.
*
* When size > 0, for convenience, usually the parameters of uva and
* size are not page aligned. There are four different alignment scenarios and
* we must handler all of them correctly.
*
* The basic idea is to align down uva and align up size so all the pages
* in range [uva, uva + size) are walked. However, there are special cases.
*
* Considering a 2M-hugepage addr scenario. Assuming the caller wants to
* traverse range [1001M, 1004.5M), so uva and size is 1001M and 3.5M
* accordingly. The aligned-down uva is 1000M and the aligned-up size is 4M.
* The traverse range will be [1000M, 1004M). Obviously, the final page for
* [1004M, 1004.5M) is not covered.
*
* To fix this problem, we need to walk an additional page, size should be
* ALIGN(uva+size) - uva_aligned
*/
static int __sp_walk_page_range(unsigned long uva, unsigned long size,
struct mm_struct *mm, struct sp_walk_data *sp_walk_data)
{
int ret = 0;
struct vm_area_struct *vma;
unsigned long page_nr;
struct page **pages = NULL;
bool is_hugepage = false;
unsigned long uva_aligned;
unsigned long size_aligned;
unsigned int page_size = PAGE_SIZE;
struct mm_walk_ops sp_walk = {};
/*
* Here we also support non share pool memory in this interface
* because the caller can't distinguish whether a uva is from the
* share pool or not. It is not the best idea to do so, but currently
* it simplifies overall design.
*
* In this situation, the correctness of the parameters is mainly
* guaranteed by the caller.
*/
vma = find_vma(mm, uva);
if (!vma) {
pr_debug("u2k input uva %lx is invalid\n", (unsigned long)uva);
return -EINVAL;
}
if (is_vm_hugetlb_page(vma))
is_hugepage = true;
sp_walk.pte_hole = sp_pte_hole;
sp_walk.test_walk = sp_test_walk;
if (is_hugepage) {
sp_walk_data->is_hugepage = true;
sp_walk.hugetlb_entry = sp_hugetlb_entry;
page_size = PMD_SIZE;
} else {
sp_walk_data->is_hugepage = false;
sp_walk.pte_entry = sp_pte_entry;
sp_walk.pmd_entry = sp_pmd_entry;
}
sp_walk_data->page_size = page_size;
uva_aligned = ALIGN_DOWN(uva, page_size);
sp_walk_data->uva_aligned = uva_aligned;
if (size == 0)
size_aligned = page_size;
else
/* special alignment handling */
size_aligned = ALIGN(uva + size, page_size) - uva_aligned;
if (uva_aligned + size_aligned < uva_aligned) {
pr_err_ratelimited("overflow happened in walk page range\n");
return -EINVAL;
}
page_nr = size_aligned / page_size;
pages = kvmalloc(page_nr * sizeof(struct page *), GFP_KERNEL);
if (!pages) {
pr_err_ratelimited("alloc page array failed in walk page range\n");
return -ENOMEM;
}
sp_walk_data->pages = pages;
ret = walk_page_range(mm, uva_aligned, uva_aligned + size_aligned,
&sp_walk, sp_walk_data);
if (ret)
kvfree(pages);
return ret;
}
static void __sp_walk_page_free(struct sp_walk_data *data)
{
int i = 0;
struct page *page;
while (i < data->page_count) {
page = data->pages[i++];
put_page(page);
}
kvfree(data->pages);
/* prevent repeated release */
data->page_count = 0;
data->pages = NULL;
}
/**
* sp_make_share_u2k() - Share user memory of a specified process to kernel.
* @uva: the VA of shared user memory
......@@ -723,7 +925,39 @@ EXPORT_SYMBOL_GPL(mg_sp_unshare);
int sp_walk_page_range(unsigned long uva, unsigned long size,
struct task_struct *tsk, struct sp_walk_data *sp_walk_data)
{
return 0;
struct mm_struct *mm;
int ret = 0;
check_interrupt_context();
if (unlikely(!sp_walk_data)) {
pr_err_ratelimited("null pointer when walk page range\n");
return -EINVAL;
}
if (!tsk || (tsk->flags & PF_EXITING))
return -ESRCH;
get_task_struct(tsk);
mm = get_task_mm(tsk);
if (!mm) {
put_task_struct(tsk);
return -ESRCH;
}
sp_walk_data->page_count = 0;
down_write(&mm->mmap_lock);
if (likely(!mm->core_state))
ret = __sp_walk_page_range(uva, size, mm, sp_walk_data);
else {
pr_err("walk page range: encoutered coredump\n");
ret = -ESRCH;
}
up_write(&mm->mmap_lock);
mmput(mm);
put_task_struct(tsk);
return ret;
}
EXPORT_SYMBOL_GPL(sp_walk_page_range);
......@@ -740,7 +974,12 @@ EXPORT_SYMBOL_GPL(mg_sp_walk_page_range);
*/
void sp_walk_page_free(struct sp_walk_data *sp_walk_data)
{
return;
check_interrupt_context();
if (!sp_walk_data)
return;
__sp_walk_page_free(sp_walk_data);
}
EXPORT_SYMBOL_GPL(sp_walk_page_free);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册