提交 1291ad3f 编写于 作者: D Ding Tianhong 提交者: Yang Yingliang

ascend: vmalloc: export new function for share pool

ascend inclusion
category: feature
bugzilla: NA
CVE: NA

-------------------------------------------------

This is a prepare patch for share pool, export new function
to vmalloc hugepage and vmap the hugepage to virtually
contiguous space.

The new head file share_pool.h is mainly used for share pool
features and export some sp_xxx function when ascend_share_pool
config is enabled, and do nothing by default.
Signed-off-by: NZefan Li <lizefan@huawei.com>
Signed-off-by: NTang Yizhou <tangyizhou@huawei.com>
Signed-off-by: NLi Ming <limingming.li@huawei.com>
Signed-off-by: NDing Tianhong <dingtianhong@huawei.com>
Reviewed-by: NKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 16313919
#ifndef LINUX_SHARE_POOL_H
#define LINUX_SHARE_POOL_H
#include <linux/mman.h>
#include <linux/mm_types.h>
#include <linux/notifier.h>
#include <linux/vmalloc.h>
#define SP_HUGEPAGE (1 << 0)
#define SP_HUGEPAGE_ONLY (1 << 1)
#define SP_DVPP (1 << 2)
#define SPG_ID_NONE -1 /* not associated with sp_group, only for specified thread */
#define SPG_ID_DEFAULT 0 /* use the spg id of current thread */
#define SPG_ID_MIN 1 /* valid id should be >= 1 */
#define SPG_ID_MAX 99999
#define SPG_ID_AUTO_MIN 100000
#define SPG_ID_AUTO_MAX 199999
#define SPG_ID_AUTO 200000 /* generate group id automatically */
#define SPG_ID_DVPP_PASS_THROUGH_MIN 800000
#define SPG_ID_DVPP_PASS_THROUGH_MAX 899999
#define SPG_ID_DVPP_PASS_THROUGH 900000
#define MAX_DEVID 1 /* the max num of Da-vinci devices */
#define VM_HUGE_PAGES 0x00001000 /* use for huge pages */
/* to align the pointer to the (next) PMD boundary */
#define PMD_ALIGN(addr) ALIGN(addr, PMD_SIZE)
/* test whether an address (unsigned long or pointer) is aligned to PMD_SIZE */
#define PMD_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PMD_SIZE)
extern int sysctl_share_pool_hugepage_enable;
extern int sysctl_ac_mode;
extern int enable_ascend_share_pool;
/* Processes in the same sp_group can share memory.
* Memory layout for share pool:
*
* |-------------------- 8T -------------------|---|------ 8T ------------|
* | Device 0 | Device 1 |...| |
* |----------------------------------------------------------------------|
* |- 16G -|- 16G -|- 16G -|- 16G -| | | | |
* | DVPP GROUP0 | DVPP GROUP1 | ... | ... |...| sp normal memory |
* | svm | sp | svm | sp | | | | |
* |----------------------------------------------------------------------|
*
* The host SVM feature reserves 8T virtual memory by mmap, and due to the
* restriction of DVPP, while SVM and share pool will both allocate memory
* for DVPP, the memory have to be in the same 32G range.
*
* Share pool reserves 16T memory, with 8T for normal uses and 8T for DVPP.
* Within this 8T DVPP memory, SVM will call sp_config_dvpp_range() to
* tell us which 16G memory range is reserved for share pool .
*
* In some scenarios where there is no host SVM feature, share pool uses
* the default memory setting for DVPP.
*/
struct sp_group {
int id;
struct file *file;
struct file *file_hugetlb;
/* list head of processes */
struct list_head procs;
/* list of sp_area */
struct list_head spa_list;
/* number of sp_area */
atomic_t spa_num;
/* total size of all sp_area from sp_alloc and k2u(spg) */
atomic_t size;
/* record the number of hugepage allocation failures */
int hugepage_failures;
/* is_alive == false means it's being destroyed */
bool is_alive;
/* we define the creator process of a sp_group as owner */
struct task_struct *owner;
/* dvpp_multi_spaces == true means multiple dvpp 16G spaces are set */
bool dvpp_multi_spaces;
unsigned long dvpp_va_start;
unsigned long dvpp_size;
atomic_t use_count;
};
struct sp_walk_data {
struct page **pages;
unsigned int page_count;
unsigned long uva_aligned;
unsigned long page_size;
bool is_hugepage;
};
#ifdef CONFIG_ASCEND_SHARE_POOL
#define MAP_SHARE_POOL 0x100000
#define MMAP_TOP_4G_SIZE 0x100000000UL
/* 8T size */
#define MMAP_SHARE_POOL_NORMAL_SIZE 0x80000000000UL
/* 8T size*/
#define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL
/* 16G size */
#define MMAP_SHARE_POOL_16G_SIZE 0x400000000UL
#define MMAP_SHARE_POOL_SIZE (MMAP_SHARE_POOL_NORMAL_SIZE + MMAP_SHARE_POOL_DVPP_SIZE)
/* align to 2M hugepage size, and MMAP_SHARE_POOL_TOP_16G_START should be align to 16G */
#define MMAP_SHARE_POOL_END ((TASK_SIZE - MMAP_SHARE_POOL_DVPP_SIZE) & ~((1 << 21) - 1))
#define MMAP_SHARE_POOL_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_SIZE)
#define MMAP_SHARE_POOL_16G_START (MMAP_SHARE_POOL_END - MMAP_SHARE_POOL_DVPP_SIZE)
static inline void sp_init_mm(struct mm_struct *mm)
{
mm->sp_group = NULL;
INIT_LIST_HEAD(&mm->sp_node);
mm->sp_stat_id = 0;
}
extern int sp_group_add_task(int pid, int spg_id);
extern void sp_group_exit(struct mm_struct *mm);
extern void sp_group_post_exit(struct mm_struct *mm);
extern int sp_group_id_by_pid(int pid);
extern int sp_group_walk(int spg_id, void *data, int (*func)(struct mm_struct *mm, void *));
extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
extern void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_id);
extern int sp_free(unsigned long addr);
extern void *sp_make_share_k2u(unsigned long kva, unsigned long size,
unsigned long sp_flags, int pid, int spg_id);
extern void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid);
extern int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id);
extern void sp_area_drop(struct vm_area_struct *vma);
extern int sp_walk_page_range(unsigned long uva, unsigned long size,
struct task_struct *tsk, struct sp_walk_data *sp_walk_data);
extern void sp_walk_page_free(struct sp_walk_data *sp_walk_data);
extern int sp_register_notifier(struct notifier_block *nb);
extern int sp_unregister_notifier(struct notifier_block *nb);
extern bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid);
extern bool is_sharepool_addr(unsigned long addr);
extern void proc_sharepool_init(void);
static inline struct task_struct *sp_get_task(struct mm_struct *mm)
{
if (enable_ascend_share_pool)
return mm->owner;
else
return current;
}
static inline bool sp_check_hugepage(struct page *p)
{
if (enable_ascend_share_pool && PageHuge(p))
return true;
return false;
}
static inline bool sp_is_enabled(void)
{
return enable_ascend_share_pool ? true : false;
}
static inline bool sp_check_vm_huge_page(unsigned long flags)
{
if (enable_ascend_share_pool && (flags & VM_HUGE_PAGES))
return true;
return false;
}
static inline void sp_area_work_around(struct vm_unmapped_area_info *info)
{
if (enable_ascend_share_pool)
info->high_limit = min(info->high_limit, MMAP_SHARE_POOL_START);
}
extern struct page *sp_alloc_pages(struct vm_struct *area, gfp_t mask,
unsigned int page_order, int node);
static inline void sp_free_pages(struct page *page, struct vm_struct *area)
{
if (PageHuge(page))
put_page(page);
else
__free_pages(page, area->page_order);
}
static inline bool sp_check_vm_share_pool(unsigned long vm_flags)
{
if (enable_ascend_share_pool && (vm_flags & VM_SHARE_POOL))
return true;
return false;
}
static inline bool is_vm_huge_special(struct vm_area_struct *vma)
{
return !!(enable_ascend_share_pool && (vma->vm_flags & VM_HUGE_SPECIAL));
}
static inline bool sp_mmap_check(unsigned long flags)
{
if (enable_ascend_share_pool && (flags & MAP_SHARE_POOL))
return true;
return false;
}
#else
static inline int sp_group_add_task(int pid, int spg_id)
{
return -EPERM;
}
static inline void sp_group_exit(struct mm_struct *mm)
{
}
static inline void sp_group_post_exit(struct mm_struct *mm)
{
}
static inline int sp_group_id_by_pid(int pid)
{
return -EPERM;
}
static inline int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
return -EPERM;
}
static inline void *sp_alloc(unsigned long size, unsigned long sp_flags, int sp_id)
{
return NULL;
}
static inline int sp_free(unsigned long addr)
{
return -EPERM;
}
static inline void *sp_make_share_k2u(unsigned long kva, unsigned long size,
unsigned long sp_flags, int pid, int spg_id)
{
return NULL;
}
static inline void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid)
{
return NULL;
}
static inline int sp_unshare(unsigned long va, unsigned long size, int pid, int spg_id)
{
return -EPERM;
}
static inline void sp_init_mm(struct mm_struct *mm)
{
}
static inline void sp_area_drop(struct vm_area_struct *vma)
{
}
static inline int sp_walk_page_range(unsigned long uva, unsigned long size,
struct task_struct *tsk, struct sp_walk_data *sp_walk_data)
{
return 0;
}
static inline void sp_walk_page_free(struct sp_walk_data *sp_walk_data)
{
}
static inline int sp_register_notifier(struct notifier_block *nb)
{
return -EPERM;
}
static inline int sp_unregister_notifier(struct notifier_block *nb)
{
return -EPERM;
}
static inline bool sp_config_dvpp_range(size_t start, size_t size, int device_id, int pid)
{
return false;
}
static inline bool is_sharepool_addr(unsigned long addr)
{
return false;
}
static inline void proc_sharepool_init(void)
{
}
static inline struct task_struct *sp_get_task(struct mm_struct *mm)
{
return current;
}
static inline bool sp_check_hugepage(struct page *p)
{
return false;
}
static inline bool sp_is_enabled(void)
{
return false;
}
static inline bool sp_check_vm_huge_page(unsigned long flags)
{
return false;
}
static inline void sp_area_work_around(struct vm_unmapped_area_info *info)
{
}
static inline struct page *sp_alloc_pages(void *area, gfp_t mask,
unsigned int page_order, int node)
{
return NULL;
}
static inline void sp_free_pages(struct page *page, struct vm_struct *area)
{
}
static inline bool sp_check_vm_share_pool(unsigned long vm_flags)
{
return false;
}
static inline bool is_vm_huge_special(struct vm_area_struct *vma)
{
return false;
}
static inline bool sp_mmap_check(unsigned long flags)
{
return false;
}
#endif
#endif /* LINUX_SHARE_POOL_H */
......@@ -95,6 +95,8 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller);
extern void *vmalloc_hugepage(unsigned long size);
extern void *vmalloc_hugepage_user(unsigned long size);
#ifndef CONFIG_MMU
extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
......@@ -123,6 +125,13 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
void vmalloc_sync_mappings(void);
void vmalloc_sync_unmappings(void);
extern void *vmap_hugepage(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot);
extern int remap_vmalloc_hugepage_range_partial(struct vm_area_struct *vma,
unsigned long uaddr, void *kaddr,
unsigned long size);
extern int remap_vmalloc_hugepage_range(struct vm_area_struct *vma,
void *addr, unsigned long pgoff);
/*
* Lowlevel-APIs (not for driver use!)
*/
......
......@@ -33,6 +33,7 @@
#include <linux/bitops.h>
#include <linux/rbtree_augmented.h>
#include <linux/overflow.h>
#include <linux/share_pool.h>
#include <linux/uaccess.h>
#include <asm/tlbflush.h>
......@@ -478,6 +479,37 @@ static int vmap_pages_range(unsigned long addr, unsigned long end,
return err;
}
static int vmap_hugepages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift)
{
unsigned int i, nr = (end - addr) >> page_shift;
for (i = 0; i < nr; i++) {
int err;
err = vmap_range_noflush(addr, addr + (1UL << page_shift),
__pa(page_address(pages[i])), prot,
page_shift);
if (err)
return err;
addr += 1UL << page_shift;
}
return 0;
}
static int vmap_hugepages_range(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages,
unsigned int page_shift)
{
int err;
err = vmap_hugepages_range_noflush(addr, end, prot, pages, page_shift);
flush_cache_vmap(addr, end);
return err;
}
/**
* map_kernel_range_noflush - map kernel VM area with the specified pages
* @addr: start of the VM area to map
......@@ -589,6 +621,22 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
}
EXPORT_SYMBOL(vmalloc_to_page);
/*
* Walk a hugepage vmap address to the struct page it maps.
* return the head page that corresponds to the base page address.
*/
struct page *vmalloc_to_hugepage(const void *vmalloc_addr)
{
struct page *huge;
huge = vmalloc_to_page(vmalloc_addr);
if (huge && PageHuge(huge))
return huge;
else
return NULL;
}
EXPORT_SYMBOL(vmalloc_to_hugepage);
/*
* Map a vmalloc()-space virtual address to the physical page frame number.
*/
......@@ -2243,7 +2291,12 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
const void *caller)
{
return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
unsigned long align = 1;
if (sp_check_vm_huge_page(flags))
align = PMD_SIZE;
return __get_vm_area_node(size, align, flags, VMALLOC_START, VMALLOC_END,
NUMA_NO_NODE, GFP_KERNEL, caller);
}
......@@ -2327,7 +2380,10 @@ static void __vunmap(const void *addr, int deallocate_pages)
struct page *page = area->pages[i];
BUG_ON(!page);
__free_pages(page, area->page_order);
if (sp_is_enabled())
sp_free_pages(page, area);
else
__free_pages(page, area->page_order);
}
kvfree(area->pages);
......@@ -2452,6 +2508,43 @@ void *vmap(struct page **pages, unsigned int count,
}
EXPORT_SYMBOL(vmap);
/**
* vmap_hugepag - map an array of huge pages into virtually contiguous space
* @pages: array of huge page pointers
* @count: number of pages to map
* @flags: vm_area->flags
* @prot: page protection for the mapping
*
* Maps @count pages from @pages into contiguous kernel virtual
* space.
*/
void *vmap_hugepage(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot)
{
struct vm_struct *area;
unsigned long size; /* In bytes */
might_sleep();
if (count > totalram_pages)
return NULL;
size = (unsigned long)count << PMD_SHIFT;
area = get_vm_area_caller(size, flags, __builtin_return_address(0));
if (!area)
return NULL;
if (vmap_hugepages_range((unsigned long)area->addr,
(unsigned long)area->addr + size, prot,
pages, PMD_SHIFT) < 0) {
vunmap(area->addr);
return NULL;
}
return area->addr;
}
EXPORT_SYMBOL(vmap_hugepage);
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller);
......@@ -2494,7 +2587,12 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page *page;
int p;
page = alloc_pages_node(node, alloc_mask|highmem_mask, page_order);
if (sp_is_enabled())
page = sp_alloc_pages(area, alloc_mask|highmem_mask,
page_order, node);
else
page = alloc_pages_node(node, alloc_mask|highmem_mask,
page_order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
......@@ -2562,7 +2660,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
*/
size_per_node = size;
if (node == NUMA_NO_NODE)
if (node == NUMA_NO_NODE && !sp_is_enabled())
size_per_node /= num_online_nodes();
if (size_per_node >= PMD_SIZE) {
shift = PMD_SHIFT;
......@@ -2825,6 +2923,55 @@ void *vmalloc_32_user(unsigned long size)
}
EXPORT_SYMBOL(vmalloc_32_user);
/**
* vmalloc_hugepage - allocate virtually contiguous hugetlb memory
* @size: allocation size
*
* Allocate enough huge pages to cover @size and map them into
* contiguous kernel virtual space.
*
* The allocation size is aligned to PMD_SIZE automatically
*/
void *vmalloc_hugepage(unsigned long size)
{
/* PMD hugepage aligned */
size = PMD_ALIGN(size);
return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
NUMA_NO_NODE, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_hugepage);
/**
* vmalloc_hugepage_user - allocate virtually contiguous hugetlb memory
* for userspace
* @size: allocation size
*
* Allocate enough huge pages to cover @size and map them into
* contiguous kernel virtual space. The resulting memory area
* is zeroed so it can be mapped to userspace without leaking data.
*
* The allocation size is aligned to PMD_SIZE automatically
*/
void *vmalloc_hugepage_user(unsigned long size)
{
struct vm_struct *area;
void *ret;
/* 2M hugepa aligned */
size = PMD_ALIGN(size);
ret = __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
NUMA_NO_NODE, __builtin_return_address(0));
if (ret) {
area = find_vm_area(ret);
area->flags |= VM_USERMAP;
}
return ret;
}
EXPORT_SYMBOL(vmalloc_hugepage_user);
/*
* small helper routine , copy contents to buf from addr.
* If the page is not present, fill zero.
......@@ -3150,6 +3297,85 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
}
EXPORT_SYMBOL(remap_vmalloc_range);
/**
* remap_vmalloc_hugepage_range_partial - map vmalloc hugepages
* to userspace
* @vma: vma to cover
* @uaddr: target user address to start at
* @kaddr: virtual address of vmalloc hugepage kernel memory
* @size: size of map area
*
* Returns: 0 for success, -Exxx on failure
*
* This function checks that @kaddr is a valid vmalloc'ed area,
* and that it is big enough to cover the range starting at
* @uaddr in @vma. Will return failure if that criteria isn't
* met.
*
* Similar to remap_pfn_range() (see mm/memory.c)
*/
int remap_vmalloc_hugepage_range_partial(struct vm_area_struct *vma,
unsigned long uaddr, void *kaddr, unsigned long size)
{
struct vm_struct *area;
size = PMD_ALIGN(size);
if (!PMD_ALIGNED(uaddr) || !PMD_ALIGNED(kaddr))
return -EINVAL;
area = find_vm_area(kaddr);
if (!area)
return -EINVAL;
if (!(area->flags & VM_USERMAP))
return -EINVAL;
if (kaddr + size > area->addr + get_vm_area_size(area))
return -EINVAL;
do {
struct page *page = vmalloc_to_hugepage(kaddr);
int ret;
ret = vm_insert_page(vma, uaddr, page);
if (ret)
return ret;
uaddr += PMD_SIZE;
kaddr += PMD_SIZE;
size -= PMD_SIZE;
} while (size > 0);
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
return 0;
}
EXPORT_SYMBOL(remap_vmalloc_hugepage_range_partial);
/**
* remap_vmalloc_hugepage_range - map vmalloc hugepages to userspace
* @vma: vma to cover (map full range of vma)
* @addr: vmalloc memory
* @pgoff: number of hugepages into addr before first page to map
*
* Returns: 0 for success, -Exxx on failure
*
* This function checks that addr is a valid vmalloc'ed area, and
* that it is big enough to cover the vma. Will return failure if
* that criteria isn't met.
*
* Similar to remap_pfn_range() (see mm/memory.c)
*/
int remap_vmalloc_hugepage_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff)
{
return remap_vmalloc_hugepage_range_partial(vma, vma->vm_start,
addr + (pgoff << PMD_SHIFT),
vma->vm_end - vma->vm_start);
}
EXPORT_SYMBOL(remap_vmalloc_hugepage_range);
/*
* Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose
* not to have one.
......@@ -3611,6 +3837,9 @@ static int s_show(struct seq_file *m, void *p)
if (is_vmalloc_addr(v->pages))
seq_puts(m, " vpages");
if (sp_is_enabled())
seq_printf(m, " order=%d", v->page_order);
show_numa_info(m, v);
seq_putc(m, '\n');
return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册