提交 5c580732 编写于 作者: W Wang Wensheng 提交者: Zheng Zengkai

share_pool: Implement sp_make_share_k2u()

ascend inclusion
category: Feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4NDAW
CVE: NA

-------------------

This share a kernel memory range to userspace.
Introduce vm_struct flag VM_SHAREPOOL to indicate that a vm_struct
shared to userspace and we cannot vfree such a vm_area.
Signed-off-by: NWang Wensheng <wangwensheng4@huawei.com>
Signed-off-by: NTang Yizhou <tangyizhou@huawei.com>
Signed-off-by: NZhou Guanghui <zhouguanghui1@huawei.com>
Reviewed-by: Kefeng Wang<wangkefeng.wang@huawei.com>
Reviewed-by: NWeilong Chen <chenweilong@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 e04d6654
......@@ -288,6 +288,14 @@ static inline void sp_dump_stack(void)
dump_stack();
}
static inline bool is_vmalloc_sharepool(unsigned long vm_flags)
{
if (sp_is_enabled() && (vm_flags & VM_SHAREPOOL))
return true;
return false;
}
#else /* CONFIG_ASCEND_SHARE_POOL */
static inline int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id)
......
......@@ -28,6 +28,11 @@ struct notifier_block; /* in notifier.h */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
#define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */
#define VM_HUGE_PAGES 0x00001000 /* used for vmalloc hugepages */
#ifdef CONFIG_ASCEND_SHARE_POOL
#define VM_SHAREPOOL 0x00002000 /* remapped to sharepool */
#else
#define VM_SHAREPOOL 0
#endif
/*
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
......
......@@ -3287,6 +3287,25 @@ static struct ctl_table vm_table[] = {
.extra1 = &zero_ul,
.extra2 = &sysctl_sp_compact_interval_max,
},
{
/* 0: map_unlock, 1: map_lock */
.procname = "share_pool_map_lock_enable",
.data = &sysctl_share_pool_map_lock_enable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{
.procname = "sharepool_perf_k2u",
.data = &sysctl_sp_perf_k2u,
.maxlen = sizeof(sysctl_sp_perf_k2u),
.mode = 0600,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ten_thousand,
},
#endif
{ }
};
......
......@@ -31,6 +31,7 @@
#include <linux/llist.h>
#include <linux/cma.h>
#include <linux/mman.h>
#include <linux/share_pool.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
......@@ -4110,6 +4111,13 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
pte = huge_ptep_get_and_clear(mm, address, ptep);
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
/* sharepool k2u mapped pages are marked special */
if (sp_check_vm_share_pool(vma->vm_flags) && pte_special(pte)) {
spin_unlock(ptl);
continue;
}
if (huge_pte_dirty(pte))
set_page_dirty(page);
......
......@@ -87,6 +87,10 @@ static int __read_mostly enable_share_k2u_spg = 1;
/* debug mode */
int sysctl_sp_debug_mode;
int sysctl_share_pool_map_lock_enable;
int sysctl_sp_perf_k2u;
static int share_pool_group_mode = SINGLE_GROUP_MODE;
static int system_group_count;
......@@ -632,6 +636,13 @@ static inline void check_interrupt_context(void)
panic("function can't be used in interrupt context\n");
}
static unsigned long sp_mmap(struct mm_struct *mm, struct file *file,
struct sp_area *spa, unsigned long *populate,
unsigned long prot);
static void sp_munmap(struct mm_struct *mm, unsigned long addr, unsigned long size);
static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
struct mm_struct *mm, unsigned long prot);
static void free_sp_group_id(int spg_id)
{
/* ida operation is protected by an internal spin_lock */
......@@ -1206,6 +1217,19 @@ static struct sp_area *__find_sp_area(unsigned long addr)
return n;
}
static bool vmalloc_area_clr_flag(unsigned long kva, unsigned long flags)
{
struct vm_struct *area;
area = find_vm_area((void *)kva);
if (area) {
area->flags &= ~flags;
return true;
}
return false;
}
/*
* Free the VA region starting from addr to the share pool
*/
......@@ -1235,6 +1259,9 @@ static void sp_free_area(struct sp_area *spa)
}
}
if (spa->kva && !vmalloc_area_clr_flag(spa->kva, VM_SHAREPOOL))
pr_debug("clear spa->kva %ld is not valid\n", spa->kva);
spa_dec_usage(spa);
if (spa->spg != spg_none)
list_del(&spa->link);
......@@ -1542,6 +1569,37 @@ int mg_sp_free(unsigned long addr)
}
EXPORT_SYMBOL_GPL(mg_sp_free);
/* wrapper of __do_mmap() and the caller must hold down_write(&mm->mmap_lock). */
static unsigned long sp_mmap(struct mm_struct *mm, struct file *file,
struct sp_area *spa, unsigned long *populate,
unsigned long prot)
{
unsigned long addr = spa->va_start;
unsigned long size = spa_size(spa);
unsigned long flags = MAP_FIXED | MAP_SHARED | MAP_POPULATE |
MAP_SHARE_POOL;
unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY;
unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT;
/* Mark the mapped region to be locked. After the MAP_LOCKED is enable,
* multiple tasks will preempt resources, causing performance loss.
*/
if (sysctl_share_pool_map_lock_enable)
flags |= MAP_LOCKED;
atomic_inc(&spa->use_count);
addr = __do_mmap_mm(mm, file, addr, size, prot, flags, vm_flags, pgoff,
populate, NULL);
if (IS_ERR_VALUE(addr)) {
atomic_dec(&spa->use_count);
pr_err("do_mmap fails %ld\n", addr);
} else {
BUG_ON(addr != spa->va_start);
}
return addr;
}
/**
* sp_alloc() - Allocate shared memory for all the processes in a sp_group.
* @size: the size of memory to allocate.
......@@ -1596,6 +1654,314 @@ static int is_vmap_hugepage(unsigned long addr)
return 0;
}
static unsigned long __sp_remap_get_pfn(unsigned long kva)
{
unsigned long pfn;
if (is_vmalloc_addr((void *)kva))
pfn = vmalloc_to_pfn((void *)kva);
else
pfn = virt_to_pfn(kva);
return pfn;
}
/* when called by k2u to group, always make sure rw_lock of spg is down */
static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
struct mm_struct *mm, unsigned long prot)
{
struct vm_area_struct *vma;
unsigned long ret_addr;
unsigned long populate = 0;
int ret = 0;
unsigned long addr, buf, offset;
down_write(&mm->mmap_lock);
if (unlikely(mm->core_state)) {
pr_err("k2u mmap: encountered coredump, abort\n");
ret_addr = -EBUSY;
goto put_mm;
}
ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot);
if (IS_ERR_VALUE(ret_addr)) {
pr_debug("k2u mmap failed %lx\n", ret_addr);
goto put_mm;
}
BUG_ON(ret_addr != spa->va_start);
vma = find_vma(mm, ret_addr);
BUG_ON(vma == NULL);
if (prot & PROT_WRITE)
vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
if (is_vm_hugetlb_page(vma)) {
ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0);
if (ret) {
do_munmap(mm, ret_addr, spa_size(spa), NULL);
pr_debug("remap vmalloc hugepage failed, ret %d, kva is %lx\n",
ret, (unsigned long)kva);
ret_addr = ret;
goto put_mm;
}
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
} else {
buf = ret_addr;
addr = kva;
offset = 0;
do {
ret = remap_pfn_range(vma, buf, __sp_remap_get_pfn(addr), PAGE_SIZE,
__pgprot(vma->vm_page_prot.pgprot));
if (ret) {
do_munmap(mm, ret_addr, spa_size(spa), NULL);
pr_err("remap_pfn_range failed %d\n", ret);
ret_addr = ret;
goto put_mm;
}
offset += PAGE_SIZE;
buf += PAGE_SIZE;
addr += PAGE_SIZE;
} while (offset < spa_size(spa));
}
put_mm:
up_write(&mm->mmap_lock);
return ret_addr;
}
/**
* sp_make_share_kva_to_task() - Share kernel memory to current task.
* @kva: the VA of shared kernel memory
* @size: the size of area to share, should be aligned properly
* @sp_flags: the flags for the opreation
*
* Return:
* * if succeed, return the shared user address to start at.
* * if fail, return the pointer of -errno.
*/
static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, unsigned long sp_flags)
{
void *uva;
struct sp_area *spa;
struct spg_proc_stat *stat;
unsigned long prot = PROT_READ | PROT_WRITE;
down_write(&sp_group_sem);
stat = sp_init_process_stat(current, current->mm, spg_none);
up_write(&sp_group_sem);
if (IS_ERR(stat)) {
pr_err_ratelimited("k2u_task init process stat failed %lx\n",
PTR_ERR(stat));
return stat;
}
spa = sp_alloc_area(size, sp_flags, spg_none, SPA_TYPE_K2TASK, current->tgid);
if (IS_ERR(spa)) {
pr_err_ratelimited("alloc spa failed in k2u_task (potential no enough virtual memory when -75): %ld\n",
PTR_ERR(spa));
return spa;
}
spa->kva = kva;
uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot);
__sp_area_drop(spa);
if (IS_ERR(uva))
pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva));
else {
update_spg_proc_stat(size, true, stat, SPA_TYPE_K2TASK);
spa->mm = current->mm;
}
return uva;
}
/**
* Share kernel memory to a spg, the current process must be in that group
* @kva: the VA of shared kernel memory
* @size: the size of area to share, should be aligned properly
* @sp_flags: the flags for the opreation
* @spg: the sp group to be shared with
*
* Return: the shared user address to start at
*/
static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size,
unsigned long sp_flags, struct sp_group *spg)
{
struct sp_area *spa;
struct mm_struct *mm;
struct sp_group_node *spg_node;
void *uva = ERR_PTR(-ENODEV);
down_read(&spg->rw_lock);
spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2SPG, current->tgid);
if (IS_ERR(spa)) {
up_read(&spg->rw_lock);
pr_err_ratelimited("alloc spa failed in k2u_spg (potential no enough virtual memory when -75): %ld\n",
PTR_ERR(spa));
return spa;
}
spa->kva = kva;
list_for_each_entry(spg_node, &spg->procs, proc_node) {
mm = spg_node->master->mm;
uva = (void *)sp_remap_kva_to_vma(kva, spa, mm, spg_node->prot);
if (IS_ERR(uva)) {
pr_err("remap k2u to spg failed %ld\n", PTR_ERR(uva));
__sp_free(spg, spa->va_start, spa_size(spa), mm);
goto out;
}
}
out:
up_read(&spg->rw_lock);
__sp_area_drop(spa);
if (!IS_ERR(uva))
sp_update_process_stat(current, true, spa);
return uva;
}
static bool vmalloc_area_set_flag(unsigned long kva, unsigned long flags)
{
struct vm_struct *area;
area = find_vm_area((void *)kva);
if (area) {
area->flags |= flags;
return true;
}
return false;
}
struct sp_k2u_context {
unsigned long kva;
unsigned long kva_aligned;
unsigned long size;
unsigned long size_aligned;
unsigned long sp_flags;
int spg_id;
bool to_task;
struct timespec64 start;
struct timespec64 end;
};
static void trace_sp_k2u_begin(struct sp_k2u_context *kc)
{
if (!sysctl_sp_perf_k2u)
return;
ktime_get_ts64(&kc->start);
}
static void trace_sp_k2u_finish(struct sp_k2u_context *kc, void *uva)
{
unsigned long cost;
if (!sysctl_sp_perf_k2u)
return;
ktime_get_ts64(&kc->end);
cost = SEC2US(kc->end.tv_sec - kc->start.tv_sec) +
NS2US(kc->end.tv_nsec - kc->start.tv_nsec);
if (cost >= (unsigned long)sysctl_sp_perf_k2u) {
pr_err("Task %s(%d/%d) sp_k2u returns 0x%lx consumes %luus, size is %luKB, size_aligned is %luKB, sp_flags is %lx, to_task is %d\n",
current->comm, current->tgid, current->pid,
(unsigned long)uva, cost, byte2kb(kc->size), byte2kb(kc->size_aligned),
kc->sp_flags, kc->to_task);
}
}
static int sp_k2u_prepare(unsigned long kva, unsigned long size,
unsigned long sp_flags, int spg_id, struct sp_k2u_context *kc)
{
int is_hugepage;
unsigned int page_size = PAGE_SIZE;
unsigned long kva_aligned, size_aligned;
trace_sp_k2u_begin(kc);
if (sp_flags & ~SP_DVPP) {
pr_err_ratelimited("k2u sp_flags %lx error\n", sp_flags);
return -EINVAL;
}
if (!current->mm) {
pr_err_ratelimited("k2u: kthread is not allowed\n");
return -EPERM;
}
is_hugepage = is_vmap_hugepage(kva);
if (is_hugepage > 0) {
sp_flags |= SP_HUGEPAGE;
page_size = PMD_SIZE;
} else if (is_hugepage == 0) {
/* do nothing */
} else {
pr_err_ratelimited("k2u kva is not vmalloc address\n");
return is_hugepage;
}
/* aligned down kva is convenient for caller to start with any valid kva */
kva_aligned = ALIGN_DOWN(kva, page_size);
size_aligned = ALIGN(kva + size, page_size) - kva_aligned;
if (!vmalloc_area_set_flag(kva_aligned, VM_SHAREPOOL)) {
pr_debug("k2u_task kva %lx is not valid\n", kva_aligned);
return -EINVAL;
}
kc->kva = kva;
kc->kva_aligned = kva_aligned;
kc->size = size;
kc->size_aligned = size_aligned;
kc->sp_flags = sp_flags;
kc->spg_id = spg_id;
kc->to_task = false;
return 0;
}
static int sp_check_k2task(struct sp_k2u_context *kc)
{
int ret = 0;
int spg_id = kc->spg_id;
if (share_pool_group_mode == SINGLE_GROUP_MODE) {
struct sp_group *spg = get_first_group(current->mm);
if (!spg) {
if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT)
ret = -EINVAL;
else
kc->to_task = true;
} else {
if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id)
ret = -EINVAL;
sp_group_drop(spg);
}
} else {
if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE)
kc->to_task = true;
}
return ret;
}
static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc)
{
if (IS_ERR(uva))
vmalloc_area_clr_flag(kc->kva_aligned, VM_SHAREPOOL);
else
uva = uva + (kc->kva - kc->kva_aligned);
trace_sp_k2u_finish(kc, uva);
sp_dump_stack();
return uva;
}
/**
* sp_make_share_k2u() - Share kernel memory to current process or an sp_group.
* @kva: the VA of shared kernel memory.
......@@ -1616,7 +1982,43 @@ static int is_vmap_hugepage(unsigned long addr)
void *sp_make_share_k2u(unsigned long kva, unsigned long size,
unsigned long sp_flags, int pid, int spg_id)
{
return NULL;
void *uva;
int ret;
struct sp_k2u_context kc;
check_interrupt_context();
ret = sp_k2u_prepare(kva, size, sp_flags, spg_id, &kc);
if (ret)
return ERR_PTR(ret);
ret = sp_check_k2task(&kc);
if (ret) {
uva = ERR_PTR(ret);
goto out;
}
if (kc.to_task)
uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags);
else {
struct sp_group *spg;
spg = __sp_find_spg(current->pid, kc.spg_id);
if (spg) {
ret = sp_check_caller_permission(spg, current->mm);
if (ret < 0) {
sp_group_drop(spg);
uva = ERR_PTR(ret);
goto out;
}
uva = sp_make_share_kva_to_spg(kc.kva_aligned, kc.size_aligned, kc.sp_flags, spg);
sp_group_drop(spg);
} else
uva = ERR_PTR(-ENODEV);
}
out:
return sp_k2u_finish(uva, &kc);
}
EXPORT_SYMBOL_GPL(sp_make_share_k2u);
......
......@@ -37,6 +37,7 @@
#include <linux/pgtable.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
#include <linux/share_pool.h>
#include <asm/io.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
......@@ -2622,6 +2623,12 @@ static void __vunmap(const void *addr, int deallocate_pages)
return;
}
/* unmap a sharepool vm area will cause meamleak! */
if (is_vmalloc_sharepool(area->flags)) {
WARN(1, "Memory leak due to vfree() sharepool vm area (%p) !\n", addr);
return;
}
debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册