提交 6ef9f2b1 编写于 作者: F Fang Lijun 提交者: Zheng Zengkai

mm: Check numa node hugepages enough when mmap hugetlb

ascend inclusion
category: Bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4JMLR
CVE: NA

--------------

System cann't use the cdm nodes memory, but it can mmap all nodes huge
pages, so it will cause Bus error when mmap succeed but the huge pages
were not enough.

When set the cdmmask, users will transfer the numa id by mmap flag to
map the specific numa node hugepages, if there was not enough hugepages
on this node, return -ENOMEM.

Dvpp use flags MAP_CHECKNODE to enable check node hugetlb.
The global variable numanode will cause the mmap not be
reenterable, so use the flags BITS[26:31] directly.
v2: fix a compiling error on platforms such as mips
Signed-off-by: NFang Lijun <fanglijun3@huawei.com>
Reviewed-by: NWeilong Chen <chenweilong@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 208a3120
......@@ -32,6 +32,7 @@
#define MAP_HUGETLB 0x100000 /* create a huge page mapping */
#define MAP_FIXED_NOREPLACE 0x200000/* MAP_FIXED which doesn't unmap underlying mapping */
#define MAP_PA32BIT 0x400000 /* physical address is within 4G */
#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
#define MS_ASYNC 1 /* sync memory asynchronously */
#define MS_SYNC 2 /* synchronous memory sync */
......
......@@ -50,6 +50,7 @@
#define MAP_HUGETLB 0x80000 /* create a huge page mapping */
#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
#define MAP_PA32BIT 0x400000 /* physical address is within 4G */
#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
/*
* Flags for msync
......
......@@ -27,6 +27,7 @@
#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
#define MAP_UNINITIALIZED 0 /* uninitialized anonymous mmap */
#define MAP_PA32BIT 0x400000 /* physical address is within 4G */
#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
#define MS_SYNC 1 /* synchronous memory sync */
#define MS_ASYNC 2 /* sync memory asynchronously */
......
......@@ -26,6 +26,7 @@
#define MCL_FUTURE 0x4000 /* lock all additions to address space */
#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */
#define MAP_PA32BIT 0x400000 /* physical address is within 4G */
#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
/* Override any generic PKEY permission defines */
#define PKEY_DISABLE_EXECUTE 0x4
......
......@@ -22,5 +22,6 @@
#define MCL_FUTURE 0x4000 /* lock all additions to address space */
#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */
#define MAP_PA32BIT 0x400000 /* physical address is within 4G */
#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
#endif /* _UAPI__SPARC_MMAN_H__ */
......@@ -57,6 +57,7 @@
#define MAP_HUGETLB 0x80000 /* create a huge page mapping */
#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
#define MAP_PA32BIT 0x400000 /* physical address is within 4G */
#define MAP_CHECKNODE 0x800000 /* hugetlb numa node check */
#define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be
* uninitialized */
......
......@@ -118,6 +118,45 @@ static void huge_pagevec_release(struct pagevec *pvec)
pagevec_reinit(pvec);
}
/*
* Check current numa node has enough free huge pages to mmap hugetlb.
* resv_huge_pages_node: mmap hugepages but haven't used in current
* numa node.
*/
static int hugetlb_checknode(struct vm_area_struct *vma, long nr)
{
int nid;
int ret = 0;
struct hstate *h = &default_hstate;
spin_lock(&hugetlb_lock);
nid = vma->vm_flags >> CHECKNODE_BITS;
if (nid >= MAX_NUMNODES) {
ret = -EINVAL;
goto err;
}
if (h->free_huge_pages_node[nid] < nr) {
ret = -ENOMEM;
goto err;
} else {
if (h->resv_huge_pages_node[nid] + nr >
h->free_huge_pages_node[nid]) {
ret = -ENOMEM;
goto err;
} else {
h->resv_huge_pages_node[nid] += nr;
ret = 0;
}
}
err:
spin_unlock(&hugetlb_lock);
return ret;
}
/*
* Mask used when checking the page offset value passed in via system
* calls. This value will be converted to a loff_t which is signed.
......@@ -175,6 +214,12 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
inode_lock(inode);
file_accessed(file);
if (is_set_cdmmask() && (vma->vm_flags & VM_CHECKNODE)) {
ret = hugetlb_checknode(vma, len >> huge_page_shift(h));
if (ret < 0)
goto out;
}
ret = -ENOMEM;
if (hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
......
......@@ -581,6 +581,7 @@ struct hstate {
unsigned int nr_huge_pages_node[MAX_NUMNODES];
unsigned int free_huge_pages_node[MAX_NUMNODES];
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
unsigned int resv_huge_pages_node[MAX_NUMNODES];
#ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
unsigned int nr_free_vmemmap_pages;
#endif
......
......@@ -97,6 +97,15 @@ extern const int mmap_rnd_compat_bits_max;
extern int mmap_rnd_compat_bits __read_mostly;
#endif
#ifdef CONFIG_COHERENT_DEVICE
static inline bool is_set_cdmmask(void)
{
return !nodes_empty(cdmmask);
}
#else
#define is_set_cdmmask() (0)
#endif
#include <asm/page.h>
#include <asm/processor.h>
......@@ -304,6 +313,8 @@ extern unsigned int kobjsize(const void *objp);
#define VM_CDM 0x100000000 /* Contains coherent device memory */
#endif
#define VM_CHECKNODE 0x200000000
#ifdef CONFIG_USERSWAP
/* bit[32:36] is the protection key of intel, so use a large value for VM_USWAP */
#define VM_USWAP 0x2000000000000000
......
......@@ -8,6 +8,21 @@
#include <linux/atomic.h>
#include <uapi/linux/mman.h>
#ifdef CONFIG_COHERENT_DEVICE
#define CHECKNODE_BITS 48
#define CHECKNODE_MASK (~((_AC(1, UL) << CHECKNODE_BITS) - 1))
static inline void set_vm_checknode(vm_flags_t *vm_flags, unsigned long flags)
{
if (is_set_cdmmask())
*vm_flags |= VM_CHECKNODE | ((((flags >> MAP_HUGE_SHIFT) &
MAP_HUGE_MASK) << CHECKNODE_BITS) & CHECKNODE_MASK);
}
#else
#define CHECKNODE_BITS (0)
static inline void set_vm_checknode(vm_flags_t *vm_flags, unsigned long flags)
{}
#endif
/*
* Arrange for legacy / undefined architecture specific flags to be
* ignored by mmap handling code.
......
......@@ -5,6 +5,7 @@
#include <asm-generic/mman-common.h>
#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
#define MAP_CHECKNODE 0x0400 /* hugetlb numa node check */
#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
#define MAP_LOCKED 0x2000 /* pages are locked */
......
......@@ -30,6 +30,7 @@
#include <linux/numa.h>
#include <linux/llist.h>
#include <linux/cma.h>
#include <linux/mman.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
......@@ -1164,6 +1165,8 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
SetHPageRestoreReserve(page);
h->resv_huge_pages--;
if (is_set_cdmmask() && (vma->vm_flags & VM_CHECKNODE))
h->resv_huge_pages_node[vma->vm_flags >> CHECKNODE_BITS]--;
}
mpol_cond_put(mpol);
......
......@@ -1581,6 +1581,12 @@ __do_mmap(struct file *file, unsigned long addr, unsigned long len,
vm_flags |= VM_NORESERVE;
}
/* set numa node id into vm_flags,
* hugetlbfs file mmap will use it to check node
*/
if (flags & MAP_CHECKNODE)
set_vm_checknode(&vm_flags, flags);
addr = mmap_region(file, addr, len, vm_flags, pgoff, uf);
if (!IS_ERR_VALUE(addr) &&
((vm_flags & VM_LOCKED) ||
......@@ -1825,12 +1831,23 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
} else if (flags & MAP_HUGETLB) {
struct user_struct *user = NULL;
struct hstate *hs;
int page_size_log;
hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
/*
* If config cdm node, flags bits [26:31] used for
* mmap hugetlb check node
*/
if (is_set_cdmmask())
page_size_log = 0;
else
page_size_log = (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK;
hs = hstate_sizelog(page_size_log);
if (!hs)
return -EINVAL;
len = ALIGN(len, huge_page_size(hs));
/*
* VM_NORESERVE is used because the reservations will be
* taken when vm_ops->mmap() is called
......@@ -1839,8 +1856,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
*/
file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
VM_NORESERVE,
&user, HUGETLB_ANONHUGE_INODE,
(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
&user, HUGETLB_ANONHUGE_INODE, page_size_log);
if (IS_ERR(file))
return PTR_ERR(file);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册