提交 8deff3a6 编写于 作者: C Chen Jun 提交者: Zhang Zekun

mm/sharepool: Add mg_sp_alloc_nodemask

hulk inclusion
category: feature
bugzilla: N/A

--------------------------------

Support alloc memory from nodes.

mg_sp_alloc allow to alloc memory from one node.
If the node have no enough memory, the caller would
pick a next node. But that has a lot of overhead.

To improve performance, we support a new interface to
alloc memory from nodes.
Signed-off-by: NChen Jun <chenjun102@huawei.com>
上级 598a1d66
...@@ -629,6 +629,9 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, ...@@ -629,6 +629,9 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
const struct hstate *hugetlb_get_hstate(void); const struct hstate *hugetlb_get_hstate(void);
struct page *hugetlb_alloc_hugepage(int nid, int flag); struct page *hugetlb_alloc_hugepage(int nid, int flag);
struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma,
unsigned long address, int flag);
int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr,
pgprot_t prot, struct page *hpage); pgprot_t prot, struct page *hpage);
int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm,
...@@ -645,6 +648,12 @@ static inline struct page *hugetlb_alloc_hugepage(int nid, int flag) ...@@ -645,6 +648,12 @@ static inline struct page *hugetlb_alloc_hugepage(int nid, int flag)
return NULL; return NULL;
} }
static inline struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma,
unsigned long address, int flag)
{
return NULL;
}
static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm, static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm,
unsigned long addr, pgprot_t prot, struct page *hpage) unsigned long addr, pgprot_t prot, struct page *hpage)
{ {
...@@ -1091,6 +1100,12 @@ static inline struct page *hugetlb_alloc_hugepage(int nid, int flag) ...@@ -1091,6 +1100,12 @@ static inline struct page *hugetlb_alloc_hugepage(int nid, int flag)
return NULL; return NULL;
} }
static inline struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma,
unsigned long address, int flag)
{
return NULL;
}
static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm, static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm,
unsigned long addr, pgprot_t prot, struct page *hpage) unsigned long addr, pgprot_t prot, struct page *hpage)
{ {
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#include <linux/jump_label.h> #include <linux/jump_label.h>
#include <linux/kabi.h> #include <linux/kabi.h>
#include <linux/share_pool_interface.h>
#define SP_HUGEPAGE (1 << 0) #define SP_HUGEPAGE (1 << 0)
#define SP_HUGEPAGE_ONLY (1 << 1) #define SP_HUGEPAGE_ONLY (1 << 1)
#define SP_DVPP (1 << 2) #define SP_DVPP (1 << 2)
...@@ -256,6 +258,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, ...@@ -256,6 +258,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task); struct pid *pid, struct task_struct *task);
extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id); extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id);
extern void *mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id,
nodemask_t nodemask);
extern int mg_sp_free(unsigned long addr, int id); extern int mg_sp_free(unsigned long addr, int id);
extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size,
...@@ -286,7 +290,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, ...@@ -286,7 +290,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
unsigned long address, pte_t *ptep, unsigned int flags); unsigned long address, pte_t *ptep, unsigned int flags);
extern bool sp_check_addr(unsigned long addr); extern bool sp_check_addr(unsigned long addr);
extern bool sp_check_mmap_addr(unsigned long addr, unsigned long flags); extern bool sp_check_mmap_addr(unsigned long addr, unsigned long flags);
extern int sp_node_id(struct vm_area_struct *vma);
static inline bool sp_is_enabled(void) static inline bool sp_is_enabled(void)
{ {
...@@ -452,11 +455,6 @@ static inline bool is_vmalloc_sharepool(unsigned long vm_flags) ...@@ -452,11 +455,6 @@ static inline bool is_vmalloc_sharepool(unsigned long vm_flags)
return NULL; return NULL;
} }
static inline int sp_node_id(struct vm_area_struct *vma)
{
return numa_node_id();
}
static inline bool sp_check_addr(unsigned long addr) static inline bool sp_check_addr(unsigned long addr)
{ {
return false; return false;
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_SHARE_POOL_INTERFACE_H
#define LINUX_SHARE_POOL_INTERFACE_H
#include <linux/mman.h>
#include <linux/mm_types.h>
#include <linux/numa.h>
#include <linux/kabi.h>
#ifdef CONFIG_ASCEND_SHARE_POOL
extern int sp_node_id(struct vm_area_struct *vma);
#else
static inline int sp_node_id(struct vm_area_struct *vma)
{
return numa_node_id();
}
#endif /* !CONFIG_ASCEND_SHARE_POOL */
#endif /* LINUX_SHARE_POOL_INTERFACE_H */
...@@ -6312,7 +6312,7 @@ static struct page *hugetlb_alloc_hugepage_normal(struct hstate *h, ...@@ -6312,7 +6312,7 @@ static struct page *hugetlb_alloc_hugepage_normal(struct hstate *h,
/* /*
* Allocate hugepage without reserve * Allocate hugepage without reserve
*/ */
struct page *hugetlb_alloc_hugepage(int nid, int flag) struct page *hugetlb_alloc_hugepage_nodemask(int nid, int flag, nodemask_t *nodemask)
{ {
struct hstate *h = &default_hstate; struct hstate *h = &default_hstate;
gfp_t gfp_mask = htlb_alloc_mask(h); gfp_t gfp_mask = htlb_alloc_mask(h);
...@@ -6327,7 +6327,6 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) ...@@ -6327,7 +6327,6 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag)
if (flag & ~HUGETLB_ALLOC_MASK) if (flag & ~HUGETLB_ALLOC_MASK)
return NULL; return NULL;
gfp_mask |= __GFP_THISNODE;
if (enable_charge_mighp) if (enable_charge_mighp)
gfp_mask |= __GFP_ACCOUNT; gfp_mask |= __GFP_ACCOUNT;
...@@ -6337,12 +6336,22 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag) ...@@ -6337,12 +6336,22 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag)
if (flag & HUGETLB_ALLOC_NORMAL) if (flag & HUGETLB_ALLOC_NORMAL)
page = hugetlb_alloc_hugepage_normal(h, gfp_mask, nid); page = hugetlb_alloc_hugepage_normal(h, gfp_mask, nid);
else if (flag & HUGETLB_ALLOC_BUDDY) else if (flag & HUGETLB_ALLOC_BUDDY)
page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL); page = alloc_migrate_huge_page(h, gfp_mask, nid, nodemask);
else else
page = alloc_huge_page_nodemask(h, nid, NULL, gfp_mask); page = alloc_huge_page_nodemask(h, nid, nodemask, gfp_mask);
return page; return page;
} }
struct page *hugetlb_alloc_hugepage(int nid, int flag)
{
nodemask_t nodemask;
nodes_clear(nodemask);
node_set(nid, nodemask);
return hugetlb_alloc_hugepage_nodemask(nid, flag, &nodemask);
}
EXPORT_SYMBOL_GPL(hugetlb_alloc_hugepage); EXPORT_SYMBOL_GPL(hugetlb_alloc_hugepage);
static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr, static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
...@@ -6364,6 +6373,19 @@ static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr, ...@@ -6364,6 +6373,19 @@ static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
return ptep; return ptep;
} }
struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma, unsigned long address, int flag)
{
int nid;
struct hstate *h = hstate_vma(vma);
struct mempolicy *mpol;
nodemask_t *nodemask;
gfp_t gfp_mask;
gfp_mask = htlb_alloc_mask(h);
nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
return hugetlb_alloc_hugepage_nodemask(nid, flag, nodemask);
}
static int __hugetlb_insert_hugepage(struct mm_struct *mm, unsigned long addr, static int __hugetlb_insert_hugepage(struct mm_struct *mm, unsigned long addr,
pgprot_t prot, unsigned long pfn) pgprot_t prot, unsigned long pfn)
{ {
......
...@@ -103,6 +103,8 @@ ...@@ -103,6 +103,8 @@
#include <linux/printk.h> #include <linux/printk.h>
#include <linux/swapops.h> #include <linux/swapops.h>
#include <linux/share_pool_interface.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -2198,7 +2200,7 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, ...@@ -2198,7 +2200,7 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags,
nid = interleave_nid(*mpol, vma, addr, nid = interleave_nid(*mpol, vma, addr,
huge_page_shift(hstate_vma(vma))); huge_page_shift(hstate_vma(vma)));
} else { } else {
nid = policy_node(gfp_flags, *mpol, numa_node_id()); nid = policy_node(gfp_flags, *mpol, sp_node_id(vma));
if ((*mpol)->mode == MPOL_BIND || mode == MPOL_PREFERRED_MANY) if ((*mpol)->mode == MPOL_BIND || mode == MPOL_PREFERRED_MANY)
*nodemask = &(*mpol)->v.nodes; *nodemask = &(*mpol)->v.nodes;
} }
......
...@@ -700,7 +700,7 @@ struct sp_area { ...@@ -700,7 +700,7 @@ struct sp_area {
struct mm_struct *mm; /* owner of k2u(task) */ struct mm_struct *mm; /* owner of k2u(task) */
unsigned long kva; /* shared kva */ unsigned long kva; /* shared kva */
pid_t applier; /* the original applier process */ pid_t applier; /* the original applier process */
int node_id; /* memory node */ int preferred_node_id; /* memory node */
int device_id; int device_id;
}; };
static DEFINE_SPINLOCK(sp_area_lock); static DEFINE_SPINLOCK(sp_area_lock);
...@@ -1892,7 +1892,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags, ...@@ -1892,7 +1892,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
spa->mm = NULL; spa->mm = NULL;
spa->kva = 0; /* NULL pointer */ spa->kva = 0; /* NULL pointer */
spa->applier = applier; spa->applier = applier;
spa->node_id = node_id; spa->preferred_node_id = node_id;
spa->device_id = device_id; spa->device_id = device_id;
spa_inc_usage(spa); spa_inc_usage(spa);
...@@ -2191,7 +2191,9 @@ static int sp_free_get_spa(struct sp_free_context *fc) ...@@ -2191,7 +2191,9 @@ static int sp_free_get_spa(struct sp_free_context *fc)
} }
/** /**
* mg_sp_free() - Free the memory allocated by mg_sp_alloc(). * mg_sp_free() - Free the memory allocated by mg_sp_alloc() or
* mg_sp_alloc_nodemask().
*
* @addr: the starting VA of the memory. * @addr: the starting VA of the memory.
* @id: Address space identifier, which is used to distinguish the addr. * @id: Address space identifier, which is used to distinguish the addr.
* *
...@@ -2448,18 +2450,15 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, ...@@ -2448,18 +2450,15 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa,
} }
static long sp_mbind(struct mm_struct *mm, unsigned long start, unsigned long len, static long sp_mbind(struct mm_struct *mm, unsigned long start, unsigned long len,
unsigned long node) nodemask_t *nodemask)
{ {
nodemask_t nmask;
nodes_clear(nmask);
node_set(node, nmask);
return __do_mbind(start, len, MPOL_BIND, MPOL_F_STATIC_NODES, return __do_mbind(start, len, MPOL_BIND, MPOL_F_STATIC_NODES,
&nmask, MPOL_MF_STRICT, mm); nodemask, MPOL_MF_STRICT, mm);
} }
static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
struct sp_group_node *spg_node, struct sp_alloc_context *ac) struct sp_group_node *spg_node, struct sp_alloc_context *ac,
nodemask_t *nodemask)
{ {
int ret; int ret;
...@@ -2468,10 +2467,10 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ...@@ -2468,10 +2467,10 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
return ret; return ret;
if (!ac->have_mbind) { if (!ac->have_mbind) {
ret = sp_mbind(mm, spa->va_start, spa->real_size, spa->node_id); ret = sp_mbind(mm, spa->va_start, spa->real_size, nodemask);
if (ret < 0) { if (ret < 0) {
pr_err("cannot bind the memory range to specified node:%d, err:%d\n", pr_err("cannot bind the memory range to node[%*pbl], err:%d\n",
spa->node_id, ret); nodemask_pr_args(nodemask), ret);
return ret; return ret;
} }
ac->have_mbind = true; ac->have_mbind = true;
...@@ -2490,17 +2489,25 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, ...@@ -2490,17 +2489,25 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
} }
static int sp_alloc_mmap_populate(struct sp_area *spa, static int sp_alloc_mmap_populate(struct sp_area *spa,
struct sp_alloc_context *ac) struct sp_alloc_context *ac,
nodemask_t *nodemask)
{ {
int ret = -EINVAL; int ret = -EINVAL;
int mmap_ret = 0; int mmap_ret = 0;
struct mm_struct *mm, *end_mm = NULL; struct mm_struct *mm, *end_mm = NULL;
struct sp_group_node *spg_node; struct sp_group_node *spg_node;
nodemask_t __nodemask;
if (!nodemask) { /* mg_sp_alloc */
nodes_clear(__nodemask);
node_set(spa->preferred_node_id, __nodemask);
} else /* mg_sp_alloc_nodemask */
__nodemask = *nodemask;
/* create mapping for each process in the group */ /* create mapping for each process in the group */
list_for_each_entry(spg_node, &spa->spg->procs, proc_node) { list_for_each_entry(spg_node, &spa->spg->procs, proc_node) {
mm = spg_node->master->mm; mm = spg_node->master->mm;
mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac); mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac, &__nodemask);
if (mmap_ret) { if (mmap_ret) {
/* /*
...@@ -2563,19 +2570,8 @@ static void sp_alloc_finish(int result, struct sp_area *spa, ...@@ -2563,19 +2570,8 @@ static void sp_alloc_finish(int result, struct sp_area *spa,
sp_group_put(spg); sp_group_put(spg);
} }
/** void *__mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id,
* mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group. nodemask_t *nodemask)
* @size: the size of memory to allocate.
* @sp_flags: how to allocate the memory.
* @spg_id: the share group that the memory is allocated to.
*
* Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode.
*
* Return:
* * if succeed, return the starting address of the shared memory.
* * if fail, return the pointer of -errno.
*/
void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
{ {
struct sp_area *spa = NULL; struct sp_area *spa = NULL;
int ret = 0; int ret = 0;
...@@ -2598,7 +2594,7 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) ...@@ -2598,7 +2594,7 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
goto out; goto out;
} }
ret = sp_alloc_mmap_populate(spa, &ac); ret = sp_alloc_mmap_populate(spa, &ac, nodemask);
if (ret && ac.state == ALLOC_RETRY) { if (ret && ac.state == ALLOC_RETRY) {
/* /*
* The mempolicy for shared memory is located at backend file, which varies * The mempolicy for shared memory is located at backend file, which varies
...@@ -2616,6 +2612,30 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id) ...@@ -2616,6 +2612,30 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
else else
return (void *)(spa->va_start); return (void *)(spa->va_start);
} }
void *mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id,
nodemask_t nodemask)
{
return __mg_sp_alloc_nodemask(size, sp_flags, spg_id, &nodemask);
}
EXPORT_SYMBOL_GPL(mg_sp_alloc_nodemask);
/**
* mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group.
* @size: the size of memory to allocate.
* @sp_flags: how to allocate the memory.
* @spg_id: the share group that the memory is allocated to.
*
* Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode.
*
* Return:
* * if succeed, return the starting address of the shared memory.
* * if fail, return the pointer of -errno.
*/
void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
{
return __mg_sp_alloc_nodemask(size, sp_flags, spg_id, NULL);
}
EXPORT_SYMBOL_GPL(mg_sp_alloc); EXPORT_SYMBOL_GPL(mg_sp_alloc);
/** /**
...@@ -3599,7 +3619,7 @@ int sp_node_id(struct vm_area_struct *vma) ...@@ -3599,7 +3619,7 @@ int sp_node_id(struct vm_area_struct *vma)
if (vma && (vma->vm_flags & VM_SHARE_POOL) && vma->vm_private_data) { if (vma && (vma->vm_flags & VM_SHARE_POOL) && vma->vm_private_data) {
spa = vma->vm_private_data; spa = vma->vm_private_data;
node_id = spa->node_id; node_id = spa->preferred_node_id;
} }
return node_id; return node_id;
...@@ -4028,7 +4048,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, ...@@ -4028,7 +4048,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
unsigned long haddr = address & huge_page_mask(h); unsigned long haddr = address & huge_page_mask(h);
bool new_page = false; bool new_page = false;
int err; int err;
int node_id;
struct sp_area *spa; struct sp_area *spa;
bool charge_hpage; bool charge_hpage;
...@@ -4037,7 +4056,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, ...@@ -4037,7 +4056,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
pr_err("share pool: vma is invalid, not from sp mmap\n"); pr_err("share pool: vma is invalid, not from sp mmap\n");
return ret; return ret;
} }
node_id = spa->node_id;
retry: retry:
page = find_lock_page(mapping, idx); page = find_lock_page(mapping, idx);
...@@ -4049,7 +4067,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, ...@@ -4049,7 +4067,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
charge_hpage = false; charge_hpage = false;
page = alloc_huge_page(vma, haddr, 0); page = alloc_huge_page(vma, haddr, 0);
if (IS_ERR(page)) { if (IS_ERR(page)) {
page = hugetlb_alloc_hugepage(node_id, page = hugetlb_alloc_hugepage_vma(vma, haddr,
HUGETLB_ALLOC_BUDDY | HUGETLB_ALLOC_NORECLAIM); HUGETLB_ALLOC_BUDDY | HUGETLB_ALLOC_NORECLAIM);
if (!page) if (!page)
page = ERR_PTR(-ENOMEM); page = ERR_PTR(-ENOMEM);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册