提交 8deff3a6 编写于 作者: C Chen Jun 提交者: Zhang Zekun

mm/sharepool: Add mg_sp_alloc_nodemask

hulk inclusion
category: feature
bugzilla: N/A

--------------------------------

Support alloc memory from nodes.

mg_sp_alloc allow to alloc memory from one node.
If the node have no enough memory, the caller would
pick a next node. But that has a lot of overhead.

To improve performance, we support a new interface to
alloc memory from nodes.
Signed-off-by: NChen Jun <chenjun102@huawei.com>
上级 598a1d66
......@@ -629,6 +629,9 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
const struct hstate *hugetlb_get_hstate(void);
struct page *hugetlb_alloc_hugepage(int nid, int flag);
struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma,
unsigned long address, int flag);
int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr,
pgprot_t prot, struct page *hpage);
int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm,
......@@ -645,6 +648,12 @@ static inline struct page *hugetlb_alloc_hugepage(int nid, int flag)
return NULL;
}
static inline struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma,
unsigned long address, int flag)
{
return NULL;
}
static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm,
unsigned long addr, pgprot_t prot, struct page *hpage)
{
......@@ -1091,6 +1100,12 @@ static inline struct page *hugetlb_alloc_hugepage(int nid, int flag)
return NULL;
}
static inline struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma,
unsigned long address, int flag)
{
return NULL;
}
static inline int hugetlb_insert_hugepage_pte(struct mm_struct *mm,
unsigned long addr, pgprot_t prot, struct page *hpage)
{
......
......@@ -12,6 +12,8 @@
#include <linux/jump_label.h>
#include <linux/kabi.h>
#include <linux/share_pool_interface.h>
#define SP_HUGEPAGE (1 << 0)
#define SP_HUGEPAGE_ONLY (1 << 1)
#define SP_DVPP (1 << 2)
......@@ -256,6 +258,8 @@ extern int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
extern void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id);
extern void *mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id,
nodemask_t nodemask);
extern int mg_sp_free(unsigned long addr, int id);
extern void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size,
......@@ -286,7 +290,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
unsigned long address, pte_t *ptep, unsigned int flags);
extern bool sp_check_addr(unsigned long addr);
extern bool sp_check_mmap_addr(unsigned long addr, unsigned long flags);
extern int sp_node_id(struct vm_area_struct *vma);
static inline bool sp_is_enabled(void)
{
......@@ -452,11 +455,6 @@ static inline bool is_vmalloc_sharepool(unsigned long vm_flags)
return NULL;
}
static inline int sp_node_id(struct vm_area_struct *vma)
{
return numa_node_id();
}
static inline bool sp_check_addr(unsigned long addr)
{
return false;
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_SHARE_POOL_INTERFACE_H
#define LINUX_SHARE_POOL_INTERFACE_H
#include <linux/mman.h>
#include <linux/mm_types.h>
#include <linux/numa.h>
#include <linux/kabi.h>
#ifdef CONFIG_ASCEND_SHARE_POOL
extern int sp_node_id(struct vm_area_struct *vma);
#else
static inline int sp_node_id(struct vm_area_struct *vma)
{
return numa_node_id();
}
#endif /* !CONFIG_ASCEND_SHARE_POOL */
#endif /* LINUX_SHARE_POOL_INTERFACE_H */
......@@ -6312,7 +6312,7 @@ static struct page *hugetlb_alloc_hugepage_normal(struct hstate *h,
/*
* Allocate hugepage without reserve
*/
struct page *hugetlb_alloc_hugepage(int nid, int flag)
struct page *hugetlb_alloc_hugepage_nodemask(int nid, int flag, nodemask_t *nodemask)
{
struct hstate *h = &default_hstate;
gfp_t gfp_mask = htlb_alloc_mask(h);
......@@ -6327,7 +6327,6 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag)
if (flag & ~HUGETLB_ALLOC_MASK)
return NULL;
gfp_mask |= __GFP_THISNODE;
if (enable_charge_mighp)
gfp_mask |= __GFP_ACCOUNT;
......@@ -6337,12 +6336,22 @@ struct page *hugetlb_alloc_hugepage(int nid, int flag)
if (flag & HUGETLB_ALLOC_NORMAL)
page = hugetlb_alloc_hugepage_normal(h, gfp_mask, nid);
else if (flag & HUGETLB_ALLOC_BUDDY)
page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
page = alloc_migrate_huge_page(h, gfp_mask, nid, nodemask);
else
page = alloc_huge_page_nodemask(h, nid, NULL, gfp_mask);
page = alloc_huge_page_nodemask(h, nid, nodemask, gfp_mask);
return page;
}
struct page *hugetlb_alloc_hugepage(int nid, int flag)
{
nodemask_t nodemask;
nodes_clear(nodemask);
node_set(nid, nodemask);
return hugetlb_alloc_hugepage_nodemask(nid, flag, &nodemask);
}
EXPORT_SYMBOL_GPL(hugetlb_alloc_hugepage);
static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
......@@ -6364,6 +6373,19 @@ static pte_t *hugetlb_huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
return ptep;
}
struct page *hugetlb_alloc_hugepage_vma(struct vm_area_struct *vma, unsigned long address, int flag)
{
int nid;
struct hstate *h = hstate_vma(vma);
struct mempolicy *mpol;
nodemask_t *nodemask;
gfp_t gfp_mask;
gfp_mask = htlb_alloc_mask(h);
nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
return hugetlb_alloc_hugepage_nodemask(nid, flag, nodemask);
}
static int __hugetlb_insert_hugepage(struct mm_struct *mm, unsigned long addr,
pgprot_t prot, unsigned long pfn)
{
......
......@@ -103,6 +103,8 @@
#include <linux/printk.h>
#include <linux/swapops.h>
#include <linux/share_pool_interface.h>
#include <asm/tlbflush.h>
#include <linux/uaccess.h>
......@@ -2198,7 +2200,7 @@ int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags,
nid = interleave_nid(*mpol, vma, addr,
huge_page_shift(hstate_vma(vma)));
} else {
nid = policy_node(gfp_flags, *mpol, numa_node_id());
nid = policy_node(gfp_flags, *mpol, sp_node_id(vma));
if ((*mpol)->mode == MPOL_BIND || mode == MPOL_PREFERRED_MANY)
*nodemask = &(*mpol)->v.nodes;
}
......
......@@ -700,7 +700,7 @@ struct sp_area {
struct mm_struct *mm; /* owner of k2u(task) */
unsigned long kva; /* shared kva */
pid_t applier; /* the original applier process */
int node_id; /* memory node */
int preferred_node_id; /* memory node */
int device_id;
};
static DEFINE_SPINLOCK(sp_area_lock);
......@@ -1892,7 +1892,7 @@ static struct sp_area *sp_alloc_area(unsigned long size, unsigned long flags,
spa->mm = NULL;
spa->kva = 0; /* NULL pointer */
spa->applier = applier;
spa->node_id = node_id;
spa->preferred_node_id = node_id;
spa->device_id = device_id;
spa_inc_usage(spa);
......@@ -2191,7 +2191,9 @@ static int sp_free_get_spa(struct sp_free_context *fc)
}
/**
* mg_sp_free() - Free the memory allocated by mg_sp_alloc().
* mg_sp_free() - Free the memory allocated by mg_sp_alloc() or
* mg_sp_alloc_nodemask().
*
* @addr: the starting VA of the memory.
* @id: Address space identifier, which is used to distinguish the addr.
*
......@@ -2448,18 +2450,15 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa,
}
static long sp_mbind(struct mm_struct *mm, unsigned long start, unsigned long len,
unsigned long node)
nodemask_t *nodemask)
{
nodemask_t nmask;
nodes_clear(nmask);
node_set(node, nmask);
return __do_mbind(start, len, MPOL_BIND, MPOL_F_STATIC_NODES,
&nmask, MPOL_MF_STRICT, mm);
nodemask, MPOL_MF_STRICT, mm);
}
static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
struct sp_group_node *spg_node, struct sp_alloc_context *ac)
struct sp_group_node *spg_node, struct sp_alloc_context *ac,
nodemask_t *nodemask)
{
int ret;
......@@ -2468,10 +2467,10 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
return ret;
if (!ac->have_mbind) {
ret = sp_mbind(mm, spa->va_start, spa->real_size, spa->node_id);
ret = sp_mbind(mm, spa->va_start, spa->real_size, nodemask);
if (ret < 0) {
pr_err("cannot bind the memory range to specified node:%d, err:%d\n",
spa->node_id, ret);
pr_err("cannot bind the memory range to node[%*pbl], err:%d\n",
nodemask_pr_args(nodemask), ret);
return ret;
}
ac->have_mbind = true;
......@@ -2490,17 +2489,25 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
}
static int sp_alloc_mmap_populate(struct sp_area *spa,
struct sp_alloc_context *ac)
struct sp_alloc_context *ac,
nodemask_t *nodemask)
{
int ret = -EINVAL;
int mmap_ret = 0;
struct mm_struct *mm, *end_mm = NULL;
struct sp_group_node *spg_node;
nodemask_t __nodemask;
if (!nodemask) { /* mg_sp_alloc */
nodes_clear(__nodemask);
node_set(spa->preferred_node_id, __nodemask);
} else /* mg_sp_alloc_nodemask */
__nodemask = *nodemask;
/* create mapping for each process in the group */
list_for_each_entry(spg_node, &spa->spg->procs, proc_node) {
mm = spg_node->master->mm;
mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac);
mmap_ret = __sp_alloc_mmap_populate(mm, spa, spg_node, ac, &__nodemask);
if (mmap_ret) {
/*
......@@ -2563,19 +2570,8 @@ static void sp_alloc_finish(int result, struct sp_area *spa,
sp_group_put(spg);
}
/**
* mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group.
* @size: the size of memory to allocate.
* @sp_flags: how to allocate the memory.
* @spg_id: the share group that the memory is allocated to.
*
* Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode.
*
* Return:
* * if succeed, return the starting address of the shared memory.
* * if fail, return the pointer of -errno.
*/
void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
void *__mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id,
nodemask_t *nodemask)
{
struct sp_area *spa = NULL;
int ret = 0;
......@@ -2598,7 +2594,7 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
goto out;
}
ret = sp_alloc_mmap_populate(spa, &ac);
ret = sp_alloc_mmap_populate(spa, &ac, nodemask);
if (ret && ac.state == ALLOC_RETRY) {
/*
* The mempolicy for shared memory is located at backend file, which varies
......@@ -2616,6 +2612,30 @@ void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
else
return (void *)(spa->va_start);
}
void *mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, int spg_id,
nodemask_t nodemask)
{
return __mg_sp_alloc_nodemask(size, sp_flags, spg_id, &nodemask);
}
EXPORT_SYMBOL_GPL(mg_sp_alloc_nodemask);
/**
* mg_sp_alloc() - Allocate shared memory for all the processes in a sp_group.
* @size: the size of memory to allocate.
* @sp_flags: how to allocate the memory.
* @spg_id: the share group that the memory is allocated to.
*
* Use pass through allocation if spg_id == SPG_ID_DEFAULT in multi-group mode.
*
* Return:
* * if succeed, return the starting address of the shared memory.
* * if fail, return the pointer of -errno.
*/
void *mg_sp_alloc(unsigned long size, unsigned long sp_flags, int spg_id)
{
return __mg_sp_alloc_nodemask(size, sp_flags, spg_id, NULL);
}
EXPORT_SYMBOL_GPL(mg_sp_alloc);
/**
......@@ -3599,7 +3619,7 @@ int sp_node_id(struct vm_area_struct *vma)
if (vma && (vma->vm_flags & VM_SHARE_POOL) && vma->vm_private_data) {
spa = vma->vm_private_data;
node_id = spa->node_id;
node_id = spa->preferred_node_id;
}
return node_id;
......@@ -4028,7 +4048,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
unsigned long haddr = address & huge_page_mask(h);
bool new_page = false;
int err;
int node_id;
struct sp_area *spa;
bool charge_hpage;
......@@ -4037,7 +4056,6 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
pr_err("share pool: vma is invalid, not from sp mmap\n");
return ret;
}
node_id = spa->node_id;
retry:
page = find_lock_page(mapping, idx);
......@@ -4049,7 +4067,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm,
charge_hpage = false;
page = alloc_huge_page(vma, haddr, 0);
if (IS_ERR(page)) {
page = hugetlb_alloc_hugepage(node_id,
page = hugetlb_alloc_hugepage_vma(vma, haddr,
HUGETLB_ALLOC_BUDDY | HUGETLB_ALLOC_NORECLAIM);
if (!page)
page = ERR_PTR(-ENOMEM);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部