提交 94e5cb7e 编写于 作者: F Feng Tang 提交者: Ma Wupeng

mm/hugetlb: add dedicated func to get 'allowed' nodemask for current process

mainline inclusion
from mainline-v6.1-rc1
commit d2226ebd
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I6I1Z2
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d2226ebd5484afcf9f9b71b394ec1567a7730eb1

--------------------------------

Muchun Song found that after MPOL_PREFERRED_MANY policy was introduced in
commit b27abacc ("mm/mempolicy: add MPOL_PREFERRED_MANY for multiple
preferred nodes"), the policy_nodemask_current()'s semantics for this new
policy has been changed, which returns 'preferred' nodes instead of
'allowed' nodes.

With the changed semantic of policy_nodemask_current, a task with
MPOL_PREFERRED_MANY policy could fail to get its reservation even though
it can fall back to other nodes (either defined by cpusets or all online
nodes) for that reservation failing mmap calles unnecessarily early.

The fix is to not consider MPOL_PREFERRED_MANY for reservations at all
because they, unlike MPOL_MBIND, do not pose any actual hard constrain.

Michal suggested the policy_nodemask_current() is only used by hugetlb,
and could be moved to hugetlb code with more explicit name to enforce the
'allowed' semantics for which only MPOL_BIND policy matters.

apply_policy_zone() is made extern to be called in hugetlb code and its
return value is changed to bool.

[1]. https://lore.kernel.org/lkml/20220801084207.39086-1-songmuchun@bytedance.com/t/

Link: https://lkml.kernel.org/r/20220805005903.95563-1-feng.tang@intel.com
Fixes: b27abacc ("mm/mempolicy: add MPOL_PREFERRED_MANY for multiple preferred nodes")
Signed-off-by: NFeng Tang <feng.tang@intel.com>
Reported-by: NMuchun Song <songmuchun@bytedance.com>
Suggested-by: NMichal Hocko <mhocko@suse.com>
Acked-by: NMichal Hocko <mhocko@suse.com>
Reviewed-by: NMuchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Ben Widawsky <bwidawsk@kernel.org>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>

Conflicts:
	include/linux/mempolicy.h
	mm/hugetlb.c
Signed-off-by: NMa Wupeng <mawupeng1@huawei.com>
上级 1e3451e0
无相关合并请求
...@@ -155,13 +155,6 @@ extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, ...@@ -155,13 +155,6 @@ extern bool mempolicy_nodemask_intersects(struct task_struct *tsk,
const nodemask_t *mask); const nodemask_t *mask);
extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy);
static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
{
struct mempolicy *mpol = get_task_policy(current);
return policy_nodemask(gfp, mpol);
}
extern unsigned int mempolicy_slab_node(void); extern unsigned int mempolicy_slab_node(void);
extern enum zone_type policy_zone; extern enum zone_type policy_zone;
...@@ -210,6 +203,9 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol) ...@@ -210,6 +203,9 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol)
{ {
return (pol->mode == MPOL_PREFERRED_MANY); return (pol->mode == MPOL_PREFERRED_MANY);
} }
extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);
#else #else
struct mempolicy {}; struct mempolicy {};
...@@ -321,11 +317,6 @@ static inline void mpol_put_task_policy(struct task_struct *task) ...@@ -321,11 +317,6 @@ static inline void mpol_put_task_policy(struct task_struct *task)
{ {
} }
static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
{
return NULL;
}
static inline bool mpol_is_preferred_many(struct mempolicy *pol) static inline bool mpol_is_preferred_many(struct mempolicy *pol)
{ {
return false; return false;
......
...@@ -3853,19 +3853,35 @@ static int __init default_hugepagesz_setup(char *s) ...@@ -3853,19 +3853,35 @@ static int __init default_hugepagesz_setup(char *s)
} }
__setup("default_hugepagesz=", default_hugepagesz_setup); __setup("default_hugepagesz=", default_hugepagesz_setup);
static nodemask_t *policy_mbind_nodemask(gfp_t gfp)
{
#ifdef CONFIG_NUMA
struct mempolicy *mpol = get_task_policy(current);
/*
* Only enforce MPOL_BIND policy which overlaps with cpuset policy
* (from policy_nodemask) specifically for hugetlb case
*/
if (mpol->mode == MPOL_BIND &&
(apply_policy_zone(mpol, gfp_zone(gfp)) &&
cpuset_nodemask_valid_mems_allowed(&mpol->v.nodes)))
return &mpol->v.nodes;
#endif
return NULL;
}
static unsigned int allowed_mems_nr(struct hstate *h) static unsigned int allowed_mems_nr(struct hstate *h)
{ {
int node; int node;
unsigned int nr = 0; unsigned int nr = 0;
nodemask_t *mpol_allowed; nodemask_t *mbind_nodemask;
unsigned int *array = h->free_huge_pages_node; unsigned int *array = h->free_huge_pages_node;
gfp_t gfp_mask = htlb_alloc_mask(h); gfp_t gfp_mask = htlb_alloc_mask(h);
mpol_allowed = policy_nodemask_current(gfp_mask); mbind_nodemask = policy_mbind_nodemask(gfp_mask);
for_each_node_mask(node, cpuset_current_mems_allowed) { for_each_node_mask(node, cpuset_current_mems_allowed) {
if (!mpol_allowed || if (!mbind_nodemask ||
(mpol_allowed && node_isset(node, *mpol_allowed))) (mbind_nodemask && node_isset(node, *mbind_nodemask)))
nr += array[node]; nr += array[node];
} }
......
...@@ -1971,7 +1971,7 @@ bool vma_policy_mof(struct vm_area_struct *vma) ...@@ -1971,7 +1971,7 @@ bool vma_policy_mof(struct vm_area_struct *vma)
return pol->flags & MPOL_F_MOF; return pol->flags & MPOL_F_MOF;
} }
static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone) bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
{ {
enum zone_type dynamic_policy_zone = policy_zone; enum zone_type dynamic_policy_zone = policy_zone;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部