提交 467b171a 编写于 作者: A Aneesh Kumar K.V 提交者: Andrew Morton

mm/demotion: update node_is_toptier to work with memory tiers

With memory tier support we can have memory only NUMA nodes in the top
tier from which we want to avoid promotion tracking NUMA faults.  Update
node_is_toptier to work with memory tiers.  All NUMA nodes are by default
top tier nodes.  With lower(slower) memory tiers added we consider all
memory tiers above a memory tier having CPU NUMA nodes as a top memory
tier

[sj@kernel.org: include missed header file, memory-tiers.h]
  Link: https://lkml.kernel.org/r/20220820190720.248704-1-sj@kernel.org
[akpm@linux-foundation.org: mm/memory.c needs linux/memory-tiers.h]
[aneesh.kumar@linux.ibm.com: make toptier_distance inclusive upper bound of toptiers]
  Link: https://lkml.kernel.org/r/20220830081457.118960-1-aneesh.kumar@linux.ibm.com
Link: https://lkml.kernel.org/r/20220818131042.113280-10-aneesh.kumar@linux.ibm.comSigned-off-by: NAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: N"Huang, Ying" <ying.huang@intel.com>
Acked-by: NWei Xu <weixugc@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hesham Almatary <hesham.almatary@huawei.com>
Cc: Jagdish Gediya <jvgediya.oss@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
上级 32008027
...@@ -40,6 +40,7 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype); ...@@ -40,6 +40,7 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype);
#ifdef CONFIG_MIGRATION #ifdef CONFIG_MIGRATION
int next_demotion_node(int node); int next_demotion_node(int node);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
bool node_is_toptier(int node);
#else #else
static inline int next_demotion_node(int node) static inline int next_demotion_node(int node)
{ {
...@@ -50,6 +51,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target ...@@ -50,6 +51,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target
{ {
*targets = NODE_MASK_NONE; *targets = NODE_MASK_NONE;
} }
static inline bool node_is_toptier(int node)
{
return true;
}
#endif #endif
#else #else
...@@ -87,5 +93,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target ...@@ -87,5 +93,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target
{ {
*targets = NODE_MASK_NONE; *targets = NODE_MASK_NONE;
} }
static inline bool node_is_toptier(int node)
{
return true;
}
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
#endif /* _LINUX_MEMORY_TIERS_H */ #endif /* _LINUX_MEMORY_TIERS_H */
...@@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, ...@@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
#define to_node(device) container_of(device, struct node, dev) #define to_node(device) container_of(device, struct node, dev)
static inline bool node_is_toptier(int node)
{
return node_state(node, N_CPU);
}
#endif /* _LINUX_NODE_H_ */ #endif /* _LINUX_NODE_H_ */
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include <linux/cpuidle.h> #include <linux/cpuidle.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/memory-tiers.h>
#include <linux/mempolicy.h> #include <linux/mempolicy.h>
#include <linux/mutex_api.h> #include <linux/mutex_api.h>
#include <linux/profile.h> #include <linux/profile.h>
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/numa.h> #include <linux/numa.h>
#include <linux/page_owner.h> #include <linux/page_owner.h>
#include <linux/sched/sysctl.h> #include <linux/sched/sysctl.h>
#include <linux/memory-tiers.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
......
...@@ -37,6 +37,7 @@ static LIST_HEAD(memory_tiers); ...@@ -37,6 +37,7 @@ static LIST_HEAD(memory_tiers);
static struct node_memory_type_map node_memory_types[MAX_NUMNODES]; static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
static struct memory_dev_type *default_dram_type; static struct memory_dev_type *default_dram_type;
#ifdef CONFIG_MIGRATION #ifdef CONFIG_MIGRATION
static int top_tier_adistance;
/* /*
* node_demotion[] examples: * node_demotion[] examples:
* *
...@@ -162,6 +163,31 @@ static struct memory_tier *__node_get_memory_tier(int node) ...@@ -162,6 +163,31 @@ static struct memory_tier *__node_get_memory_tier(int node)
} }
#ifdef CONFIG_MIGRATION #ifdef CONFIG_MIGRATION
bool node_is_toptier(int node)
{
bool toptier;
pg_data_t *pgdat;
struct memory_tier *memtier;
pgdat = NODE_DATA(node);
if (!pgdat)
return false;
rcu_read_lock();
memtier = rcu_dereference(pgdat->memtier);
if (!memtier) {
toptier = true;
goto out;
}
if (memtier->adistance_start <= top_tier_adistance)
toptier = true;
else
toptier = false;
out:
rcu_read_unlock();
return toptier;
}
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
{ {
struct memory_tier *memtier; struct memory_tier *memtier;
...@@ -319,6 +345,27 @@ static void establish_demotion_targets(void) ...@@ -319,6 +345,27 @@ static void establish_demotion_targets(void)
} }
} while (1); } while (1);
} }
/*
* Promotion is allowed from a memory tier to higher
* memory tier only if the memory tier doesn't include
* compute. We want to skip promotion from a memory tier,
* if any node that is part of the memory tier have CPUs.
* Once we detect such a memory tier, we consider that tier
* as top tiper from which promotion is not allowed.
*/
list_for_each_entry_reverse(memtier, &memory_tiers, list) {
tier_nodes = get_memtier_nodemask(memtier);
nodes_and(tier_nodes, node_states[N_CPU], tier_nodes);
if (!nodes_empty(tier_nodes)) {
/*
* abstract distance below the max value of this memtier
* is considered toptier.
*/
top_tier_adistance = memtier->adistance_start +
MEMTIER_CHUNK_SIZE - 1;
break;
}
}
/* /*
* Now build the lower_tier mask for each node collecting node mask from * Now build the lower_tier mask for each node collecting node mask from
* all memory tier below it. This allows us to fallback demotion page * all memory tier below it. This allows us to fallback demotion page
......
...@@ -66,6 +66,7 @@ ...@@ -66,6 +66,7 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/migrate.h> #include <linux/migrate.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/memory-tiers.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/userfaultfd_k.h> #include <linux/userfaultfd_k.h>
#include <linux/dax.h> #include <linux/dax.h>
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/sched/sysctl.h> #include <linux/sched/sysctl.h>
#include <linux/memory-tiers.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/pgtable.h> #include <linux/pgtable.h>
#include <linux/sched/sysctl.h> #include <linux/sched/sysctl.h>
#include <linux/userfaultfd_k.h> #include <linux/userfaultfd_k.h>
#include <linux/memory-tiers.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册