From 4886e9054208b2251182d2fc697489c962bc776a Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 13 May 2019 22:39:34 +0800 Subject: [PATCH] mm: Define coherent device memory (CDM) node euler inclusion category: feature bugzilla: 11082 CVE: NA ------------------- There are certain devices like specialized accelerator, GPU cards, network cards, FPGA cards etc which might contain onboard memory which is coherent along with the existing system RAM while being accessed either from the CPU or from the device. They share some similar properties with that of normal system RAM but at the same time can also be different with respect to system RAM. User applications might be interested in using this kind of coherent device memory explicitly or implicitly along side the system RAM utilizing all possible core memory functions like anon mapping (LRU), file mapping (LRU), page cache (LRU), driver managed (non LRU), HW poisoning, NUMA migrations etc. To achieve this kind of tight integration with core memory subsystem, the device onboard coherent memory must be represented as a memory only NUMA node. At the same time arch must export some kind of a function to identify of this node as a coherent device memory not any other regular cpu less memory only NUMA node. After achieving the integration with core memory subsystem coherent device memory might still need some special consideration inside the kernel. There can be a variety of coherent memory nodes with different expectations from the core kernel memory. But right now only one kind of special treatment is considered which requires certain isolation. Now consider the case of a coherent device memory node type which requires isolation. This kind of coherent memory is onboard an external device attached to the system through a link where there is always a chance of a link failure taking down the entire memory node with it. More over the memory might also have higher chance of ECC failure as compared to the system RAM. Hence allocation into this kind of coherent memory node should be regulated. Kernel allocations must not come here. Normal user space allocations too should not come here implicitly (without user application knowing about it). This summarizes isolation requirement of certain kind of coherent device memory node as an example. There can be different kinds of isolation requirement also. Some coherent memory devices might not require isolation altogether after all. Then there might be other coherent memory devices which might require some other special treatment after being part of core memory representation . For now, will look into isolation seeking coherent device memory node not the other ones. To implement the integration as well as isolation, the coherent memory node must be present in N_MEMORY and a new N_COHERENT_DEVICE node mask inside the node_states[] array. During memory hotplug operations, the new nodemask N_COHERENT_DEVICE is updated along with N_MEMORY for these coherent device memory nodes. This also creates the following new sysfs based interface to list down all the coherent memory nodes of the system. /sys/devices/system/node/is_cdm_node Architectures must export function arch_check_node_cdm() which identifies any coherent device memory node in case they enable CONFIG_COHERENT_DEVICE. Signed-off-by: Anshuman Khandual Signed-off-by: zhong jiang [Backported to 4.19 -remove set or clear node state for memory_hotplug -separate CONFIG_COHERENT and CPUSET] Signed-off-by: Lijun Fang Reviewed-by: zhong jiang Signed-off-by: Yang Yingliang --- arch/arm64/mm/numa.c | 7 ++++ drivers/base/node.c | 6 ++++ include/linux/nodemask.h | 76 +++++++++++++++++++++++++++++++++++++++- mm/Kconfig | 8 +++++ mm/page_alloc.c | 8 +++-- 5 files changed, 102 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 29ba94efcd92..2156f71a4745 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -37,6 +37,13 @@ static int numa_distance_cnt; static u8 *numa_distance; bool numa_off; +#ifdef CONFIG_COHERENT_DEVICE +inline int arch_check_node_cdm(int nid) +{ + return 0; +} +#endif + static __init int numa_parse_early_param(char *opt) { if (!opt) diff --git a/drivers/base/node.c b/drivers/base/node.c index c3968e2d0a98..e6b0060cc273 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -634,6 +634,9 @@ static struct node_attr node_state_attr[] = { #endif [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), +#ifdef CONFIG_COHERENT_DEVICE + [N_COHERENT_DEVICE] = _NODE_ATTR(is_cdm_node, N_COHERENT_DEVICE), +#endif }; static struct attribute *node_state_attrs[] = { @@ -645,6 +648,9 @@ static struct attribute *node_state_attrs[] = { #endif &node_state_attr[N_MEMORY].attr.attr, &node_state_attr[N_CPU].attr.attr, +#ifdef CONFIG_COHERENT_DEVICE + &node_state_attr[N_COHERENT_DEVICE].attr.attr, +#endif NULL }; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 5a30ad594ccc..41fb047bdba8 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -397,8 +397,11 @@ enum node_states { #else N_HIGH_MEMORY = N_NORMAL_MEMORY, #endif - N_MEMORY, /* The node has memory(regular, high, movable) */ + N_MEMORY, /* The node has memory(regular, high, movable, cdm) */ N_CPU, /* The node has one or more cpus */ +#ifdef CONFIG_COHERENT_DEVICE + N_COHERENT_DEVICE, /* The node has CDM memory */ +#endif NR_NODE_STATES }; @@ -502,6 +505,77 @@ static inline int node_random(const nodemask_t *mask) } #endif +#ifdef CONFIG_COHERENT_DEVICE +extern int arch_check_node_cdm(int nid); + +static inline nodemask_t system_mem_nodemask(void) +{ + nodemask_t system_mem; + + nodes_clear(system_mem); + nodes_andnot(system_mem, node_states[N_MEMORY], + node_states[N_COHERENT_DEVICE]); + return system_mem; +} + +static inline bool is_cdm_node(int node) +{ + return node_isset(node, node_states[N_COHERENT_DEVICE]); +} + +static inline bool nodemask_has_cdm(nodemask_t mask) +{ + int node, i; + + node = first_node(mask); + for (i = 0; i < nodes_weight(mask); i++) { + if (is_cdm_node(node)) + return true; + node = next_node(node, mask); + } + return false; +} + +static inline void node_set_state_cdm(int node) +{ + if (arch_check_node_cdm(node)) + node_set_state(node, N_COHERENT_DEVICE); +} + +static inline void node_clear_state_cdm(int node) +{ + if (arch_check_node_cdm(node)) + node_clear_state(node, N_COHERENT_DEVICE); +} + +#else + +static inline int arch_check_node_cdm(int nid) { return 0; } + +static inline nodemask_t system_mem_nodemask(void) +{ + return node_states[N_MEMORY]; +} + +static inline bool is_cdm_node(int node) +{ + return false; +} + +static inline bool nodemask_has_cdm(nodemask_t mask) +{ + return false; +} + +static inline void node_set_state_cdm(int node) +{ +} + +static inline void node_clear_state_cdm(int node) +{ +} +#endif /* CONFIG_COHERENT_DEVICE */ + #define node_online_map node_states[N_ONLINE] #define node_possible_map node_states[N_POSSIBLE] diff --git a/mm/Kconfig b/mm/Kconfig index aba6e953d397..114d78e74ed3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -142,6 +142,14 @@ config HAVE_GENERIC_GUP config ARCH_DISCARD_MEMBLOCK bool +config COHERENT_DEVICE + bool "coherent device memory" + def_bool n + depends on CPUSETS && ARM64 + help + Enable coherent device memory (CDM) support. + + config NO_BOOTMEM bool diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7cae100adb5f..0b27d786982d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6600,8 +6600,10 @@ static unsigned long __init early_calculate_totalpages(void) unsigned long pages = end_pfn - start_pfn; totalpages += pages; - if (pages) + if (pages) { + node_set_state_cdm(nid); node_set_state(nid, N_MEMORY); + } } return totalpages; } @@ -6922,8 +6924,10 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) find_min_pfn_for_node(nid), NULL); /* Any memory on that node */ - if (pgdat->node_present_pages) + if (pgdat->node_present_pages) { + node_set_state_cdm(nid); node_set_state(nid, N_MEMORY); + } check_for_memory(pgdat, nid); } } -- GitLab