diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2df4b310eb23acce9e39f35c21015a30fd7cf88c..44f1bf1a5b082ba18aa1ccb9238fe33afb5bb807 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2000,6 +2000,15 @@ config ASCEND_DVPP_MMAP special memory for DvPP processor, the new flag is only valid for Ascend platform. +config ASCEND_CLEAN_CDM + bool "move the management structure for HBM to DDR" + def_bool n + depends on COHERENT_DEVICE + help + The cdm nodes sometimes are easiler to raise an ECC error and it may + cause the kernel crash if the essential structures went wrong. So move + the management structures for hbm nodes to the ddr nodes of the same + partion to reduce the probability of kernel crashes. endif endmenu diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 9a2e29a3a597a581aef085e9290364c0ef8c49b6..a167b74272b2449d92b64d181ee098e7909488de 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -33,6 +33,70 @@ inline int arch_check_node_cdm(int nid) return node_isset(nid, cdmmask); } +#ifdef CONFIG_ASCEND_CLEAN_CDM +/** + * cdm_node_to_ddr_node - Convert the cdm node to the ddr node of the + * same partion. + * @nid: input node ID + * + * Here is a typical memory topology in usage. + * There are some DDR and HBM in each partion and DDRs present at first, then + * come all the HBMs of the first partion, then HBMs of the second partion, etc. + * + * ------------------------- + * | P0 | P1 | + * ----------- | ----------- + * |node0 DDR| | |node1 DDR| + * |---------- | ----------| + * |node2 HBM| | |node4 HBM| + * |---------- | ----------| + * |node3 HBM| | |node5 HBM| + * |---------- | ----------| + * | ... | | | ... | + * |---------- | ----------| + * + * Return: + * This function returns a ddr node which is of the same partion with the input + * node if the input node is a HBM node. + * The input nid is returned if it is a DDR node or if the memory topology of + * the system doesn't apply to the above model. + */ +int __init cdm_node_to_ddr_node(int nid) +{ + nodemask_t ddr_mask; + int nr_ddr, cdm_per_part, fake_nid; + int nr_cdm = nodes_weight(cdmmask); + /* + * Specify the count of hbm nodes whoes management structrue would be + * moved. Here number 2 is a magic and we should make it configable + * for extending + */ + int hbm_per_part = 2; + + if (!nr_cdm || nodes_empty(numa_nodes_parsed)) + return nid; + + if (!node_isset(nid, cdmmask)) + return nid; + + nodes_xor(ddr_mask, cdmmask, numa_nodes_parsed); + nr_ddr = nodes_weight(ddr_mask); + cdm_per_part = nr_cdm / nr_ddr; + + if (cdm_per_part == 0 || nid < nr_ddr || + nid >= (hbm_per_part + 1) * nr_ddr) + /* our assumption has borken, just return the original nid. */ + return nid; + + fake_nid = (nid - nr_ddr) / hbm_per_part; + fake_nid = !node_isset(fake_nid, cdmmask) ? fake_nid : nid; + + pr_info("nid: %d, fake_nid: %d\n", nid, fake_nid); + + return fake_nid; +} +#endif + static int __init cdm_nodes_setup(char *s) { int nid; @@ -252,11 +316,12 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) u64 nd_pa; void *nd; int tnid; + int fake_nid = cdm_node_to_ddr_node(nid); if (start_pfn >= end_pfn) pr_info("Initmem setup node %d []\n", nid); - nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); + nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, fake_nid); if (!nd_pa) panic("Cannot allocate %zu bytes for node %d data\n", nd_size, nid); diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 90ea204cc0590eecad289d2f4b753f8eef041d26..2636f08d685c7dfe15ebdb089f55eef67c52cd3f 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -509,6 +509,12 @@ static inline int node_random(const nodemask_t *mask) #ifdef CONFIG_COHERENT_DEVICE extern int arch_check_node_cdm(int nid); +#ifdef CONFIG_ASCEND_CLEAN_CDM +extern int cdm_node_to_ddr_node(int nid); +#else +static inline int cdm_node_to_ddr_node(int nid) { return nid; } +#endif + static inline nodemask_t system_mem_nodemask(void) { nodemask_t system_mem; @@ -552,6 +558,7 @@ static inline void node_clear_state_cdm(int node) #else static inline int arch_check_node_cdm(int nid) { return 0; } +static inline int cdm_node_to_ddr_node(int nid) { return nid; } static inline nodemask_t system_mem_nodemask(void) { diff --git a/mm/sparse.c b/mm/sparse.c index 0e645ff9cf0fe0e80a7d264492ddebf7f147d04f..5a48ea3e9968fe099aaee7f9a229f87828447e0c 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -528,14 +528,15 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, struct mem_section_usage *usage; unsigned long pnum; struct page *map; + int fake_nid = cdm_node_to_ddr_node(nid); - usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), + usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(fake_nid), mem_section_usage_size() * map_count); if (!usage) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } - sparse_buffer_init(map_count * section_map_size(), nid); + sparse_buffer_init(map_count * section_map_size(), fake_nid); for_each_present_section_nr(pnum_begin, pnum) { unsigned long pfn = section_nr_to_pfn(pnum); @@ -543,7 +544,7 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, break; map = __populate_section_memmap(pfn, PAGES_PER_SECTION, - nid, NULL); + fake_nid, NULL); if (!map) { pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.", __func__, nid);