diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 446a5b9a1d5f3515777f60d9172e6df468f73d07..12bd1fdd206b041831275edb7a81120214fc5a67 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -187,7 +187,6 @@ struct bootnode; extern int acpi_numa; extern int x86_acpi_numa_init(void); extern int acpi_scan_nodes(void); -#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) #ifdef CONFIG_NUMA_EMU extern void acpi_fake_nodes(const struct bootnode *fake_nodes, diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 26fc6e2dd0fb4642474568111e0859bb2b829c00..3d4dab43c99469b6d917f0ba4f4bebd7cdc25bb6 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -5,6 +5,9 @@ #include #ifdef CONFIG_NUMA + +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) + /* * __apicid_to_node[] stores the raw mapping between physical apicid and * node and is used to initialize cpu_to_node mapping. diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index d3a45147d09ba124e0bc56c6ebde6dca81900cce..3306a2b99ece06855368ce9c905323ff37674405 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -32,6 +32,8 @@ extern nodemask_t mem_nodes_parsed __initdata; extern struct bootnode numa_nodes[MAX_NUMNODES] __initdata; extern int __cpuinit numa_cpu_node(int cpu); +extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); +extern int __init numa_register_memblks(void); #ifdef CONFIG_NUMA_EMU #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 82ee3083b0949889df4b2a630b43b6c61dc216e1..a1d702d2584c08712a6f1b14247b553fd4e0d07d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -33,6 +33,10 @@ struct memnode memnode; static unsigned long __initdata nodemap_addr; static unsigned long __initdata nodemap_size; +static int num_node_memblks __initdata; +static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; +static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; + struct bootnode numa_nodes[MAX_NUMNODES] __initdata; /* @@ -184,6 +188,43 @@ static void * __init early_node_mem(int nodeid, unsigned long start, return NULL; } +static __init int conflicting_memblks(unsigned long start, unsigned long end) +{ + int i; + for (i = 0; i < num_node_memblks; i++) { + struct bootnode *nd = &node_memblk_range[i]; + if (nd->start == nd->end) + continue; + if (nd->end > start && nd->start < end) + return memblk_nodeid[i]; + if (nd->end == end && nd->start == start) + return memblk_nodeid[i]; + } + return -1; +} + +int __init numa_add_memblk(int nid, u64 start, u64 end) +{ + int i; + + i = conflicting_memblks(start, end); + if (i == nid) { + printk(KERN_WARNING "NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", + nid, start, end, numa_nodes[i].start, numa_nodes[i].end); + } else if (i >= 0) { + printk(KERN_ERR "NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", + nid, start, end, i, + numa_nodes[i].start, numa_nodes[i].end); + return -EINVAL; + } + + node_memblk_range[num_node_memblks].start = start; + node_memblk_range[num_node_memblks].end = end; + memblk_nodeid[num_node_memblks] = nid; + num_node_memblks++; + return 0; +} + static __init void cutoff_node(int i, unsigned long start, unsigned long end) { struct bootnode *nd = &numa_nodes[i]; @@ -246,6 +287,71 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) node_set_online(nodeid); } +int __init numa_register_memblks(void) +{ + int i; + + /* + * Join together blocks on the same node, holes between + * which don't overlap with memory on other nodes. + */ + for (i = 0; i < num_node_memblks; ++i) { + int j, k; + + for (j = i + 1; j < num_node_memblks; ++j) { + unsigned long start, end; + + if (memblk_nodeid[i] != memblk_nodeid[j]) + continue; + start = min(node_memblk_range[i].end, + node_memblk_range[j].end); + end = max(node_memblk_range[i].start, + node_memblk_range[j].start); + for (k = 0; k < num_node_memblks; ++k) { + if (memblk_nodeid[i] == memblk_nodeid[k]) + continue; + if (start < node_memblk_range[k].end && + end > node_memblk_range[k].start) + break; + } + if (k < num_node_memblks) + continue; + start = min(node_memblk_range[i].start, + node_memblk_range[j].start); + end = max(node_memblk_range[i].end, + node_memblk_range[j].end); + printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", + memblk_nodeid[i], + node_memblk_range[i].start, + node_memblk_range[i].end, + node_memblk_range[j].start, + node_memblk_range[j].end, + start, end); + node_memblk_range[i].start = start; + node_memblk_range[i].end = end; + k = --num_node_memblks - j; + memmove(memblk_nodeid + j, memblk_nodeid + j+1, + k * sizeof(*memblk_nodeid)); + memmove(node_memblk_range + j, node_memblk_range + j+1, + k * sizeof(*node_memblk_range)); + --j; + } + } + + memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, + memblk_nodeid); + if (memnode_shift < 0) { + printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n"); + return -EINVAL; + } + + for (i = 0; i < num_node_memblks; i++) + memblock_x86_register_active_regions(memblk_nodeid[i], + node_memblk_range[i].start >> PAGE_SHIFT, + node_memblk_range[i].end >> PAGE_SHIFT); + return 0; +} + #ifdef CONFIG_NUMA_EMU /* Numa emulation */ static struct bootnode nodes[MAX_NUMNODES] __initdata; @@ -653,6 +759,9 @@ void __init initmem_init(void) nodes_clear(mem_nodes_parsed); nodes_clear(node_possible_map); nodes_clear(node_online_map); + num_node_memblks = 0; + memset(node_memblk_range, 0, sizeof(node_memblk_range)); + memset(memblk_nodeid, 0, sizeof(memblk_nodeid)); memset(numa_nodes, 0, sizeof(numa_nodes)); if (numa_init[i]() < 0) diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 82b1087963a26d5fd98a28dce529cc9c5dc9372f..341b37193c76bfa1557d399f5789f6a3c6523e71 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -30,30 +30,11 @@ static struct acpi_table_slit *acpi_slit; static struct bootnode nodes_add[MAX_NUMNODES]; -static int num_node_memblks __initdata; -static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; -static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; - static __init int setup_node(int pxm) { return acpi_map_pxm_to_node(pxm); } -static __init int conflicting_memblks(unsigned long start, unsigned long end) -{ - int i; - for (i = 0; i < num_node_memblks; i++) { - struct bootnode *nd = &node_memblk_range[i]; - if (nd->start == nd->end) - continue; - if (nd->end > start && nd->start < end) - return memblk_nodeid[i]; - if (nd->end == end && nd->start == start) - return memblk_nodeid[i]; - } - return -1; -} - static __init void bad_srat(void) { int i; @@ -233,7 +214,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) struct bootnode *nd; unsigned long start, end; int node, pxm; - int i; if (srat_disabled()) return; @@ -255,16 +235,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) bad_srat(); return; } - i = conflicting_memblks(start, end); - if (i == node) { - printk(KERN_WARNING - "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", - pxm, start, end, numa_nodes[i].start, numa_nodes[i].end); - } else if (i >= 0) { - printk(KERN_ERR - "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n", - pxm, start, end, node_to_pxm(i), - numa_nodes[i].start, numa_nodes[i].end); + + if (numa_add_memblk(node, start, end) < 0) { bad_srat(); return; } @@ -285,11 +257,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) } } else update_nodes_add(node, start, end); - - node_memblk_range[num_node_memblks].start = start; - node_memblk_range[num_node_memblks].end = end; - memblk_nodeid[num_node_memblks] = node; - num_node_memblks++; } /* Sanity check to catch more bad SRATs (they are amazingly common). @@ -341,68 +308,11 @@ int __init acpi_scan_nodes(void) if (acpi_numa <= 0) return -1; - /* - * Join together blocks on the same node, holes between - * which don't overlap with memory on other nodes. - */ - for (i = 0; i < num_node_memblks; ++i) { - int j, k; - - for (j = i + 1; j < num_node_memblks; ++j) { - unsigned long start, end; - - if (memblk_nodeid[i] != memblk_nodeid[j]) - continue; - start = min(node_memblk_range[i].end, - node_memblk_range[j].end); - end = max(node_memblk_range[i].start, - node_memblk_range[j].start); - for (k = 0; k < num_node_memblks; ++k) { - if (memblk_nodeid[i] == memblk_nodeid[k]) - continue; - if (start < node_memblk_range[k].end && - end > node_memblk_range[k].start) - break; - } - if (k < num_node_memblks) - continue; - start = min(node_memblk_range[i].start, - node_memblk_range[j].start); - end = max(node_memblk_range[i].end, - node_memblk_range[j].end); - printk(KERN_INFO "SRAT: Node %d " - "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", - memblk_nodeid[i], - node_memblk_range[i].start, - node_memblk_range[i].end, - node_memblk_range[j].start, - node_memblk_range[j].end, - start, end); - node_memblk_range[i].start = start; - node_memblk_range[i].end = end; - k = --num_node_memblks - j; - memmove(memblk_nodeid + j, memblk_nodeid + j+1, - k * sizeof(*memblk_nodeid)); - memmove(node_memblk_range + j, node_memblk_range + j+1, - k * sizeof(*node_memblk_range)); - --j; - } - } - - memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, - memblk_nodeid); - if (memnode_shift < 0) { - printk(KERN_ERR - "SRAT: No NUMA node hash function found. Contact maintainer\n"); + if (numa_register_memblks() < 0) { bad_srat(); return -1; } - for (i = 0; i < num_node_memblks; i++) - memblock_x86_register_active_regions(memblk_nodeid[i], - node_memblk_range[i].start >> PAGE_SHIFT, - node_memblk_range[i].end >> PAGE_SHIFT); - /* for out of order entries in SRAT */ sort_node_map(); if (!nodes_cover_memory(numa_nodes)) {