srat.c 6.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * ACPI 3.0 based NUMA setup
 * Copyright 2004 Andi Kleen, SuSE Labs.
 *
 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
 *
 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
 * Assumes all memory regions belonging to a single proximity domain
 * are in one chunk. Holes between them will be included in the node.
 */

#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/mmzone.h>
#include <linux/bitmap.h>
#include <linux/module.h>
#include <linux/topology.h>
18
#include <linux/bootmem.h>
19
#include <linux/memblock.h>
20
#include <linux/mm.h>
L
Linus Torvalds 已提交
21 22
#include <asm/proto.h>
#include <asm/numa.h>
23
#include <asm/e820.h>
I
Ingo Molnar 已提交
24
#include <asm/apic.h>
I
Ingo Molnar 已提交
25
#include <asm/uv/uv.h>
L
Linus Torvalds 已提交
26

A
Andi Kleen 已提交
27 28
int acpi_numa __initdata;

L
Linus Torvalds 已提交
29 30
static __init int setup_node(int pxm)
{
31
	return acpi_map_pxm_to_node(pxm);
L
Linus Torvalds 已提交
32 33 34 35 36 37 38 39 40 41
}

static __init void bad_srat(void)
{
	printk(KERN_ERR "SRAT: SRAT not used.\n");
	acpi_numa = -1;
}

static __init inline int srat_disabled(void)
{
42
	return acpi_numa < 0;
L
Linus Torvalds 已提交
43 44 45 46 47
}

/* Callback for SLIT parsing */
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
48
	int i, j;
Y
Yinghai Lu 已提交
49

50 51 52 53
	for (i = 0; i < slit->locality_count; i++)
		for (j = 0; j < slit->locality_count; j++)
			numa_set_distance(pxm_to_node(i), pxm_to_node(j),
				slit->entry[slit->locality_count * i + j]);
L
Linus Torvalds 已提交
54 55
}

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
/* Callback for Proximity Domain -> x2APIC mapping */
void __init
acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
{
	int pxm, node;
	int apic_id;

	if (srat_disabled())
		return;
	if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
		bad_srat();
		return;
	}
	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
		return;
	pxm = pa->proximity_domain;
72
	apic_id = pa->apic_id;
73
	if (!apic->apic_id_valid(apic_id)) {
74 75 76 77
		printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
			 pxm, apic_id);
		return;
	}
78 79 80 81 82 83 84
	node = setup_node(pxm);
	if (node < 0) {
		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
		bad_srat();
		return;
	}

85 86 87 88
	if (apic_id >= MAX_LOCAL_APIC) {
		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
		return;
	}
89
	set_apicid_to_node(apic_id, node);
90
	node_set(node, numa_nodes_parsed);
91
	acpi_numa = 1;
92
	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
93 94 95
	       pxm, apic_id, node);
}

L
Linus Torvalds 已提交
96 97
/* Callback for Proximity Domain -> LAPIC mapping */
void __init
98
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
L
Linus Torvalds 已提交
99 100
{
	int pxm, node;
101 102
	int apic_id;

103 104
	if (srat_disabled())
		return;
105
	if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
106
		bad_srat();
107 108
		return;
	}
109
	if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
L
Linus Torvalds 已提交
110
		return;
111
	pxm = pa->proximity_domain_lo;
112 113
	if (acpi_srat_revision >= 2)
		pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
L
Linus Torvalds 已提交
114 115 116 117 118 119
	node = setup_node(pxm);
	if (node < 0) {
		printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
		bad_srat();
		return;
	}
120

121
	if (get_uv_system_type() >= UV_X2APIC)
J
Jack Steiner 已提交
122 123 124
		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
	else
		apic_id = pa->apic_id;
125 126 127 128 129 130

	if (apic_id >= MAX_LOCAL_APIC) {
		printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
		return;
	}

131
	set_apicid_to_node(apic_id, node);
132
	node_set(node, numa_nodes_parsed);
L
Linus Torvalds 已提交
133
	acpi_numa = 1;
134
	printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
135
	       pxm, apic_id, node);
L
Linus Torvalds 已提交
136 137
}

138
#ifdef CONFIG_MEMORY_HOTPLUG
139 140 141 142
static inline int save_add_info(void) {return 1;}
#else
static inline int save_add_info(void) {return 0;}
#endif
143

144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
static void __init handle_movablemem(int node, u64 start, u64 end)
{
	int overlap;
	unsigned long start_pfn, end_pfn;

	start_pfn = PFN_DOWN(start);
	end_pfn = PFN_UP(end);

	/*
	 * For movablecore_map=nn[KMG]@ss[KMG]:
	 *
	 * SRAT:		|_____| |_____| |_________| |_________| ......
	 * node id:		   0       1         1           2
	 * user specified:	          |__|                 |___|
	 * movablemem_map:		  |___| |_________|    |______| ......
	 *
	 * Using movablemem_map, we can prevent memblock from allocating memory
	 * on ZONE_MOVABLE at boot time.
	 */
	overlap = movablemem_map_overlap(start_pfn, end_pfn);
	if (overlap >= 0) {
		/*
		 * If part of this range is in movablemem_map, we need to
		 * add the range after it to extend the range to the end
		 * of the node, because from the min address specified to
		 * the end of the node will be ZONE_MOVABLE.
		 */
		start_pfn = max(start_pfn,
			    movablemem_map.map[overlap].start_pfn);
		insert_movablemem_map(start_pfn, end_pfn);

		/*
		 * Set the nodemask, so that if the address range on one node
		 * is not continuse, we can add the subsequent ranges on the
		 * same node into movablemem_map.
		 */
		node_set(node, movablemem_map.numa_nodes_hotplug);
	} else {
		if (node_isset(node, movablemem_map.numa_nodes_hotplug))
			/*
			 * Insert the range if we already have movable ranges
			 * on the same node.
			 */
			insert_movablemem_map(start_pfn, end_pfn);
	}
}
#else		/* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
static inline void handle_movablemem(int node, u64 start, u64 end)
{
}
#endif		/* CONFIG_HAVE_MEMBLOCK_NODE_MAP */

L
Linus Torvalds 已提交
197
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
198
int __init
199
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
L
Linus Torvalds 已提交
200
{
T
Tejun Heo 已提交
201
	u64 start, end;
202
	u32 hotpluggable;
L
Linus Torvalds 已提交
203 204
	int node, pxm;

205
	if (srat_disabled())
206 207 208
		goto out_err;
	if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
		goto out_err_bad_srat;
209
	if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
210
		goto out_err;
211 212
	hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
	if (hotpluggable && !save_add_info())
213 214
		goto out_err;

215 216
	start = ma->base_address;
	end = start + ma->length;
L
Linus Torvalds 已提交
217
	pxm = ma->proximity_domain;
218 219
	if (acpi_srat_revision <= 1)
		pxm &= 0xff;
220

L
Linus Torvalds 已提交
221 222 223
	node = setup_node(pxm);
	if (node < 0) {
		printk(KERN_ERR "SRAT: Too many proximity domains.\n");
224
		goto out_err_bad_srat;
L
Linus Torvalds 已提交
225
	}
226

227 228
	if (numa_add_memblk(node, start, end) < 0)
		goto out_err_bad_srat;
229

230 231
	node_set(node, numa_nodes_parsed);

232
	printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n",
233
	       node, pxm,
234 235 236 237
	       (unsigned long long) start, (unsigned long long) end - 1,
	       hotpluggable ? "Hot Pluggable": "");

	handle_movablemem(node, start, end);
238

239
	return 0;
240 241 242 243
out_err_bad_srat:
	bad_srat();
out_err:
	return -1;
L
Linus Torvalds 已提交
244 245 246 247
}

void __init acpi_numa_arch_fixup(void) {}

248 249 250 251 252 253 254 255 256
int __init x86_acpi_numa_init(void)
{
	int ret;

	ret = acpi_numa_init();
	if (ret < 0)
		return ret;
	return srat_disabled() ? -EINVAL : 0;
}