提交 30c05350 编写于 作者: N Nathan Fontenot 提交者: Benjamin Herrenschmidt

powerpc/pseries: Use stop machine to update cpu maps

The new PRRN firmware feature allows CPU and memory resources to be
transparently reassigned across NUMA boundaries. When this happens, the
kernel must update the node maps to reflect the new affinity information.

Although the NUMA maps can be protected by locking primitives during the
update itself, this is insufficient to prevent concurrent accesses to these
structures. Since cpumask_of_node() hands out a pointer to these
structures, they can still be modified outside of the lock. Furthermore,
tracking down each usage of these pointers and adding locks would be quite
invasive and difficult to maintain.

The approach used is to make a list of affected cpus and call stop_machine
to have the update routine run on each of the affected cpus allowing them
to update themselves. Each cpu finds itself in the list of cpus and makes
the appropriate updates. We need to have each cpu do this for themselves to
handle calls to vdso_getcpu_init() added in a subsequent patch.

Situations like these are best handled using stop_machine(). Since the NUMA
affinity updates are exceptionally rare events, this approach has the
benefit of not adding any overhead while accessing the NUMA maps during
normal operation.
Signed-off-by: NNathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: NBenjamin Herrenschmidt <benh@kernel.crashing.org>
上级 5d88aa85
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/pfn.h> #include <linux/pfn.h>
#include <linux/cpuset.h> #include <linux/cpuset.h>
#include <linux/node.h> #include <linux/node.h>
#include <linux/stop_machine.h>
#include <asm/sparsemem.h> #include <asm/sparsemem.h>
#include <asm/prom.h> #include <asm/prom.h>
#include <asm/smp.h> #include <asm/smp.h>
...@@ -1254,6 +1255,13 @@ u64 memory_hotplug_max(void) ...@@ -1254,6 +1255,13 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */ /* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR #ifdef CONFIG_PPC_SPLPAR
struct topology_update_data {
struct topology_update_data *next;
unsigned int cpu;
int old_nid;
int new_nid;
};
static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS]; static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
static cpumask_t cpu_associativity_changes_mask; static cpumask_t cpu_associativity_changes_mask;
static int vphn_enabled; static int vphn_enabled;
...@@ -1404,42 +1412,80 @@ static long vphn_get_associativity(unsigned long cpu, ...@@ -1404,42 +1412,80 @@ static long vphn_get_associativity(unsigned long cpu,
return rc; return rc;
} }
/*
* Update the CPU maps and sysfs entries for a single CPU when its NUMA
* characteristics change. This function doesn't perform any locking and is
* only safe to call from stop_machine().
*/
static int update_cpu_topology(void *data)
{
struct topology_update_data *update;
unsigned long cpu;
if (!data)
return -EINVAL;
cpu = get_cpu();
for (update = data; update; update = update->next) {
if (cpu != update->cpu)
continue;
unregister_cpu_under_node(update->cpu, update->old_nid);
unmap_cpu_from_node(update->cpu);
map_cpu_to_node(update->cpu, update->new_nid);
register_cpu_under_node(update->cpu, update->new_nid);
}
return 0;
}
/* /*
* Update the node maps and sysfs entries for each cpu whose home node * Update the node maps and sysfs entries for each cpu whose home node
* has changed. Returns 1 when the topology has changed, and 0 otherwise. * has changed. Returns 1 when the topology has changed, and 0 otherwise.
*/ */
int arch_update_cpu_topology(void) int arch_update_cpu_topology(void)
{ {
int cpu, nid, old_nid, changed = 0; unsigned int cpu, changed = 0;
struct topology_update_data *updates, *ud;
unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
struct device *dev; struct device *dev;
int weight, i = 0;
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
return 0;
updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
if (!updates)
return 0;
for_each_cpu(cpu, &cpu_associativity_changes_mask) { for_each_cpu(cpu, &cpu_associativity_changes_mask) {
ud = &updates[i++];
ud->cpu = cpu;
vphn_get_associativity(cpu, associativity); vphn_get_associativity(cpu, associativity);
nid = associativity_to_nid(associativity); ud->new_nid = associativity_to_nid(associativity);
if (nid < 0 || !node_online(nid)) if (ud->new_nid < 0 || !node_online(ud->new_nid))
nid = first_online_node; ud->new_nid = first_online_node;
old_nid = numa_cpu_lookup_table[cpu]; ud->old_nid = numa_cpu_lookup_table[cpu];
/* Disable hotplug while we update the cpu if (i < weight)
* masks and sysfs. ud->next = &updates[i];
*/ }
get_online_cpus();
unregister_cpu_under_node(cpu, old_nid); stop_machine(update_cpu_topology, &updates[0], cpu_online_mask);
unmap_cpu_from_node(cpu);
map_cpu_to_node(cpu, nid); for (ud = &updates[0]; ud; ud = ud->next) {
register_cpu_under_node(cpu, nid); dev = get_cpu_device(ud->cpu);
put_online_cpus();
dev = get_cpu_device(cpu);
if (dev) if (dev)
kobject_uevent(&dev->kobj, KOBJ_CHANGE); kobject_uevent(&dev->kobj, KOBJ_CHANGE);
cpumask_clear_cpu(cpu, &cpu_associativity_changes_mask); cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
changed = 1; changed = 1;
} }
kfree(updates);
return changed; return changed;
} }
...@@ -1488,10 +1534,10 @@ static int dt_update_callback(struct notifier_block *nb, ...@@ -1488,10 +1534,10 @@ static int dt_update_callback(struct notifier_block *nb,
int rc = NOTIFY_DONE; int rc = NOTIFY_DONE;
switch (action) { switch (action) {
case OF_RECONFIG_ADD_PROPERTY:
case OF_RECONFIG_UPDATE_PROPERTY: case OF_RECONFIG_UPDATE_PROPERTY:
update = (struct of_prop_reconfig *)data; update = (struct of_prop_reconfig *)data;
if (!of_prop_cmp(update->dn->type, "cpu")) { if (!of_prop_cmp(update->dn->type, "cpu") &&
!of_prop_cmp(update->prop->name, "ibm,associativity")) {
u32 core_id; u32 core_id;
of_property_read_u32(update->dn, "reg", &core_id); of_property_read_u32(update->dn, "reg", &core_id);
stage_topology_update(core_id); stage_topology_update(core_id);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册