提交 8ddc6448 编写于 作者: A Aneesh Kumar K.V 提交者: Michael Ellerman

powerpc/pseries: Consolidate different NUMA distance update code paths

The associativity details of the newly added resourced are collected from
the hypervisor via "ibm,configure-connector" rtas call. Update the numa
distance details of the newly added numa node after the above call.

Instead of updating NUMA distance every time we lookup a node id
from the associativity property, add helpers that can be used
during boot which does this only once. Also remove the distance
update from node id lookup helpers.

Currently, we duplicate parsing code for ibm,associativity and
ibm,associativity-lookup-arrays in the kernel. The associativity array provided
by these device tree properties are very similar and hence can use
a helper to parse the node id and numa distance details.
Signed-off-by: NAneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210812132223.225214-4-aneesh.kumar@linux.ibm.com
上级 0eacd06b
...@@ -64,6 +64,7 @@ static inline int early_cpu_to_node(int cpu) ...@@ -64,6 +64,7 @@ static inline int early_cpu_to_node(int cpu)
} }
int of_drconf_to_nid_single(struct drmem_lmb *lmb); int of_drconf_to_nid_single(struct drmem_lmb *lmb);
void update_numa_distance(struct device_node *node);
#else #else
...@@ -93,6 +94,7 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb) ...@@ -93,6 +94,7 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
return first_online_node; return first_online_node;
} }
static inline void update_numa_distance(struct device_node *node) {}
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
......
...@@ -208,50 +208,35 @@ int __node_distance(int a, int b) ...@@ -208,50 +208,35 @@ int __node_distance(int a, int b)
} }
EXPORT_SYMBOL(__node_distance); EXPORT_SYMBOL(__node_distance);
static void initialize_distance_lookup_table(int nid, static int __associativity_to_nid(const __be32 *associativity,
const __be32 *associativity) int max_array_sz)
{ {
int i; int nid;
/*
* primary_domain_index is 1 based array index.
*/
int index = primary_domain_index - 1;
if (affinity_form != FORM1_AFFINITY) if (!numa_enabled || index >= max_array_sz)
return; return NUMA_NO_NODE;
for (i = 0; i < distance_ref_points_depth; i++) { nid = of_read_number(&associativity[index], 1);
const __be32 *entry;
entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1]; /* POWER4 LPAR uses 0xffff as invalid node */
distance_lookup_table[nid][i] = of_read_number(entry, 1); if (nid == 0xffff || nid >= nr_node_ids)
} nid = NUMA_NO_NODE;
return nid;
} }
/* /*
* Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
* info is found. * info is found.
*/ */
static int associativity_to_nid(const __be32 *associativity) static int associativity_to_nid(const __be32 *associativity)
{ {
int nid = NUMA_NO_NODE; int array_sz = of_read_number(associativity, 1);
if (!numa_enabled)
goto out;
if (of_read_number(associativity, 1) >= primary_domain_index)
nid = of_read_number(&associativity[primary_domain_index], 1);
/* POWER4 LPAR uses 0xffff as invalid node */
if (nid == 0xffff || nid >= nr_node_ids)
nid = NUMA_NO_NODE;
if (nid > 0 &&
of_read_number(associativity, 1) >= distance_ref_points_depth) {
/*
* Skip the length field and send start of associativity array
*/
initialize_distance_lookup_table(nid, associativity + 1);
}
out: /* Skip the first element in the associativity array */
return nid; return __associativity_to_nid((associativity + 1), array_sz);
} }
/* Returns the nid associated with the given device tree node, /* Returns the nid associated with the given device tree node,
...@@ -287,6 +272,60 @@ int of_node_to_nid(struct device_node *device) ...@@ -287,6 +272,60 @@ int of_node_to_nid(struct device_node *device)
} }
EXPORT_SYMBOL(of_node_to_nid); EXPORT_SYMBOL(of_node_to_nid);
static void __initialize_form1_numa_distance(const __be32 *associativity,
int max_array_sz)
{
int i, nid;
if (affinity_form != FORM1_AFFINITY)
return;
nid = __associativity_to_nid(associativity, max_array_sz);
if (nid != NUMA_NO_NODE) {
for (i = 0; i < distance_ref_points_depth; i++) {
const __be32 *entry;
int index = be32_to_cpu(distance_ref_points[i]) - 1;
/*
* broken hierarchy, return with broken distance table
*/
if (WARN(index >= max_array_sz, "Broken ibm,associativity property"))
return;
entry = &associativity[index];
distance_lookup_table[nid][i] = of_read_number(entry, 1);
}
}
}
static void initialize_form1_numa_distance(const __be32 *associativity)
{
int array_sz;
array_sz = of_read_number(associativity, 1);
/* Skip the first element in the associativity array */
__initialize_form1_numa_distance(associativity + 1, array_sz);
}
/*
* Used to update distance information w.r.t newly added node.
*/
void update_numa_distance(struct device_node *node)
{
if (affinity_form == FORM0_AFFINITY)
return;
else if (affinity_form == FORM1_AFFINITY) {
const __be32 *associativity;
associativity = of_get_associativity(node);
if (!associativity)
return;
initialize_form1_numa_distance(associativity);
return;
}
}
static int __init find_primary_domain_index(void) static int __init find_primary_domain_index(void)
{ {
int index; int index;
...@@ -433,6 +472,38 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa) ...@@ -433,6 +472,38 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
return 0; return 0;
} }
static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
{
struct assoc_arrays aa = { .arrays = NULL };
int default_nid = NUMA_NO_NODE;
int nid = default_nid;
int rc, index;
if ((primary_domain_index < 0) || !numa_enabled)
return default_nid;
rc = of_get_assoc_arrays(&aa);
if (rc)
return default_nid;
if (primary_domain_index <= aa.array_sz &&
!(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
const __be32 *associativity;
index = lmb->aa_index * aa.array_sz;
associativity = &aa.arrays[index];
nid = __associativity_to_nid(associativity, aa.array_sz);
if (nid > 0 && affinity_form == FORM1_AFFINITY) {
/*
* lookup array associativity entries have
* no length of the array as the first element.
*/
__initialize_form1_numa_distance(associativity, aa.array_sz);
}
}
return nid;
}
/* /*
* This is like of_node_to_nid_single() for memory represented in the * This is like of_node_to_nid_single() for memory represented in the
* ibm,dynamic-reconfiguration-memory node. * ibm,dynamic-reconfiguration-memory node.
...@@ -453,26 +524,19 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb) ...@@ -453,26 +524,19 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
if (primary_domain_index <= aa.array_sz && if (primary_domain_index <= aa.array_sz &&
!(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
index = lmb->aa_index * aa.array_sz + primary_domain_index - 1; const __be32 *associativity;
nid = of_read_number(&aa.arrays[index], 1);
if (nid == 0xffff || nid >= nr_node_ids)
nid = default_nid;
if (nid > 0) { index = lmb->aa_index * aa.array_sz;
index = lmb->aa_index * aa.array_sz; associativity = &aa.arrays[index];
initialize_distance_lookup_table(nid, nid = __associativity_to_nid(associativity, aa.array_sz);
&aa.arrays[index]);
}
} }
return nid; return nid;
} }
#ifdef CONFIG_PPC_SPLPAR #ifdef CONFIG_PPC_SPLPAR
static int vphn_get_nid(long lcpu)
static int __vphn_get_associativity(long lcpu, __be32 *associativity)
{ {
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
long rc, hwid; long rc, hwid;
/* /*
...@@ -492,12 +556,30 @@ static int vphn_get_nid(long lcpu) ...@@ -492,12 +556,30 @@ static int vphn_get_nid(long lcpu)
rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity); rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
if (rc == H_SUCCESS) if (rc == H_SUCCESS)
return associativity_to_nid(associativity); return 0;
} }
return -1;
}
static int vphn_get_nid(long lcpu)
{
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
if (!__vphn_get_associativity(lcpu, associativity))
return associativity_to_nid(associativity);
return NUMA_NO_NODE; return NUMA_NO_NODE;
} }
#else #else
static int __vphn_get_associativity(long lcpu, __be32 *associativity)
{
return -1;
}
static int vphn_get_nid(long unused) static int vphn_get_nid(long unused)
{ {
return NUMA_NO_NODE; return NUMA_NO_NODE;
...@@ -692,7 +774,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb, ...@@ -692,7 +774,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
size = read_n_cells(n_mem_size_cells, usm); size = read_n_cells(n_mem_size_cells, usm);
} }
nid = of_drconf_to_nid_single(lmb); nid = get_nid_and_numa_distance(lmb);
fake_numa_create_new_node(((base + size) >> PAGE_SHIFT), fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
&nid); &nid);
node_set_online(nid); node_set_online(nid);
...@@ -709,6 +791,7 @@ static int __init parse_numa_properties(void) ...@@ -709,6 +791,7 @@ static int __init parse_numa_properties(void)
struct device_node *memory; struct device_node *memory;
int default_nid = 0; int default_nid = 0;
unsigned long i; unsigned long i;
const __be32 *associativity;
if (numa_enabled == 0) { if (numa_enabled == 0) {
printk(KERN_WARNING "NUMA disabled by user\n"); printk(KERN_WARNING "NUMA disabled by user\n");
...@@ -734,18 +817,30 @@ static int __init parse_numa_properties(void) ...@@ -734,18 +817,30 @@ static int __init parse_numa_properties(void)
* each node to be onlined must have NODE_DATA etc backing it. * each node to be onlined must have NODE_DATA etc backing it.
*/ */
for_each_present_cpu(i) { for_each_present_cpu(i) {
__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
struct device_node *cpu; struct device_node *cpu;
int nid = vphn_get_nid(i); int nid = NUMA_NO_NODE;
/* memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
* Don't fall back to default_nid yet -- we will plug
* cpus into nodes once the memory scan has discovered if (__vphn_get_associativity(i, vphn_assoc) == 0) {
* the topology. nid = associativity_to_nid(vphn_assoc);
*/ initialize_form1_numa_distance(vphn_assoc);
if (nid == NUMA_NO_NODE) { } else {
/*
* Don't fall back to default_nid yet -- we will plug
* cpus into nodes once the memory scan has discovered
* the topology.
*/
cpu = of_get_cpu_node(i, NULL); cpu = of_get_cpu_node(i, NULL);
BUG_ON(!cpu); BUG_ON(!cpu);
nid = of_node_to_nid_single(cpu);
associativity = of_get_associativity(cpu);
if (associativity) {
nid = associativity_to_nid(associativity);
initialize_form1_numa_distance(associativity);
}
of_node_put(cpu); of_node_put(cpu);
} }
...@@ -781,8 +876,11 @@ static int __init parse_numa_properties(void) ...@@ -781,8 +876,11 @@ static int __init parse_numa_properties(void)
* have associativity properties. If none, then * have associativity properties. If none, then
* everything goes to default_nid. * everything goes to default_nid.
*/ */
nid = of_node_to_nid_single(memory); associativity = of_get_associativity(memory);
if (nid < 0) if (associativity) {
nid = associativity_to_nid(associativity);
initialize_form1_numa_distance(associativity);
} else
nid = default_nid; nid = default_nid;
fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
......
...@@ -580,6 +580,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index) ...@@ -580,6 +580,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
return saved_rc; return saved_rc;
} }
update_numa_distance(dn);
rc = dlpar_online_cpu(dn); rc = dlpar_online_cpu(dn);
if (rc) { if (rc) {
saved_rc = rc; saved_rc = rc;
......
...@@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb) ...@@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
return -ENODEV; return -ENODEV;
} }
update_numa_distance(lmb_node);
dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (!dr_node) { if (!dr_node) {
dlpar_free_cc_nodes(lmb_node); dlpar_free_cc_nodes(lmb_node);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册