提交 80533ca2 编写于 作者: P Peter Krempa

nodeinfo: Fix gathering of nodeinfo data structure

This patch changes the way data to fill the nodeinfo structure are
gathered. We've gathere the test data by iterating processors an sockets
separately from nodes. The reported data was based solely on information
about core id. Problems arise when eg cores in mulit-processor machines
don't have same id's on both processors or maybe one physical processor
contains more NUMA nodes.

This patch changes the approach how we detect processors and nodes. Now
we start at enumerating nodes and for each node processors, sockets and
threads are enumerated separately. This approach provides acurate data
that comply to docs about the nodeinfo structure. This also enables to
get rid of hacks: see commits 10d9038b,
ac9dd4a6. (Those changes in nodeinfo.c
are efectively reverted by this patch).

This patch also changes output of one of the tests, as the processor
topology is now acquired more precisely.
上级 6dcf98c8
...@@ -188,38 +188,139 @@ virNodeParseSocket(const char *dir, unsigned int cpu) ...@@ -188,38 +188,139 @@ virNodeParseSocket(const char *dir, unsigned int cpu)
return ret; return ret;
} }
/* parses a node entry, returning number of processors in the node and
* filling arguments */
static int static int
virNodeParseNode(const char *sysfs_dir) virNodeParseNode(const char *node, int *sockets, int *cores, int *threads)
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
ATTRIBUTE_NONNULL(3) ATTRIBUTE_NONNULL(4)
{ {
char *file = NULL;
char *possible = NULL;
char *tmp;
int ret = -1; int ret = -1;
int processors = 0;
DIR *cpudir = NULL;
struct dirent *cpudirent = NULL;
int sock_max = 0;
cpu_set_t sock_map;
int sock;
cpu_set_t *core_maps = NULL;
int core;
int i;
int siblings;
unsigned int cpu;
int online;
if (virAsprintf(&file, "%s/node/possible", sysfs_dir) < 0) { *threads = 0;
virReportOOMError(); *cores = 0;
*sockets = 0;
if (!(cpudir = opendir(node))) {
virReportSystemError(errno, _("cannot opendir %s"), node);
goto cleanup; goto cleanup;
} }
/* Assume that a missing node/possible file implies no NUMA
* support, and hence all cpus belong to the same node. */ /* enumerate sockets in the node */
if (!virFileExists(file)) { CPU_ZERO(&sock_map);
ret = 1; errno = 0;
while ((cpudirent = readdir(cpudir))) {
if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
continue;
/* Parse socket */
sock = virNodeParseSocket(node, cpu);
CPU_SET(sock, &sock_map);
if (sock > sock_max)
sock_max = sock;
errno = 0;
}
if (errno) {
virReportSystemError(errno, _("problem reading %s"), node);
goto cleanup; goto cleanup;
} }
if (virFileReadAll(file, 1024, &possible) < 0)
sock_max++;
/* allocate cpu maps for each socket */
if (VIR_ALLOC_N(core_maps, sock_max) < 0) {
virReportOOMError();
goto cleanup; goto cleanup;
if (virStrToLong_i(possible, &tmp, 10, &ret) < 0 || }
(*tmp == '-' && virStrToLong_i(tmp+1, &tmp, 10, &ret) < 0) ||
*tmp != '\n') { for (i = 0; i < sock_max; i++)
nodeReportError(VIR_ERR_INTERNAL_ERROR, CPU_ZERO(&core_maps[i]);
_("failed to parse possible nodes '%s'"), possible);
/* iterate over all CPU's in the node */
rewinddir(cpudir);
errno = 0;
while ((cpudirent = readdir(cpudir))) {
if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
continue;
if ((online = virNodeGetCpuValue(node, cpu, "online", true)) < 0)
goto cleanup;
if (!online)
continue;
processors++;
/* Parse socket */
sock = virNodeParseSocket(node, cpu);
if (!CPU_ISSET(sock, &sock_map)) {
nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("CPU socket topology has changed"));
goto cleanup;
}
/* Parse core */
# if defined(__s390__) || \
defined(__s390x__)
/* logical cpu is equivalent to a core on s390 */
core = cpu;
# else
core = virNodeGetCpuValue(node, cpu, "topology/core_id", false);
# endif
CPU_SET(core, &core_maps[sock]);
if (!(siblings = virNodeCountThreadSiblings(node, cpu)))
goto cleanup;
if (siblings > *threads)
*threads = siblings;
errno = 0;
}
if (errno) {
virReportSystemError(errno, _("problem reading %s"), node);
goto cleanup; goto cleanup;
} }
ret++;
/* finalize the returned data */
*sockets = CPU_COUNT(&sock_map);
for (i = 0; i < sock_max; i++) {
if (!CPU_ISSET(i, &sock_map))
continue;
core = CPU_COUNT(&core_maps[i]);
if (core > *cores)
*cores = core;
}
ret = processors;
cleanup: cleanup:
VIR_FREE(file); /* don't shadow a more serious error */
VIR_FREE(possible); if (cpudir && closedir(cpudir) < 0 && ret >= 0) {
virReportSystemError(errno, _("problem closing %s"), node);
ret = -1;
}
VIR_FREE(core_maps);
return ret; return ret;
} }
...@@ -228,23 +329,20 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo, ...@@ -228,23 +329,20 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
virNodeInfoPtr nodeinfo) virNodeInfoPtr nodeinfo)
{ {
char line[1024]; char line[1024];
DIR *cpudir = NULL; DIR *nodedir = NULL;
struct dirent *cpudirent = NULL; struct dirent *nodedirent = NULL;
unsigned int cpu; int cpus, cores, socks, threads;
unsigned long core, sock, cur_threads; unsigned int node;
cpu_set_t core_mask;
cpu_set_t socket_mask;
int online;
int ret = -1; int ret = -1;
char *sysfs_nodedir = NULL;
char *sysfs_cpudir = NULL; char *sysfs_cpudir = NULL;
unsigned int cpu_cores = 0;
nodeinfo->cpus = 0; nodeinfo->cpus = 0;
nodeinfo->mhz = 0; nodeinfo->mhz = 0;
nodeinfo->cores = 0; nodeinfo->cores = 0;
nodeinfo->nodes = 0;
/* Start with parsing /proc/cpuinfo; although it tends to have /* Start with parsing CPU clock speed from /proc/cpuinfo */
* fewer details. Hyperthreads are ignored at this stage. */
while (fgets(line, sizeof(line), cpuinfo) != NULL) { while (fgets(line, sizeof(line), cpuinfo) != NULL) {
# if defined(__x86_64__) || \ # if defined(__x86_64__) || \
defined(__amd64__) || \ defined(__amd64__) || \
...@@ -254,40 +352,22 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo, ...@@ -254,40 +352,22 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
char *p; char *p;
unsigned int ui; unsigned int ui;
buf += 9; buf += 7;
while (*buf && c_isspace(*buf)) while (*buf && c_isspace(*buf))
buf++; buf++;
if (*buf != ':' || !buf[1]) { if (*buf != ':' || !buf[1]) {
nodeReportError(VIR_ERR_INTERNAL_ERROR, nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
"%s", _("parsing cpu MHz from cpuinfo")); _("parsing cpu MHz from cpuinfo"));
goto cleanup; goto cleanup;
} }
if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
/* Accept trailing fractional part. */ /* Accept trailing fractional part. */
&& (*p == '\0' || *p == '.' || c_isspace(*p))) (*p == '\0' || *p == '.' || c_isspace(*p)))
nodeinfo->mhz = ui; nodeinfo->mhz = ui;
} }
if (STRPREFIX(buf, "cpu cores")) {
char *p;
unsigned int ui;
buf += 9;
while (*buf && c_isspace(*buf))
buf++;
if (*buf != ':' || !buf[1]) {
nodeReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("parsing cpu cores from cpuinfo"));
return -1;
}
if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
&& (*p == '\0' || c_isspace(*p)))
cpu_cores = ui;
}
# elif defined(__powerpc__) || \ # elif defined(__powerpc__) || \
defined(__powerpc64__) defined(__powerpc64__)
char *buf = line; char *buf = line;
...@@ -300,14 +380,14 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo, ...@@ -300,14 +380,14 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
buf++; buf++;
if (*buf != ':' || !buf[1]) { if (*buf != ':' || !buf[1]) {
nodeReportError(VIR_ERR_INTERNAL_ERROR, nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s",
"%s", _("parsing cpu MHz from cpuinfo")); _("parsing cpu MHz from cpuinfo"));
goto cleanup; goto cleanup;
} }
if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
/* Accept trailing fractional part. */ /* Accept trailing fractional part. */
&& (*p == '\0' || *p == '.' || c_isspace(*p))) (*p == '\0' || *p == '.' || c_isspace(*p)))
nodeinfo->mhz = ui; nodeinfo->mhz = ui;
/* No other interesting infos are available in /proc/cpuinfo. /* No other interesting infos are available in /proc/cpuinfo.
* However, there is a line identifying processor's version, * However, there is a line identifying processor's version,
...@@ -328,115 +408,101 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo, ...@@ -328,115 +408,101 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
/* OK, we've parsed clock speed out of /proc/cpuinfo. Get the /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the
* core, node, socket, thread and topology information from /sys * core, node, socket, thread and topology information from /sys
*/ */
if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) { if (virAsprintf(&sysfs_nodedir, "%s/node", sysfs_dir) < 0) {
virReportOOMError(); virReportOOMError();
goto cleanup; goto cleanup;
} }
cpudir = opendir(sysfs_cpudir);
if (cpudir == NULL) {
virReportSystemError(errno, _("cannot opendir %s"), sysfs_cpudir);
goto cleanup;
}
CPU_ZERO(&core_mask); if (!(nodedir = opendir(sysfs_nodedir))) {
CPU_ZERO(&socket_mask); /* the host isn't probably running a NUMA architecture */
goto fallback;
}
while ((cpudirent = readdir(cpudir))) { errno = 0;
if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1) while ((nodedirent = readdir(nodedir))) {
if (sscanf(nodedirent->d_name, "node%u", &node) != 1)
continue; continue;
online = virNodeGetCpuValue(sysfs_cpudir, cpu, "online", true); nodeinfo->nodes++;
if (online < 0) {
closedir(cpudir); if (virAsprintf(&sysfs_cpudir, "%s/node/%s",
sysfs_dir, nodedirent->d_name) < 0) {
virReportOOMError();
goto cleanup; goto cleanup;
} }
if (!online)
continue;
nodeinfo->cpus++;
/* Parse core */ if ((cpus = virNodeParseNode(sysfs_cpudir, &socks,
# if defined(__s390__) || \ &cores, &threads)) < 0)
defined(__s390x__) goto cleanup;
/* logical cpu is equivalent to a core on s390 */
core = cpu;
# else
core = virNodeGetCpuValue(sysfs_cpudir, cpu, "topology/core_id", false);
# endif
if (!CPU_ISSET(core, &core_mask)) {
CPU_SET(core, &core_mask);
nodeinfo->cores++;
}
/* Parse socket */ VIR_FREE(sysfs_cpudir);
sock = virNodeParseSocket(sysfs_cpudir, cpu);
if (!CPU_ISSET(sock, &socket_mask)) {
CPU_SET(sock, &socket_mask);
nodeinfo->sockets++;
}
cur_threads = virNodeCountThreadSiblings(sysfs_cpudir, cpu); nodeinfo->cpus += cpus;
if (cur_threads == 0) {
closedir(cpudir); if (socks > nodeinfo->sockets)
goto cleanup; nodeinfo->sockets = socks;
}
if (cur_threads > nodeinfo->threads) if (cores > nodeinfo->cores)
nodeinfo->threads = cur_threads; nodeinfo->cores = cores;
if (threads > nodeinfo->threads)
nodeinfo->threads = threads;
errno = 0;
} }
if (errno) { if (errno) {
virReportSystemError(errno, virReportSystemError(errno, _("problem reading %s"), sysfs_nodedir);
_("problem reading %s"), sysfs_cpudir);
closedir(cpudir);
goto cleanup; goto cleanup;
} }
if (closedir(cpudir) < 0) {
virReportSystemError(errno, if (nodeinfo->cpus && nodeinfo->nodes)
_("problem closing %s"), sysfs_cpudir); goto done;
fallback:
VIR_FREE(sysfs_cpudir);
if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) {
virReportOOMError();
goto cleanup; goto cleanup;
} }
if ((nodeinfo->nodes = virNodeParseNode(sysfs_dir)) <= 0) if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, &cores, &threads)) < 0)
goto cleanup; goto cleanup;
nodeinfo->nodes = 1;
nodeinfo->cpus = cpus;
nodeinfo->sockets = socks;
nodeinfo->cores = cores;
nodeinfo->threads = threads;
done:
/* There should always be at least one cpu, socket, node, and thread. */ /* There should always be at least one cpu, socket, node, and thread. */
if (nodeinfo->cpus == 0) { if (nodeinfo->cpus == 0) {
nodeReportError(VIR_ERR_INTERNAL_ERROR, nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found"));
"%s", _("no CPUs found"));
goto cleanup; goto cleanup;
} }
if (nodeinfo->sockets == 0) { if (nodeinfo->sockets == 0) {
nodeReportError(VIR_ERR_INTERNAL_ERROR, nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found"));
"%s", _("no sockets found"));
goto cleanup; goto cleanup;
} }
if (nodeinfo->threads == 0) { if (nodeinfo->threads == 0) {
nodeReportError(VIR_ERR_INTERNAL_ERROR, nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found"));
"%s", _("no threads found"));
goto cleanup; goto cleanup;
} }
/* Platform like AMD Magny Cours has two NUMA nodes each package, and
* the two nodes share the same core ID set, it results in the cores
* number calculated from sysfs is not the actual cores number. Use
* "cpu cores" in /proc/cpuinfo as the cores number instead in this case.
* More details about the problem:
* https://www.redhat.com/archives/libvir-list/2012-May/msg00607.html
*/
if (cpu_cores && (cpu_cores > nodeinfo->cores))
nodeinfo->cores = cpu_cores;
/* nodeinfo->sockets is supposed to be a number of sockets per NUMA node,
* however if NUMA nodes are not composed of whole sockets, we just lie
* about the number of NUMA nodes and force apps to check capabilities XML
* for the actual NUMA topology.
*/
if (nodeinfo->sockets % nodeinfo->nodes == 0)
nodeinfo->sockets /= nodeinfo->nodes;
else
nodeinfo->nodes = 1;
ret = 0; ret = 0;
cleanup: cleanup:
/* don't shadow a more serious error */
if (nodedir && closedir(nodedir) < 0 && ret >= 0) {
virReportSystemError(errno, _("problem closing %s"), sysfs_nodedir);
ret = -1;
}
VIR_FREE(sysfs_nodedir);
VIR_FREE(sysfs_cpudir); VIR_FREE(sysfs_cpudir);
return ret; return ret;
} }
......
CPUs: 48/48, MHz: 2100, Nodes: 1, Sockets: 4, Cores: 12, Threads: 1 CPUs: 48/48, MHz: 2100, Nodes: 8, Sockets: 1, Cores: 6, Threads: 1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册