/*
* virnuma.c: helper APIs for managing numa
*
* Copyright (C) 2011-2013 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see
* .
*
*/
#include
#define NUMA_MAX_N_CPUS 4096
#if WITH_NUMACTL
# define NUMA_VERSION1_COMPATIBILITY 1
# include
# if LIBNUMA_API_VERSION > 1
# undef NUMA_MAX_N_CPUS
# define NUMA_MAX_N_CPUS (numa_all_cpus_ptr->size)
# endif
#endif /* WITH_NUMACTL */
#include "virnuma.h"
#include "vircommand.h"
#include "virerror.h"
#include "virlog.h"
#include "viralloc.h"
#include "virbitmap.h"
#define VIR_FROM_THIS VIR_FROM_NONE
VIR_LOG_INIT("util.numa");
VIR_ENUM_IMPL(virDomainNumatuneMemMode,
VIR_DOMAIN_NUMATUNE_MEM_LAST,
"strict",
"preferred",
"interleave");
VIR_ENUM_IMPL(virNumaTuneMemPlacementMode,
VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_LAST,
"default",
"static",
"auto");
#if HAVE_NUMAD
char *
virNumaGetAutoPlacementAdvice(unsigned short vcpus,
unsigned long long balloon)
{
virCommandPtr cmd = NULL;
char *output = NULL;
cmd = virCommandNewArgList(NUMAD, "-w", NULL);
virCommandAddArgFormat(cmd, "%d:%llu", vcpus,
VIR_DIV_UP(balloon, 1024));
virCommandSetOutputBuffer(cmd, &output);
if (virCommandRun(cmd, NULL) < 0)
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Failed to query numad for the "
"advisory nodeset"));
virCommandFree(cmd);
return output;
}
#else
char *
virNumaGetAutoPlacementAdvice(unsigned short vcpus ATTRIBUTE_UNUSED,
unsigned long long balloon ATTRIBUTE_UNUSED)
{
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("numad is not available on this host"));
return NULL;
}
#endif
#if WITH_NUMACTL
int
virNumaSetupMemoryPolicy(virNumaTuneDef numatune,
virBitmapPtr nodemask)
{
nodemask_t mask;
int mode = -1;
int node = -1;
int ret = -1;
int bit = 0;
size_t i;
int maxnode = 0;
virBitmapPtr tmp_nodemask = NULL;
if (numatune.memory.placement_mode ==
VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
if (!numatune.memory.nodemask)
return 0;
VIR_DEBUG("Set NUMA memory policy with specified nodeset");
tmp_nodemask = numatune.memory.nodemask;
} else if (numatune.memory.placement_mode ==
VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) {
VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad");
tmp_nodemask = nodemask;
} else {
return 0;
}
if (numa_available() < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("Host kernel is not aware of NUMA."));
return -1;
}
maxnode = numa_max_node();
maxnode = maxnode < NUMA_NUM_NODES ? maxnode : NUMA_NUM_NODES;
/* Convert nodemask to NUMA bitmask. */
nodemask_zero(&mask);
bit = -1;
while ((bit = virBitmapNextSetBit(tmp_nodemask, bit)) >= 0) {
if (bit > maxnode) {
virReportError(VIR_ERR_INTERNAL_ERROR,
_("NUMA node %d is out of range"), bit);
return -1;
}
nodemask_set(&mask, bit);
}
mode = numatune.memory.mode;
if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
numa_set_bind_policy(1);
numa_set_membind(&mask);
numa_set_bind_policy(0);
} else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) {
int nnodes = 0;
for (i = 0; i < NUMA_NUM_NODES; i++) {
if (nodemask_isset(&mask, i)) {
node = i;
nnodes++;
}
}
if (nnodes != 1) {
virReportError(VIR_ERR_INTERNAL_ERROR,
"%s", _("NUMA memory tuning in 'preferred' mode "
"only supports single node"));
goto cleanup;
}
numa_set_bind_policy(0);
numa_set_preferred(node);
} else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) {
numa_set_interleave_mask(&mask);
} else {
/* XXX: Shouldn't go here, as we already do checking when
* parsing domain XML.
*/
virReportError(VIR_ERR_XML_ERROR,
"%s", _("Invalid mode for memory NUMA tuning."));
goto cleanup;
}
ret = 0;
cleanup:
return ret;
}
bool
virNumaIsAvailable(void)
{
return numa_available() != -1;
}
/**
* virNumaGetMaxNode:
* Get the highest node number available on the current system.
* (See the node numbers in /sys/devices/system/node/ ).
*
* Returns the highest NUMA node id on success, -1 on error.
*/
int
virNumaGetMaxNode(void)
{
int ret;
if (!virNumaIsAvailable()) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("NUMA isn't available on this host"));
return -1;
}
if ((ret = numa_max_node()) < 0) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("Failed to request maximum NUMA node id"));
return -1;
}
return ret;
}
/**
* virNumaGetNodeMemory:
* @node: identifier of the requested NUMA node
* @memsize: returns the total size of memory in the NUMA node
* @memfree: returns the total free memory in a NUMA node
*
* Returns the size of the memory in one NUMA node in bytes via the @size
* argument and free memory of a node in the @free argument. The caller has to
* guarantee that @node is in range (see virNumaGetMaxNode).
*
* Returns 0 on success, -1 on error. Does not report errors.
*/
int
virNumaGetNodeMemory(int node,
unsigned long long *memsize,
unsigned long long *memfree)
{
long long node_size;
long long node_free;
if (memsize)
*memsize = 0;
if (memfree)
*memfree = 0;
if ((node_size = numa_node_size64(node, &node_free)) < 0)
return -1;
if (memsize)
*memsize = node_size;
if (memfree)
*memfree = node_free;
return 0;
}
/**
* virNumaGetNodeCPUs:
* @node: identifier of the requested NUMA node
* @cpus: returns a bitmap of CPUs in @node
*
* Returns count of CPUs in the selected node and sets the map of the cpus to
* @cpus. On error if the @node doesn't exist in the system this function
* returns -2 and sets @cpus to NULL. On other errors -1 is returned, @cpus
* is set to NULL and an error is reported.
*/
# define n_bits(var) (8 * sizeof(var))
# define MASK_CPU_ISSET(mask, cpu) \
(((mask)[((cpu) / n_bits(*(mask)))] >> ((cpu) % n_bits(*(mask)))) & 1)
int
virNumaGetNodeCPUs(int node,
virBitmapPtr *cpus)
{
unsigned long *mask = NULL;
unsigned long *allonesmask = NULL;
virBitmapPtr cpumap = NULL;
int ncpus = 0;
int max_n_cpus = virNumaGetMaxCPUs();
int mask_n_bytes = max_n_cpus / 8;
size_t i;
int ret = -1;
*cpus = NULL;
if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof(*mask)) < 0)
goto cleanup;
if (VIR_ALLOC_N(allonesmask, mask_n_bytes / sizeof(*mask)) < 0)
goto cleanup;
memset(allonesmask, 0xff, mask_n_bytes);
/* The first time this returns -1, ENOENT if node doesn't exist... */
if (numa_node_to_cpus(node, mask, mask_n_bytes) < 0) {
VIR_WARN("NUMA topology for cell %d is not available, ignoring", node);
ret = -2;
goto cleanup;
}
/* second, third... times it returns an all-1's mask */
if (memcmp(mask, allonesmask, mask_n_bytes) == 0) {
VIR_DEBUG("NUMA topology for cell %d is invalid, ignoring", node);
ret = -2;
goto cleanup;
}
if (!(cpumap = virBitmapNew(max_n_cpus)))
goto cleanup;
for (i = 0; i < max_n_cpus; i++) {
if (MASK_CPU_ISSET(mask, i)) {
ignore_value(virBitmapSetBit(cpumap, i));
ncpus++;
}
}
*cpus = cpumap;
cpumap = NULL;
ret = ncpus;
cleanup:
VIR_FREE(mask);
VIR_FREE(allonesmask);
VIR_FREE(cpumap);
return ret;
}
# undef MASK_CPU_ISSET
# undef n_bits
#else
int
virNumaSetupMemoryPolicy(virNumaTuneDef numatune,
virBitmapPtr nodemask ATTRIBUTE_UNUSED)
{
if (numatune.memory.nodemask) {
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("libvirt is compiled without NUMA tuning support"));
return -1;
}
return 0;
}
bool
virNumaIsAvailable(void)
{
return false;
}
int
virNumaGetMaxNode(void)
{
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("NUMA isn't available on this host"));
return -1;
}
int
virNumaGetNodeMemory(int node ATTRIBUTE_UNUSED,
unsigned long long *memsize,
unsigned long long *memfree)
{
if (memsize)
*memsize = 0;
if (memfree)
*memfree = 0;
VIR_DEBUG("NUMA isn't available on this host");
return -1;
}
int
virNumaGetNodeCPUs(int node ATTRIBUTE_UNUSED,
virBitmapPtr *cpus)
{
*cpus = NULL;
virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("NUMA isn't available on this host"));
return -1;
}
#endif
/**
* virNumaGetMaxCPUs:
*
* Get the maximum count of CPUs supportable in the host.
*
* Returns the count of CPUs supported.
*/
unsigned int
virNumaGetMaxCPUs(void)
{
return NUMA_MAX_N_CPUS;
}
#ifdef HAVE_NUMA_BITMASK_ISBITSET
/**
* virNumaNodeIsAvailable:
* @node: node to check
*
* On some hosts the set of NUMA nodes isn't continuous.
* Use this function to test if the @node is available.
*
* Returns: true if @node is available,
* false if @node doesn't exist
*/
bool
virNumaNodeIsAvailable(int node)
{
return numa_bitmask_isbitset(numa_nodes_ptr, node);
}
/**
* virNumaGetDistances:
* @node: identifier of the requested NUMA node
* @distances: array of distances to sibling nodes
* @ndistances: size of @distances
*
* Get array of distances to sibling nodes from @node. If a
* distances[x] equals to zero, the node x is not enabled or
* doesn't exist. As a special case, if @node itself refers to
* disabled or nonexistent NUMA node, then @distances and
* @ndistances are set to NULL and zero respectively.
*
* The distances are a bit of magic. For a local node the value
* is 10, for remote it's typically 20 meaning that time penalty
* for accessing a remote node is two time bigger than when
* accessing a local node.
*
* Returns 0 on success, -1 otherwise.
*/
int
virNumaGetDistances(int node,
int **distances,
int *ndistances)
{
int ret = -1;
int max_node;
size_t i;
if (!virNumaNodeIsAvailable(node)) {
VIR_DEBUG("Node %d does not exist", node);
*distances = NULL;
*ndistances = 0;
return 0;
}
if ((max_node = virNumaGetMaxNode()) < 0)
goto cleanup;
if (VIR_ALLOC_N(*distances, max_node) < 0)
goto cleanup;
*ndistances = max_node + 1;
for (i = 0; i<= max_node; i++) {
if (!virNumaNodeIsAvailable(node))
continue;
(*distances)[i] = numa_distance(node, i);
}
ret = 0;
cleanup:
return ret;
}
#else
bool
virNumaNodeIsAvailable(int node)
{
int max_node = virNumaGetMaxNode();
if (max_node < 0)
return false;
/* Do we have anything better? */
return (node >= 0) && (node < max_node);
}
int
virNumaGetDistances(int node ATTRIBUTE_UNUSED,
int **distances,
int *ndistances)
{
*distances = NULL;
*ndistances = 0;
VIR_DEBUG("NUMA distance information isn't availble on this host");
return 0;
}
#endif