nodeinfo.c 15.8 KB
Newer Older
1 2 3
/*
 * nodeinfo.c: Helper routines for OS specific node information
 *
E
Eric Blake 已提交
4
 * Copyright (C) 2006-2008, 2010-2011 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Copyright (C) 2006 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 */

24
#include <config.h>
J
Jim Meyering 已提交
25

26 27 28
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
29
#include <stdint.h>
30
#include <errno.h>
31
#include <dirent.h>
E
Eric Blake 已提交
32
#include <sys/utsname.h>
33 34 35 36 37

#if HAVE_NUMACTL
# define NUMA_VERSION1_COMPATIBILITY 1
# include <numa.h>
#endif
38

39 40
#include "c-ctype.h"
#include "memory.h"
41
#include "nodeinfo.h"
42
#include "physmem.h"
43
#include "util.h"
44
#include "logging.h"
45
#include "virterror_internal.h"
46
#include "count-one-bits.h"
E
Eric Blake 已提交
47
#include "intprops.h"
48
#include "files.h"
49

50 51 52

#define VIR_FROM_THIS VIR_FROM_NONE

53
#define nodeReportError(code, ...)                                      \
54
    virReportErrorHelper(VIR_FROM_NONE, code, __FILE__,                 \
55
                         __FUNCTION__, __LINE__, __VA_ARGS__)
56

57
#ifdef __linux__
58 59
# define CPUINFO_PATH "/proc/cpuinfo"
# define CPU_SYS_PATH "/sys/devices/system/cpu"
60

61
/* NB, this is not static as we need to call it from the testsuite */
62
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
63 64
                             virNodeInfoPtr nodeinfo,
                             bool need_hyperthreads);
65

E
Eric Blake 已提交
66 67 68
/* Return the positive decimal contents of the given
 * CPU_SYS_PATH/cpu%u/FILE, or -1 on error.  If MISSING_OK and the
 * file could not be found, return 1 instead of an error; this is
69 70
 * because some machines cannot hot-unplug cpu0, or because
 * hot-unplugging is disabled.  */
E
Eric Blake 已提交
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
static int
get_cpu_value(unsigned int cpu, const char *file, bool missing_ok)
{
    char *path;
    FILE *pathfp;
    int value = -1;
    char value_str[INT_BUFSIZE_BOUND(value)];
    char *tmp;

    if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/%s", cpu, file) < 0) {
        virReportOOMError();
        return -1;
    }

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
        if (missing_ok && errno == ENOENT)
            value = 1;
        else
            virReportSystemError(errno, _("cannot open %s"), path);
        goto cleanup;
    }

    if (fgets(value_str, sizeof(value_str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }
    if (virStrToLong_i(value_str, &tmp, 10, &value) < 0) {
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
                        _("could not convert '%s' to an integer"),
                        value_str);
        goto cleanup;
    }

cleanup:
106
    VIR_FORCE_FCLOSE(pathfp);
E
Eric Blake 已提交
107 108 109 110 111 112 113 114 115 116
    VIR_FREE(path);

    return value;
}

/* Check if CPU is online via CPU_SYS_PATH/cpu%u/online.  Return 1 if online,
   0 if offline, and -1 on error.  */
static int
cpu_online(unsigned int cpu)
{
117
    return get_cpu_value(cpu, "online", true);
E
Eric Blake 已提交
118 119
}

C
Chris Lalancette 已提交
120
static unsigned long count_thread_siblings(unsigned int cpu)
121 122
{
    unsigned long ret = 0;
C
Chris Lalancette 已提交
123 124
    char *path;
    FILE *pathfp;
125 126 127
    char str[1024];
    int i;

C
Chris Lalancette 已提交
128
    if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/topology/thread_siblings",
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
                    cpu) < 0) {
        virReportOOMError();
        return 0;
    }

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
        virReportSystemError(errno, _("cannot open %s"), path);
        VIR_FREE(path);
        return 0;
    }

    if (fgets(str, sizeof(str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }

    i = 0;
    while (str[i] != '\0') {
C
Chris Lalancette 已提交
148
        if (c_isdigit(str[i]))
149
            ret += count_one_bits(str[i] - '0');
C
Chris Lalancette 已提交
150 151 152 153
        else if (str[i] >= 'A' && str[i] <= 'F')
            ret += count_one_bits(str[i] - 'A' + 10);
        else if (str[i] >= 'a' && str[i] <= 'f')
            ret += count_one_bits(str[i] - 'a' + 10);
154 155 156 157
        i++;
    }

cleanup:
158
    VIR_FORCE_FCLOSE(pathfp);
159 160 161 162 163
    VIR_FREE(path);

    return ret;
}

C
Chris Lalancette 已提交
164
static int parse_socket(unsigned int cpu)
165
{
166
    int ret = get_cpu_value(cpu, "topology/physical_package_id", false);
E
Eric Blake 已提交
167
# if defined(__powerpc__) || \
168 169 170 171
    defined(__powerpc64__)
    /* ppc has -1 */
    if (ret < 0)
        ret = 0;
E
Eric Blake 已提交
172
# endif
173
    return ret;
174 175
}

176
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
177 178
                             virNodeInfoPtr nodeinfo,
                             bool need_hyperthreads)
179
{
180
    char line[1024];
181 182
    DIR *cpudir = NULL;
    struct dirent *cpudirent = NULL;
C
Chris Lalancette 已提交
183
    unsigned int cpu;
184 185 186
    unsigned long cur_threads;
    int socket;
    unsigned long long socket_mask = 0;
E
Eric Blake 已提交
187 188
    unsigned int remaining;
    int online;
189 190 191

    nodeinfo->cpus = 0;
    nodeinfo->mhz = 0;
192
    nodeinfo->cores = 1;
193 194

    nodeinfo->nodes = 1;
J
Jiri Denemark 已提交
195
# if HAVE_NUMACTL
196
    if (numa_available() >= 0)
197
        nodeinfo->nodes = numa_max_node() + 1;
J
Jiri Denemark 已提交
198
# endif
199 200

    /* NB: It is impossible to fill our nodes, since cpuinfo
C
Chris Lalancette 已提交
201
     * has no knowledge of NUMA nodes */
202

203
    /* NOTE: hyperthreads are ignored here; they are parsed out of /sys */
204 205
    while (fgets(line, sizeof(line), cpuinfo) != NULL) {
        char *buf = line;
206
        if (STRPREFIX(buf, "processor")) { /* aka a single logical CPU */
207
            buf += 9;
208
            while (*buf && c_isspace(*buf))
209 210
                buf++;
            if (*buf != ':') {
211
                nodeReportError(VIR_ERR_INTERNAL_ERROR,
212
                                "%s", _("parsing cpuinfo processor"));
213 214 215
                return -1;
            }
            nodeinfo->cpus++;
E
Eric Blake 已提交
216
# if defined(__x86_64__) || \
217 218
    defined(__amd64__)  || \
    defined(__i386__)
219
        } else if (STRPREFIX(buf, "cpu MHz")) {
220 221
            char *p;
            unsigned int ui;
222
            buf += 9;
223
            while (*buf && c_isspace(*buf))
224 225
                buf++;
            if (*buf != ':' || !buf[1]) {
226
                nodeReportError(VIR_ERR_INTERNAL_ERROR,
227
                                "%s", _("parsing cpuinfo cpu MHz"));
228 229
                return -1;
            }
230
            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
231
                /* Accept trailing fractional part.  */
232
                && (*p == '\0' || *p == '.' || c_isspace(*p)))
233
                nodeinfo->mhz = ui;
234
        } else if (STRPREFIX(buf, "cpu cores")) { /* aka cores */
235
            char *p;
236 237
            unsigned int id;
            buf += 9;
238
            while (*buf && c_isspace(*buf))
239 240
                buf++;
            if (*buf != ':' || !buf[1]) {
241
                nodeReportError(VIR_ERR_INTERNAL_ERROR,
242
                                _("parsing cpuinfo cpu cores %c"), *buf);
243 244
                return -1;
            }
245
            if (virStrToLong_ui(buf+1, &p, 10, &id) == 0
246
                && (*p == '\0' || c_isspace(*p))
247
                && id > nodeinfo->cores)
248 249
                nodeinfo->cores = id;
        }
E
Eric Blake 已提交
250
# elif defined(__powerpc__) || \
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
      defined(__powerpc64__)
        } else if (STRPREFIX(buf, "clock")) {
            char *p;
            unsigned int ui;
            buf += 5;
            while (*buf && c_isspace(*buf))
                buf++;
            if (*buf != ':' || !buf[1]) {
                nodeReportError(VIR_ERR_INTERNAL_ERROR,
                                "%s", _("parsing cpuinfo cpu MHz"));
                return -1;
            }
            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
                /* Accept trailing fractional part.  */
                && (*p == '\0' || *p == '.' || c_isspace(*p)))
                nodeinfo->mhz = ui;
        }
E
Eric Blake 已提交
268 269 270
# else
#  warning Parser for /proc/cpuinfo needs to be adapted for your architecture
# endif
271 272 273
    }

    if (!nodeinfo->cpus) {
274
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
275
                        "%s", _("no cpus found"));
276 277 278
        return -1;
    }

279 280 281
    if (!need_hyperthreads)
        return 0;

282 283
    /* OK, we've parsed what we can out of /proc/cpuinfo.  Get the socket
     * and thread information from /sys
284
     */
E
Eric Blake 已提交
285
    remaining = nodeinfo->cpus;
286 287 288 289 290
    cpudir = opendir(CPU_SYS_PATH);
    if (cpudir == NULL) {
        virReportSystemError(errno, _("cannot opendir %s"), CPU_SYS_PATH);
        return -1;
    }
E
Eric Blake 已提交
291
    while ((errno = 0), remaining && (cpudirent = readdir(cpudir))) {
C
Chris Lalancette 已提交
292
        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
293 294
            continue;

E
Eric Blake 已提交
295 296 297 298 299 300 301 302 303
        online = cpu_online(cpu);
        if (online < 0) {
            closedir(cpudir);
            return -1;
        }
        if (!online)
            continue;
        remaining--;

304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
        socket = parse_socket(cpu);
        if (socket < 0) {
            closedir(cpudir);
            return -1;
        }
        if (!(socket_mask & (1 << socket))) {
            socket_mask |= (1 << socket);
            nodeinfo->sockets++;
        }

        cur_threads = count_thread_siblings(cpu);
        if (cur_threads == 0) {
            closedir(cpudir);
            return -1;
        }
        if (cur_threads > nodeinfo->threads)
            nodeinfo->threads = cur_threads;
    }
E
Eric Blake 已提交
322 323 324 325 326 327
    if (errno) {
        virReportSystemError(errno,
                             _("problem reading %s"), CPU_SYS_PATH);
        closedir(cpudir);
        return -1;
    }
328 329

    closedir(cpudir);
330

C
Chris Lalancette 已提交
331 332
    /* there should always be at least one socket and one thread */
    if (nodeinfo->sockets == 0) {
333
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
C
Chris Lalancette 已提交
334 335 336 337
                        "%s", _("no sockets found"));
        return -1;
    }
    if (nodeinfo->threads == 0) {
338
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
C
Chris Lalancette 已提交
339 340 341 342
                        "%s", _("no threads found"));
        return -1;
    }

343 344 345 346 347 348 349 350 351 352
    /* nodeinfo->sockets is supposed to be a number of sockets per NUMA node,
     * however if NUMA nodes are not composed of whole sockets, we just lie
     * about the number of NUMA nodes and force apps to check capabilities XML
     * for the actual NUMA topology.
     */
    if (nodeinfo->sockets % nodeinfo->nodes == 0)
        nodeinfo->sockets /= nodeinfo->nodes;
    else
        nodeinfo->nodes = 1;

353 354 355 356 357
    return 0;
}

#endif

358
int nodeGetInfo(virConnectPtr conn ATTRIBUTE_UNUSED, virNodeInfoPtr nodeinfo) {
359 360
    struct utsname info;

E
Eric Blake 已提交
361
    memset(nodeinfo, 0, sizeof(*nodeinfo));
362 363
    uname(&info);

C
Chris Lalancette 已提交
364 365
    if (virStrcpyStatic(nodeinfo->model, info.machine) == NULL)
        return -1;
366

367
#ifdef __linux__
368 369 370
    {
    int ret;
    FILE *cpuinfo = fopen(CPUINFO_PATH, "r");
371
    if (!cpuinfo) {
372
        virReportSystemError(errno,
373
                             _("cannot open %s"), CPUINFO_PATH);
374 375
        return -1;
    }
376
    ret = linuxNodeInfoCPUPopulate(cpuinfo, nodeinfo, true);
377
    VIR_FORCE_FCLOSE(cpuinfo);
378 379 380
    if (ret < 0)
        return -1;

381 382
    /* Convert to KB. */
    nodeinfo->memory = physmem_total () / 1024;
383 384

    return ret;
385
    }
386 387
#else
    /* XXX Solaris will need an impl later if they port QEMU driver */
388
    nodeReportError(VIR_ERR_NO_SUPPORT, "%s",
389
                    _("node info not implemented on this platform"));
390 391 392
    return -1;
#endif
}
393 394 395 396 397 398 399 400 401 402 403 404 405

#if HAVE_NUMACTL
# if LIBNUMA_API_VERSION <= 1
#  define NUMA_MAX_N_CPUS 4096
# else
#  define NUMA_MAX_N_CPUS (numa_all_cpus_ptr->size)
# endif

# define n_bits(var) (8 * sizeof(var))
# define MASK_CPU_ISSET(mask, cpu) \
  (((mask)[((cpu) / n_bits(*(mask)))] >> ((cpu) % n_bits(*(mask)))) & 1)

int
406
nodeCapsInitNUMA(virCapsPtr caps)
407 408
{
    int n;
409
    unsigned long *mask = NULL;
410
    unsigned long *allonesmask = NULL;
411 412 413 414 415 416 417 418 419 420
    int *cpus = NULL;
    int ret = -1;
    int max_n_cpus = NUMA_MAX_N_CPUS;

    if (numa_available() < 0)
        return 0;

    int mask_n_bytes = max_n_cpus / 8;
    if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof *mask) < 0)
        goto cleanup;
421 422 423
    if (VIR_ALLOC_N(allonesmask, mask_n_bytes / sizeof *mask) < 0)
        goto cleanup;
    memset(allonesmask, 0xff, mask_n_bytes);
424 425 426 427

    for (n = 0 ; n <= numa_max_node() ; n++) {
        int i;
        int ncpus;
428
        /* The first time this returns -1, ENOENT if node doesn't exist... */
429 430
        if (numa_node_to_cpus(n, mask, mask_n_bytes) < 0) {
            VIR_WARN("NUMA topology for cell %d of %d not available, ignoring",
431 432 433 434 435 436 437
                     n, numa_max_node()+1);
            continue;
        }
        /* second, third... times it returns an all-1's mask */
        if (memcmp(mask, allonesmask, mask_n_bytes) == 0) {
            VIR_DEBUG("NUMA topology for cell %d of %d is all ones, ignoring",
                      n, numa_max_node()+1);
438 439
            continue;
        }
440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465

        for (ncpus = 0, i = 0 ; i < max_n_cpus ; i++)
            if (MASK_CPU_ISSET(mask, i))
                ncpus++;

        if (VIR_ALLOC_N(cpus, ncpus) < 0)
            goto cleanup;

        for (ncpus = 0, i = 0 ; i < max_n_cpus ; i++)
            if (MASK_CPU_ISSET(mask, i))
                cpus[ncpus++] = i;

        if (virCapabilitiesAddHostNUMACell(caps,
                                           n,
                                           ncpus,
                                           cpus) < 0)
            goto cleanup;

        VIR_FREE(cpus);
    }

    ret = 0;

cleanup:
    VIR_FREE(cpus);
    VIR_FREE(mask);
466
    VIR_FREE(allonesmask);
467 468
    return ret;
}
469 470 471


int
472
nodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED,
473 474 475 476 477 478 479 480 481
                       unsigned long long *freeMems,
                       int startCell,
                       int maxCells)
{
    int n, lastCell, numCells;
    int ret = -1;
    int maxCell;

    if (numa_available() < 0) {
482
        nodeReportError(VIR_ERR_NO_SUPPORT,
483 484 485 486 487
                        "%s", _("NUMA not supported on this host"));
        goto cleanup;
    }
    maxCell = numa_max_node();
    if (startCell > maxCell) {
488
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
489 490 491 492 493 494 495 496 497 498 499
                        _("start cell %d out of range (0-%d)"),
                        startCell, maxCell);
        goto cleanup;
    }
    lastCell = startCell + maxCells - 1;
    if (lastCell > maxCell)
        lastCell = maxCell;

    for (numCells = 0, n = startCell ; n <= lastCell ; n++) {
        long long mem;
        if (numa_node_size64(n, &mem) < 0) {
500
            nodeReportError(VIR_ERR_INTERNAL_ERROR,
501 502
                           _("Failed to query NUMA free memory for node: %d"),
                           n);
503 504 505 506 507 508 509 510 511 512 513
            goto cleanup;
        }
        freeMems[numCells++] = mem;
    }
    ret = numCells;

cleanup:
    return ret;
}

unsigned long long
514
nodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED)
515 516 517 518 519
{
    unsigned long long freeMem = 0;
    int n;

    if (numa_available() < 0) {
520
        nodeReportError(VIR_ERR_NO_SUPPORT,
521 522 523 524 525 526 527
                        "%s", _("NUMA not supported on this host"));
        goto cleanup;
    }

    for (n = 0 ; n <= numa_max_node() ; n++) {
        long long mem;
        if (numa_node_size64(n, &mem) < 0) {
528
            nodeReportError(VIR_ERR_INTERNAL_ERROR,
529 530 531 532 533 534 535 536 537 538
                            "%s", _("Failed to query NUMA free memory"));
            goto cleanup;
        }
        freeMem += mem;
    }

cleanup:
    return freeMem;
}

539
#else
540 541 542 543
int nodeCapsInitNUMA(virCapsPtr caps ATTRIBUTE_UNUSED) {
    return 0;
}

544
int nodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED,
545 546 547 548
                              unsigned long long *freeMems ATTRIBUTE_UNUSED,
                              int startCell ATTRIBUTE_UNUSED,
                              int maxCells ATTRIBUTE_UNUSED)
{
549
    nodeReportError(VIR_ERR_NO_SUPPORT, "%s",
550 551 552 553
                    _("NUMA memory information not available on this platform"));
    return -1;
}

554
unsigned long long nodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED)
555
{
556
    nodeReportError(VIR_ERR_NO_SUPPORT, "%s",
557 558 559
                    _("NUMA memory information not available on this platform"));
    return 0;
}
560
#endif