nodeinfo.c 46.1 KB
Newer Older
1 2 3
/*
 * nodeinfo.c: Helper routines for OS specific node information
 *
4
 * Copyright (C) 2006-2008, 2010-2013 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright (C) 2006 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 */

24
#include <config.h>
J
Jim Meyering 已提交
25

26 27 28
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
29
#include <stdint.h>
30
#include <errno.h>
31
#include <dirent.h>
E
Eric Blake 已提交
32
#include <sys/utsname.h>
33
#include <sched.h>
34
#include "conf/domain_conf.h"
35

36
#if defined(__FreeBSD__) || defined(__APPLE__)
R
Roman Bogorodskiy 已提交
37 38 39 40
# include <sys/types.h>
# include <sys/sysctl.h>
#endif

41
#include "c-ctype.h"
42
#include "viralloc.h"
43
#include "nodeinfo.h"
44
#include "physmem.h"
45
#include "virlog.h"
46
#include "virerror.h"
47
#include "count-one-bits.h"
E
Eric Blake 已提交
48
#include "intprops.h"
49
#include "virarch.h"
E
Eric Blake 已提交
50
#include "virfile.h"
51
#include "virtypedparam.h"
52
#include "virstring.h"
53
#include "virnuma.h"
54 55 56

#define VIR_FROM_THIS VIR_FROM_NONE

57
#if defined(__FreeBSD__) || defined(__APPLE__)
R
Roman Bogorodskiy 已提交
58
static int
59
appleFreebsdNodeGetCPUCount(void)
R
Roman Bogorodskiy 已提交
60 61 62 63 64 65 66 67 68 69 70 71
{
    int ncpu_mib[2] = { CTL_HW, HW_NCPU };
    unsigned long ncpu;
    size_t ncpu_len = sizeof(ncpu);

    if (sysctl(ncpu_mib, 2, &ncpu, &ncpu_len, NULL, 0) == -1) {
        virReportSystemError(errno, "%s", _("Cannot obtain CPU count"));
        return -1;
    }

    return ncpu;
}
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98

/* VIR_HW_PHYSMEM - the resulting value of HW_PHYSMEM of FreeBSD
 * is 64 bits while that of Mac OS X is still 32 bits.
 * Mac OS X provides HW_MEMSIZE for 64 bits version of HW_PHYSMEM
 * since 10.6.8 (Snow Leopard) at least.
 */
# ifdef HW_MEMSIZE
#  define VIR_HW_PHYSMEM HW_MEMSIZE
# else
#  define VIR_HW_PHYSMEM HW_PHYSMEM
# endif
static int
appleFreebsdNodeGetMemorySize(unsigned long *memory)
{
    int mib[2] = { CTL_HW, VIR_HW_PHYSMEM };
    unsigned long physmem;
    size_t len = sizeof(physmem);

    if (sysctl(mib, 2, &physmem, &len, NULL, 0) == -1) {
        virReportSystemError(errno, "%s", _("cannot obtain memory size"));
        return -1;
    }

    *memory = (unsigned long)(physmem / 1024);

    return 0;
}
R
Roman Bogorodskiy 已提交
99 100
#endif

101
#ifdef __linux__
102
# define CPUINFO_PATH "/proc/cpuinfo"
103
# define SYSFS_SYSTEM_PATH "/sys/devices/system"
104
# define SYSFS_CPU_PATH SYSFS_SYSTEM_PATH"/cpu"
105
# define PROCSTAT_PATH "/proc/stat"
106
# define MEMINFO_PATH "/proc/meminfo"
107
# define SYSFS_MEMORY_SHARED_PATH "/sys/kernel/mm/ksm"
108
# define SYSFS_THREAD_SIBLINGS_LIST_LENGTH_MAX 1024
109 110

# define LINUX_NB_CPU_STATS 4
111 112
# define LINUX_NB_MEMORY_STATS_ALL 4
# define LINUX_NB_MEMORY_STATS_CELL 2
113

114
/* NB, this is not static as we need to call it from the testsuite */
115
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
116
                             const char *sysfs_dir,
117
                             virNodeInfoPtr nodeinfo);
118

E
Eric Blake 已提交
119
/* Return the positive decimal contents of the given
120 121 122 123 124
 * DIR/cpu%u/FILE, or -1 on error.  If DEFAULT_VALUE is non-negative
 * and the file could not be found, return that instead of an error;
 * this is useful for machines that cannot hot-unplug cpu0, or where
 * hot-unplugging is disabled, or where the kernel is too old
 * to support NUMA cells, etc.  */
E
Eric Blake 已提交
125
static int
E
Eric Blake 已提交
126
virNodeGetCpuValue(const char *dir, unsigned int cpu, const char *file,
127
                   int default_value)
E
Eric Blake 已提交
128 129 130 131 132 133 134
{
    char *path;
    FILE *pathfp;
    int value = -1;
    char value_str[INT_BUFSIZE_BOUND(value)];
    char *tmp;

135
    if (virAsprintf(&path, "%s/cpu%u/%s", dir, cpu, file) < 0)
E
Eric Blake 已提交
136 137 138 139
        return -1;

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
140 141
        if (default_value >= 0 && errno == ENOENT)
            value = default_value;
E
Eric Blake 已提交
142 143 144 145 146 147 148 149 150 151
        else
            virReportSystemError(errno, _("cannot open %s"), path);
        goto cleanup;
    }

    if (fgets(value_str, sizeof(value_str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }
    if (virStrToLong_i(value_str, &tmp, 10, &value) < 0) {
152 153 154
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("could not convert '%s' to an integer"),
                       value_str);
E
Eric Blake 已提交
155 156 157 158
        goto cleanup;
    }

cleanup:
159
    VIR_FORCE_FCLOSE(pathfp);
E
Eric Blake 已提交
160 161 162 163 164
    VIR_FREE(path);

    return value;
}

E
Eric Blake 已提交
165 166
static unsigned long
virNodeCountThreadSiblings(const char *dir, unsigned int cpu)
167 168
{
    unsigned long ret = 0;
C
Chris Lalancette 已提交
169 170
    char *path;
    FILE *pathfp;
171
    char str[1024];
172
    size_t i;
173

174
    if (virAsprintf(&path, "%s/cpu%u/topology/thread_siblings",
175
                    dir, cpu) < 0)
176 177 178 179
        return 0;

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
180 181 182 183 184 185
        /* If file doesn't exist, then pretend our only
         * sibling is ourself */
        if (errno == ENOENT) {
            VIR_FREE(path);
            return 1;
        }
186 187 188 189 190 191 192 193 194 195 196 197
        virReportSystemError(errno, _("cannot open %s"), path);
        VIR_FREE(path);
        return 0;
    }

    if (fgets(str, sizeof(str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }

    i = 0;
    while (str[i] != '\0') {
C
Chris Lalancette 已提交
198
        if (c_isdigit(str[i]))
199
            ret += count_one_bits(str[i] - '0');
C
Chris Lalancette 已提交
200 201 202 203
        else if (str[i] >= 'A' && str[i] <= 'F')
            ret += count_one_bits(str[i] - 'A' + 10);
        else if (str[i] >= 'a' && str[i] <= 'f')
            ret += count_one_bits(str[i] - 'a' + 10);
204 205 206 207
        i++;
    }

cleanup:
208
    VIR_FORCE_FCLOSE(pathfp);
209 210 211 212 213
    VIR_FREE(path);

    return ret;
}

E
Eric Blake 已提交
214 215
static int
virNodeParseSocket(const char *dir, unsigned int cpu)
216
{
E
Eric Blake 已提交
217
    int ret = virNodeGetCpuValue(dir, cpu, "topology/physical_package_id",
218
                                 0);
E
Eric Blake 已提交
219
# if defined(__powerpc__) || \
220 221
    defined(__powerpc64__) || \
    defined(__s390__) || \
222 223
    defined(__s390x__) || \
    defined(__aarch64__)
224
    /* ppc and s390(x) has -1 */
225 226
    if (ret < 0)
        ret = 0;
E
Eric Blake 已提交
227
# endif
228
    return ret;
229 230
}

231 232 233 234
# ifndef CPU_COUNT
static int
CPU_COUNT(cpu_set_t *set)
{
235
    size_t i, count = 0;
236 237 238 239 240 241 242 243

    for (i = 0; i < CPU_SETSIZE; i++)
        if (CPU_ISSET(i, set))
            count++;
    return count;
}
# endif /* !CPU_COUNT */

244 245
/* parses a node entry, returning number of processors in the node and
 * filling arguments */
246
static int
247 248
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
ATTRIBUTE_NONNULL(3) ATTRIBUTE_NONNULL(4)
249 250 251 252 253 254
ATTRIBUTE_NONNULL(5)
virNodeParseNode(const char *node,
                 int *sockets,
                 int *cores,
                 int *threads,
                 int *offline)
255 256
{
    int ret = -1;
257 258 259 260 261 262 263 264
    int processors = 0;
    DIR *cpudir = NULL;
    struct dirent *cpudirent = NULL;
    int sock_max = 0;
    cpu_set_t sock_map;
    int sock;
    cpu_set_t *core_maps = NULL;
    int core;
265
    size_t i;
266 267 268
    int siblings;
    unsigned int cpu;
    int online;
269

270 271 272 273 274 275
    *threads = 0;
    *cores = 0;
    *sockets = 0;

    if (!(cpudir = opendir(node))) {
        virReportSystemError(errno, _("cannot opendir %s"), node);
276 277
        goto cleanup;
    }
278 279 280 281 282 283 284 285

    /* enumerate sockets in the node */
    CPU_ZERO(&sock_map);
    errno = 0;
    while ((cpudirent = readdir(cpudir))) {
        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
            continue;

286
        if ((online = virNodeGetCpuValue(node, cpu, "online", 1)) < 0)
287 288 289 290 291
            goto cleanup;

        if (!online)
            continue;

292
        /* Parse socket */
293 294
        if ((sock = virNodeParseSocket(node, cpu)) < 0)
            goto cleanup;
295 296 297 298 299 300 301 302 303 304
        CPU_SET(sock, &sock_map);

        if (sock > sock_max)
            sock_max = sock;

        errno = 0;
    }

    if (errno) {
        virReportSystemError(errno, _("problem reading %s"), node);
305 306
        goto cleanup;
    }
307 308 309 310

    sock_max++;

    /* allocate cpu maps for each socket */
311
    if (VIR_ALLOC_N(core_maps, sock_max) < 0)
312
        goto cleanup;
313 314 315 316 317 318 319 320 321 322 323

    for (i = 0; i < sock_max; i++)
        CPU_ZERO(&core_maps[i]);

    /* iterate over all CPU's in the node */
    rewinddir(cpudir);
    errno = 0;
    while ((cpudirent = readdir(cpudir))) {
        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
            continue;

324
        if ((online = virNodeGetCpuValue(node, cpu, "online", 1)) < 0)
325 326
            goto cleanup;

327 328
        if (!online) {
            (*offline)++;
329
            continue;
330
        }
331 332 333 334

        processors++;

        /* Parse socket */
335 336
        if ((sock = virNodeParseSocket(node, cpu)) < 0)
            goto cleanup;
337
        if (!CPU_ISSET(sock, &sock_map)) {
338 339
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("CPU socket topology has changed"));
340 341 342 343 344 345 346 347 348
            goto cleanup;
        }

        /* Parse core */
# if defined(__s390__) || \
    defined(__s390x__)
        /* logical cpu is equivalent to a core on s390 */
        core = cpu;
# else
349
        core = virNodeGetCpuValue(node, cpu, "topology/core_id", 0);
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
# endif

        CPU_SET(core, &core_maps[sock]);

        if (!(siblings = virNodeCountThreadSiblings(node, cpu)))
            goto cleanup;

        if (siblings > *threads)
            *threads = siblings;

        errno = 0;
    }

    if (errno) {
        virReportSystemError(errno, _("problem reading %s"), node);
365 366
        goto cleanup;
    }
367 368 369 370 371 372 373 374 375 376 377 378 379 380

    /* finalize the returned data */
    *sockets = CPU_COUNT(&sock_map);

    for (i = 0; i < sock_max; i++) {
        if (!CPU_ISSET(i, &sock_map))
            continue;

        core = CPU_COUNT(&core_maps[i]);
        if (core > *cores)
            *cores = core;
    }

    ret = processors;
381 382

cleanup:
383 384 385 386 387 388 389
    /* don't shadow a more serious error */
    if (cpudir && closedir(cpudir) < 0 && ret >= 0) {
        virReportSystemError(errno, _("problem closing %s"), node);
        ret = -1;
    }
    VIR_FREE(core_maps);

390 391 392
    return ret;
}

393
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
394
                             const char *sysfs_dir,
395
                             virNodeInfoPtr nodeinfo)
396
{
397
    char line[1024];
398 399
    DIR *nodedir = NULL;
    struct dirent *nodedirent = NULL;
400
    int cpus, cores, socks, threads, offline = 0;
401
    unsigned int node;
402
    int ret = -1;
403
    char *sysfs_nodedir = NULL;
404
    char *sysfs_cpudir = NULL;
405

406
    /* Start with parsing CPU clock speed from /proc/cpuinfo */
407
    while (fgets(line, sizeof(line), cpuinfo) != NULL) {
E
Eric Blake 已提交
408
# if defined(__x86_64__) || \
409 410
    defined(__amd64__)  || \
    defined(__i386__)
E
Eric Blake 已提交
411
        char *buf = line;
412
        if (STRPREFIX(buf, "cpu MHz")) {
413 414
            char *p;
            unsigned int ui;
415

416
            buf += 7;
417
            while (*buf && c_isspace(*buf))
418
                buf++;
419

420
            if (*buf != ':' || !buf[1]) {
421 422
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("parsing cpu MHz from cpuinfo"));
423
                goto cleanup;
424
            }
425

426
            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
427
                /* Accept trailing fractional part.  */
428
                (*p == '\0' || *p == '.' || c_isspace(*p)))
429
                nodeinfo->mhz = ui;
430
        }
431

E
Eric Blake 已提交
432
# elif defined(__powerpc__) || \
433
      defined(__powerpc64__)
E
Eric Blake 已提交
434
        char *buf = line;
435
        if (STRPREFIX(buf, "clock")) {
436 437
            char *p;
            unsigned int ui;
438

439 440 441
            buf += 5;
            while (*buf && c_isspace(*buf))
                buf++;
442

443
            if (*buf != ':' || !buf[1]) {
444 445
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("parsing cpu MHz from cpuinfo"));
446
                goto cleanup;
447
            }
448

449
            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
450
                /* Accept trailing fractional part.  */
451
                (*p == '\0' || *p == '.' || c_isspace(*p)))
452
                nodeinfo->mhz = ui;
453 454 455 456 457
            /* No other interesting infos are available in /proc/cpuinfo.
             * However, there is a line identifying processor's version,
             * identification and machine, but we don't want it to be caught
             * and parsed in next iteration, because it is not in expected
             * format and thus lead to error. */
458
        }
459
# elif defined(__arm__) || defined(__aarch64__)
460 461 462 463 464 465 466 467 468 469
        char *buf = line;
        if (STRPREFIX(buf, "BogoMIPS")) {
            char *p;
            unsigned int ui;

            buf += 8;
            while (*buf && c_isspace(*buf))
                buf++;

            if (*buf != ':' || !buf[1]) {
470 471
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               "%s", _("parsing cpu MHz from cpuinfo"));
472 473 474 475 476 477 478 479
                goto cleanup;
            }

            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
                /* Accept trailing fractional part.  */
                && (*p == '\0' || *p == '.' || c_isspace(*p)))
                nodeinfo->mhz = ui;
        }
480 481 482 483 484
# elif defined(__s390__) || \
      defined(__s390x__)
        /* s390x has no realistic value for CPU speed,
         * assign a value of zero to signify this */
        nodeinfo->mhz = 0;
E
Eric Blake 已提交
485 486 487
# else
#  warning Parser for /proc/cpuinfo needs to be adapted for your architecture
# endif
488 489
    }

490 491
    /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the
     * core, node, socket, thread and topology information from /sys
492
     */
493
    if (virAsprintf(&sysfs_nodedir, "%s/node", sysfs_dir) < 0)
494
        goto cleanup;
495

496 497 498 499
    if (!(nodedir = opendir(sysfs_nodedir))) {
        /* the host isn't probably running a NUMA architecture */
        goto fallback;
    }
500

501 502 503
    errno = 0;
    while ((nodedirent = readdir(nodedir))) {
        if (sscanf(nodedirent->d_name, "node%u", &node) != 1)
504 505
            continue;

506 507 508
        nodeinfo->nodes++;

        if (virAsprintf(&sysfs_cpudir, "%s/node/%s",
509
                        sysfs_dir, nodedirent->d_name) < 0)
510
            goto cleanup;
E
Eric Blake 已提交
511

512 513
        if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, &cores,
                                     &threads, &offline)) < 0)
514
            goto cleanup;
515

516
        VIR_FREE(sysfs_cpudir);
517

518 519 520 521 522 523 524 525 526 527 528 529
        nodeinfo->cpus += cpus;

        if (socks > nodeinfo->sockets)
            nodeinfo->sockets = socks;

        if (cores > nodeinfo->cores)
            nodeinfo->cores = cores;

        if (threads > nodeinfo->threads)
            nodeinfo->threads = threads;

        errno = 0;
530
    }
531

E
Eric Blake 已提交
532
    if (errno) {
533
        virReportSystemError(errno, _("problem reading %s"), sysfs_nodedir);
534 535
        goto cleanup;
    }
536 537 538 539 540 541 542

    if (nodeinfo->cpus && nodeinfo->nodes)
        goto done;

fallback:
    VIR_FREE(sysfs_cpudir);

543
    if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0)
544
        goto cleanup;
545

546 547
    if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, &cores,
                                 &threads, &offline)) < 0)
548
        goto cleanup;
549

550 551 552 553 554 555 556
    nodeinfo->nodes = 1;
    nodeinfo->cpus = cpus;
    nodeinfo->sockets = socks;
    nodeinfo->cores = cores;
    nodeinfo->threads = threads;

done:
557
    /* There should always be at least one cpu, socket, node, and thread. */
558
    if (nodeinfo->cpus == 0) {
559
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found"));
560
        goto cleanup;
561
    }
562

C
Chris Lalancette 已提交
563
    if (nodeinfo->sockets == 0) {
564
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found"));
565
        goto cleanup;
C
Chris Lalancette 已提交
566
    }
567

C
Chris Lalancette 已提交
568
    if (nodeinfo->threads == 0) {
569
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found"));
570
        goto cleanup;
C
Chris Lalancette 已提交
571 572
    }

573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
    /* Now check if the topology makes sense. There are machines that don't
     * expose their real number of nodes or for example the AMD Bulldozer
     * architecture that exposes their Clustered integer core modules as both
     * threads and cores. This approach throws off our detection. Unfortunately
     * the nodeinfo structure isn't designed to carry the full topology so
     * we're going to lie about the detected topology to notify the user
     * to check the host capabilities for the actual topology. */
    if ((nodeinfo->nodes *
         nodeinfo->sockets *
         nodeinfo->cores *
         nodeinfo->threads) != (nodeinfo->cpus + offline)) {
        nodeinfo->nodes = 1;
        nodeinfo->sockets = 1;
        nodeinfo->cores = nodeinfo->cpus + offline;
        nodeinfo->threads = 1;
    }

590 591 592
    ret = 0;

cleanup:
593 594 595 596 597 598 599
    /* don't shadow a more serious error */
    if (nodedir && closedir(nodedir) < 0 && ret >= 0) {
        virReportSystemError(errno, _("problem closing %s"), sysfs_nodedir);
        ret = -1;
    }

    VIR_FREE(sysfs_nodedir);
600 601
    VIR_FREE(sysfs_cpudir);
    return ret;
602 603
}

604 605
# define TICK_TO_NSEC (1000ull * 1000ull * 1000ull / sysconf(_SC_CLK_TCK))

606 607 608 609 610
static int
linuxNodeGetCPUStats(FILE *procstat,
                     int cpuNum,
                     virNodeCPUStatsPtr params,
                     int *nparams)
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
{
    int ret = -1;
    char line[1024];
    unsigned long long usr, ni, sys, idle, iowait;
    unsigned long long irq, softirq, steal, guest, guest_nice;
    char cpu_header[3 + INT_BUFSIZE_BOUND(cpuNum)];

    if ((*nparams) == 0) {
        /* Current number of cpu stats supported by linux */
        *nparams = LINUX_NB_CPU_STATS;
        ret = 0;
        goto cleanup;
    }

    if ((*nparams) != LINUX_NB_CPU_STATS) {
626 627 628
        virReportInvalidArg(*nparams,
                            _("nparams in %s must be equal to %d"),
                            __FUNCTION__, LINUX_NB_CPU_STATS);
629 630 631
        goto cleanup;
    }

632
    if (cpuNum == VIR_NODE_CPU_STATS_ALL_CPUS) {
633 634 635 636 637 638 639 640 641
        strcpy(cpu_header, "cpu");
    } else {
        snprintf(cpu_header, sizeof(cpu_header), "cpu%d", cpuNum);
    }

    while (fgets(line, sizeof(line), procstat) != NULL) {
        char *buf = line;

        if (STRPREFIX(buf, cpu_header)) { /* aka logical CPU time */
642
            size_t i;
643 644 645 646 647 648 649 650 651 652

            if (sscanf(buf,
                       "%*s %llu %llu %llu %llu %llu" // user ~ iowait
                       "%llu %llu %llu %llu %llu",    // irq  ~ guest_nice
                       &usr, &ni, &sys, &idle, &iowait,
                       &irq, &softirq, &steal, &guest, &guest_nice) < 4) {
                continue;
            }

            for (i = 0; i < *nparams; i++) {
653
                virNodeCPUStatsPtr param = &params[i];
654 655 656

                switch (i) {
                case 0: /* fill kernel cpu time here */
657
                    if (virStrcpyStatic(param->field, VIR_NODE_CPU_STATS_KERNEL) == NULL) {
658 659
                        virReportError(VIR_ERR_INTERNAL_ERROR,
                                       "%s", _("Field kernel cpu time too long for destination"));
660 661 662 663 664 665
                        goto cleanup;
                    }
                    param->value = (sys + irq + softirq) * TICK_TO_NSEC;
                    break;

                case 1: /* fill user cpu time here */
666
                    if (virStrcpyStatic(param->field, VIR_NODE_CPU_STATS_USER) == NULL) {
667 668
                        virReportError(VIR_ERR_INTERNAL_ERROR,
                                       "%s", _("Field kernel cpu time too long for destination"));
669 670 671 672 673 674
                        goto cleanup;
                    }
                    param->value = (usr + ni) * TICK_TO_NSEC;
                    break;

                case 2: /* fill idle cpu time here */
675
                    if (virStrcpyStatic(param->field, VIR_NODE_CPU_STATS_IDLE) == NULL) {
676 677
                        virReportError(VIR_ERR_INTERNAL_ERROR,
                                       "%s", _("Field kernel cpu time too long for destination"));
678 679 680 681 682 683
                        goto cleanup;
                    }
                    param->value = idle * TICK_TO_NSEC;
                    break;

                case 3: /* fill iowait cpu time here */
684
                    if (virStrcpyStatic(param->field, VIR_NODE_CPU_STATS_IOWAIT) == NULL) {
685 686
                        virReportError(VIR_ERR_INTERNAL_ERROR,
                                       "%s", _("Field kernel cpu time too long for destination"));
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
                        goto cleanup;
                    }
                    param->value = iowait * TICK_TO_NSEC;
                    break;

                default:
                    break;
                    /* should not hit here */
                }
            }
            ret = 0;
            goto cleanup;
        }
    }

702 703 704
    virReportInvalidArg(cpuNum,
                        _("Invalid cpuNum in %s"),
                        __FUNCTION__);
705

706 707 708 709
cleanup:
    return ret;
}

710 711 712 713 714
static int
linuxNodeGetMemoryStats(FILE *meminfo,
                        int cellNum,
                        virNodeMemoryStatsPtr params,
                        int *nparams)
715 716
{
    int ret = -1;
717
    size_t i = 0, j = 0, k = 0;
718 719 720
    int found = 0;
    int nr_param;
    char line[1024];
721
    char meminfo_hdr[VIR_NODE_MEMORY_STATS_FIELD_LENGTH];
722 723 724 725 726
    unsigned long val;
    struct field_conv {
        const char *meminfo_hdr;  // meminfo header
        const char *field;        // MemoryStats field name
    } field_conv[] = {
727 728 729 730
        {"MemTotal:", VIR_NODE_MEMORY_STATS_TOTAL},
        {"MemFree:",  VIR_NODE_MEMORY_STATS_FREE},
        {"Buffers:",  VIR_NODE_MEMORY_STATS_BUFFERS},
        {"Cached:",   VIR_NODE_MEMORY_STATS_CACHED},
731 732 733
        {NULL,        NULL}
    };

734
    if (cellNum == VIR_NODE_MEMORY_STATS_ALL_CELLS) {
735 736 737 738 739 740 741 742 743 744 745 746 747
        nr_param = LINUX_NB_MEMORY_STATS_ALL;
    } else {
        nr_param = LINUX_NB_MEMORY_STATS_CELL;
    }

    if ((*nparams) == 0) {
        /* Current number of memory stats supported by linux */
        *nparams = nr_param;
        ret = 0;
        goto cleanup;
    }

    if ((*nparams) != nr_param) {
748 749 750
        virReportInvalidArg(nparams,
                            _("nparams in %s must be %d"),
                            __FUNCTION__, nr_param);
751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
        goto cleanup;
    }

    while (fgets(line, sizeof(line), meminfo) != NULL) {
        char *buf = line;

        if (STRPREFIX(buf, "Node ")) {
            /*
             * /sys/devices/system/node/nodeX/meminfo format is below.
             * So, skip prefix "Node XX ".
             *
             * Node 0 MemTotal:        8386980 kB
             * Node 0 MemFree:         5300920 kB
             *         :
             */
            char *p;

            p = buf;
            for (i = 0; i < 2; i++) {
                p = strchr(p, ' ');
                if (p == NULL) {
772 773
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   "%s", _("no prefix found"));
774 775 776 777 778 779 780 781 782 783 784 785 786 787
                    goto cleanup;
                }
                p++;
            }
            buf = p;
        }

        if (sscanf(buf, "%s %lu kB", meminfo_hdr, &val) < 2)
            continue;

        for (j = 0; field_conv[j].meminfo_hdr != NULL; j++) {
            struct field_conv *convp = &field_conv[j];

            if (STREQ(meminfo_hdr, convp->meminfo_hdr)) {
788
                virNodeMemoryStatsPtr param = &params[k++];
789 790

                if (virStrcpyStatic(param->field, convp->field) == NULL) {
791 792
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   "%s", _("Field kernel memory too long for destination"));
793 794 795 796 797 798 799 800 801 802 803 804
                    goto cleanup;
                }
                param->value = val;
                found++;
                break;
            }
        }
        if (found >= nr_param)
            break;
    }

    if (found == 0) {
805 806
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("no available memory line found"));
807 808 809 810 811
        goto cleanup;
    }

    ret = 0;

812 813 814
cleanup:
    return ret;
}
815

816 817 818 819 820 821 822 823 824

/* Determine the maximum cpu id from a Linux sysfs cpu/present file. */
static int
linuxParseCPUmax(const char *path)
{
    char *str = NULL;
    char *tmp;
    int ret = -1;

E
Eric Blake 已提交
825
    if (virFileReadAll(path, 5 * VIR_DOMAIN_CPUMASK_LEN, &str) < 0)
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
        goto cleanup;

    tmp = str;
    do {
        if (virStrToLong_i(tmp, &tmp, 10, &ret) < 0 ||
            !strchr(",-\n", *tmp)) {
            virReportError(VIR_ERR_NO_SUPPORT,
                           _("failed to parse %s"), path);
            ret = -1;
            goto cleanup;
        }
    } while (*tmp++ != '\n');
    ret++;

cleanup:
    VIR_FREE(str);
    return ret;
}

845
/*
846 847 848
 * Linux maintains cpu bit map under cpu/online. For example, if
 * cpuid=5's flag is not set and max cpu is 7, the map file shows
 * 0-4,6-7. This function parses it and returns cpumap.
849
 */
H
Hu Tao 已提交
850
static virBitmapPtr
851
linuxParseCPUmap(int max_cpuid, const char *path)
852
{
H
Hu Tao 已提交
853
    virBitmapPtr map = NULL;
854 855
    char *str = NULL;

856
    if (virFileReadAll(path, 5 * VIR_DOMAIN_CPUMASK_LEN, &str) < 0)
857 858
        goto error;

859
    if (virBitmapParse(str, 0, &map, max_cpuid) < 0)
860 861
        goto error;

H
Hu Tao 已提交
862
    VIR_FREE(str);
863 864 865 866
    return map;

error:
    VIR_FREE(str);
H
Hu Tao 已提交
867
    virBitmapFree(map);
868 869
    return NULL;
}
E
Eric Blake 已提交
870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893


static virBitmapPtr
virNodeGetSiblingsList(const char *dir, int cpu_id)
{
    char *path = NULL;
    char *buf = NULL;
    virBitmapPtr ret = NULL;

    if (virAsprintf(&path, "%s/cpu%u/topology/thread_siblings_list",
                    dir, cpu_id) < 0)
        goto cleanup;

    if (virFileReadAll(path, SYSFS_THREAD_SIBLINGS_LIST_LENGTH_MAX, &buf) < 0)
        goto cleanup;

    if (virBitmapParse(buf, 0, &ret, virNumaGetMaxCPUs()) < 0)
        goto cleanup;

cleanup:
    VIR_FREE(buf);
    VIR_FREE(path);
    return ret;
}
894 895
#endif

896
int nodeGetInfo(virNodeInfoPtr nodeinfo)
897 898
{
    virArch hostarch = virArchFromHost();
899

900 901
    memset(nodeinfo, 0, sizeof(*nodeinfo));

902
    if (virStrcpyStatic(nodeinfo->model, virArchToString(hostarch)) == NULL)
C
Chris Lalancette 已提交
903
        return -1;
904

905
#ifdef __linux__
906
    {
907
    int ret = -1;
908
    FILE *cpuinfo = fopen(CPUINFO_PATH, "r");
909
    if (!cpuinfo) {
910
        virReportSystemError(errno,
911
                             _("cannot open %s"), CPUINFO_PATH);
912 913
        return -1;
    }
914

915
    ret = linuxNodeInfoCPUPopulate(cpuinfo, SYSFS_SYSTEM_PATH, nodeinfo);
916 917
    if (ret < 0)
        goto cleanup;
918

919
    /* Convert to KB. */
920
    nodeinfo->memory = physmem_total() / 1024;
921

922 923
cleanup:
    VIR_FORCE_FCLOSE(cpuinfo);
924
    return ret;
925
    }
926
#elif defined(__FreeBSD__) || defined(__APPLE__)
R
Roman Bogorodskiy 已提交
927 928 929 930 931
    {
    nodeinfo->nodes = 1;
    nodeinfo->sockets = 1;
    nodeinfo->threads = 1;

932
    nodeinfo->cpus = appleFreebsdNodeGetCPUCount();
R
Roman Bogorodskiy 已提交
933 934 935 936 937 938 939 940
    if (nodeinfo->cpus == -1)
        return -1;

    nodeinfo->cores = nodeinfo->cpus;

    unsigned long cpu_freq;
    size_t cpu_freq_len = sizeof(cpu_freq);

941
# ifdef __FreeBSD__
R
Roman Bogorodskiy 已提交
942 943 944 945 946 947
    if (sysctlbyname("dev.cpu.0.freq", &cpu_freq, &cpu_freq_len, NULL, 0) < 0) {
        virReportSystemError(errno, "%s", _("cannot obtain CPU freq"));
        return -1;
    }

    nodeinfo->mhz = cpu_freq;
948 949 950 951 952 953 954 955
# else
    if (sysctlbyname("hw.cpufrequency", &cpu_freq, &cpu_freq_len, NULL, 0) < 0) {
        virReportSystemError(errno, "%s", _("cannot obtain CPU freq"));
        return -1;
    }

    nodeinfo->mhz = cpu_freq / 1000000;
# endif
R
Roman Bogorodskiy 已提交
956

957
    if (appleFreebsdNodeGetMemorySize(&nodeinfo->memory) < 0)
R
Roman Bogorodskiy 已提交
958 959 960 961
        return -1;

    return 0;
    }
962 963
#else
    /* XXX Solaris will need an impl later if they port QEMU driver */
964 965
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node info not implemented on this platform"));
966 967 968
    return -1;
#endif
}
969

970
int nodeGetCPUStats(int cpuNum ATTRIBUTE_UNUSED,
971
                    virNodeCPUStatsPtr params ATTRIBUTE_UNUSED,
972
                    int *nparams ATTRIBUTE_UNUSED,
973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
                    unsigned int flags)
{
    virCheckFlags(0, -1);

#ifdef __linux__
    {
        int ret;
        FILE *procstat = fopen(PROCSTAT_PATH, "r");
        if (!procstat) {
            virReportSystemError(errno,
                                 _("cannot open %s"), PROCSTAT_PATH);
            return -1;
        }
        ret = linuxNodeGetCPUStats(procstat, cpuNum, params, nparams);
        VIR_FORCE_FCLOSE(procstat);

        return ret;
    }
#else
992 993
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node CPU stats not implemented on this platform"));
994 995 996 997
    return -1;
#endif
}

998
int nodeGetMemoryStats(int cellNum ATTRIBUTE_UNUSED,
999
                       virNodeMemoryStatsPtr params ATTRIBUTE_UNUSED,
1000
                       int *nparams ATTRIBUTE_UNUSED,
1001 1002 1003 1004 1005 1006 1007 1008 1009
                       unsigned int flags)
{
    virCheckFlags(0, -1);

#ifdef __linux__
    {
        int ret;
        char *meminfo_path = NULL;
        FILE *meminfo;
1010
        int max_node;
1011

1012
        if (cellNum == VIR_NODE_MEMORY_STATS_ALL_CELLS) {
1013
            if (VIR_STRDUP(meminfo_path, MEMINFO_PATH) < 0)
1014 1015
                return -1;
        } else {
1016
            if ((max_node = virNumaGetMaxNode()) < 0)
1017 1018
                return -1;

1019
            if (cellNum > max_node) {
1020 1021
                virReportInvalidArg(cellNum,
                                    _("cellNum in %s must be less than or equal to %d"),
1022
                                    __FUNCTION__, max_node);
1023 1024 1025
                return -1;
            }

1026
            if (virAsprintf(&meminfo_path, "%s/node/node%d/meminfo",
1027
                            SYSFS_SYSTEM_PATH, cellNum) < 0)
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
                return -1;
        }
        meminfo = fopen(meminfo_path, "r");

        if (!meminfo) {
            virReportSystemError(errno,
                                 _("cannot open %s"), meminfo_path);
            VIR_FREE(meminfo_path);
            return -1;
        }
        ret = linuxNodeGetMemoryStats(meminfo, cellNum, params, nparams);
        VIR_FORCE_FCLOSE(meminfo);
        VIR_FREE(meminfo_path);

        return ret;
    }
#else
1045 1046
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node memory stats not implemented on this platform"));
1047 1048 1049 1050
    return -1;
#endif
}

1051 1052 1053
int
nodeGetCPUCount(void)
{
R
Roman Bogorodskiy 已提交
1054
#if defined(__linux__)
1055 1056 1057 1058 1059 1060
    /* To support older kernels that lack cpu/present, such as 2.6.18
     * in RHEL5, we fall back to count cpu/cpuNN entries; this assumes
     * that such kernels also lack hotplug, and therefore cpu/cpuNN
     * will be consecutive.
     */
    char *cpupath = NULL;
1061
    int ncpu;
1062 1063

    if (virFileExists(SYSFS_SYSTEM_PATH "/cpu/present")) {
1064
        ncpu = linuxParseCPUmax(SYSFS_SYSTEM_PATH "/cpu/present");
1065
    } else if (virFileExists(SYSFS_SYSTEM_PATH "/cpu/cpu0")) {
1066
        ncpu = 0;
1067
        do {
1068
            ncpu++;
1069 1070
            VIR_FREE(cpupath);
            if (virAsprintf(&cpupath, "%s/cpu/cpu%d",
1071
                            SYSFS_SYSTEM_PATH, ncpu) < 0)
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
                return -1;
        } while (virFileExists(cpupath));
    } else {
        /* no cpu/cpu0: we give up */
        virReportError(VIR_ERR_NO_SUPPORT, "%s",
                       _("host cpu counting not supported on this node"));
        return -1;
    }

    VIR_FREE(cpupath);
1082
    return ncpu;
1083 1084
#elif defined(__FreeBSD__) || defined(__APPLE__)
    return appleFreebsdNodeGetCPUCount();
1085 1086 1087 1088 1089 1090 1091
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("host cpu counting not implemented on this platform"));
    return -1;
#endif
}

H
Hu Tao 已提交
1092
virBitmapPtr
1093
nodeGetCPUBitmap(int *max_id ATTRIBUTE_UNUSED)
1094 1095
{
#ifdef __linux__
H
Hu Tao 已提交
1096
    virBitmapPtr cpumap;
1097 1098
    int present;

E
Eric Blake 已提交
1099
    present = nodeGetCPUCount();
1100
    if (present < 0)
1101
        return NULL;
E
Eric Blake 已提交
1102 1103 1104 1105

    if (virFileExists(SYSFS_SYSTEM_PATH "/cpu/online")) {
        cpumap = linuxParseCPUmap(present, SYSFS_SYSTEM_PATH "/cpu/online");
    } else {
1106
        size_t i;
E
Eric Blake 已提交
1107 1108

        cpumap = virBitmapNew(present);
1109
        if (!cpumap)
E
Eric Blake 已提交
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
            return NULL;
        for (i = 0; i < present; i++) {
            int online = virNodeGetCpuValue(SYSFS_SYSTEM_PATH, i, "online", 1);
            if (online < 0) {
                virBitmapFree(cpumap);
                return NULL;
            }
            if (online)
                ignore_value(virBitmapSetBit(cpumap, i));
        }
    }
1121 1122
    if (max_id && cpumap)
        *max_id = present;
1123 1124
    return cpumap;
#else
1125 1126
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node cpumap not implemented on this platform"));
1127 1128 1129 1130
    return NULL;
#endif
}

1131
#ifdef __linux__
1132
static int
1133
nodeSetMemoryParameterValue(virTypedParameterPtr param)
1134 1135 1136 1137 1138 1139
{
    char *path = NULL;
    char *strval = NULL;
    int ret = -1;
    int rc = -1;

1140
    char *field = strchr(param->field, '_');
1141
    sa_assert(field);
1142
    field++;
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154
    if (virAsprintf(&path, "%s/%s",
                    SYSFS_MEMORY_SHARED_PATH, field) < 0) {
        ret = -2;
        goto cleanup;
    }

    if (virAsprintf(&strval, "%u", param->value.ui) == -1) {
        ret = -2;
        goto cleanup;
    }

    if ((rc = virFileWriteStr(path, strval, 0)) < 0) {
1155
        virReportSystemError(-rc, _("failed to set %s"), param->field);
1156 1157 1158 1159 1160 1161 1162 1163 1164
        goto cleanup;
    }

    ret = 0;
cleanup:
    VIR_FREE(path);
    VIR_FREE(strval);
    return ret;
}
1165 1166 1167 1168 1169 1170

static bool
nodeMemoryParametersIsAllSupported(virTypedParameterPtr params,
                                   int nparams)
{
    char *path = NULL;
1171
    size_t i;
1172 1173 1174 1175 1176

    for (i = 0; i < nparams; i++) {
        virTypedParameterPtr param = &params[i];

        char *field = strchr(param->field, '_');
1177
        sa_assert(field);
1178 1179
        field++;
        if (virAsprintf(&path, "%s/%s",
1180
                        SYSFS_MEMORY_SHARED_PATH, field) < 0)
1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195
            return false;

        if (!virFileExists(path)) {
            virReportError(VIR_ERR_OPERATION_INVALID,
                           _("Parameter '%s' is not supported by "
                             "this kernel"), param->field);
            VIR_FREE(path);
            return false;
        }

        VIR_FREE(path);
    }

    return true;
}
1196
#endif
1197 1198

int
1199
nodeSetMemoryParameters(virTypedParameterPtr params ATTRIBUTE_UNUSED,
1200
                        int nparams ATTRIBUTE_UNUSED,
1201 1202 1203 1204 1205
                        unsigned int flags)
{
    virCheckFlags(0, -1);

#ifdef __linux__
1206
    size_t i;
1207
    int rc;
1208

1209 1210 1211 1212 1213 1214 1215 1216
    if (virTypedParamsValidate(params, nparams,
                               VIR_NODE_MEMORY_SHARED_PAGES_TO_SCAN,
                               VIR_TYPED_PARAM_UINT,
                               VIR_NODE_MEMORY_SHARED_SLEEP_MILLISECS,
                               VIR_TYPED_PARAM_UINT,
                               VIR_NODE_MEMORY_SHARED_MERGE_ACROSS_NODES,
                               VIR_TYPED_PARAM_UINT,
                               NULL) < 0)
1217 1218
        return -1;

1219 1220
    if (!nodeMemoryParametersIsAllSupported(params, nparams))
        return -1;
1221

1222 1223
    for (i = 0; i < nparams; i++) {
        rc = nodeSetMemoryParameterValue(&params[i]);
1224

1225 1226 1227
        /* Out of memory */
        if (rc == -2)
            return -1;
1228 1229
    }

1230
    return 0;
1231 1232 1233 1234 1235 1236 1237 1238
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node set memory parameters not implemented"
                     " on this platform"));
    return -1;
#endif
}

1239
#ifdef __linux__
1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250
static int
nodeGetMemoryParameterValue(const char *field,
                            void *value)
{
    char *path = NULL;
    char *buf = NULL;
    char *tmp = NULL;
    int ret = -1;
    int rc = -1;

    if (virAsprintf(&path, "%s/%s",
1251
                    SYSFS_MEMORY_SHARED_PATH, field) < 0)
1252 1253
        goto cleanup;

1254 1255 1256 1257 1258
    if (!virFileExists(path)) {
        ret = -2;
        goto cleanup;
    }

1259 1260 1261 1262 1263 1264
    if (virFileReadAll(path, 1024, &buf) < 0)
        goto cleanup;

    if ((tmp = strchr(buf, '\n')))
        *tmp = '\0';

1265 1266 1267
    if (STREQ(field, "pages_to_scan")   ||
        STREQ(field, "sleep_millisecs") ||
        STREQ(field, "merge_across_nodes"))
1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
        rc = virStrToLong_ui(buf, NULL, 10, (unsigned int *)value);
    else if (STREQ(field, "pages_shared")    ||
             STREQ(field, "pages_sharing")   ||
             STREQ(field, "pages_unshared")  ||
             STREQ(field, "pages_volatile")  ||
             STREQ(field, "full_scans"))
        rc = virStrToLong_ull(buf, NULL, 10, (unsigned long long *)value);

    if (rc < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("failed to parse %s"), field);
        goto cleanup;
    }

    ret = 0;
cleanup:
    VIR_FREE(path);
    VIR_FREE(buf);
    return ret;
}
1288
#endif
1289

1290
#define NODE_MEMORY_PARAMETERS_NUM 8
1291
int
1292
nodeGetMemoryParameters(virTypedParameterPtr params ATTRIBUTE_UNUSED,
1293
                        int *nparams ATTRIBUTE_UNUSED,
1294 1295 1296 1297 1298 1299 1300
                        unsigned int flags)
{
    virCheckFlags(VIR_TYPED_PARAM_STRING_OKAY, -1);

#ifdef __linux__
    unsigned int pages_to_scan;
    unsigned int sleep_millisecs;
1301
    unsigned int merge_across_nodes;
1302 1303 1304 1305 1306
    unsigned long long pages_shared;
    unsigned long long pages_sharing;
    unsigned long long pages_unshared;
    unsigned long long pages_volatile;
    unsigned long long full_scans = 0;
1307
    size_t i;
1308
    int ret;
1309 1310 1311 1312 1313 1314 1315 1316 1317

    if ((*nparams) == 0) {
        *nparams = NODE_MEMORY_PARAMETERS_NUM;
        return 0;
    }

    for (i = 0; i < *nparams && i < NODE_MEMORY_PARAMETERS_NUM; i++) {
        virTypedParameterPtr param = &params[i];

1318
        switch (i) {
1319
        case 0:
1320 1321 1322 1323
            ret = nodeGetMemoryParameterValue("pages_to_scan", &pages_to_scan);
            if (ret == -2)
                continue;
            else if (ret == -1)
1324 1325 1326 1327 1328 1329 1330 1331 1332
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_TO_SCAN,
                                        VIR_TYPED_PARAM_UINT, pages_to_scan) < 0)
                return -1;

            break;

        case 1:
1333 1334 1335 1336
            ret = nodeGetMemoryParameterValue("sleep_millisecs", &sleep_millisecs);
            if (ret == -2)
                continue;
            else if (ret == -1)
1337 1338 1339 1340 1341 1342 1343 1344 1345
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_SLEEP_MILLISECS,
                                        VIR_TYPED_PARAM_UINT, sleep_millisecs) < 0)
                return -1;

            break;

        case 2:
1346 1347 1348 1349
            ret = nodeGetMemoryParameterValue("pages_shared", &pages_shared);
            if (ret == -2)
                continue;
            else if (ret == -1)
1350 1351 1352 1353 1354 1355 1356 1357 1358
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_SHARED,
                                        VIR_TYPED_PARAM_ULLONG, pages_shared) < 0)
                return -1;

            break;

        case 3:
1359 1360 1361 1362
            ret = nodeGetMemoryParameterValue("pages_sharing", &pages_sharing);
            if (ret == -2)
                continue;
            else if (ret == -1)
1363 1364 1365 1366 1367 1368 1369 1370 1371
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_SHARING,
                                        VIR_TYPED_PARAM_ULLONG, pages_sharing) < 0)
                return -1;

            break;

        case 4:
1372 1373 1374 1375
            ret = nodeGetMemoryParameterValue("pages_unshared", &pages_unshared);
            if (ret == -2)
                continue;
            else if (ret == -1)
1376 1377 1378 1379 1380 1381 1382 1383 1384
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_UNSHARED,
                                        VIR_TYPED_PARAM_ULLONG, pages_unshared) < 0)
                return -1;

            break;

        case 5:
1385 1386 1387 1388
            ret = nodeGetMemoryParameterValue("pages_volatile", &pages_volatile);
            if (ret == -2)
                continue;
            else if (ret == -1)
1389 1390 1391 1392 1393 1394 1395 1396 1397
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_VOLATILE,
                                        VIR_TYPED_PARAM_ULLONG, pages_volatile) < 0)
                return -1;

            break;

        case 6:
1398 1399 1400 1401
            ret = nodeGetMemoryParameterValue("full_scans", &full_scans);
            if (ret == -2)
                continue;
            else if (ret == -1)
1402 1403 1404 1405 1406 1407 1408 1409
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_FULL_SCANS,
                                        VIR_TYPED_PARAM_ULLONG, full_scans) < 0)
                return -1;

            break;

1410
        case 7:
1411 1412 1413 1414
            ret = nodeGetMemoryParameterValue("merge_across_nodes", &merge_across_nodes);
            if (ret == -2)
                continue;
            else if (ret == -1)
1415 1416 1417 1418 1419 1420 1421 1422
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_MERGE_ACROSS_NODES,
                                        VIR_TYPED_PARAM_UINT, merge_across_nodes) < 0)
                return -1;

            break;

1423
        /* coverity[dead_error_begin] */
1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437
        default:
            break;
        }
    }

    return 0;
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node get memory parameters not implemented"
                     " on this platform"));
    return -1;
#endif
}

1438
int
1439
nodeGetCPUMap(unsigned char **cpumap,
1440 1441
              unsigned int *online,
              unsigned int flags)
1442 1443 1444 1445 1446 1447 1448 1449
{
    virBitmapPtr cpus = NULL;
    int maxpresent;
    int ret = -1;
    int dummy;

    virCheckFlags(0, -1);

1450 1451 1452
    if (!cpumap && !online)
        return nodeGetCPUCount();

1453
    if (!(cpus = nodeGetCPUBitmap(&maxpresent)))
1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468
        goto cleanup;

    if (cpumap && virBitmapToData(cpus, cpumap, &dummy) < 0)
        goto cleanup;
    if (online)
        *online = virBitmapCountBits(cpus);

    ret = maxpresent;
cleanup:
    if (ret < 0 && cpumap)
        VIR_FREE(*cpumap);
    virBitmapFree(cpus);
    return ret;
}

1469 1470 1471 1472 1473 1474 1475 1476 1477
static int
nodeCapsInitNUMAFake(virCapsPtr caps ATTRIBUTE_UNUSED)
{
    virNodeInfo nodeinfo;
    virCapsHostNUMACellCPUPtr cpus;
    int ncpus;
    int s, c, t;
    int id;

1478
    if (nodeGetInfo(&nodeinfo) < 0)
1479 1480 1481 1482
        return -1;

    ncpus = VIR_NODEINFO_MAXCPUS(nodeinfo);

1483
    if (VIR_ALLOC_N(cpus, ncpus) < 0)
1484 1485 1486
        return -1;

    id = 0;
1487 1488 1489
    for (s = 0; s < nodeinfo.sockets; s++) {
        for (c = 0; c < nodeinfo.cores; c++) {
            for (t = 0; t < nodeinfo.threads; t++) {
1490 1491 1492 1493 1494
                cpus[id].id = id;
                cpus[id].socket_id = s;
                cpus[id].core_id = c;
                if (!(cpus[id].siblings = virBitmapNew(ncpus)))
                    goto error;
1495
                ignore_value(virBitmapSetBit(cpus[id].siblings, id));
1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509
                id++;
            }
        }
    }

    if (virCapabilitiesAddHostNUMACell(caps, 0,
                                       ncpus,
                                       nodeinfo.memory,
                                       cpus) < 0)
        goto error;

    return 0;

 error:
1510
    for (; id >= 0; id--)
1511 1512 1513 1514 1515 1516
        virBitmapFree(cpus[id].siblings);
    VIR_FREE(cpus);
    return -1;
}

static int
1517
nodeGetCellsFreeMemoryFake(unsigned long long *freeMems,
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
                           int startCell,
                           int maxCells ATTRIBUTE_UNUSED)
{
    double avail = physmem_available();

    if (startCell != 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("start cell %d out of range (0-%d)"),
                       startCell, 0);
        return -1;
    }

    freeMems[0] = (unsigned long long)avail;

    if (!freeMems[0]) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot determine free memory"));
        return -1;
    }

    return 1;
}

static unsigned long long
1542
nodeGetFreeMemoryFake(void)
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555
{
    double avail = physmem_available();
    unsigned long long ret;

    if (!(ret = (unsigned long long)avail)) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot determine free memory"));
        return 0;
    }

    return ret;
}

1556 1557
/* returns 1 on success, 0 if the detection failed and -1 on hard error */
static int
E
Eric Blake 已提交
1558 1559
virNodeCapsFillCPUInfo(int cpu_id ATTRIBUTE_UNUSED,
                       virCapsHostNUMACellCPUPtr cpu ATTRIBUTE_UNUSED)
1560
{
E
Eric Blake 已提交
1561
#ifdef __linux__
1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580
    int tmp;
    cpu->id = cpu_id;

    if ((tmp = virNodeGetCpuValue(SYSFS_CPU_PATH, cpu_id,
                                  "topology/physical_package_id", -1)) < 0)
        return 0;

    cpu->socket_id = tmp;

    if ((tmp = virNodeGetCpuValue(SYSFS_CPU_PATH, cpu_id,
                                  "topology/core_id", -1)) < 0)
        return 0;

    cpu->core_id = tmp;

    if (!(cpu->siblings = virNodeGetSiblingsList(SYSFS_CPU_PATH, cpu_id)))
        return -1;

    return 0;
E
Eric Blake 已提交
1581 1582 1583 1584 1585
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node cpu info not implemented on this platform"));
    return -1;
#endif
1586 1587
}

1588
int
1589
nodeCapsInitNUMA(virCapsPtr caps)
1590 1591
{
    int n;
1592
    unsigned long long memory;
1593
    virCapsHostNUMACellCPUPtr cpus = NULL;
1594
    virBitmapPtr cpumap = NULL;
1595
    int ret = -1;
1596
    int ncpus = 0;
1597
    int cpu;
1598
    bool topology_failed = false;
1599
    int max_node;
1600

1601
    if (!virNumaIsAvailable())
1602
        return nodeCapsInitNUMAFake(caps);
1603

1604 1605 1606 1607
    if ((max_node = virNumaGetMaxNode()) < 0)
        goto cleanup;

    for (n = 0; n <= max_node; n++) {
1608
        size_t i;
1609

1610 1611 1612
        if ((ncpus = virNumaGetNodeCPUs(n, &cpumap)) < 0) {
            if (ncpus == -2)
                continue;
1613

1614 1615
            goto cleanup;
        }
1616 1617 1618

        if (VIR_ALLOC_N(cpus, ncpus) < 0)
            goto cleanup;
1619
        cpu = 0;
1620

1621 1622 1623 1624 1625 1626 1627
        for (i = 0; i < virBitmapSize(cpumap); i++) {
            bool cpustate;
            if (virBitmapGetBit(cpumap, i, &cpustate) < 0)
                continue;

            if (cpustate) {
                if (virNodeCapsFillCPUInfo(i, cpus + cpu++) < 0) {
1628 1629 1630 1631 1632
                    topology_failed = true;
                    virResetLastError();
                }
            }
        }
1633

1634 1635 1636 1637
        /* Detect the amount of memory in the numa cell in KiB */
        virNumaGetNodeMemory(n, &memory, NULL);
        memory >>= 10;

1638
        if (virCapabilitiesAddHostNUMACell(caps, n, ncpus, memory, cpus) < 0)
1639
            goto cleanup;
1640 1641

        cpus = NULL;
1642 1643 1644 1645 1646
    }

    ret = 0;

cleanup:
1647 1648 1649
    if (topology_failed || ret < 0)
        virCapabilitiesClearHostNUMACellCPUTopology(cpus, ncpus);

1650 1651 1652
    virBitmapFree(cpumap);
    VIR_FREE(cpus);

1653 1654 1655
    if (ret < 0)
        VIR_FREE(cpus);

1656 1657
    return ret;
}
1658 1659 1660


int
1661
nodeGetCellsFreeMemory(unsigned long long *freeMems,
1662 1663 1664
                       int startCell,
                       int maxCells)
{
1665
    unsigned long long mem;
1666 1667 1668 1669
    int n, lastCell, numCells;
    int ret = -1;
    int maxCell;

1670
    if (!virNumaIsAvailable())
1671
        return nodeGetCellsFreeMemoryFake(freeMems,
1672 1673
                                          startCell, maxCells);

1674 1675 1676
    if ((maxCell = virNumaGetMaxNode()) < 0)
        return 0;

1677
    if (startCell > maxCell) {
1678 1679 1680
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("start cell %d out of range (0-%d)"),
                       startCell, maxCell);
1681 1682 1683 1684 1685 1686
        goto cleanup;
    }
    lastCell = startCell + maxCells - 1;
    if (lastCell > maxCell)
        lastCell = maxCell;

1687
    for (numCells = 0, n = startCell; n <= lastCell; n++) {
1688
        virNumaGetNodeMemory(n, NULL, &mem);
1689

1690 1691 1692 1693 1694 1695 1696 1697 1698
        freeMems[numCells++] = mem;
    }
    ret = numCells;

cleanup:
    return ret;
}

unsigned long long
1699
nodeGetFreeMemory(void)
1700
{
1701
    unsigned long long mem;
1702
    unsigned long long freeMem = 0;
1703
    int max_node;
1704 1705
    int n;

1706
    if (!virNumaIsAvailable())
1707
        return nodeGetFreeMemoryFake();
1708

1709 1710
    if ((max_node = virNumaGetMaxNode()) < 0)
        return 0;
1711

1712
    for (n = 0; n <= max_node; n++) {
1713
        virNumaGetNodeMemory(n, NULL, &mem);
1714

1715 1716 1717 1718 1719
        freeMem += mem;
    }

    return freeMem;
}