nodeinfo.c 67.2 KB
Newer Older
1 2 3
/*
 * nodeinfo.c: Helper routines for OS specific node information
 *
4
 * Copyright (C) 2006-2008, 2010-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright (C) 2006 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 */

24
#include <config.h>
J
Jim Meyering 已提交
25

26 27 28
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
29
#include <stdint.h>
30
#include <errno.h>
31
#include <dirent.h>
E
Eric Blake 已提交
32
#include <sys/utsname.h>
33
#include "conf/domain_conf.h"
34 35
#include <fcntl.h>
#include <sys/ioctl.h>
M
Michal Privoznik 已提交
36
#include <unistd.h>
37 38 39 40

#if HAVE_LINUX_KVM_H
# include <linux/kvm.h>
#endif
41

42
#if defined(__FreeBSD__) || defined(__APPLE__)
43
# include <sys/time.h>
R
Roman Bogorodskiy 已提交
44 45
# include <sys/types.h>
# include <sys/sysctl.h>
46
# include <sys/resource.h>
R
Roman Bogorodskiy 已提交
47 48
#endif

49
#include "c-ctype.h"
50
#include "viralloc.h"
51
#include "nodeinfopriv.h"
52
#include "nodeinfo.h"
53
#include "physmem.h"
54
#include "virerror.h"
55
#include "count-one-bits.h"
E
Eric Blake 已提交
56
#include "intprops.h"
57
#include "virarch.h"
E
Eric Blake 已提交
58
#include "virfile.h"
59
#include "virtypedparam.h"
60
#include "virstring.h"
61
#include "virnuma.h"
62
#include "virlog.h"
63 64 65

#define VIR_FROM_THIS VIR_FROM_NONE

66 67
VIR_LOG_INIT("nodeinfo");

68 69
#define SYSFS_SYSTEM_PATH "/sys/devices/system"

70
#if defined(__FreeBSD__) || defined(__APPLE__)
R
Roman Bogorodskiy 已提交
71
static int
72
appleFreebsdNodeGetCPUCount(void)
R
Roman Bogorodskiy 已提交
73 74 75 76 77 78 79 80 81 82 83 84
{
    int ncpu_mib[2] = { CTL_HW, HW_NCPU };
    unsigned long ncpu;
    size_t ncpu_len = sizeof(ncpu);

    if (sysctl(ncpu_mib, 2, &ncpu, &ncpu_len, NULL, 0) == -1) {
        virReportSystemError(errno, "%s", _("Cannot obtain CPU count"));
        return -1;
    }

    return ncpu;
}
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111

/* VIR_HW_PHYSMEM - the resulting value of HW_PHYSMEM of FreeBSD
 * is 64 bits while that of Mac OS X is still 32 bits.
 * Mac OS X provides HW_MEMSIZE for 64 bits version of HW_PHYSMEM
 * since 10.6.8 (Snow Leopard) at least.
 */
# ifdef HW_MEMSIZE
#  define VIR_HW_PHYSMEM HW_MEMSIZE
# else
#  define VIR_HW_PHYSMEM HW_PHYSMEM
# endif
static int
appleFreebsdNodeGetMemorySize(unsigned long *memory)
{
    int mib[2] = { CTL_HW, VIR_HW_PHYSMEM };
    unsigned long physmem;
    size_t len = sizeof(physmem);

    if (sysctl(mib, 2, &physmem, &len, NULL, 0) == -1) {
        virReportSystemError(errno, "%s", _("cannot obtain memory size"));
        return -1;
    }

    *memory = (unsigned long)(physmem / 1024);

    return 0;
}
112 113 114
#endif /* defined(__FreeBSD__) || defined(__APPLE__) */

#ifdef __FreeBSD__
115
# define BSD_CPU_STATS_ALL 4
116 117
# define BSD_MEMORY_STATS_ALL 4

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
# define TICK_TO_NSEC (1000ull * 1000ull * 1000ull / (stathz ? stathz : hz))

static int
freebsdNodeGetCPUStats(int cpuNum,
                       virNodeCPUStatsPtr params,
                       int *nparams)
{
    const char *sysctl_name;
    long *cpu_times;
    struct clockinfo clkinfo;
    size_t i, j, cpu_times_size, clkinfo_size;
    int cpu_times_num, offset, hz, stathz, ret = -1;
    struct field_cpu_map {
        const char *field;
        int idx[CPUSTATES];
    } cpu_map[] = {
        {VIR_NODE_CPU_STATS_KERNEL, {CP_SYS}},
        {VIR_NODE_CPU_STATS_USER, {CP_USER, CP_NICE}},
        {VIR_NODE_CPU_STATS_IDLE, {CP_IDLE}},
        {VIR_NODE_CPU_STATS_INTR, {CP_INTR}},
        {NULL, {0}}
    };

    if ((*nparams) == 0) {
        *nparams = BSD_CPU_STATS_ALL;
        return 0;
    }

    if ((*nparams) != BSD_CPU_STATS_ALL) {
        virReportInvalidArg(*nparams,
                            _("nparams in %s must be equal to %d"),
                            __FUNCTION__, BSD_CPU_STATS_ALL);
        return -1;
    }

    clkinfo_size = sizeof(clkinfo);
    if (sysctlbyname("kern.clockrate", &clkinfo, &clkinfo_size, NULL, 0) < 0) {
        virReportSystemError(errno,
                             _("sysctl failed for '%s'"),
                             "kern.clockrate");
        return -1;
    }

    stathz = clkinfo.stathz;
    hz = clkinfo.hz;

    if (cpuNum == VIR_NODE_CPU_STATS_ALL_CPUS) {
        sysctl_name = "kern.cp_time";
        cpu_times_num = 1;
        offset = 0;
    } else {
        sysctl_name = "kern.cp_times";
        cpu_times_num = appleFreebsdNodeGetCPUCount();

        if (cpuNum >= cpu_times_num) {
            virReportInvalidArg(cpuNum,
                                _("Invalid cpuNum in %s"),
                                __FUNCTION__);
            return -1;
        }

        offset = cpu_times_num * CPUSTATES;
    }

    cpu_times_size = sizeof(long) * cpu_times_num * CPUSTATES;

    if (VIR_ALLOC_N(cpu_times, cpu_times_num * CPUSTATES) < 0)
        goto cleanup;

    if (sysctlbyname(sysctl_name, cpu_times, &cpu_times_size, NULL, 0) < 0) {
        virReportSystemError(errno,
                             _("sysctl failed for '%s'"),
                             sysctl_name);
        goto cleanup;
    }

    for (i = 0; cpu_map[i].field != NULL; i++) {
        virNodeCPUStatsPtr param = &params[i];

        if (virStrcpyStatic(param->field, cpu_map[i].field) == NULL) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Field '%s' too long for destination"),
                           cpu_map[i].field);
            goto cleanup;
        }

        param->value = 0;
        for (j = 0; j < ARRAY_CARDINALITY(cpu_map[i].idx); j++)
            param->value += cpu_times[offset + cpu_map[i].idx[j]] * TICK_TO_NSEC;
    }

    ret = 0;

211
 cleanup:
212 213 214 215 216
    VIR_FREE(cpu_times);

    return ret;
}

217 218
static int
freebsdNodeGetMemoryStats(virNodeMemoryStatsPtr params,
A
Andrea Bolognani 已提交
219
                          int *nparams)
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
{
    size_t i, j = 0;
    unsigned long pagesize = getpagesize() >> 10;
    long bufpages;
    size_t bufpages_size = sizeof(bufpages);
    struct field_sysctl_map {
        const char *field;
        const char *sysctl_name;
    } sysctl_map[] = {
        {VIR_NODE_MEMORY_STATS_TOTAL, "vm.stats.vm.v_page_count"},
        {VIR_NODE_MEMORY_STATS_FREE, "vm.stats.vm.v_free_count"},
        {VIR_NODE_MEMORY_STATS_CACHED, "vm.stats.vm.v_cache_count"},
        {NULL, NULL}
    };

    if ((*nparams) == 0) {
        *nparams = BSD_MEMORY_STATS_ALL;
        return 0;
    }

    if ((*nparams) != BSD_MEMORY_STATS_ALL) {
        virReportInvalidArg(nparams,
                            _("nparams in %s must be %d"),
                            __FUNCTION__, BSD_MEMORY_STATS_ALL);
        return -1;
    }

    for (i = 0; sysctl_map[i].field != NULL; i++) {
        u_int value;
        size_t value_size = sizeof(value);
        virNodeMemoryStatsPtr param;

        if (sysctlbyname(sysctl_map[i].sysctl_name, &value,
                         &value_size, NULL, 0) < 0) {
            virReportSystemError(errno,
                                 _("sysctl failed for '%s'"),
                                 sysctl_map[i].sysctl_name);
            return -1;
        }

        param = &params[j++];
        if (virStrcpyStatic(param->field, sysctl_map[i].field) == NULL) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Field '%s' too long for destination"),
                           sysctl_map[i].field);
            return -1;
        }
        param->value = (unsigned long long)value * pagesize;
    }

    {
        virNodeMemoryStatsPtr param = &params[j++];

        if (sysctlbyname("vfs.bufspace", &bufpages, &bufpages_size, NULL, 0) < 0) {
            virReportSystemError(errno,
                                 _("sysctl failed for '%s'"),
                                 "vfs.bufspace");
            return -1;
        }
        if (virStrcpyStatic(param->field, VIR_NODE_MEMORY_STATS_BUFFERS) == NULL) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Field '%s' too long for destination"),
                           VIR_NODE_MEMORY_STATS_BUFFERS);
            return -1;
        }
        param->value = (unsigned long long)bufpages >> 10;
    }

    return 0;
}
#endif /* __FreeBSD__ */
R
Roman Bogorodskiy 已提交
291

292
#ifdef __linux__
293
# define CPUINFO_PATH "/proc/cpuinfo"
294
# define PROCSTAT_PATH "/proc/stat"
295
# define MEMINFO_PATH "/proc/meminfo"
296
# define SYSFS_MEMORY_SHARED_PATH "/sys/kernel/mm/ksm"
297
# define SYSFS_THREAD_SIBLINGS_LIST_LENGTH_MAX 8192
298 299

# define LINUX_NB_CPU_STATS 4
300 301
# define LINUX_NB_MEMORY_STATS_ALL 4
# define LINUX_NB_MEMORY_STATS_CELL 2
302

E
Eric Blake 已提交
303
/* Return the positive decimal contents of the given
304 305 306 307 308
 * DIR/cpu%u/FILE, or -1 on error.  If DEFAULT_VALUE is non-negative
 * and the file could not be found, return that instead of an error;
 * this is useful for machines that cannot hot-unplug cpu0, or where
 * hot-unplugging is disabled, or where the kernel is too old
 * to support NUMA cells, etc.  */
E
Eric Blake 已提交
309
static int
E
Eric Blake 已提交
310
virNodeGetCpuValue(const char *dir, unsigned int cpu, const char *file,
311
                   int default_value)
E
Eric Blake 已提交
312 313 314 315 316 317 318
{
    char *path;
    FILE *pathfp;
    int value = -1;
    char value_str[INT_BUFSIZE_BOUND(value)];
    char *tmp;

319
    if (virAsprintf(&path, "%s/cpu%u/%s", dir, cpu, file) < 0)
E
Eric Blake 已提交
320 321 322 323
        return -1;

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
324 325
        if (default_value >= 0 && errno == ENOENT)
            value = default_value;
E
Eric Blake 已提交
326 327 328 329 330 331 332 333 334 335
        else
            virReportSystemError(errno, _("cannot open %s"), path);
        goto cleanup;
    }

    if (fgets(value_str, sizeof(value_str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }
    if (virStrToLong_i(value_str, &tmp, 10, &value) < 0) {
336 337 338
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("could not convert '%s' to an integer"),
                       value_str);
E
Eric Blake 已提交
339 340 341
        goto cleanup;
    }

342
 cleanup:
343
    VIR_FORCE_FCLOSE(pathfp);
E
Eric Blake 已提交
344 345 346 347 348
    VIR_FREE(path);

    return value;
}

E
Eric Blake 已提交
349 350
static unsigned long
virNodeCountThreadSiblings(const char *dir, unsigned int cpu)
351 352
{
    unsigned long ret = 0;
C
Chris Lalancette 已提交
353
    char *path;
354
    char *str = NULL;
355
    size_t i;
356

357
    if (virAsprintf(&path, "%s/cpu%u/topology/thread_siblings",
358
                    dir, cpu) < 0)
359 360
        return 0;

361
    if (!virFileExists(path)) {
362 363
        /* If file doesn't exist, then pretend our only
         * sibling is ourself */
364
        ret = 1;
365
        goto cleanup;
366
    }
367

368
    if (virFileReadAll(path, SYSFS_THREAD_SIBLINGS_LIST_LENGTH_MAX, &str) < 0)
369 370
        goto cleanup;

J
Ján Tomko 已提交
371 372 373
    for (i = 0; str[i] != '\0'; i++) {
        if (c_isxdigit(str[i]))
            ret += count_one_bits(virHexToBin(str[i]));
374 375
    }

376
 cleanup:
377
    VIR_FREE(str);
378 379 380 381
    VIR_FREE(path);
    return ret;
}

E
Eric Blake 已提交
382
static int
383 384 385
virNodeParseSocket(const char *dir,
                   virArch arch,
                   unsigned int cpu)
386
{
387 388
    int ret = virNodeGetCpuValue(dir, cpu, "topology/physical_package_id", 0);

389 390
    if (ARCH_IS_ARM(arch) || ARCH_IS_PPC(arch) || ARCH_IS_S390(arch)) {
        /* arm, ppc and s390(x) has -1 */
391 392 393 394
        if (ret < 0)
            ret = 0;
    }

395
    return ret;
396 397
}

398 399
/* parses a node entry, returning number of processors in the node and
 * filling arguments */
400
static int
401
ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(3)
402 403 404
ATTRIBUTE_NONNULL(4) ATTRIBUTE_NONNULL(6)
ATTRIBUTE_NONNULL(7) ATTRIBUTE_NONNULL(8)
ATTRIBUTE_NONNULL(9)
405
virNodeParseNode(const char *node,
406
                 virArch arch,
407 408
                 virBitmapPtr present_cpus_map,
                 virBitmapPtr online_cpus_map,
409
                 int threads_per_subcore,
410 411 412 413
                 int *sockets,
                 int *cores,
                 int *threads,
                 int *offline)
414
{
415 416 417
    /* Biggest value we can expect to be used as either socket id
     * or core id. Bitmaps will need to be sized accordingly */
    const int ID_MAX = 4095;
418
    int ret = -1;
419 420 421
    int processors = 0;
    DIR *cpudir = NULL;
    struct dirent *cpudirent = NULL;
422
    virBitmapPtr node_cpus_map = NULL;
423 424
    virBitmapPtr sockets_map = NULL;
    virBitmapPtr *cores_maps = NULL;
425
    int npresent_cpus = virBitmapSize(present_cpus_map);
426 427 428
    int sock_max = 0;
    int sock;
    int core;
429
    size_t i;
430 431
    int siblings;
    unsigned int cpu;
N
Natanael Copa 已提交
432
    int direrr;
433

434 435 436 437 438 439
    *threads = 0;
    *cores = 0;
    *sockets = 0;

    if (!(cpudir = opendir(node))) {
        virReportSystemError(errno, _("cannot opendir %s"), node);
440 441
        goto cleanup;
    }
442

443 444 445 446
    /* Keep track of the CPUs that belong to the current node */
    if (!(node_cpus_map = virBitmapNew(npresent_cpus)))
        goto cleanup;

447
    /* enumerate sockets in the node */
448 449 450
    if (!(sockets_map = virBitmapNew(ID_MAX + 1)))
        goto cleanup;

N
Natanael Copa 已提交
451
    while ((direrr = virDirRead(cpudir, &cpudirent, node)) > 0) {
452 453 454
        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
            continue;

455
        if (!virBitmapIsBitSet(present_cpus_map, cpu))
456 457
            continue;

458 459 460 461
        /* Mark this CPU as part of the current node */
        if (virBitmapSetBit(node_cpus_map, cpu) < 0)
            goto cleanup;

462
        if (!virBitmapIsBitSet(online_cpus_map, cpu))
463 464
            continue;

465
        /* Parse socket */
466
        if ((sock = virNodeParseSocket(node, arch, cpu)) < 0)
467
            goto cleanup;
468 469 470 471 472 473 474 475 476
        if (sock > ID_MAX) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Socket %d can't be handled (max socket is %d)"),
                           sock, ID_MAX);
            goto cleanup;
        }

        if (virBitmapSetBit(sockets_map, sock) < 0)
            goto cleanup;
477 478 479 480 481

        if (sock > sock_max)
            sock_max = sock;
    }

N
Natanael Copa 已提交
482
    if (direrr < 0)
483
        goto cleanup;
484 485 486

    sock_max++;

487 488
    /* allocate cores maps for each socket */
    if (VIR_ALLOC_N(cores_maps, sock_max) < 0)
489
        goto cleanup;
490 491

    for (i = 0; i < sock_max; i++)
492 493
        if (!(cores_maps[i] = virBitmapNew(ID_MAX + 1)))
            goto cleanup;
494

495 496
    /* Iterate over all CPUs in the node, in ascending order */
    for (cpu = 0; cpu < npresent_cpus; cpu++) {
497

498 499
        /* Skip CPUs that are not part of the current node */
        if (!virBitmapIsBitSet(node_cpus_map, cpu))
500 501
            continue;

502
        if (!virBitmapIsBitSet(online_cpus_map, cpu)) {
503 504 505 506 507 508 509 510 511 512 513 514
            if (threads_per_subcore > 0 &&
                cpu % threads_per_subcore != 0 &&
                virBitmapIsBitSet(online_cpus_map,
                                  cpu - (cpu % threads_per_subcore))) {
                /* Secondary offline threads are counted as online when
                 * subcores are in use and the corresponding primary
                 * thread is online */
                processors++;
            } else {
                /* But they are counted as offline otherwise */
                (*offline)++;
            }
515
            continue;
516
        }
517 518 519 520

        processors++;

        /* Parse socket */
521
        if ((sock = virNodeParseSocket(node, arch, cpu)) < 0)
522
            goto cleanup;
523
        if (!virBitmapIsBitSet(sockets_map, sock)) {
524 525
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("CPU socket topology has changed"));
526 527 528 529
            goto cleanup;
        }

        /* Parse core */
530 531 532 533
        if (ARCH_IS_S390(arch)) {
            /* logical cpu is equivalent to a core on s390 */
            core = cpu;
        } else {
534 535 536
            if ((core = virNodeGetCpuValue(node, cpu,
                                           "topology/core_id", 0)) < 0)
                goto cleanup;
537
        }
538 539 540 541 542 543
        if (core > ID_MAX) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Core %d can't be handled (max core is %d)"),
                           core, ID_MAX);
            goto cleanup;
        }
544

545 546
        if (virBitmapSetBit(cores_maps[sock], core) < 0)
            goto cleanup;
547 548 549 550 551 552 553 554 555

        if (!(siblings = virNodeCountThreadSiblings(node, cpu)))
            goto cleanup;

        if (siblings > *threads)
            *threads = siblings;
    }

    /* finalize the returned data */
556
    *sockets = virBitmapCountBits(sockets_map);
557 558

    for (i = 0; i < sock_max; i++) {
559
        if (!virBitmapIsBitSet(sockets_map, i))
560 561
            continue;

562
        core = virBitmapCountBits(cores_maps[i]);
563 564 565 566
        if (core > *cores)
            *cores = core;
    }

567 568 569 570 571 572
    if (threads_per_subcore > 0) {
        /* The thread count ignores offline threads, which means that only
         * only primary threads have been considered so far. If subcores
         * are in use, we need to also account for secondary threads */
        *threads *= threads_per_subcore;
    }
573
    ret = processors;
574

575
 cleanup:
576 577 578 579 580
    /* don't shadow a more serious error */
    if (cpudir && closedir(cpudir) < 0 && ret >= 0) {
        virReportSystemError(errno, _("problem closing %s"), node);
        ret = -1;
    }
581 582 583 584 585
    if (cores_maps)
        for (i = 0; i < sock_max; i++)
            virBitmapFree(cores_maps[i]);
    VIR_FREE(cores_maps);
    virBitmapFree(sockets_map);
586
    virBitmapFree(node_cpus_map);
587

588 589 590
    return ret;
}

591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
/* Check whether the host subcore configuration is valid.
 *
 * A valid configuration is one where no secondary thread is online;
 * the primary thread in a subcore is always the first one */
static bool
nodeHasValidSubcoreConfiguration(const char *sysfs_prefix,
                                 int threads_per_subcore)
{
    virBitmapPtr online_cpus = NULL;
    int cpu = -1;
    bool ret = false;

    /* No point in checking if subcores are not in use */
    if (threads_per_subcore <= 0)
        goto cleanup;

    if (!(online_cpus = nodeGetOnlineCPUBitmap(sysfs_prefix)))
        goto cleanup;

    while ((cpu = virBitmapNextSetBit(online_cpus, cpu)) >= 0) {

        /* A single online secondary thread is enough to
         * make the configuration invalid */
        if (cpu % threads_per_subcore != 0)
            goto cleanup;
    }

    ret = true;

 cleanup:
    virBitmapFree(online_cpus);

    return ret;
}

A
Andrea Bolognani 已提交
626 627 628 629 630
int
linuxNodeInfoCPUPopulate(const char *sysfs_prefix,
                         FILE *cpuinfo,
                         virArch arch,
                         virNodeInfoPtr nodeinfo)
631
{
632
    const char *prefix = sysfs_prefix ? sysfs_prefix : SYSFS_SYSTEM_PATH;
633 634
    virBitmapPtr present_cpus_map = NULL;
    virBitmapPtr online_cpus_map = NULL;
635
    char line[1024];
636 637
    DIR *nodedir = NULL;
    struct dirent *nodedirent = NULL;
638
    int cpus, cores, socks, threads, offline = 0;
639
    int threads_per_subcore = 0;
640
    unsigned int node;
641
    int ret = -1;
642
    char *sysfs_nodedir = NULL;
643
    char *sysfs_cpudir = NULL;
N
Natanael Copa 已提交
644
    int direrr;
645

646
    /* Start with parsing CPU clock speed from /proc/cpuinfo */
647
    while (fgets(line, sizeof(line), cpuinfo) != NULL) {
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
        if (ARCH_IS_X86(arch)) {
            char *buf = line;
            if (STRPREFIX(buf, "cpu MHz")) {
                char *p;
                unsigned int ui;

                buf += 7;
                while (*buf && c_isspace(*buf))
                    buf++;

                if (*buf != ':' || !buf[1]) {
                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                                   _("parsing cpu MHz from cpuinfo"));
                    goto cleanup;
                }
663

664 665 666 667
                if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
                    /* Accept trailing fractional part.  */
                    (*p == '\0' || *p == '.' || c_isspace(*p)))
                    nodeinfo->mhz = ui;
668
            }
669

670 671 672 673 674
        } else if (ARCH_IS_PPC(arch)) {
            char *buf = line;
            if (STRPREFIX(buf, "clock")) {
                char *p;
                unsigned int ui;
675

676 677 678
                buf += 5;
                while (*buf && c_isspace(*buf))
                    buf++;
679

680 681 682 683 684
                if (*buf != ':' || !buf[1]) {
                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                                   _("parsing cpu MHz from cpuinfo"));
                    goto cleanup;
                }
685

686 687 688 689 690 691 692 693 694
                if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
                    /* Accept trailing fractional part.  */
                    (*p == '\0' || *p == '.' || c_isspace(*p)))
                    nodeinfo->mhz = ui;
                /* No other interesting infos are available in /proc/cpuinfo.
                 * However, there is a line identifying processor's version,
                 * identification and machine, but we don't want it to be caught
                 * and parsed in next iteration, because it is not in expected
                 * format and thus lead to error. */
695
            }
696 697 698 699 700
        } else if (ARCH_IS_ARM(arch)) {
            char *buf = line;
            if (STRPREFIX(buf, "BogoMIPS")) {
                char *p;
                unsigned int ui;
701

702 703 704
                buf += 8;
                while (*buf && c_isspace(*buf))
                    buf++;
705

706 707 708 709 710
                if (*buf != ':' || !buf[1]) {
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   "%s", _("parsing cpu MHz from cpuinfo"));
                    goto cleanup;
                }
711

712 713 714 715
                if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
                    /* Accept trailing fractional part.  */
                    && (*p == '\0' || *p == '.' || c_isspace(*p)))
                    nodeinfo->mhz = ui;
716
            }
717 718 719 720 721 722 723
        } else if (ARCH_IS_S390(arch)) {
            /* s390x has no realistic value for CPU speed,
             * assign a value of zero to signify this */
            nodeinfo->mhz = 0;
        } else {
            VIR_WARN("Parser for /proc/cpuinfo needs to be adapted for your architecture");
            break;
724
        }
725 726
    }

727 728 729 730 731 732 733 734 735
    /* Get information about what CPUs are present in the host and what
     * CPUs are online, so that we don't have to so for each node */
    present_cpus_map = nodeGetPresentCPUBitmap(sysfs_prefix);
    if (!present_cpus_map)
        goto cleanup;
    online_cpus_map = nodeGetOnlineCPUBitmap(sysfs_prefix);
    if (!online_cpus_map)
        goto cleanup;

736 737
    /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the
     * core, node, socket, thread and topology information from /sys
738
     */
739
    if (virAsprintf(&sysfs_nodedir, "%s/node", prefix) < 0)
740
        goto cleanup;
741

742 743 744 745
    if (!(nodedir = opendir(sysfs_nodedir))) {
        /* the host isn't probably running a NUMA architecture */
        goto fallback;
    }
746

747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
    /* PPC-KVM needs the secondary threads of a core to be offline on the
     * host. The kvm scheduler brings the secondary threads online in the
     * guest context. Moreover, P8 processor has split-core capability
     * where, there can be 1,2 or 4 subcores per core. The primaries of the
     * subcores alone will be online on the host for a subcore in the
     * host. Even though the actual threads per core for P8 processor is 8,
     * depending on the subcores_per_core = 1, 2 or 4, the threads per
     * subcore will vary accordingly to 8, 4 and 2 repectively.
     * So, On host threads_per_core what is arrived at from sysfs in the
     * current logic is actually the subcores_per_core. Threads per subcore
     * can only be obtained from the kvm device. For example, on P8 wih 1
     * core having 8 threads, sub_cores_percore=4, the threads 0,2,4 & 6
     * will be online. The sysfs reflects this and in the current logic
     * variable 'threads' will be 4 which is nothing but subcores_per_core.
     * If the user tampers the cpu online/offline states using chcpu or other
     * means, then it is an unsupported configuration for kvm.
     * The code below tries to keep in mind
     *  - when the libvirtd is run inside a KVM guest or Phyp based guest.
     *  - Or on the kvm host where user manually tampers the cpu states to
     *    offline/online randomly.
     * On hosts other than POWER this will be 0, in which case a simpler
     * thread-counting logic will be used  */
    if ((threads_per_subcore = nodeGetThreadsPerSubcore(arch)) < 0)
        goto cleanup;

    /* If the subcore configuration is not valid, just pretend subcores
     * are not in use and count threads one by one */
    if (!nodeHasValidSubcoreConfiguration(sysfs_prefix, threads_per_subcore))
        threads_per_subcore = 0;

N
Natanael Copa 已提交
777
    while ((direrr = virDirRead(nodedir, &nodedirent, sysfs_nodedir)) > 0) {
778
        if (sscanf(nodedirent->d_name, "node%u", &node) != 1)
779 780
            continue;

781 782 783
        nodeinfo->nodes++;

        if (virAsprintf(&sysfs_cpudir, "%s/node/%s",
784
                        prefix, nodedirent->d_name) < 0)
785
            goto cleanup;
E
Eric Blake 已提交
786

787 788 789
        if ((cpus = virNodeParseNode(sysfs_cpudir, arch,
                                     present_cpus_map,
                                     online_cpus_map,
790
                                     threads_per_subcore,
791
                                     &socks, &cores,
792
                                     &threads, &offline)) < 0)
793
            goto cleanup;
794

795
        VIR_FREE(sysfs_cpudir);
796

797 798 799 800 801 802 803 804 805 806
        nodeinfo->cpus += cpus;

        if (socks > nodeinfo->sockets)
            nodeinfo->sockets = socks;

        if (cores > nodeinfo->cores)
            nodeinfo->cores = cores;

        if (threads > nodeinfo->threads)
            nodeinfo->threads = threads;
807
    }
808

N
Natanael Copa 已提交
809
    if (direrr < 0)
810
        goto cleanup;
811 812 813 814

    if (nodeinfo->cpus && nodeinfo->nodes)
        goto done;

815
 fallback:
816 817
    VIR_FREE(sysfs_cpudir);

818
    if (virAsprintf(&sysfs_cpudir, "%s/cpu", prefix) < 0)
819
        goto cleanup;
820

821 822 823
    if ((cpus = virNodeParseNode(sysfs_cpudir, arch,
                                 present_cpus_map,
                                 online_cpus_map,
824
                                 threads_per_subcore,
825
                                 &socks, &cores,
826
                                 &threads, &offline)) < 0)
827
        goto cleanup;
828

829 830 831 832 833 834
    nodeinfo->nodes = 1;
    nodeinfo->cpus = cpus;
    nodeinfo->sockets = socks;
    nodeinfo->cores = cores;
    nodeinfo->threads = threads;

835
 done:
836
    /* There should always be at least one cpu, socket, node, and thread. */
837
    if (nodeinfo->cpus == 0) {
838
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found"));
839
        goto cleanup;
840
    }
841

C
Chris Lalancette 已提交
842
    if (nodeinfo->sockets == 0) {
843
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found"));
844
        goto cleanup;
C
Chris Lalancette 已提交
845
    }
846

C
Chris Lalancette 已提交
847
    if (nodeinfo->threads == 0) {
848
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found"));
849
        goto cleanup;
C
Chris Lalancette 已提交
850 851
    }

852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
    /* Now check if the topology makes sense. There are machines that don't
     * expose their real number of nodes or for example the AMD Bulldozer
     * architecture that exposes their Clustered integer core modules as both
     * threads and cores. This approach throws off our detection. Unfortunately
     * the nodeinfo structure isn't designed to carry the full topology so
     * we're going to lie about the detected topology to notify the user
     * to check the host capabilities for the actual topology. */
    if ((nodeinfo->nodes *
         nodeinfo->sockets *
         nodeinfo->cores *
         nodeinfo->threads) != (nodeinfo->cpus + offline)) {
        nodeinfo->nodes = 1;
        nodeinfo->sockets = 1;
        nodeinfo->cores = nodeinfo->cpus + offline;
        nodeinfo->threads = 1;
    }

869 870
    ret = 0;

871
 cleanup:
872 873 874 875 876 877
    /* don't shadow a more serious error */
    if (nodedir && closedir(nodedir) < 0 && ret >= 0) {
        virReportSystemError(errno, _("problem closing %s"), sysfs_nodedir);
        ret = -1;
    }

878 879
    virBitmapFree(present_cpus_map);
    virBitmapFree(online_cpus_map);
880
    VIR_FREE(sysfs_nodedir);
881 882
    VIR_FREE(sysfs_cpudir);
    return ret;
883 884
}

J
Ján Tomko 已提交
885 886 887 888 889 890 891
static int
virNodeCPUStatsAssign(virNodeCPUStatsPtr param,
                      const char *name,
                      unsigned long long value)
{
    if (virStrcpyStatic(param->field, name) == NULL) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
892 893
                       "%s", _("kernel cpu time field is too long"
                               " for the destination"));
J
Ján Tomko 已提交
894 895 896 897 898 899
        return -1;
    }
    param->value = value;
    return 0;
}

900 901
# define TICK_TO_NSEC (1000ull * 1000ull * 1000ull / sysconf(_SC_CLK_TCK))

J
Ján Tomko 已提交
902
int
903 904 905 906
linuxNodeGetCPUStats(FILE *procstat,
                     int cpuNum,
                     virNodeCPUStatsPtr params,
                     int *nparams)
907 908 909 910 911
{
    int ret = -1;
    char line[1024];
    unsigned long long usr, ni, sys, idle, iowait;
    unsigned long long irq, softirq, steal, guest, guest_nice;
912
    char cpu_header[4 + INT_BUFSIZE_BOUND(cpuNum)];
913 914 915 916 917 918 919 920 921

    if ((*nparams) == 0) {
        /* Current number of cpu stats supported by linux */
        *nparams = LINUX_NB_CPU_STATS;
        ret = 0;
        goto cleanup;
    }

    if ((*nparams) != LINUX_NB_CPU_STATS) {
922 923 924
        virReportInvalidArg(*nparams,
                            _("nparams in %s must be equal to %d"),
                            __FUNCTION__, LINUX_NB_CPU_STATS);
925 926 927
        goto cleanup;
    }

928
    if (cpuNum == VIR_NODE_CPU_STATS_ALL_CPUS) {
929
        strcpy(cpu_header, "cpu ");
930
    } else {
931
        snprintf(cpu_header, sizeof(cpu_header), "cpu%d ", cpuNum);
932 933 934 935 936 937 938 939 940 941 942 943 944 945
    }

    while (fgets(line, sizeof(line), procstat) != NULL) {
        char *buf = line;

        if (STRPREFIX(buf, cpu_header)) { /* aka logical CPU time */
            if (sscanf(buf,
                       "%*s %llu %llu %llu %llu %llu" // user ~ iowait
                       "%llu %llu %llu %llu %llu",    // irq  ~ guest_nice
                       &usr, &ni, &sys, &idle, &iowait,
                       &irq, &softirq, &steal, &guest, &guest_nice) < 4) {
                continue;
            }

J
Ján Tomko 已提交
946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
            if (virNodeCPUStatsAssign(&params[0], VIR_NODE_CPU_STATS_KERNEL,
                                      (sys + irq + softirq) * TICK_TO_NSEC) < 0)
                goto cleanup;

            if (virNodeCPUStatsAssign(&params[1], VIR_NODE_CPU_STATS_USER,
                                      (usr + ni) * TICK_TO_NSEC) < 0)
                goto cleanup;

            if (virNodeCPUStatsAssign(&params[2], VIR_NODE_CPU_STATS_IDLE,
                                      idle * TICK_TO_NSEC) < 0)
                goto cleanup;

            if (virNodeCPUStatsAssign(&params[3], VIR_NODE_CPU_STATS_IOWAIT,
                                      iowait * TICK_TO_NSEC) < 0)
                goto cleanup;

962 963 964 965 966
            ret = 0;
            goto cleanup;
        }
    }

967 968 969
    virReportInvalidArg(cpuNum,
                        _("Invalid cpuNum in %s"),
                        __FUNCTION__);
970

971
 cleanup:
972 973 974
    return ret;
}

975 976 977 978 979
static int
linuxNodeGetMemoryStats(FILE *meminfo,
                        int cellNum,
                        virNodeMemoryStatsPtr params,
                        int *nparams)
980 981
{
    int ret = -1;
982
    size_t i = 0, j = 0, k = 0;
983 984 985
    int found = 0;
    int nr_param;
    char line[1024];
986
    char meminfo_hdr[VIR_NODE_MEMORY_STATS_FIELD_LENGTH];
987 988 989 990 991
    unsigned long val;
    struct field_conv {
        const char *meminfo_hdr;  // meminfo header
        const char *field;        // MemoryStats field name
    } field_conv[] = {
992 993 994 995
        {"MemTotal:", VIR_NODE_MEMORY_STATS_TOTAL},
        {"MemFree:",  VIR_NODE_MEMORY_STATS_FREE},
        {"Buffers:",  VIR_NODE_MEMORY_STATS_BUFFERS},
        {"Cached:",   VIR_NODE_MEMORY_STATS_CACHED},
996 997 998
        {NULL,        NULL}
    };

999
    if (cellNum == VIR_NODE_MEMORY_STATS_ALL_CELLS) {
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
        nr_param = LINUX_NB_MEMORY_STATS_ALL;
    } else {
        nr_param = LINUX_NB_MEMORY_STATS_CELL;
    }

    if ((*nparams) == 0) {
        /* Current number of memory stats supported by linux */
        *nparams = nr_param;
        ret = 0;
        goto cleanup;
    }

    if ((*nparams) != nr_param) {
1013 1014 1015
        virReportInvalidArg(nparams,
                            _("nparams in %s must be %d"),
                            __FUNCTION__, nr_param);
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
        goto cleanup;
    }

    while (fgets(line, sizeof(line), meminfo) != NULL) {
        char *buf = line;

        if (STRPREFIX(buf, "Node ")) {
            /*
             * /sys/devices/system/node/nodeX/meminfo format is below.
             * So, skip prefix "Node XX ".
             *
             * Node 0 MemTotal:        8386980 kB
             * Node 0 MemFree:         5300920 kB
             *         :
             */
            char *p;

            p = buf;
            for (i = 0; i < 2; i++) {
                p = strchr(p, ' ');
                if (p == NULL) {
1037 1038
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   "%s", _("no prefix found"));
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
                    goto cleanup;
                }
                p++;
            }
            buf = p;
        }

        if (sscanf(buf, "%s %lu kB", meminfo_hdr, &val) < 2)
            continue;

        for (j = 0; field_conv[j].meminfo_hdr != NULL; j++) {
            struct field_conv *convp = &field_conv[j];

            if (STREQ(meminfo_hdr, convp->meminfo_hdr)) {
1053
                virNodeMemoryStatsPtr param = &params[k++];
1054 1055

                if (virStrcpyStatic(param->field, convp->field) == NULL) {
1056 1057
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   "%s", _("Field kernel memory too long for destination"));
1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
                    goto cleanup;
                }
                param->value = val;
                found++;
                break;
            }
        }
        if (found >= nr_param)
            break;
    }

    if (found == 0) {
1070 1071
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("no available memory line found"));
1072 1073 1074 1075 1076
        goto cleanup;
    }

    ret = 0;

1077
 cleanup:
1078 1079
    return ret;
}
1080

1081
static char *
1082 1083
linuxGetCPUGlobalPath(const char *sysfs_prefix,
                      const char *file)
1084 1085 1086 1087
{
    const char *prefix = sysfs_prefix ? sysfs_prefix : SYSFS_SYSTEM_PATH;
    char *path = NULL;

1088
    if (virAsprintf(&path, "%s/cpu/%s", prefix, file) < 0)
1089
        return NULL;
1090

1091 1092
    return path;
}
1093

1094 1095 1096 1097 1098 1099
static char *
linuxGetCPUPresentPath(const char *sysfs_prefix)
{
    return linuxGetCPUGlobalPath(sysfs_prefix, "present");
}

1100 1101 1102 1103 1104 1105
static char *
linuxGetCPUOnlinePath(const char *sysfs_prefix)
{
    return linuxGetCPUGlobalPath(sysfs_prefix, "online");
}

1106 1107
/* Determine the number of CPUs (maximum CPU id + 1) from a file containing
 * a list of CPU ids, like the Linux sysfs cpu/present file */
1108
static int
1109
linuxParseCPUCount(const char *path)
1110 1111 1112 1113 1114
{
    char *str = NULL;
    char *tmp;
    int ret = -1;

E
Eric Blake 已提交
1115
    if (virFileReadAll(path, 5 * VIR_DOMAIN_CPUMASK_LEN, &str) < 0)
1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
        goto cleanup;

    tmp = str;
    do {
        if (virStrToLong_i(tmp, &tmp, 10, &ret) < 0 ||
            !strchr(",-\n", *tmp)) {
            virReportError(VIR_ERR_NO_SUPPORT,
                           _("failed to parse %s"), path);
            ret = -1;
            goto cleanup;
        }
    } while (*tmp++ != '\n');
    ret++;

1130
 cleanup:
1131 1132 1133 1134
    VIR_FREE(str);
    return ret;
}

1135
/*
1136 1137 1138
 * Linux maintains cpu bit map under cpu/online. For example, if
 * cpuid=5's flag is not set and max cpu is 7, the map file shows
 * 0-4,6-7. This function parses it and returns cpumap.
1139
 */
H
Hu Tao 已提交
1140
static virBitmapPtr
1141
linuxParseCPUmap(int max_cpuid, const char *path)
1142
{
H
Hu Tao 已提交
1143
    virBitmapPtr map = NULL;
1144 1145
    char *str = NULL;

1146
    if (virFileReadAll(path, 5 * VIR_DOMAIN_CPUMASK_LEN, &str) < 0)
1147 1148
        goto error;

1149
    if (virBitmapParse(str, 0, &map, max_cpuid) < 0)
1150 1151
        goto error;

H
Hu Tao 已提交
1152
    VIR_FREE(str);
1153 1154
    return map;

1155
 error:
1156
    VIR_FREE(str);
H
Hu Tao 已提交
1157
    virBitmapFree(map);
1158 1159
    return NULL;
}
E
Eric Blake 已提交
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178


static virBitmapPtr
virNodeGetSiblingsList(const char *dir, int cpu_id)
{
    char *path = NULL;
    char *buf = NULL;
    virBitmapPtr ret = NULL;

    if (virAsprintf(&path, "%s/cpu%u/topology/thread_siblings_list",
                    dir, cpu_id) < 0)
        goto cleanup;

    if (virFileReadAll(path, SYSFS_THREAD_SIBLINGS_LIST_LENGTH_MAX, &buf) < 0)
        goto cleanup;

    if (virBitmapParse(buf, 0, &ret, virNumaGetMaxCPUs()) < 0)
        goto cleanup;

1179
 cleanup:
E
Eric Blake 已提交
1180 1181 1182 1183
    VIR_FREE(buf);
    VIR_FREE(path);
    return ret;
}
1184 1185
#endif

A
Andrea Bolognani 已提交
1186 1187 1188
int
nodeGetInfo(const char *sysfs_prefix ATTRIBUTE_UNUSED,
            virNodeInfoPtr nodeinfo)
1189 1190
{
    virArch hostarch = virArchFromHost();
1191

1192 1193
    memset(nodeinfo, 0, sizeof(*nodeinfo));

1194
    if (virStrcpyStatic(nodeinfo->model, virArchToString(hostarch)) == NULL)
C
Chris Lalancette 已提交
1195
        return -1;
1196

1197
#ifdef __linux__
1198
    {
1199
    int ret = -1;
1200
    FILE *cpuinfo = fopen(CPUINFO_PATH, "r");
1201

1202
    if (!cpuinfo) {
1203
        virReportSystemError(errno,
1204
                             _("cannot open %s"), CPUINFO_PATH);
1205 1206
        return -1;
    }
1207

1208
    ret = linuxNodeInfoCPUPopulate(sysfs_prefix, cpuinfo,
1209
                                   hostarch, nodeinfo);
1210 1211
    if (ret < 0)
        goto cleanup;
1212

1213
    /* Convert to KB. */
1214
    nodeinfo->memory = physmem_total() / 1024;
1215

1216
 cleanup:
1217
    VIR_FORCE_FCLOSE(cpuinfo);
1218
    return ret;
1219
    }
1220
#elif defined(__FreeBSD__) || defined(__APPLE__)
R
Roman Bogorodskiy 已提交
1221 1222 1223 1224 1225
    {
    nodeinfo->nodes = 1;
    nodeinfo->sockets = 1;
    nodeinfo->threads = 1;

1226
    nodeinfo->cpus = appleFreebsdNodeGetCPUCount();
R
Roman Bogorodskiy 已提交
1227 1228 1229 1230 1231 1232 1233 1234
    if (nodeinfo->cpus == -1)
        return -1;

    nodeinfo->cores = nodeinfo->cpus;

    unsigned long cpu_freq;
    size_t cpu_freq_len = sizeof(cpu_freq);

1235
# ifdef __FreeBSD__
R
Roman Bogorodskiy 已提交
1236 1237 1238 1239 1240 1241
    if (sysctlbyname("dev.cpu.0.freq", &cpu_freq, &cpu_freq_len, NULL, 0) < 0) {
        virReportSystemError(errno, "%s", _("cannot obtain CPU freq"));
        return -1;
    }

    nodeinfo->mhz = cpu_freq;
1242 1243 1244 1245 1246 1247 1248 1249
# else
    if (sysctlbyname("hw.cpufrequency", &cpu_freq, &cpu_freq_len, NULL, 0) < 0) {
        virReportSystemError(errno, "%s", _("cannot obtain CPU freq"));
        return -1;
    }

    nodeinfo->mhz = cpu_freq / 1000000;
# endif
R
Roman Bogorodskiy 已提交
1250

1251
    if (appleFreebsdNodeGetMemorySize(&nodeinfo->memory) < 0)
R
Roman Bogorodskiy 已提交
1252 1253 1254 1255
        return -1;

    return 0;
    }
1256 1257
#else
    /* XXX Solaris will need an impl later if they port QEMU driver */
1258 1259
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node info not implemented on this platform"));
1260 1261 1262
    return -1;
#endif
}
1263

A
Andrea Bolognani 已提交
1264 1265 1266 1267 1268
int
nodeGetCPUStats(int cpuNum ATTRIBUTE_UNUSED,
                virNodeCPUStatsPtr params ATTRIBUTE_UNUSED,
                int *nparams ATTRIBUTE_UNUSED,
                unsigned int flags)
1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
{
    virCheckFlags(0, -1);

#ifdef __linux__
    {
        int ret;
        FILE *procstat = fopen(PROCSTAT_PATH, "r");
        if (!procstat) {
            virReportSystemError(errno,
                                 _("cannot open %s"), PROCSTAT_PATH);
            return -1;
        }
        ret = linuxNodeGetCPUStats(procstat, cpuNum, params, nparams);
        VIR_FORCE_FCLOSE(procstat);

        return ret;
    }
1286 1287
#elif defined(__FreeBSD__)
    return freebsdNodeGetCPUStats(cpuNum, params, nparams);
1288
#else
1289 1290
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node CPU stats not implemented on this platform"));
1291 1292 1293 1294
    return -1;
#endif
}

A
Andrea Bolognani 已提交
1295 1296 1297 1298 1299 1300
int
nodeGetMemoryStats(const char *sysfs_prefix ATTRIBUTE_UNUSED,
                   int cellNum ATTRIBUTE_UNUSED,
                   virNodeMemoryStatsPtr params ATTRIBUTE_UNUSED,
                   int *nparams ATTRIBUTE_UNUSED,
                   unsigned int flags)
1301 1302 1303 1304 1305 1306
{
    virCheckFlags(0, -1);

#ifdef __linux__
    {
        int ret;
1307
        const char *prefix = sysfs_prefix ? sysfs_prefix : SYSFS_SYSTEM_PATH;
1308 1309
        char *meminfo_path = NULL;
        FILE *meminfo;
1310
        int max_node;
1311

1312
        if (cellNum == VIR_NODE_MEMORY_STATS_ALL_CELLS) {
1313
            if (VIR_STRDUP(meminfo_path, MEMINFO_PATH) < 0)
1314 1315
                return -1;
        } else {
1316
            if ((max_node = virNumaGetMaxNode()) < 0)
1317 1318
                return -1;

1319
            if (cellNum > max_node) {
1320 1321
                virReportInvalidArg(cellNum,
                                    _("cellNum in %s must be less than or equal to %d"),
1322
                                    __FUNCTION__, max_node);
1323 1324 1325
                return -1;
            }

1326
            if (virAsprintf(&meminfo_path, "%s/node/node%d/meminfo",
1327
                            prefix, cellNum) < 0)
1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
                return -1;
        }
        meminfo = fopen(meminfo_path, "r");

        if (!meminfo) {
            virReportSystemError(errno,
                                 _("cannot open %s"), meminfo_path);
            VIR_FREE(meminfo_path);
            return -1;
        }
        ret = linuxNodeGetMemoryStats(meminfo, cellNum, params, nparams);
        VIR_FORCE_FCLOSE(meminfo);
        VIR_FREE(meminfo_path);

        return ret;
    }
1344 1345
#elif defined(__FreeBSD__)
    return freebsdNodeGetMemoryStats(params, nparams);
1346
#else
1347 1348
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node memory stats not implemented on this platform"));
1349 1350 1351 1352
    return -1;
#endif
}

1353
int
1354
nodeGetCPUCount(const char *sysfs_prefix ATTRIBUTE_UNUSED)
1355
{
R
Roman Bogorodskiy 已提交
1356
#if defined(__linux__)
1357 1358 1359 1360 1361
    /* To support older kernels that lack cpu/present, such as 2.6.18
     * in RHEL5, we fall back to count cpu/cpuNN entries; this assumes
     * that such kernels also lack hotplug, and therefore cpu/cpuNN
     * will be consecutive.
     */
1362
    char *present_path = NULL;
1363
    const char *prefix = sysfs_prefix ? sysfs_prefix : SYSFS_SYSTEM_PATH;
1364
    char *cpupath = NULL;
1365 1366
    int ncpu = -1;

1367
    if (!(present_path = linuxGetCPUPresentPath(sysfs_prefix)))
1368
        return -1;
1369

1370
    if (virFileExists(present_path)) {
1371
        ncpu = linuxParseCPUCount(present_path);
1372 1373 1374 1375 1376 1377
        goto cleanup;
    }

    if (virAsprintf(&cpupath, "%s/cpu/cpu0", prefix) < 0)
        goto cleanup;
    if (virFileExists(cpupath)) {
1378
        ncpu = 0;
1379
        do {
1380
            ncpu++;
1381 1382
            VIR_FREE(cpupath);
            if (virAsprintf(&cpupath, "%s/cpu/cpu%d",
1383
                            prefix, ncpu) < 0) {
1384 1385 1386
                ncpu = -1;
                goto cleanup;
            }
1387 1388 1389 1390 1391 1392 1393
        } while (virFileExists(cpupath));
    } else {
        /* no cpu/cpu0: we give up */
        virReportError(VIR_ERR_NO_SUPPORT, "%s",
                       _("host cpu counting not supported on this node"));
    }

1394 1395
 cleanup:
    VIR_FREE(present_path);
1396
    VIR_FREE(cpupath);
1397
    return ncpu;
1398 1399
#elif defined(__FreeBSD__) || defined(__APPLE__)
    return appleFreebsdNodeGetCPUCount();
1400 1401 1402 1403 1404 1405 1406
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("host cpu counting not implemented on this platform"));
    return -1;
#endif
}

1407
virBitmapPtr
1408
nodeGetPresentCPUBitmap(const char *sysfs_prefix ATTRIBUTE_UNUSED)
1409
{
1410
#ifdef __linux__
1411
    virBitmapPtr present_cpus = NULL;
1412
    char *present_path = NULL;
1413
    int npresent_cpus;
1414

1415 1416
    if ((npresent_cpus = nodeGetCPUCount(sysfs_prefix)) < 0)
        goto cleanup;
1417

1418
    if (!(present_path = linuxGetCPUPresentPath(sysfs_prefix)))
1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435
        goto cleanup;

    /* If the cpu/present file is available, parse it and exit */
    if (virFileExists(present_path)) {
        present_cpus = linuxParseCPUmap(npresent_cpus, present_path);
        goto cleanup;
    }

    /* If the file is not available, we can assume that the kernel is
     * too old to support non-consecutive CPU ids and just mark all
     * possible CPUs as present */
    if (!(present_cpus = virBitmapNew(npresent_cpus)))
        goto cleanup;

    virBitmapSetAll(present_cpus);

 cleanup:
1436
    VIR_FREE(present_path);
1437 1438

    return present_cpus;
1439 1440
#endif
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
1441
                   _("node present CPU map not implemented on this platform"));
1442 1443 1444
    return NULL;
}

H
Hu Tao 已提交
1445
virBitmapPtr
1446
nodeGetOnlineCPUBitmap(const char *sysfs_prefix ATTRIBUTE_UNUSED)
1447 1448
{
#ifdef __linux__
1449 1450
    const char *prefix = sysfs_prefix ? sysfs_prefix : SYSFS_SYSTEM_PATH;
    char *online_path = NULL;
1451
    char *cpudir = NULL;
H
Hu Tao 已提交
1452
    virBitmapPtr cpumap;
1453 1454
    int present;

1455
    present = nodeGetCPUCount(sysfs_prefix);
1456
    if (present < 0)
1457
        return NULL;
E
Eric Blake 已提交
1458

1459
    if (!(online_path = linuxGetCPUOnlinePath(sysfs_prefix)))
1460 1461 1462
        return NULL;
    if (virFileExists(online_path)) {
        cpumap = linuxParseCPUmap(present, online_path);
E
Eric Blake 已提交
1463
    } else {
1464
        size_t i;
E
Eric Blake 已提交
1465 1466

        cpumap = virBitmapNew(present);
1467
        if (!cpumap)
1468
            goto cleanup;
1469 1470 1471 1472

        if (virAsprintf(&cpudir, "%s/cpu", prefix) < 0)
            goto cleanup;

E
Eric Blake 已提交
1473
        for (i = 0; i < present; i++) {
1474
            int online = virNodeGetCpuValue(cpudir, i, "online", 1);
E
Eric Blake 已提交
1475 1476
            if (online < 0) {
                virBitmapFree(cpumap);
1477 1478
                cpumap = NULL;
                goto cleanup;
E
Eric Blake 已提交
1479 1480 1481 1482 1483
            }
            if (online)
                ignore_value(virBitmapSetBit(cpumap, i));
        }
    }
1484

1485 1486
 cleanup:
    VIR_FREE(online_path);
1487
    VIR_FREE(cpudir);
1488 1489
    return cpumap;
#else
1490
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
1491
                   _("node online CPU map not implemented on this platform"));
1492 1493 1494 1495
    return NULL;
#endif
}

1496
#ifdef __linux__
1497
static int
1498
nodeSetMemoryParameterValue(virTypedParameterPtr param)
1499 1500 1501 1502 1503 1504
{
    char *path = NULL;
    char *strval = NULL;
    int ret = -1;
    int rc = -1;

1505
    char *field = strchr(param->field, '_');
1506
    sa_assert(field);
1507
    field++;
1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519
    if (virAsprintf(&path, "%s/%s",
                    SYSFS_MEMORY_SHARED_PATH, field) < 0) {
        ret = -2;
        goto cleanup;
    }

    if (virAsprintf(&strval, "%u", param->value.ui) == -1) {
        ret = -2;
        goto cleanup;
    }

    if ((rc = virFileWriteStr(path, strval, 0)) < 0) {
1520
        virReportSystemError(-rc, _("failed to set %s"), param->field);
1521 1522 1523 1524
        goto cleanup;
    }

    ret = 0;
1525
 cleanup:
1526 1527 1528 1529
    VIR_FREE(path);
    VIR_FREE(strval);
    return ret;
}
1530 1531 1532 1533 1534 1535

static bool
nodeMemoryParametersIsAllSupported(virTypedParameterPtr params,
                                   int nparams)
{
    char *path = NULL;
1536
    size_t i;
1537 1538 1539 1540 1541

    for (i = 0; i < nparams; i++) {
        virTypedParameterPtr param = &params[i];

        char *field = strchr(param->field, '_');
1542
        sa_assert(field);
1543 1544
        field++;
        if (virAsprintf(&path, "%s/%s",
1545
                        SYSFS_MEMORY_SHARED_PATH, field) < 0)
1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560
            return false;

        if (!virFileExists(path)) {
            virReportError(VIR_ERR_OPERATION_INVALID,
                           _("Parameter '%s' is not supported by "
                             "this kernel"), param->field);
            VIR_FREE(path);
            return false;
        }

        VIR_FREE(path);
    }

    return true;
}
1561
#endif
1562 1563

int
1564
nodeSetMemoryParameters(virTypedParameterPtr params ATTRIBUTE_UNUSED,
1565
                        int nparams ATTRIBUTE_UNUSED,
1566 1567 1568 1569 1570
                        unsigned int flags)
{
    virCheckFlags(0, -1);

#ifdef __linux__
1571
    size_t i;
1572
    int rc;
1573

1574 1575 1576 1577 1578 1579 1580 1581
    if (virTypedParamsValidate(params, nparams,
                               VIR_NODE_MEMORY_SHARED_PAGES_TO_SCAN,
                               VIR_TYPED_PARAM_UINT,
                               VIR_NODE_MEMORY_SHARED_SLEEP_MILLISECS,
                               VIR_TYPED_PARAM_UINT,
                               VIR_NODE_MEMORY_SHARED_MERGE_ACROSS_NODES,
                               VIR_TYPED_PARAM_UINT,
                               NULL) < 0)
1582 1583
        return -1;

1584 1585
    if (!nodeMemoryParametersIsAllSupported(params, nparams))
        return -1;
1586

1587 1588
    for (i = 0; i < nparams; i++) {
        rc = nodeSetMemoryParameterValue(&params[i]);
1589

1590
        if (rc < 0)
1591
            return -1;
1592 1593
    }

1594
    return 0;
1595 1596 1597 1598 1599 1600 1601 1602
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node set memory parameters not implemented"
                     " on this platform"));
    return -1;
#endif
}

1603
#ifdef __linux__
1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
static int
nodeGetMemoryParameterValue(const char *field,
                            void *value)
{
    char *path = NULL;
    char *buf = NULL;
    char *tmp = NULL;
    int ret = -1;
    int rc = -1;

    if (virAsprintf(&path, "%s/%s",
1615
                    SYSFS_MEMORY_SHARED_PATH, field) < 0)
1616 1617
        goto cleanup;

1618 1619 1620 1621 1622
    if (!virFileExists(path)) {
        ret = -2;
        goto cleanup;
    }

1623 1624 1625 1626 1627 1628
    if (virFileReadAll(path, 1024, &buf) < 0)
        goto cleanup;

    if ((tmp = strchr(buf, '\n')))
        *tmp = '\0';

1629 1630 1631
    if (STREQ(field, "pages_to_scan")   ||
        STREQ(field, "sleep_millisecs") ||
        STREQ(field, "merge_across_nodes"))
1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646
        rc = virStrToLong_ui(buf, NULL, 10, (unsigned int *)value);
    else if (STREQ(field, "pages_shared")    ||
             STREQ(field, "pages_sharing")   ||
             STREQ(field, "pages_unshared")  ||
             STREQ(field, "pages_volatile")  ||
             STREQ(field, "full_scans"))
        rc = virStrToLong_ull(buf, NULL, 10, (unsigned long long *)value);

    if (rc < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("failed to parse %s"), field);
        goto cleanup;
    }

    ret = 0;
1647
 cleanup:
1648 1649 1650 1651
    VIR_FREE(path);
    VIR_FREE(buf);
    return ret;
}
1652
#endif
1653

1654
#define NODE_MEMORY_PARAMETERS_NUM 8
1655
int
1656
nodeGetMemoryParameters(virTypedParameterPtr params ATTRIBUTE_UNUSED,
1657
                        int *nparams ATTRIBUTE_UNUSED,
1658 1659 1660 1661 1662 1663 1664
                        unsigned int flags)
{
    virCheckFlags(VIR_TYPED_PARAM_STRING_OKAY, -1);

#ifdef __linux__
    unsigned int pages_to_scan;
    unsigned int sleep_millisecs;
1665
    unsigned int merge_across_nodes;
1666 1667 1668 1669 1670
    unsigned long long pages_shared;
    unsigned long long pages_sharing;
    unsigned long long pages_unshared;
    unsigned long long pages_volatile;
    unsigned long long full_scans = 0;
1671
    size_t i;
1672
    int ret;
1673 1674 1675 1676 1677 1678 1679 1680 1681

    if ((*nparams) == 0) {
        *nparams = NODE_MEMORY_PARAMETERS_NUM;
        return 0;
    }

    for (i = 0; i < *nparams && i < NODE_MEMORY_PARAMETERS_NUM; i++) {
        virTypedParameterPtr param = &params[i];

1682
        switch (i) {
1683
        case 0:
1684 1685 1686 1687
            ret = nodeGetMemoryParameterValue("pages_to_scan", &pages_to_scan);
            if (ret == -2)
                continue;
            else if (ret == -1)
1688 1689 1690 1691 1692 1693 1694 1695 1696
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_TO_SCAN,
                                        VIR_TYPED_PARAM_UINT, pages_to_scan) < 0)
                return -1;

            break;

        case 1:
1697 1698 1699 1700
            ret = nodeGetMemoryParameterValue("sleep_millisecs", &sleep_millisecs);
            if (ret == -2)
                continue;
            else if (ret == -1)
1701 1702 1703 1704 1705 1706 1707 1708 1709
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_SLEEP_MILLISECS,
                                        VIR_TYPED_PARAM_UINT, sleep_millisecs) < 0)
                return -1;

            break;

        case 2:
1710 1711 1712 1713
            ret = nodeGetMemoryParameterValue("pages_shared", &pages_shared);
            if (ret == -2)
                continue;
            else if (ret == -1)
1714 1715 1716 1717 1718 1719 1720 1721 1722
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_SHARED,
                                        VIR_TYPED_PARAM_ULLONG, pages_shared) < 0)
                return -1;

            break;

        case 3:
1723 1724 1725 1726
            ret = nodeGetMemoryParameterValue("pages_sharing", &pages_sharing);
            if (ret == -2)
                continue;
            else if (ret == -1)
1727 1728 1729 1730 1731 1732 1733 1734 1735
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_SHARING,
                                        VIR_TYPED_PARAM_ULLONG, pages_sharing) < 0)
                return -1;

            break;

        case 4:
1736 1737 1738 1739
            ret = nodeGetMemoryParameterValue("pages_unshared", &pages_unshared);
            if (ret == -2)
                continue;
            else if (ret == -1)
1740 1741 1742 1743 1744 1745 1746 1747 1748
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_UNSHARED,
                                        VIR_TYPED_PARAM_ULLONG, pages_unshared) < 0)
                return -1;

            break;

        case 5:
1749 1750 1751 1752
            ret = nodeGetMemoryParameterValue("pages_volatile", &pages_volatile);
            if (ret == -2)
                continue;
            else if (ret == -1)
1753 1754 1755 1756 1757 1758 1759 1760 1761
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_PAGES_VOLATILE,
                                        VIR_TYPED_PARAM_ULLONG, pages_volatile) < 0)
                return -1;

            break;

        case 6:
1762 1763 1764 1765
            ret = nodeGetMemoryParameterValue("full_scans", &full_scans);
            if (ret == -2)
                continue;
            else if (ret == -1)
1766 1767 1768 1769 1770 1771 1772 1773
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_FULL_SCANS,
                                        VIR_TYPED_PARAM_ULLONG, full_scans) < 0)
                return -1;

            break;

1774
        case 7:
1775 1776 1777 1778
            ret = nodeGetMemoryParameterValue("merge_across_nodes", &merge_across_nodes);
            if (ret == -2)
                continue;
            else if (ret == -1)
1779 1780 1781 1782 1783 1784 1785
                return -1;

            if (virTypedParameterAssign(param, VIR_NODE_MEMORY_SHARED_MERGE_ACROSS_NODES,
                                        VIR_TYPED_PARAM_UINT, merge_across_nodes) < 0)
                return -1;

            break;
1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797
        }
    }

    return 0;
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node get memory parameters not implemented"
                     " on this platform"));
    return -1;
#endif
}

1798
int
1799 1800
nodeGetCPUMap(const char *sysfs_prefix,
              unsigned char **cpumap,
1801 1802
              unsigned int *online,
              unsigned int flags)
1803 1804 1805 1806 1807 1808 1809
{
    virBitmapPtr cpus = NULL;
    int ret = -1;
    int dummy;

    virCheckFlags(0, -1);

1810
    if (!cpumap && !online)
1811
        return nodeGetCPUCount(sysfs_prefix);
1812

1813
    if (!(cpus = nodeGetOnlineCPUBitmap(sysfs_prefix)))
1814 1815 1816 1817 1818 1819 1820
        goto cleanup;

    if (cpumap && virBitmapToData(cpus, cpumap, &dummy) < 0)
        goto cleanup;
    if (online)
        *online = virBitmapCountBits(cpus);

1821 1822
    ret = virBitmapSize(cpus);

1823
 cleanup:
1824 1825 1826 1827 1828 1829
    if (ret < 0 && cpumap)
        VIR_FREE(*cpumap);
    virBitmapFree(cpus);
    return ret;
}

1830
static int
1831
nodeCapsInitNUMAFake(const char *sysfs_prefix,
1832 1833
                     const char *cpupath ATTRIBUTE_UNUSED,
                     virCapsPtr caps ATTRIBUTE_UNUSED)
1834 1835 1836 1837 1838
{
    virNodeInfo nodeinfo;
    virCapsHostNUMACellCPUPtr cpus;
    int ncpus;
    int s, c, t;
1839 1840
    int id, cid;
    int onlinecpus ATTRIBUTE_UNUSED;
1841

1842
    if (nodeGetInfo(sysfs_prefix, &nodeinfo) < 0)
1843 1844 1845
        return -1;

    ncpus = VIR_NODEINFO_MAXCPUS(nodeinfo);
1846
    onlinecpus = nodeinfo.cpus;
1847

1848
    if (VIR_ALLOC_N(cpus, ncpus) < 0)
1849 1850
        return -1;

1851
    id = cid = 0;
1852 1853 1854
    for (s = 0; s < nodeinfo.sockets; s++) {
        for (c = 0; c < nodeinfo.cores; c++) {
            for (t = 0; t < nodeinfo.threads; t++) {
1855
#ifdef __linux__
1856
                if (virNodeGetCpuValue(cpupath, id, "online", 1)) {
1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868
#endif
                    cpus[cid].id = id;
                    cpus[cid].socket_id = s;
                    cpus[cid].core_id = c;
                    if (!(cpus[cid].siblings = virBitmapNew(ncpus)))
                        goto error;
                    ignore_value(virBitmapSetBit(cpus[cid].siblings, id));
                    cid++;
#ifdef __linux__
                }
#endif

1869 1870 1871 1872 1873 1874 1875
                id++;
            }
        }
    }

    if (virCapabilitiesAddHostNUMACell(caps, 0,
                                       nodeinfo.memory,
1876 1877 1878
#ifdef __linux__
                                       onlinecpus, cpus,
#else
1879
                                       ncpus, cpus,
1880
#endif
M
Michal Privoznik 已提交
1881
                                       0, NULL,
1882
                                       0, NULL) < 0)
1883 1884 1885 1886 1887
        goto error;

    return 0;

 error:
1888
    for (; id >= 0; id--)
1889 1890 1891 1892 1893 1894
        virBitmapFree(cpus[id].siblings);
    VIR_FREE(cpus);
    return -1;
}

static int
1895
nodeGetCellsFreeMemoryFake(unsigned long long *freeMems,
1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
                           int startCell,
                           int maxCells ATTRIBUTE_UNUSED)
{
    double avail = physmem_available();

    if (startCell != 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("start cell %d out of range (0-%d)"),
                       startCell, 0);
        return -1;
    }

    freeMems[0] = (unsigned long long)avail;

    if (!freeMems[0]) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot determine free memory"));
        return -1;
    }

    return 1;
}

1919 1920 1921
static int
nodeGetMemoryFake(unsigned long long *mem,
                  unsigned long long *freeMem)
1922
{
1923 1924
    int ret = -1;

W
Wojciech Macek 已提交
1925 1926 1927 1928 1929
#if defined(__FreeBSD__)
    unsigned long pagesize = getpagesize();
    u_int value;
    size_t value_size = sizeof(value);

1930 1931 1932 1933 1934 1935 1936 1937
    if (mem) {
        if (sysctlbyname("vm.stats.vm.v_page_count", &value,
                         &value_size, NULL, 0) < 0) {
            virReportSystemError(errno, "%s",
                                 _("sysctl failed for vm.stats.vm.v_page_count"));
            goto cleanup;
        }
        *mem = value * (unsigned long long)pagesize;
W
Wojciech Macek 已提交
1938 1939
    }

1940 1941 1942 1943 1944 1945 1946 1947 1948 1949
    if (freeMem) {
        if (sysctlbyname("vm.stats.vm.v_free_count", &value,
                         &value_size, NULL, 0) < 0) {
            virReportSystemError(errno, "%s",
                                 _("sysctl failed for vm.stats.vm.v_free_count"));
            goto cleanup;
        }

        *freeMem = value * (unsigned long long)pagesize;
    }
W
Wojciech Macek 已提交
1950 1951

#else
1952 1953 1954 1955 1956 1957 1958
    if (mem) {
        double total = physmem_total();
        if (!total) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("Cannot determine free memory"));
            goto cleanup;
        }
1959

1960
        *mem = (unsigned long long) total;
1961 1962
    }

1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973
    if (freeMem) {
        double avail = physmem_available();

        if (!avail) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("Cannot determine free memory"));
            goto cleanup;
        }

        *freeMem = (unsigned long long) avail;
    }
W
Wojciech Macek 已提交
1974
#endif
1975 1976 1977 1978

    ret = 0;
 cleanup:
    return ret;
1979 1980
}

1981 1982
/* returns 1 on success, 0 if the detection failed and -1 on hard error */
static int
1983 1984
virNodeCapsFillCPUInfo(const char *cpupath ATTRIBUTE_UNUSED,
                       int cpu_id ATTRIBUTE_UNUSED,
E
Eric Blake 已提交
1985
                       virCapsHostNUMACellCPUPtr cpu ATTRIBUTE_UNUSED)
1986
{
E
Eric Blake 已提交
1987
#ifdef __linux__
1988 1989 1990
    int tmp;
    cpu->id = cpu_id;

1991
    if ((tmp = virNodeGetCpuValue(cpupath, cpu_id,
1992 1993 1994 1995 1996
                                  "topology/physical_package_id", -1)) < 0)
        return 0;

    cpu->socket_id = tmp;

1997
    if ((tmp = virNodeGetCpuValue(cpupath, cpu_id,
1998 1999 2000 2001 2002
                                  "topology/core_id", -1)) < 0)
        return 0;

    cpu->core_id = tmp;

2003
    if (!(cpu->siblings = virNodeGetSiblingsList(cpupath, cpu_id)))
2004 2005 2006
        return -1;

    return 0;
E
Eric Blake 已提交
2007 2008 2009 2010 2011
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node cpu info not implemented on this platform"));
    return -1;
#endif
2012 2013
}

2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060
static int
virNodeCapsGetSiblingInfo(int node,
                          virCapsHostNUMACellSiblingInfoPtr *siblings,
                          int *nsiblings)
{
    virCapsHostNUMACellSiblingInfoPtr tmp = NULL;
    int tmp_size = 0;
    int ret = -1;
    int *distances = NULL;
    int ndistances = 0;
    size_t i;

    if (virNumaGetDistances(node, &distances, &ndistances) < 0)
        goto cleanup;

    if (!distances) {
        *siblings = NULL;
        *nsiblings = 0;
        return 0;
    }

    if (VIR_ALLOC_N(tmp, ndistances) < 0)
        goto cleanup;

    for (i = 0; i < ndistances; i++) {
        if (!distances[i])
            continue;

        tmp[tmp_size].node = i;
        tmp[tmp_size].distance = distances[i];
        tmp_size++;
    }

    if (VIR_REALLOC_N(tmp, tmp_size) < 0)
        goto cleanup;

    *siblings = tmp;
    *nsiblings = tmp_size;
    tmp = NULL;
    tmp_size = 0;
    ret = 0;
 cleanup:
    VIR_FREE(distances);
    VIR_FREE(tmp);
    return ret;
}

M
Michal Privoznik 已提交
2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089
static int
virNodeCapsGetPagesInfo(int node,
                        virCapsHostNUMACellPageInfoPtr *pageinfo,
                        int *npageinfo)
{
    int ret = -1;
    unsigned int *pages_size = NULL, *pages_avail = NULL;
    size_t npages, i;

    if (virNumaGetPages(node, &pages_size, &pages_avail, NULL, &npages) < 0)
        goto cleanup;

    if (VIR_ALLOC_N(*pageinfo, npages) < 0)
        goto cleanup;
    *npageinfo = npages;

    for (i = 0; i < npages; i++) {
        (*pageinfo)[i].size = pages_size[i];
        (*pageinfo)[i].avail = pages_avail[i];
    }

    ret = 0;

 cleanup:
    VIR_FREE(pages_avail);
    VIR_FREE(pages_size);
    return ret;
}

2090
int
2091 2092
nodeCapsInitNUMA(const char *sysfs_prefix,
                 virCapsPtr caps)
2093
{
2094 2095
    const char *prefix = sysfs_prefix ? sysfs_prefix : SYSFS_SYSTEM_PATH;
    char *cpupath;
2096
    int n;
2097
    unsigned long long memory;
2098
    virCapsHostNUMACellCPUPtr cpus = NULL;
2099
    virBitmapPtr cpumap = NULL;
2100
    virCapsHostNUMACellSiblingInfoPtr siblings = NULL;
2101
    int nsiblings = 0;
M
Michal Privoznik 已提交
2102 2103
    virCapsHostNUMACellPageInfoPtr pageinfo = NULL;
    int npageinfo;
2104
    int ret = -1;
2105
    int ncpus = 0;
2106
    int cpu;
2107
    bool topology_failed = false;
2108
    int max_node;
2109

2110 2111 2112 2113
    if (virAsprintf(&cpupath, "%s/cpu", prefix) < 0)
        return -1;

    if (!virNumaIsAvailable()) {
2114
        ret = nodeCapsInitNUMAFake(sysfs_prefix, cpupath, caps);
2115 2116
        goto cleanup;
    }
2117

2118 2119 2120 2121
    if ((max_node = virNumaGetMaxNode()) < 0)
        goto cleanup;

    for (n = 0; n <= max_node; n++) {
2122
        size_t i;
2123

2124 2125 2126
        if ((ncpus = virNumaGetNodeCPUs(n, &cpumap)) < 0) {
            if (ncpus == -2)
                continue;
2127

2128 2129
            goto cleanup;
        }
2130 2131 2132

        if (VIR_ALLOC_N(cpus, ncpus) < 0)
            goto cleanup;
2133
        cpu = 0;
2134

2135
        for (i = 0; i < virBitmapSize(cpumap); i++) {
J
Ján Tomko 已提交
2136
            if (virBitmapIsBitSet(cpumap, i)) {
2137
                if (virNodeCapsFillCPUInfo(cpupath, i, cpus + cpu++) < 0) {
2138 2139 2140 2141 2142
                    topology_failed = true;
                    virResetLastError();
                }
            }
        }
2143

2144 2145 2146
        if (virNodeCapsGetSiblingInfo(n, &siblings, &nsiblings) < 0)
            goto cleanup;

M
Michal Privoznik 已提交
2147 2148 2149
        if (virNodeCapsGetPagesInfo(n, &pageinfo, &npageinfo) < 0)
            goto cleanup;

2150 2151 2152 2153
        /* Detect the amount of memory in the numa cell in KiB */
        virNumaGetNodeMemory(n, &memory, NULL);
        memory >>= 10;

2154 2155
        if (virCapabilitiesAddHostNUMACell(caps, n, memory,
                                           ncpus, cpus,
M
Michal Privoznik 已提交
2156 2157
                                           nsiblings, siblings,
                                           npageinfo, pageinfo) < 0)
2158
            goto cleanup;
2159 2160

        cpus = NULL;
2161
        siblings = NULL;
M
Michal Privoznik 已提交
2162
        pageinfo = NULL;
2163 2164
        virBitmapFree(cpumap);
        cpumap = NULL;
2165 2166 2167 2168
    }

    ret = 0;

2169
 cleanup:
2170
    if ((topology_failed || ret < 0) && cpus)
2171 2172
        virCapabilitiesClearHostNUMACellCPUTopology(cpus, ncpus);

2173 2174
    virBitmapFree(cpumap);
    VIR_FREE(cpus);
2175
    VIR_FREE(siblings);
M
Michal Privoznik 已提交
2176
    VIR_FREE(pageinfo);
2177
    VIR_FREE(cpupath);
2178 2179
    return ret;
}
2180 2181 2182


int
2183
nodeGetCellsFreeMemory(unsigned long long *freeMems,
2184 2185 2186
                       int startCell,
                       int maxCells)
{
2187
    unsigned long long mem;
2188 2189 2190 2191
    int n, lastCell, numCells;
    int ret = -1;
    int maxCell;

2192
    if (!virNumaIsAvailable())
2193
        return nodeGetCellsFreeMemoryFake(freeMems,
2194 2195
                                          startCell, maxCells);

2196 2197 2198
    if ((maxCell = virNumaGetMaxNode()) < 0)
        return 0;

2199
    if (startCell > maxCell) {
2200 2201 2202
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("start cell %d out of range (0-%d)"),
                       startCell, maxCell);
2203 2204 2205 2206 2207 2208
        goto cleanup;
    }
    lastCell = startCell + maxCells - 1;
    if (lastCell > maxCell)
        lastCell = maxCell;

2209
    for (numCells = 0, n = startCell; n <= lastCell; n++) {
2210
        virNumaGetNodeMemory(n, NULL, &mem);
2211

2212 2213 2214 2215
        freeMems[numCells++] = mem;
    }
    ret = numCells;

2216
 cleanup:
2217 2218 2219
    return ret;
}

2220 2221 2222
int
nodeGetMemory(unsigned long long *mem,
              unsigned long long *freeMem)
2223
{
2224
    int max_node;
2225 2226
    int n;

2227 2228 2229 2230 2231 2232
    if (mem)
        *mem = 0;

    if (freeMem)
        *freeMem = 0;

2233
    if (!virNumaIsAvailable())
2234
        return nodeGetMemoryFake(mem, freeMem);
2235

2236
    if ((max_node = virNumaGetMaxNode()) < 0)
2237
        return -1;
2238

2239
    for (n = 0; n <= max_node; n++) {
2240 2241 2242 2243 2244 2245 2246
        unsigned long long tmp_mem = 0, tmp_freeMem = 0;

        if (!virNumaNodeIsAvailable(n))
            continue;

        if (virNumaGetNodeMemory(n, &tmp_mem, &tmp_freeMem) < 0)
            return -1;
2247

2248 2249 2250 2251 2252
        if (mem)
            *mem += tmp_mem;

        if (freeMem)
            *freeMem += tmp_freeMem;
2253 2254
    }

2255
    return 0;
2256
}
2257 2258 2259 2260 2261 2262 2263 2264 2265

int
nodeGetFreePages(unsigned int npages,
                 unsigned int *pages,
                 int startCell,
                 unsigned int cellCount,
                 unsigned long long *counts)
{
    int ret = -1;
2266
    int cell, lastCell;
2267 2268
    size_t i, ncounts = 0;

2269 2270 2271 2272 2273 2274 2275 2276 2277 2278
    if ((lastCell = virNumaGetMaxNode()) < 0)
        return 0;

    if (startCell > lastCell) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("start cell %d out of range (0-%d)"),
                       startCell, lastCell);
        goto cleanup;
    }

2279
    lastCell = MIN(lastCell, startCell + (int) cellCount - 1);
2280

2281
    for (cell = startCell; cell <= lastCell; cell++) {
2282 2283 2284 2285
        for (i = 0; i < npages; i++) {
            unsigned int page_size = pages[i];
            unsigned int page_free;

2286
            if (virNumaGetPageInfo(cell, page_size, 0, NULL, &page_free) < 0)
2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302
                goto cleanup;

            counts[ncounts++] = page_free;
        }
    }

    if (!ncounts) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("no suitable info found"));
        goto cleanup;
    }

    ret = ncounts;
 cleanup:
    return ret;
}
2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343

int
nodeAllocPages(unsigned int npages,
               unsigned int *pageSizes,
               unsigned long long *pageCounts,
               int startCell,
               unsigned int cellCount,
               bool add)
{
    int ret = -1;
    int cell, lastCell;
    size_t i, ncounts = 0;

    if ((lastCell = virNumaGetMaxNode()) < 0)
        return 0;

    if (startCell > lastCell) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("start cell %d out of range (0-%d)"),
                       startCell, lastCell);
        goto cleanup;
    }

    lastCell = MIN(lastCell, startCell + (int) cellCount - 1);

    for (cell = startCell; cell <= lastCell; cell++) {
        for (i = 0; i < npages; i++) {
            unsigned int page_size = pageSizes[i];
            unsigned long long page_count = pageCounts[i];

            if (virNumaSetPagePoolSize(cell, page_size, page_count, add) < 0)
                goto cleanup;

            ncounts++;
        }
    }

    ret = ncounts;
 cleanup:
    return ret;
}
2344

2345 2346
#if HAVE_LINUX_KVM_H && defined(KVM_CAP_PPC_SMT)

2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395
/* Get the number of threads per subcore.
 *
 * This will be 2, 4 or 8 on POWER hosts, depending on the current
 * micro-threading configuration, and 0 everywhere else.
 *
 * Returns the number of threads per subcore if subcores are in use, zero
 * if subcores are not in use, and a negative value on error */
int
nodeGetThreadsPerSubcore(virArch arch)
{
    int threads_per_subcore = 0;
    const char *kvmpath = "/dev/kvm";
    int kvmfd;

    if (ARCH_IS_PPC64(arch)) {

        /* It's okay if /dev/kvm doesn't exist, because
         *   a. we might be running in a guest
         *   b. the kvm module might not be installed or enabled
         * In either case, falling back to the subcore-unaware thread
         * counting logic is the right thing to do */
        if (!virFileExists(kvmpath))
            goto out;

        if ((kvmfd = open(kvmpath, O_RDONLY)) < 0) {
            /* This can happen when running as a regular user if
             * permissions are tight enough, in which case erroring out
             * is better than silently falling back and reporting
             * different nodeinfo depending on the user */
            virReportSystemError(errno,
                                 _("Failed to open '%s'"),
                                 kvmpath);
            threads_per_subcore = -1;
            goto out;
        }

        /* For Phyp and KVM based guests the ioctl for KVM_CAP_PPC_SMT
         * returns zero and both primary and secondary threads will be
         * online */
        threads_per_subcore = ioctl(kvmfd,
                                    KVM_CHECK_EXTENSION,
                                    KVM_CAP_PPC_SMT);

        VIR_FORCE_CLOSE(kvmfd);
    }

 out:
    return threads_per_subcore;
}
2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407

#else

/* Fallback for nodeGetThreadsPerSubcore() used when KVM headers
 * are not available on the system */
int
nodeGetThreadsPerSubcore(virArch arch ATTRIBUTE_UNUSED)
{
    return 0;
}

#endif /* HAVE_LINUX_KVM_H && defined(KVM_CAP_PPC_SMT) */