nodeinfo.c 30.2 KB
Newer Older
1 2 3
/*
 * nodeinfo.c: Helper routines for OS specific node information
 *
4
 * Copyright (C) 2006-2008, 2010-2012 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Copyright (C) 2006 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 */

24
#include <config.h>
J
Jim Meyering 已提交
25

26 27 28
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
29
#include <stdint.h>
30
#include <errno.h>
31
#include <dirent.h>
E
Eric Blake 已提交
32
#include <sys/utsname.h>
33
#include <sched.h>
34
#include "conf/domain_conf.h"
35 36 37 38 39

#if HAVE_NUMACTL
# define NUMA_VERSION1_COMPATIBILITY 1
# include <numa.h>
#endif
40

41 42
#include "c-ctype.h"
#include "memory.h"
43
#include "nodeinfo.h"
44
#include "physmem.h"
45
#include "util.h"
46
#include "logging.h"
47
#include "virterror_internal.h"
48
#include "count-one-bits.h"
E
Eric Blake 已提交
49
#include "intprops.h"
E
Eric Blake 已提交
50
#include "virfile.h"
51

52 53 54

#define VIR_FROM_THIS VIR_FROM_NONE

55
#ifdef __linux__
56
# define CPUINFO_PATH "/proc/cpuinfo"
57
# define SYSFS_SYSTEM_PATH "/sys/devices/system"
58
# define PROCSTAT_PATH "/proc/stat"
59
# define MEMINFO_PATH "/proc/meminfo"
60 61

# define LINUX_NB_CPU_STATS 4
62 63
# define LINUX_NB_MEMORY_STATS_ALL 4
# define LINUX_NB_MEMORY_STATS_CELL 2
64

65
/* NB, this is not static as we need to call it from the testsuite */
66
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
67
                             const char *sysfs_dir,
68
                             virNodeInfoPtr nodeinfo);
69

70 71
static int linuxNodeGetCPUStats(FILE *procstat,
                                int cpuNum,
72
                                virNodeCPUStatsPtr params,
73
                                int *nparams);
74 75
static int linuxNodeGetMemoryStats(FILE *meminfo,
                                   int cellNum,
76
                                   virNodeMemoryStatsPtr params,
77
                                   int *nparams);
78

E
Eric Blake 已提交
79
/* Return the positive decimal contents of the given
E
Eric Blake 已提交
80
 * DIR/cpu%u/FILE, or -1 on error.  If MISSING_OK and the
E
Eric Blake 已提交
81
 * file could not be found, return 1 instead of an error; this is
82 83
 * because some machines cannot hot-unplug cpu0, or because
 * hot-unplugging is disabled.  */
E
Eric Blake 已提交
84
static int
E
Eric Blake 已提交
85 86
virNodeGetCpuValue(const char *dir, unsigned int cpu, const char *file,
                   bool missing_ok)
E
Eric Blake 已提交
87 88 89 90 91 92 93
{
    char *path;
    FILE *pathfp;
    int value = -1;
    char value_str[INT_BUFSIZE_BOUND(value)];
    char *tmp;

E
Eric Blake 已提交
94
    if (virAsprintf(&path, "%s/cpu%u/%s", dir, cpu, file) < 0) {
E
Eric Blake 已提交
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
        virReportOOMError();
        return -1;
    }

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
        if (missing_ok && errno == ENOENT)
            value = 1;
        else
            virReportSystemError(errno, _("cannot open %s"), path);
        goto cleanup;
    }

    if (fgets(value_str, sizeof(value_str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }
    if (virStrToLong_i(value_str, &tmp, 10, &value) < 0) {
113 114 115
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("could not convert '%s' to an integer"),
                       value_str);
E
Eric Blake 已提交
116 117 118 119
        goto cleanup;
    }

cleanup:
120
    VIR_FORCE_FCLOSE(pathfp);
E
Eric Blake 已提交
121 122 123 124 125
    VIR_FREE(path);

    return value;
}

E
Eric Blake 已提交
126 127
static unsigned long
virNodeCountThreadSiblings(const char *dir, unsigned int cpu)
128 129
{
    unsigned long ret = 0;
C
Chris Lalancette 已提交
130 131
    char *path;
    FILE *pathfp;
132 133 134
    char str[1024];
    int i;

135
    if (virAsprintf(&path, "%s/cpu%u/topology/thread_siblings",
E
Eric Blake 已提交
136
                    dir, cpu) < 0) {
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
        virReportOOMError();
        return 0;
    }

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
        virReportSystemError(errno, _("cannot open %s"), path);
        VIR_FREE(path);
        return 0;
    }

    if (fgets(str, sizeof(str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }

    i = 0;
    while (str[i] != '\0') {
C
Chris Lalancette 已提交
155
        if (c_isdigit(str[i]))
156
            ret += count_one_bits(str[i] - '0');
C
Chris Lalancette 已提交
157 158 159 160
        else if (str[i] >= 'A' && str[i] <= 'F')
            ret += count_one_bits(str[i] - 'A' + 10);
        else if (str[i] >= 'a' && str[i] <= 'f')
            ret += count_one_bits(str[i] - 'a' + 10);
161 162 163 164
        i++;
    }

cleanup:
165
    VIR_FORCE_FCLOSE(pathfp);
166 167 168 169 170
    VIR_FREE(path);

    return ret;
}

E
Eric Blake 已提交
171 172
static int
virNodeParseSocket(const char *dir, unsigned int cpu)
173
{
E
Eric Blake 已提交
174 175
    int ret = virNodeGetCpuValue(dir, cpu, "topology/physical_package_id",
                                 false);
E
Eric Blake 已提交
176
# if defined(__powerpc__) || \
177 178 179 180
    defined(__powerpc64__) || \
    defined(__s390__) || \
    defined(__s390x__)
    /* ppc and s390(x) has -1 */
181 182
    if (ret < 0)
        ret = 0;
E
Eric Blake 已提交
183
# endif
184
    return ret;
185 186
}

187 188
/* parses a node entry, returning number of processors in the node and
 * filling arguments */
189
static int
190 191 192
virNodeParseNode(const char *node, int *sockets, int *cores, int *threads)
    ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2)
    ATTRIBUTE_NONNULL(3) ATTRIBUTE_NONNULL(4)
193 194
{
    int ret = -1;
195 196 197 198 199 200 201 202 203 204 205 206
    int processors = 0;
    DIR *cpudir = NULL;
    struct dirent *cpudirent = NULL;
    int sock_max = 0;
    cpu_set_t sock_map;
    int sock;
    cpu_set_t *core_maps = NULL;
    int core;
    int i;
    int siblings;
    unsigned int cpu;
    int online;
207

208 209 210 211 212 213
    *threads = 0;
    *cores = 0;
    *sockets = 0;

    if (!(cpudir = opendir(node))) {
        virReportSystemError(errno, _("cannot opendir %s"), node);
214 215
        goto cleanup;
    }
216 217 218 219 220 221 222 223

    /* enumerate sockets in the node */
    CPU_ZERO(&sock_map);
    errno = 0;
    while ((cpudirent = readdir(cpudir))) {
        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
            continue;

224 225 226 227 228 229
        if ((online = virNodeGetCpuValue(node, cpu, "online", true)) < 0)
            goto cleanup;

        if (!online)
            continue;

230 231 232 233 234 235 236 237 238 239 240 241
        /* Parse socket */
        sock = virNodeParseSocket(node, cpu);
        CPU_SET(sock, &sock_map);

        if (sock > sock_max)
            sock_max = sock;

        errno = 0;
    }

    if (errno) {
        virReportSystemError(errno, _("problem reading %s"), node);
242 243
        goto cleanup;
    }
244 245 246 247 248 249

    sock_max++;

    /* allocate cpu maps for each socket */
    if (VIR_ALLOC_N(core_maps, sock_max) < 0) {
        virReportOOMError();
250
        goto cleanup;
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
    }

    for (i = 0; i < sock_max; i++)
        CPU_ZERO(&core_maps[i]);

    /* iterate over all CPU's in the node */
    rewinddir(cpudir);
    errno = 0;
    while ((cpudirent = readdir(cpudir))) {
        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
            continue;

        if ((online = virNodeGetCpuValue(node, cpu, "online", true)) < 0)
            goto cleanup;

        if (!online)
            continue;

        processors++;

        /* Parse socket */
        sock = virNodeParseSocket(node, cpu);
        if (!CPU_ISSET(sock, &sock_map)) {
274 275
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("CPU socket topology has changed"));
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
            goto cleanup;
        }

        /* Parse core */
# if defined(__s390__) || \
    defined(__s390x__)
        /* logical cpu is equivalent to a core on s390 */
        core = cpu;
# else
        core = virNodeGetCpuValue(node, cpu, "topology/core_id", false);
# endif

        CPU_SET(core, &core_maps[sock]);

        if (!(siblings = virNodeCountThreadSiblings(node, cpu)))
            goto cleanup;

        if (siblings > *threads)
            *threads = siblings;

        errno = 0;
    }

    if (errno) {
        virReportSystemError(errno, _("problem reading %s"), node);
301 302
        goto cleanup;
    }
303 304 305 306 307 308 309 310 311 312 313 314 315 316

    /* finalize the returned data */
    *sockets = CPU_COUNT(&sock_map);

    for (i = 0; i < sock_max; i++) {
        if (!CPU_ISSET(i, &sock_map))
            continue;

        core = CPU_COUNT(&core_maps[i]);
        if (core > *cores)
            *cores = core;
    }

    ret = processors;
317 318

cleanup:
319 320 321 322 323 324 325
    /* don't shadow a more serious error */
    if (cpudir && closedir(cpudir) < 0 && ret >= 0) {
        virReportSystemError(errno, _("problem closing %s"), node);
        ret = -1;
    }
    VIR_FREE(core_maps);

326 327 328
    return ret;
}

329
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
330
                             const char *sysfs_dir,
331
                             virNodeInfoPtr nodeinfo)
332
{
333
    char line[1024];
334 335 336 337
    DIR *nodedir = NULL;
    struct dirent *nodedirent = NULL;
    int cpus, cores, socks, threads;
    unsigned int node;
338
    int ret = -1;
339
    char *sysfs_nodedir = NULL;
340
    char *sysfs_cpudir = NULL;
341 342 343

    nodeinfo->cpus = 0;
    nodeinfo->mhz = 0;
344
    nodeinfo->cores = 0;
345
    nodeinfo->nodes = 0;
346

347
    /* Start with parsing CPU clock speed from /proc/cpuinfo */
348
    while (fgets(line, sizeof(line), cpuinfo) != NULL) {
E
Eric Blake 已提交
349
# if defined(__x86_64__) || \
350 351
    defined(__amd64__)  || \
    defined(__i386__)
E
Eric Blake 已提交
352
        char *buf = line;
353
        if (STRPREFIX(buf, "cpu MHz")) {
354 355
            char *p;
            unsigned int ui;
356

357
            buf += 7;
358
            while (*buf && c_isspace(*buf))
359
                buf++;
360

361
            if (*buf != ':' || !buf[1]) {
362 363
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("parsing cpu MHz from cpuinfo"));
364
                goto cleanup;
365
            }
366

367
            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
368
                /* Accept trailing fractional part.  */
369
                (*p == '\0' || *p == '.' || c_isspace(*p)))
370
                nodeinfo->mhz = ui;
371
        }
372

E
Eric Blake 已提交
373
# elif defined(__powerpc__) || \
374
      defined(__powerpc64__)
E
Eric Blake 已提交
375
        char *buf = line;
376
        if (STRPREFIX(buf, "clock")) {
377 378
            char *p;
            unsigned int ui;
379

380 381 382
            buf += 5;
            while (*buf && c_isspace(*buf))
                buf++;
383

384
            if (*buf != ':' || !buf[1]) {
385 386
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("parsing cpu MHz from cpuinfo"));
387
                goto cleanup;
388
            }
389

390
            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 &&
391
                /* Accept trailing fractional part.  */
392
                (*p == '\0' || *p == '.' || c_isspace(*p)))
393
                nodeinfo->mhz = ui;
394 395 396 397 398
            /* No other interesting infos are available in /proc/cpuinfo.
             * However, there is a line identifying processor's version,
             * identification and machine, but we don't want it to be caught
             * and parsed in next iteration, because it is not in expected
             * format and thus lead to error. */
399
        }
400 401 402 403 404
# elif defined(__s390__) || \
      defined(__s390x__)
        /* s390x has no realistic value for CPU speed,
         * assign a value of zero to signify this */
        nodeinfo->mhz = 0;
E
Eric Blake 已提交
405 406 407
# else
#  warning Parser for /proc/cpuinfo needs to be adapted for your architecture
# endif
408 409
    }

410 411
    /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the
     * core, node, socket, thread and topology information from /sys
412
     */
413
    if (virAsprintf(&sysfs_nodedir, "%s/node", sysfs_dir) < 0) {
414 415 416
        virReportOOMError();
        goto cleanup;
    }
417

418 419 420 421
    if (!(nodedir = opendir(sysfs_nodedir))) {
        /* the host isn't probably running a NUMA architecture */
        goto fallback;
    }
422

423 424 425
    errno = 0;
    while ((nodedirent = readdir(nodedir))) {
        if (sscanf(nodedirent->d_name, "node%u", &node) != 1)
426 427
            continue;

428 429 430 431 432
        nodeinfo->nodes++;

        if (virAsprintf(&sysfs_cpudir, "%s/node/%s",
                        sysfs_dir, nodedirent->d_name) < 0) {
            virReportOOMError();
433
            goto cleanup;
E
Eric Blake 已提交
434 435
        }

436 437 438
        if ((cpus = virNodeParseNode(sysfs_cpudir, &socks,
                                     &cores, &threads)) < 0)
            goto cleanup;
439

440
        VIR_FREE(sysfs_cpudir);
441

442 443 444 445 446 447 448 449 450 451 452 453
        nodeinfo->cpus += cpus;

        if (socks > nodeinfo->sockets)
            nodeinfo->sockets = socks;

        if (cores > nodeinfo->cores)
            nodeinfo->cores = cores;

        if (threads > nodeinfo->threads)
            nodeinfo->threads = threads;

        errno = 0;
454
    }
455

E
Eric Blake 已提交
456
    if (errno) {
457
        virReportSystemError(errno, _("problem reading %s"), sysfs_nodedir);
458 459
        goto cleanup;
    }
460 461 462 463 464 465 466 467 468

    if (nodeinfo->cpus && nodeinfo->nodes)
        goto done;

fallback:
    VIR_FREE(sysfs_cpudir);

    if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) {
        virReportOOMError();
469
        goto cleanup;
E
Eric Blake 已提交
470
    }
471

472
    if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, &cores, &threads)) < 0)
473
        goto cleanup;
474

475 476 477 478 479 480 481
    nodeinfo->nodes = 1;
    nodeinfo->cpus = cpus;
    nodeinfo->sockets = socks;
    nodeinfo->cores = cores;
    nodeinfo->threads = threads;

done:
482
    /* There should always be at least one cpu, socket, node, and thread. */
483
    if (nodeinfo->cpus == 0) {
484
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found"));
485
        goto cleanup;
486
    }
487

C
Chris Lalancette 已提交
488
    if (nodeinfo->sockets == 0) {
489
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found"));
490
        goto cleanup;
C
Chris Lalancette 已提交
491
    }
492

C
Chris Lalancette 已提交
493
    if (nodeinfo->threads == 0) {
494
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found"));
495
        goto cleanup;
C
Chris Lalancette 已提交
496 497
    }

498 499 500
    ret = 0;

cleanup:
501 502 503 504 505 506 507
    /* don't shadow a more serious error */
    if (nodedir && closedir(nodedir) < 0 && ret >= 0) {
        virReportSystemError(errno, _("problem closing %s"), sysfs_nodedir);
        ret = -1;
    }

    VIR_FREE(sysfs_nodedir);
508 509
    VIR_FREE(sysfs_cpudir);
    return ret;
510 511
}

512 513 514 515
# define TICK_TO_NSEC (1000ull * 1000ull * 1000ull / sysconf(_SC_CLK_TCK))

int linuxNodeGetCPUStats(FILE *procstat,
                         int cpuNum,
516
                         virNodeCPUStatsPtr params,
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
                         int *nparams)
{
    int ret = -1;
    char line[1024];
    unsigned long long usr, ni, sys, idle, iowait;
    unsigned long long irq, softirq, steal, guest, guest_nice;
    char cpu_header[3 + INT_BUFSIZE_BOUND(cpuNum)];

    if ((*nparams) == 0) {
        /* Current number of cpu stats supported by linux */
        *nparams = LINUX_NB_CPU_STATS;
        ret = 0;
        goto cleanup;
    }

    if ((*nparams) != LINUX_NB_CPU_STATS) {
533 534 535
        virReportInvalidArg(*nparams,
                            _("nparams in %s must be equal to %d"),
                            __FUNCTION__, LINUX_NB_CPU_STATS);
536 537 538
        goto cleanup;
    }

539
    if (cpuNum == VIR_NODE_CPU_STATS_ALL_CPUS) {
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
        strcpy(cpu_header, "cpu");
    } else {
        snprintf(cpu_header, sizeof(cpu_header), "cpu%d", cpuNum);
    }

    while (fgets(line, sizeof(line), procstat) != NULL) {
        char *buf = line;

        if (STRPREFIX(buf, cpu_header)) { /* aka logical CPU time */
            int i;

            if (sscanf(buf,
                       "%*s %llu %llu %llu %llu %llu" // user ~ iowait
                       "%llu %llu %llu %llu %llu",    // irq  ~ guest_nice
                       &usr, &ni, &sys, &idle, &iowait,
                       &irq, &softirq, &steal, &guest, &guest_nice) < 4) {
                continue;
            }

            for (i = 0; i < *nparams; i++) {
560
                virNodeCPUStatsPtr param = &params[i];
561 562 563

                switch (i) {
                case 0: /* fill kernel cpu time here */
564
                    if (virStrcpyStatic(param->field, VIR_NODE_CPU_STATS_KERNEL) == NULL) {
565 566
                        virReportError(VIR_ERR_INTERNAL_ERROR,
                                       "%s", _("Field kernel cpu time too long for destination"));
567 568 569 570 571 572
                        goto cleanup;
                    }
                    param->value = (sys + irq + softirq) * TICK_TO_NSEC;
                    break;

                case 1: /* fill user cpu time here */
573
                    if (virStrcpyStatic(param->field, VIR_NODE_CPU_STATS_USER) == NULL) {
574 575
                        virReportError(VIR_ERR_INTERNAL_ERROR,
                                       "%s", _("Field kernel cpu time too long for destination"));
576 577 578 579 580 581
                        goto cleanup;
                    }
                    param->value = (usr + ni) * TICK_TO_NSEC;
                    break;

                case 2: /* fill idle cpu time here */
582
                    if (virStrcpyStatic(param->field, VIR_NODE_CPU_STATS_IDLE) == NULL) {
583 584
                        virReportError(VIR_ERR_INTERNAL_ERROR,
                                       "%s", _("Field kernel cpu time too long for destination"));
585 586 587 588 589 590
                        goto cleanup;
                    }
                    param->value = idle * TICK_TO_NSEC;
                    break;

                case 3: /* fill iowait cpu time here */
591
                    if (virStrcpyStatic(param->field, VIR_NODE_CPU_STATS_IOWAIT) == NULL) {
592 593
                        virReportError(VIR_ERR_INTERNAL_ERROR,
                                       "%s", _("Field kernel cpu time too long for destination"));
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
                        goto cleanup;
                    }
                    param->value = iowait * TICK_TO_NSEC;
                    break;

                default:
                    break;
                    /* should not hit here */
                }
            }
            ret = 0;
            goto cleanup;
        }
    }

609 610 611
    virReportInvalidArg(cpuNum,
                        _("Invalid cpuNum in %s"),
                        __FUNCTION__);
612

613 614 615 616 617 618
cleanup:
    return ret;
}

int linuxNodeGetMemoryStats(FILE *meminfo,
                            int cellNum,
619
                            virNodeMemoryStatsPtr params,
620 621 622 623 624 625 626
                            int *nparams)
{
    int ret = -1;
    int i = 0, j = 0, k = 0;
    int found = 0;
    int nr_param;
    char line[1024];
627
    char meminfo_hdr[VIR_NODE_MEMORY_STATS_FIELD_LENGTH];
628 629 630 631 632
    unsigned long val;
    struct field_conv {
        const char *meminfo_hdr;  // meminfo header
        const char *field;        // MemoryStats field name
    } field_conv[] = {
633 634 635 636
        {"MemTotal:", VIR_NODE_MEMORY_STATS_TOTAL},
        {"MemFree:",  VIR_NODE_MEMORY_STATS_FREE},
        {"Buffers:",  VIR_NODE_MEMORY_STATS_BUFFERS},
        {"Cached:",   VIR_NODE_MEMORY_STATS_CACHED},
637 638 639
        {NULL,        NULL}
    };

640
    if (cellNum == VIR_NODE_MEMORY_STATS_ALL_CELLS) {
641 642 643 644 645 646 647 648 649 650 651 652 653
        nr_param = LINUX_NB_MEMORY_STATS_ALL;
    } else {
        nr_param = LINUX_NB_MEMORY_STATS_CELL;
    }

    if ((*nparams) == 0) {
        /* Current number of memory stats supported by linux */
        *nparams = nr_param;
        ret = 0;
        goto cleanup;
    }

    if ((*nparams) != nr_param) {
654 655 656
        virReportInvalidArg(nparams,
                            _("nparams in %s must be %d"),
                            __FUNCTION__, nr_param);
657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
        goto cleanup;
    }

    while (fgets(line, sizeof(line), meminfo) != NULL) {
        char *buf = line;

        if (STRPREFIX(buf, "Node ")) {
            /*
             * /sys/devices/system/node/nodeX/meminfo format is below.
             * So, skip prefix "Node XX ".
             *
             * Node 0 MemTotal:        8386980 kB
             * Node 0 MemFree:         5300920 kB
             *         :
             */
            char *p;

            p = buf;
            for (i = 0; i < 2; i++) {
                p = strchr(p, ' ');
                if (p == NULL) {
678 679
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   "%s", _("no prefix found"));
680 681 682 683 684 685 686 687 688 689 690 691 692 693
                    goto cleanup;
                }
                p++;
            }
            buf = p;
        }

        if (sscanf(buf, "%s %lu kB", meminfo_hdr, &val) < 2)
            continue;

        for (j = 0; field_conv[j].meminfo_hdr != NULL; j++) {
            struct field_conv *convp = &field_conv[j];

            if (STREQ(meminfo_hdr, convp->meminfo_hdr)) {
694
                virNodeMemoryStatsPtr param = &params[k++];
695 696

                if (virStrcpyStatic(param->field, convp->field) == NULL) {
697 698
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   "%s", _("Field kernel memory too long for destination"));
699 700 701 702 703 704 705 706 707 708 709 710
                    goto cleanup;
                }
                param->value = val;
                found++;
                break;
            }
        }
        if (found >= nr_param)
            break;
    }

    if (found == 0) {
711 712
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("no available memory line found"));
713 714 715 716 717
        goto cleanup;
    }

    ret = 0;

718 719 720
cleanup:
    return ret;
}
721 722 723 724 725 726 727 728 729 730 731

/*
 * Linux maintains cpu bit map. For example, if cpuid=5's flag is not set
 * and max cpu is 7. The map file shows 0-4,6-7. This function parses
 * it and returns cpumap.
 */
static char *
linuxParseCPUmap(int *max_cpuid, const char *path)
{
    char *map = NULL;
    char *str = NULL;
732
    int max_id = 0, i;
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754

    if (virFileReadAll(path, 5 * VIR_DOMAIN_CPUMASK_LEN, &str) < 0) {
        virReportOOMError();
        goto error;
    }

    if (VIR_ALLOC_N(map, VIR_DOMAIN_CPUMASK_LEN) < 0) {
        virReportOOMError();
        goto error;
    }
    if (virDomainCpuSetParse(str, 0, map,
                             VIR_DOMAIN_CPUMASK_LEN) < 0) {
        goto error;
    }

    for (i = 0; i < VIR_DOMAIN_CPUMASK_LEN; i++) {
        if (map[i]) {
            max_id = i;
        }
    }
    *max_cpuid = max_id;

H
Hu Tao 已提交
755
    VIR_FREE(str);
756 757 758 759 760 761 762
    return map;

error:
    VIR_FREE(str);
    VIR_FREE(map);
    return NULL;
}
763 764
#endif

765
int nodeGetInfo(virConnectPtr conn ATTRIBUTE_UNUSED, virNodeInfoPtr nodeinfo) {
766 767
    struct utsname info;

E
Eric Blake 已提交
768
    memset(nodeinfo, 0, sizeof(*nodeinfo));
769 770
    uname(&info);

C
Chris Lalancette 已提交
771 772
    if (virStrcpyStatic(nodeinfo->model, info.machine) == NULL)
        return -1;
773

774
#ifdef __linux__
775
    {
776
    int ret = -1;
777
    FILE *cpuinfo = fopen(CPUINFO_PATH, "r");
778
    if (!cpuinfo) {
779
        virReportSystemError(errno,
780
                             _("cannot open %s"), CPUINFO_PATH);
781 782
        return -1;
    }
783

784
    ret = linuxNodeInfoCPUPopulate(cpuinfo, SYSFS_SYSTEM_PATH, nodeinfo);
785 786
    if (ret < 0)
        goto cleanup;
787

788 789
    /* Convert to KB. */
    nodeinfo->memory = physmem_total () / 1024;
790

791 792
cleanup:
    VIR_FORCE_FCLOSE(cpuinfo);
793
    return ret;
794
    }
795 796
#else
    /* XXX Solaris will need an impl later if they port QEMU driver */
797 798
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node info not implemented on this platform"));
799 800 801
    return -1;
#endif
}
802

803
int nodeGetCPUStats(virConnectPtr conn ATTRIBUTE_UNUSED,
804
                    int cpuNum ATTRIBUTE_UNUSED,
805
                    virNodeCPUStatsPtr params ATTRIBUTE_UNUSED,
806
                    int *nparams ATTRIBUTE_UNUSED,
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
                    unsigned int flags)
{
    virCheckFlags(0, -1);

#ifdef __linux__
    {
        int ret;
        FILE *procstat = fopen(PROCSTAT_PATH, "r");
        if (!procstat) {
            virReportSystemError(errno,
                                 _("cannot open %s"), PROCSTAT_PATH);
            return -1;
        }
        ret = linuxNodeGetCPUStats(procstat, cpuNum, params, nparams);
        VIR_FORCE_FCLOSE(procstat);

        return ret;
    }
#else
826 827
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node CPU stats not implemented on this platform"));
828 829 830 831
    return -1;
#endif
}

832
int nodeGetMemoryStats(virConnectPtr conn ATTRIBUTE_UNUSED,
833
                       int cellNum ATTRIBUTE_UNUSED,
834
                       virNodeMemoryStatsPtr params ATTRIBUTE_UNUSED,
835
                       int *nparams ATTRIBUTE_UNUSED,
836 837 838 839 840 841 842 843 844 845
                       unsigned int flags)
{
    virCheckFlags(0, -1);

#ifdef __linux__
    {
        int ret;
        char *meminfo_path = NULL;
        FILE *meminfo;

846
        if (cellNum == VIR_NODE_MEMORY_STATS_ALL_CELLS) {
847 848 849 850 851 852
            meminfo_path = strdup(MEMINFO_PATH);
            if (!meminfo_path) {
                virReportOOMError();
                return -1;
            }
        } else {
853
# if HAVE_NUMACTL
854
            if (numa_available() < 0) {
855
# endif
856 857
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               "%s", _("NUMA not supported on this host"));
858
                return -1;
859
# if HAVE_NUMACTL
860
            }
861
# endif
862

863
# if HAVE_NUMACTL
864
            if (cellNum > numa_max_node()) {
865 866 867
                virReportInvalidArg(cellNum,
                                    _("cellNum in %s must be less than or equal to %d"),
                                    __FUNCTION__, numa_max_node());
868 869
                return -1;
            }
870
# endif
871

872 873
            if (virAsprintf(&meminfo_path, "%s/node/node%d/meminfo",
                            SYSFS_SYSTEM_PATH, cellNum) < 0) {
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
                virReportOOMError();
                return -1;
            }
        }
        meminfo = fopen(meminfo_path, "r");

        if (!meminfo) {
            virReportSystemError(errno,
                                 _("cannot open %s"), meminfo_path);
            VIR_FREE(meminfo_path);
            return -1;
        }
        ret = linuxNodeGetMemoryStats(meminfo, cellNum, params, nparams);
        VIR_FORCE_FCLOSE(meminfo);
        VIR_FREE(meminfo_path);

        return ret;
    }
#else
893 894
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node memory stats not implemented on this platform"));
895 896 897 898
    return -1;
#endif
}

899 900 901 902 903 904 905 906 907
char *
nodeGetCPUmap(virConnectPtr conn ATTRIBUTE_UNUSED,
              int *max_id ATTRIBUTE_UNUSED,
              const char *mapname ATTRIBUTE_UNUSED)
{
#ifdef __linux__
    char *path;
    char *cpumap;

908
    if (virAsprintf(&path, SYSFS_SYSTEM_PATH "/cpu/%s", mapname) < 0) {
909 910 911 912 913 914 915 916
        virReportOOMError();
        return NULL;
    }

    cpumap = linuxParseCPUmap(max_id, path);
    VIR_FREE(path);
    return cpumap;
#else
917 918
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node cpumap not implemented on this platform"));
919 920 921 922
    return NULL;
#endif
}

923 924 925 926 927 928 929 930 931 932 933 934
#if HAVE_NUMACTL
# if LIBNUMA_API_VERSION <= 1
#  define NUMA_MAX_N_CPUS 4096
# else
#  define NUMA_MAX_N_CPUS (numa_all_cpus_ptr->size)
# endif

# define n_bits(var) (8 * sizeof(var))
# define MASK_CPU_ISSET(mask, cpu) \
  (((mask)[((cpu) / n_bits(*(mask)))] >> ((cpu) % n_bits(*(mask)))) & 1)

int
935
nodeCapsInitNUMA(virCapsPtr caps)
936 937
{
    int n;
938
    unsigned long *mask = NULL;
939
    unsigned long *allonesmask = NULL;
940 941 942 943 944 945 946 947
    int *cpus = NULL;
    int ret = -1;
    int max_n_cpus = NUMA_MAX_N_CPUS;

    if (numa_available() < 0)
        return 0;

    int mask_n_bytes = max_n_cpus / 8;
948
    if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof(*mask)) < 0)
949
        goto cleanup;
950
    if (VIR_ALLOC_N(allonesmask, mask_n_bytes / sizeof(*mask)) < 0)
951 952
        goto cleanup;
    memset(allonesmask, 0xff, mask_n_bytes);
953 954 955 956

    for (n = 0 ; n <= numa_max_node() ; n++) {
        int i;
        int ncpus;
957
        /* The first time this returns -1, ENOENT if node doesn't exist... */
958 959
        if (numa_node_to_cpus(n, mask, mask_n_bytes) < 0) {
            VIR_WARN("NUMA topology for cell %d of %d not available, ignoring",
960 961 962 963 964 965 966
                     n, numa_max_node()+1);
            continue;
        }
        /* second, third... times it returns an all-1's mask */
        if (memcmp(mask, allonesmask, mask_n_bytes) == 0) {
            VIR_DEBUG("NUMA topology for cell %d of %d is all ones, ignoring",
                      n, numa_max_node()+1);
967 968
            continue;
        }
969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994

        for (ncpus = 0, i = 0 ; i < max_n_cpus ; i++)
            if (MASK_CPU_ISSET(mask, i))
                ncpus++;

        if (VIR_ALLOC_N(cpus, ncpus) < 0)
            goto cleanup;

        for (ncpus = 0, i = 0 ; i < max_n_cpus ; i++)
            if (MASK_CPU_ISSET(mask, i))
                cpus[ncpus++] = i;

        if (virCapabilitiesAddHostNUMACell(caps,
                                           n,
                                           ncpus,
                                           cpus) < 0)
            goto cleanup;

        VIR_FREE(cpus);
    }

    ret = 0;

cleanup:
    VIR_FREE(cpus);
    VIR_FREE(mask);
995
    VIR_FREE(allonesmask);
996 997
    return ret;
}
998 999 1000


int
1001
nodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED,
1002 1003 1004 1005 1006 1007 1008 1009 1010
                       unsigned long long *freeMems,
                       int startCell,
                       int maxCells)
{
    int n, lastCell, numCells;
    int ret = -1;
    int maxCell;

    if (numa_available() < 0) {
1011 1012
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("NUMA not supported on this host"));
1013 1014 1015 1016
        goto cleanup;
    }
    maxCell = numa_max_node();
    if (startCell > maxCell) {
1017 1018 1019
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("start cell %d out of range (0-%d)"),
                       startCell, maxCell);
1020 1021 1022 1023 1024 1025 1026 1027 1028
        goto cleanup;
    }
    lastCell = startCell + maxCells - 1;
    if (lastCell > maxCell)
        lastCell = maxCell;

    for (numCells = 0, n = startCell ; n <= lastCell ; n++) {
        long long mem;
        if (numa_node_size64(n, &mem) < 0) {
1029
            virReportError(VIR_ERR_INTERNAL_ERROR,
1030 1031
                           _("Failed to query NUMA free memory for node: %d"),
                           n);
1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
            goto cleanup;
        }
        freeMems[numCells++] = mem;
    }
    ret = numCells;

cleanup:
    return ret;
}

unsigned long long
1043
nodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED)
1044 1045 1046 1047 1048
{
    unsigned long long freeMem = 0;
    int n;

    if (numa_available() < 0) {
1049 1050
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("NUMA not supported on this host"));
1051 1052 1053 1054 1055 1056
        goto cleanup;
    }

    for (n = 0 ; n <= numa_max_node() ; n++) {
        long long mem;
        if (numa_node_size64(n, &mem) < 0) {
1057 1058
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("Failed to query NUMA free memory"));
1059 1060 1061 1062 1063 1064 1065 1066 1067
            goto cleanup;
        }
        freeMem += mem;
    }

cleanup:
    return freeMem;
}

1068
#else
1069 1070 1071 1072
int nodeCapsInitNUMA(virCapsPtr caps ATTRIBUTE_UNUSED) {
    return 0;
}

1073
int nodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED,
1074 1075 1076 1077
                              unsigned long long *freeMems ATTRIBUTE_UNUSED,
                              int startCell ATTRIBUTE_UNUSED,
                              int maxCells ATTRIBUTE_UNUSED)
{
1078 1079
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("NUMA memory information not available on this platform"));
1080 1081 1082
    return -1;
}

1083
unsigned long long nodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED)
1084
{
1085 1086
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("NUMA memory information not available on this platform"));
1087 1088
    return 0;
}
1089
#endif