nodeinfo.c 12.5 KB
Newer Older
1 2 3
/*
 * nodeinfo.c: Helper routines for OS specific node information
 *
4
 * Copyright (C) 2006, 2007, 2008, 2010 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 * Copyright (C) 2006 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 */

24
#include <config.h>
J
Jim Meyering 已提交
25

26 27 28
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
29
#include <stdint.h>
30
#include <errno.h>
31
#include <dirent.h>
E
Eric Blake 已提交
32
#include <sys/utsname.h>
33 34 35 36 37

#if HAVE_NUMACTL
# define NUMA_VERSION1_COMPATIBILITY 1
# include <numa.h>
#endif
38

39 40
#include "c-ctype.h"
#include "memory.h"
41
#include "nodeinfo.h"
42
#include "physmem.h"
43
#include "util.h"
44
#include "logging.h"
45
#include "virterror_internal.h"
46
#include "count-one-bits.h"
47

48 49 50

#define VIR_FROM_THIS VIR_FROM_NONE

51 52
#define nodeReportError(code, ...)                                      \
    virReportErrorHelper(NULL, VIR_FROM_NONE, code, __FILE__,           \
53
                         __FUNCTION__, __LINE__, __VA_ARGS__)
54

55
#ifdef __linux__
56 57
# define CPUINFO_PATH "/proc/cpuinfo"
# define CPU_SYS_PATH "/sys/devices/system/cpu"
58

59
/* NB, this is not static as we need to call it from the testsuite */
60
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
61
                             virNodeInfoPtr nodeinfo);
62

C
Chris Lalancette 已提交
63
static unsigned long count_thread_siblings(unsigned int cpu)
64 65
{
    unsigned long ret = 0;
C
Chris Lalancette 已提交
66 67
    char *path;
    FILE *pathfp;
68 69 70
    char str[1024];
    int i;

C
Chris Lalancette 已提交
71
    if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/topology/thread_siblings",
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
                    cpu) < 0) {
        virReportOOMError();
        return 0;
    }

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
        virReportSystemError(errno, _("cannot open %s"), path);
        VIR_FREE(path);
        return 0;
    }

    if (fgets(str, sizeof(str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }

    i = 0;
    while (str[i] != '\0') {
C
Chris Lalancette 已提交
91
        if (c_isdigit(str[i]))
92
            ret += count_one_bits(str[i] - '0');
C
Chris Lalancette 已提交
93 94 95 96
        else if (str[i] >= 'A' && str[i] <= 'F')
            ret += count_one_bits(str[i] - 'A' + 10);
        else if (str[i] >= 'a' && str[i] <= 'f')
            ret += count_one_bits(str[i] - 'a' + 10);
97 98 99 100 101 102 103 104 105 106
        i++;
    }

cleanup:
    fclose(pathfp);
    VIR_FREE(path);

    return ret;
}

C
Chris Lalancette 已提交
107
static int parse_socket(unsigned int cpu)
108
{
C
Chris Lalancette 已提交
109
    char *path;
110 111 112
    FILE *pathfp;
    char socket_str[1024];
    char *tmp;
C
Chris Lalancette 已提交
113
    int socket = -1;
114

C
Chris Lalancette 已提交
115 116
    if (virAsprintf(&path, CPU_SYS_PATH "/cpu%u/topology/physical_package_id",
                    cpu) < 0) {
117 118 119 120 121 122 123
        virReportOOMError();
        return -1;
    }

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
        virReportSystemError(errno, _("cannot open %s"), path);
C
Chris Lalancette 已提交
124 125
        VIR_FREE(path);
        return -1;
126 127 128 129 130 131 132
    }

    if (fgets(socket_str, sizeof(socket_str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }
    if (virStrToLong_i(socket_str, &tmp, 10, &socket) < 0) {
133
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
134 135 136 137 138 139 140 141 142 143 144 145
                        _("could not convert '%s' to an integer"),
                        socket_str);
        goto cleanup;
    }

cleanup:
    fclose(pathfp);
    VIR_FREE(path);

    return socket;
}

146
int linuxNodeInfoCPUPopulate(FILE *cpuinfo,
147 148
                             virNodeInfoPtr nodeinfo)
{
149
    char line[1024];
150 151
    DIR *cpudir = NULL;
    struct dirent *cpudirent = NULL;
C
Chris Lalancette 已提交
152
    unsigned int cpu;
153 154 155
    unsigned long cur_threads;
    int socket;
    unsigned long long socket_mask = 0;
156 157 158

    nodeinfo->cpus = 0;
    nodeinfo->mhz = 0;
159
    nodeinfo->cores = 1;
160 161

    nodeinfo->nodes = 1;
J
Jiri Denemark 已提交
162
# if HAVE_NUMACTL
163
    if (numa_available() >= 0)
164
        nodeinfo->nodes = numa_max_node() + 1;
J
Jiri Denemark 已提交
165
# endif
166 167

    /* NB: It is impossible to fill our nodes, since cpuinfo
C
Chris Lalancette 已提交
168
     * has no knowledge of NUMA nodes */
169

170
    /* NOTE: hyperthreads are ignored here; they are parsed out of /sys */
171 172
    while (fgets(line, sizeof(line), cpuinfo) != NULL) {
        char *buf = line;
173
        if (STRPREFIX(buf, "processor")) { /* aka a single logical CPU */
174
            buf += 9;
175
            while (*buf && c_isspace(*buf))
176 177
                buf++;
            if (*buf != ':') {
178
                nodeReportError(VIR_ERR_INTERNAL_ERROR,
179
                                "%s", _("parsing cpuinfo processor"));
180 181 182
                return -1;
            }
            nodeinfo->cpus++;
183
        } else if (STRPREFIX(buf, "cpu MHz")) {
184 185
            char *p;
            unsigned int ui;
186
            buf += 9;
187
            while (*buf && c_isspace(*buf))
188 189
                buf++;
            if (*buf != ':' || !buf[1]) {
190
                nodeReportError(VIR_ERR_INTERNAL_ERROR,
191
                                "%s", _("parsing cpuinfo cpu MHz"));
192 193
                return -1;
            }
194
            if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0
195
                /* Accept trailing fractional part.  */
196
                && (*p == '\0' || *p == '.' || c_isspace(*p)))
197
                nodeinfo->mhz = ui;
198
        } else if (STRPREFIX(buf, "cpu cores")) { /* aka cores */
199
            char *p;
200 201
            unsigned int id;
            buf += 9;
202
            while (*buf && c_isspace(*buf))
203 204
                buf++;
            if (*buf != ':' || !buf[1]) {
205
                nodeReportError(VIR_ERR_INTERNAL_ERROR,
206
                                "parsing cpuinfo cpu cores %c", *buf);
207 208
                return -1;
            }
209
            if (virStrToLong_ui(buf+1, &p, 10, &id) == 0
210
                && (*p == '\0' || c_isspace(*p))
211
                && id > nodeinfo->cores)
212 213 214 215 216
                nodeinfo->cores = id;
        }
    }

    if (!nodeinfo->cpus) {
217
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
218
                        "%s", _("no cpus found"));
219 220 221
        return -1;
    }

222 223
    /* OK, we've parsed what we can out of /proc/cpuinfo.  Get the socket
     * and thread information from /sys
224
     */
225 226 227 228 229 230
    cpudir = opendir(CPU_SYS_PATH);
    if (cpudir == NULL) {
        virReportSystemError(errno, _("cannot opendir %s"), CPU_SYS_PATH);
        return -1;
    }
    while ((cpudirent = readdir(cpudir))) {
C
Chris Lalancette 已提交
231
        if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1)
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
            continue;

        socket = parse_socket(cpu);
        if (socket < 0) {
            closedir(cpudir);
            return -1;
        }
        if (!(socket_mask & (1 << socket))) {
            socket_mask |= (1 << socket);
            nodeinfo->sockets++;
        }

        cur_threads = count_thread_siblings(cpu);
        if (cur_threads == 0) {
            closedir(cpudir);
            return -1;
        }
        if (cur_threads > nodeinfo->threads)
            nodeinfo->threads = cur_threads;
    }

    closedir(cpudir);
254

C
Chris Lalancette 已提交
255 256
    /* there should always be at least one socket and one thread */
    if (nodeinfo->sockets == 0) {
257
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
C
Chris Lalancette 已提交
258 259 260 261
                        "%s", _("no sockets found"));
        return -1;
    }
    if (nodeinfo->threads == 0) {
262
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
C
Chris Lalancette 已提交
263 264 265 266
                        "%s", _("no threads found"));
        return -1;
    }

267 268 269 270 271
    return 0;
}

#endif

272
int nodeGetInfo(virConnectPtr conn ATTRIBUTE_UNUSED, virNodeInfoPtr nodeinfo) {
273 274
    struct utsname info;

E
Eric Blake 已提交
275
    memset(nodeinfo, 0, sizeof(*nodeinfo));
276 277
    uname(&info);

C
Chris Lalancette 已提交
278 279
    if (virStrcpyStatic(nodeinfo->model, info.machine) == NULL)
        return -1;
280

281
#ifdef __linux__
282 283 284
    {
    int ret;
    FILE *cpuinfo = fopen(CPUINFO_PATH, "r");
285
    if (!cpuinfo) {
286
        virReportSystemError(errno,
287
                             _("cannot open %s"), CPUINFO_PATH);
288 289
        return -1;
    }
290
    ret = linuxNodeInfoCPUPopulate(cpuinfo, nodeinfo);
291 292 293 294
    fclose(cpuinfo);
    if (ret < 0)
        return -1;

295 296
    /* Convert to KB. */
    nodeinfo->memory = physmem_total () / 1024;
297 298

    return ret;
299
    }
300 301
#else
    /* XXX Solaris will need an impl later if they port QEMU driver */
302
    nodeReportError(VIR_ERR_NO_SUPPORT, "%s",
303
                    _("node info not implemented on this platform"));
304 305 306
    return -1;
#endif
}
307 308 309 310 311 312 313 314 315 316 317 318 319

#if HAVE_NUMACTL
# if LIBNUMA_API_VERSION <= 1
#  define NUMA_MAX_N_CPUS 4096
# else
#  define NUMA_MAX_N_CPUS (numa_all_cpus_ptr->size)
# endif

# define n_bits(var) (8 * sizeof(var))
# define MASK_CPU_ISSET(mask, cpu) \
  (((mask)[((cpu) / n_bits(*(mask)))] >> ((cpu) % n_bits(*(mask)))) & 1)

int
320
nodeCapsInitNUMA(virCapsPtr caps)
321 322
{
    int n;
323
    unsigned long *mask = NULL;
324 325 326 327 328 329 330 331 332 333 334 335 336 337
    int *cpus = NULL;
    int ret = -1;
    int max_n_cpus = NUMA_MAX_N_CPUS;

    if (numa_available() < 0)
        return 0;

    int mask_n_bytes = max_n_cpus / 8;
    if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof *mask) < 0)
        goto cleanup;

    for (n = 0 ; n <= numa_max_node() ; n++) {
        int i;
        int ncpus;
338 339 340 341 342
        if (numa_node_to_cpus(n, mask, mask_n_bytes) < 0) {
            VIR_WARN("NUMA topology for cell %d of %d not available, ignoring",
                     n, numa_max_node());
            continue;
        }
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370

        for (ncpus = 0, i = 0 ; i < max_n_cpus ; i++)
            if (MASK_CPU_ISSET(mask, i))
                ncpus++;

        if (VIR_ALLOC_N(cpus, ncpus) < 0)
            goto cleanup;

        for (ncpus = 0, i = 0 ; i < max_n_cpus ; i++)
            if (MASK_CPU_ISSET(mask, i))
                cpus[ncpus++] = i;

        if (virCapabilitiesAddHostNUMACell(caps,
                                           n,
                                           ncpus,
                                           cpus) < 0)
            goto cleanup;

        VIR_FREE(cpus);
    }

    ret = 0;

cleanup:
    VIR_FREE(cpus);
    VIR_FREE(mask);
    return ret;
}
371 372 373


int
374
nodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED,
375 376 377 378 379 380 381 382 383
                       unsigned long long *freeMems,
                       int startCell,
                       int maxCells)
{
    int n, lastCell, numCells;
    int ret = -1;
    int maxCell;

    if (numa_available() < 0) {
384
        nodeReportError(VIR_ERR_NO_SUPPORT,
385 386 387 388 389
                        "%s", _("NUMA not supported on this host"));
        goto cleanup;
    }
    maxCell = numa_max_node();
    if (startCell > maxCell) {
390
        nodeReportError(VIR_ERR_INTERNAL_ERROR,
391 392 393 394 395 396 397 398 399 400 401
                        _("start cell %d out of range (0-%d)"),
                        startCell, maxCell);
        goto cleanup;
    }
    lastCell = startCell + maxCells - 1;
    if (lastCell > maxCell)
        lastCell = maxCell;

    for (numCells = 0, n = startCell ; n <= lastCell ; n++) {
        long long mem;
        if (numa_node_size64(n, &mem) < 0) {
402
            nodeReportError(VIR_ERR_INTERNAL_ERROR,
403 404 405 406 407 408 409 410 411 412 413 414
                            "%s", _("Failed to query NUMA free memory"));
            goto cleanup;
        }
        freeMems[numCells++] = mem;
    }
    ret = numCells;

cleanup:
    return ret;
}

unsigned long long
415
nodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED)
416 417 418 419 420
{
    unsigned long long freeMem = 0;
    int n;

    if (numa_available() < 0) {
421
        nodeReportError(VIR_ERR_NO_SUPPORT,
422 423 424 425 426 427 428
                        "%s", _("NUMA not supported on this host"));
        goto cleanup;
    }

    for (n = 0 ; n <= numa_max_node() ; n++) {
        long long mem;
        if (numa_node_size64(n, &mem) < 0) {
429
            nodeReportError(VIR_ERR_INTERNAL_ERROR,
430 431 432 433 434 435 436 437 438 439
                            "%s", _("Failed to query NUMA free memory"));
            goto cleanup;
        }
        freeMem += mem;
    }

cleanup:
    return freeMem;
}

440
#else
441 442 443 444
int nodeCapsInitNUMA(virCapsPtr caps ATTRIBUTE_UNUSED) {
    return 0;
}

445
int nodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED,
446 447 448 449
                              unsigned long long *freeMems ATTRIBUTE_UNUSED,
                              int startCell ATTRIBUTE_UNUSED,
                              int maxCells ATTRIBUTE_UNUSED)
{
450
    nodeReportError(VIR_ERR_NO_SUPPORT, "%s",
451 452 453 454
                    _("NUMA memory information not available on this platform"));
    return -1;
}

455
unsigned long long nodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED)
456
{
457
    nodeReportError(VIR_ERR_NO_SUPPORT, "%s",
458 459 460
                    _("NUMA memory information not available on this platform"));
    return 0;
}
461
#endif