nodeinfo.c 10.5 KB
Newer Older
1 2 3
/*
 * nodeinfo.c: Helper routines for OS specific node information
 *
4
 * Copyright (C) 2006-2008, 2010-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright (C) 2006 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 */

24
#include <config.h>
J
Jim Meyering 已提交
25

26 27 28
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
29
#include <stdint.h>
30
#include <errno.h>
E
Eric Blake 已提交
31
#include <sys/utsname.h>
32
#include "conf/domain_conf.h"
33 34
#include <fcntl.h>
#include <sys/ioctl.h>
M
Michal Privoznik 已提交
35
#include <unistd.h>
36

37
#include "viralloc.h"
38
#include "nodeinfo.h"
39
#include "virhostcpu.h"
40
#include "virhostmem.h"
41
#include "physmem.h"
42
#include "virerror.h"
43
#include "count-one-bits.h"
E
Eric Blake 已提交
44
#include "intprops.h"
45
#include "virarch.h"
E
Eric Blake 已提交
46
#include "virfile.h"
47
#include "virtypedparam.h"
48
#include "virstring.h"
49
#include "virnuma.h"
50
#include "virlog.h"
51 52 53

#define VIR_FROM_THIS VIR_FROM_NONE

54 55
VIR_LOG_INIT("nodeinfo");

56

R
Roman Bogorodskiy 已提交
57

58
#ifdef __linux__
59
# define SYSFS_SYSTEM_PATH "/sys/devices/system"
60
# define SYSFS_THREAD_SIBLINGS_LIST_LENGTH_MAX 8192
61

62

E
Eric Blake 已提交
63
/* Return the positive decimal contents of the given
64 65 66 67 68
 * DIR/cpu%u/FILE, or -1 on error.  If DEFAULT_VALUE is non-negative
 * and the file could not be found, return that instead of an error;
 * this is useful for machines that cannot hot-unplug cpu0, or where
 * hot-unplugging is disabled, or where the kernel is too old
 * to support NUMA cells, etc.  */
E
Eric Blake 已提交
69
static int
E
Eric Blake 已提交
70
virNodeGetCpuValue(const char *dir, unsigned int cpu, const char *file,
71
                   int default_value)
E
Eric Blake 已提交
72 73 74 75 76 77 78
{
    char *path;
    FILE *pathfp;
    int value = -1;
    char value_str[INT_BUFSIZE_BOUND(value)];
    char *tmp;

79
    if (virAsprintf(&path, "%s/cpu%u/%s", dir, cpu, file) < 0)
E
Eric Blake 已提交
80 81 82 83
        return -1;

    pathfp = fopen(path, "r");
    if (pathfp == NULL) {
84 85
        if (default_value >= 0 && errno == ENOENT)
            value = default_value;
E
Eric Blake 已提交
86 87 88 89 90 91 92 93 94 95
        else
            virReportSystemError(errno, _("cannot open %s"), path);
        goto cleanup;
    }

    if (fgets(value_str, sizeof(value_str), pathfp) == NULL) {
        virReportSystemError(errno, _("cannot read from %s"), path);
        goto cleanup;
    }
    if (virStrToLong_i(value_str, &tmp, 10, &value) < 0) {
96 97 98
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("could not convert '%s' to an integer"),
                       value_str);
E
Eric Blake 已提交
99 100 101
        goto cleanup;
    }

102
 cleanup:
103
    VIR_FORCE_FCLOSE(pathfp);
E
Eric Blake 已提交
104 105 106 107 108
    VIR_FREE(path);

    return value;
}

109

E
Eric Blake 已提交
110
static virBitmapPtr
111
virNodeGetSiblingsListLinux(const char *dir, int cpu_id)
E
Eric Blake 已提交
112 113 114 115 116 117 118 119 120 121 122 123
{
    char *path = NULL;
    char *buf = NULL;
    virBitmapPtr ret = NULL;

    if (virAsprintf(&path, "%s/cpu%u/topology/thread_siblings_list",
                    dir, cpu_id) < 0)
        goto cleanup;

    if (virFileReadAll(path, SYSFS_THREAD_SIBLINGS_LIST_LENGTH_MAX, &buf) < 0)
        goto cleanup;

124
    if (virBitmapParse(buf, &ret, virNumaGetMaxCPUs()) < 0)
E
Eric Blake 已提交
125 126
        goto cleanup;

127
 cleanup:
E
Eric Blake 已提交
128 129 130 131
    VIR_FREE(buf);
    VIR_FREE(path);
    return ret;
}
132 133
#else
# define SYSFS_SYSTEM_PATH "fake"
134 135
#endif

136

137 138 139 140 141 142 143 144 145 146 147
int
nodeGetInfo(virNodeInfoPtr nodeinfo)
{
    virArch hostarch = virArchFromHost();
    unsigned long long memorybytes;

    memset(nodeinfo, 0, sizeof(*nodeinfo));

    if (virStrcpyStatic(nodeinfo->model, virArchToString(hostarch)) == NULL)
        return -1;

148
    if (virHostMemGetInfo(&memorybytes, NULL) < 0)
149 150 151
        return -1;
    nodeinfo->memory = memorybytes / 1024;

152 153 154 155
    if (virHostCPUGetInfo(hostarch,
                          &nodeinfo->cpus, &nodeinfo->mhz,
                          &nodeinfo->nodes, &nodeinfo->sockets,
                          &nodeinfo->cores, &nodeinfo->threads) < 0)
156 157 158 159 160
        return -1;

    return 0;
}

161

162
static int
163
nodeCapsInitNUMAFake(const char *cpupath ATTRIBUTE_UNUSED,
164
                     virCapsPtr caps ATTRIBUTE_UNUSED)
165 166 167 168 169
{
    virNodeInfo nodeinfo;
    virCapsHostNUMACellCPUPtr cpus;
    int ncpus;
    int s, c, t;
170 171
    int id, cid;
    int onlinecpus ATTRIBUTE_UNUSED;
172

173
    if (nodeGetInfo(&nodeinfo) < 0)
174 175 176
        return -1;

    ncpus = VIR_NODEINFO_MAXCPUS(nodeinfo);
177
    onlinecpus = nodeinfo.cpus;
178

179
    if (VIR_ALLOC_N(cpus, ncpus) < 0)
180 181
        return -1;

182
    id = cid = 0;
183 184 185
    for (s = 0; s < nodeinfo.sockets; s++) {
        for (c = 0; c < nodeinfo.cores; c++) {
            for (t = 0; t < nodeinfo.threads; t++) {
186
#ifdef __linux__
187
                if (virNodeGetCpuValue(cpupath, id, "online", 1)) {
188 189 190 191 192 193 194 195 196 197 198 199
#endif
                    cpus[cid].id = id;
                    cpus[cid].socket_id = s;
                    cpus[cid].core_id = c;
                    if (!(cpus[cid].siblings = virBitmapNew(ncpus)))
                        goto error;
                    ignore_value(virBitmapSetBit(cpus[cid].siblings, id));
                    cid++;
#ifdef __linux__
                }
#endif

200 201 202 203 204 205 206
                id++;
            }
        }
    }

    if (virCapabilitiesAddHostNUMACell(caps, 0,
                                       nodeinfo.memory,
207 208 209
#ifdef __linux__
                                       onlinecpus, cpus,
#else
210
                                       ncpus, cpus,
211
#endif
M
Michal Privoznik 已提交
212
                                       0, NULL,
213
                                       0, NULL) < 0)
214 215 216 217 218
        goto error;

    return 0;

 error:
219
    for (; id >= 0; id--)
220 221 222 223 224 225
        virBitmapFree(cpus[id].siblings);
    VIR_FREE(cpus);
    return -1;
}


226 227
/* returns 1 on success, 0 if the detection failed and -1 on hard error */
static int
228 229
virNodeCapsFillCPUInfo(const char *cpupath ATTRIBUTE_UNUSED,
                       int cpu_id ATTRIBUTE_UNUSED,
E
Eric Blake 已提交
230
                       virCapsHostNUMACellCPUPtr cpu ATTRIBUTE_UNUSED)
231
{
E
Eric Blake 已提交
232
#ifdef __linux__
233 234 235
    int tmp;
    cpu->id = cpu_id;

236
    if ((tmp = virNodeGetCpuValue(cpupath, cpu_id,
237 238 239 240 241
                                  "topology/physical_package_id", -1)) < 0)
        return 0;

    cpu->socket_id = tmp;

242
    if ((tmp = virNodeGetCpuValue(cpupath, cpu_id,
243 244 245 246 247
                                  "topology/core_id", -1)) < 0)
        return 0;

    cpu->core_id = tmp;

248
    if (!(cpu->siblings = virNodeGetSiblingsListLinux(cpupath, cpu_id)))
249 250 251
        return -1;

    return 0;
E
Eric Blake 已提交
252 253 254 255 256
#else
    virReportError(VIR_ERR_NO_SUPPORT, "%s",
                   _("node cpu info not implemented on this platform"));
    return -1;
#endif
257 258
}

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
static int
virNodeCapsGetSiblingInfo(int node,
                          virCapsHostNUMACellSiblingInfoPtr *siblings,
                          int *nsiblings)
{
    virCapsHostNUMACellSiblingInfoPtr tmp = NULL;
    int tmp_size = 0;
    int ret = -1;
    int *distances = NULL;
    int ndistances = 0;
    size_t i;

    if (virNumaGetDistances(node, &distances, &ndistances) < 0)
        goto cleanup;

    if (!distances) {
        *siblings = NULL;
        *nsiblings = 0;
        return 0;
    }

    if (VIR_ALLOC_N(tmp, ndistances) < 0)
        goto cleanup;

    for (i = 0; i < ndistances; i++) {
        if (!distances[i])
            continue;

        tmp[tmp_size].node = i;
        tmp[tmp_size].distance = distances[i];
        tmp_size++;
    }

    if (VIR_REALLOC_N(tmp, tmp_size) < 0)
        goto cleanup;

    *siblings = tmp;
    *nsiblings = tmp_size;
    tmp = NULL;
    tmp_size = 0;
    ret = 0;
 cleanup:
    VIR_FREE(distances);
    VIR_FREE(tmp);
    return ret;
}

M
Michal Privoznik 已提交
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
static int
virNodeCapsGetPagesInfo(int node,
                        virCapsHostNUMACellPageInfoPtr *pageinfo,
                        int *npageinfo)
{
    int ret = -1;
    unsigned int *pages_size = NULL, *pages_avail = NULL;
    size_t npages, i;

    if (virNumaGetPages(node, &pages_size, &pages_avail, NULL, &npages) < 0)
        goto cleanup;

    if (VIR_ALLOC_N(*pageinfo, npages) < 0)
        goto cleanup;
    *npageinfo = npages;

    for (i = 0; i < npages; i++) {
        (*pageinfo)[i].size = pages_size[i];
        (*pageinfo)[i].avail = pages_avail[i];
    }

    ret = 0;

 cleanup:
    VIR_FREE(pages_avail);
    VIR_FREE(pages_size);
    return ret;
}

335
int
336
nodeCapsInitNUMA(virCapsPtr caps)
337 338
{
    int n;
339
    unsigned long long memory;
340
    virCapsHostNUMACellCPUPtr cpus = NULL;
341
    virBitmapPtr cpumap = NULL;
342
    virCapsHostNUMACellSiblingInfoPtr siblings = NULL;
343
    int nsiblings = 0;
M
Michal Privoznik 已提交
344 345
    virCapsHostNUMACellPageInfoPtr pageinfo = NULL;
    int npageinfo;
346
    int ret = -1;
347
    int ncpus = 0;
348
    int cpu;
349
    bool topology_failed = false;
350
    int max_node;
351

352
    if (!virNumaIsAvailable()) {
353
        ret = nodeCapsInitNUMAFake(SYSFS_SYSTEM_PATH "/cpu", caps);
354 355
        goto cleanup;
    }
356

357 358 359 360
    if ((max_node = virNumaGetMaxNode()) < 0)
        goto cleanup;

    for (n = 0; n <= max_node; n++) {
361
        size_t i;
362

363 364 365
        if ((ncpus = virNumaGetNodeCPUs(n, &cpumap)) < 0) {
            if (ncpus == -2)
                continue;
366

367 368
            goto cleanup;
        }
369 370 371

        if (VIR_ALLOC_N(cpus, ncpus) < 0)
            goto cleanup;
372
        cpu = 0;
373

374
        for (i = 0; i < virBitmapSize(cpumap); i++) {
J
Ján Tomko 已提交
375
            if (virBitmapIsBitSet(cpumap, i)) {
376 377
                if (virNodeCapsFillCPUInfo(SYSFS_SYSTEM_PATH "/cpu",
                                           i, cpus + cpu++) < 0) {
378 379 380 381 382
                    topology_failed = true;
                    virResetLastError();
                }
            }
        }
383

384 385 386
        if (virNodeCapsGetSiblingInfo(n, &siblings, &nsiblings) < 0)
            goto cleanup;

M
Michal Privoznik 已提交
387 388 389
        if (virNodeCapsGetPagesInfo(n, &pageinfo, &npageinfo) < 0)
            goto cleanup;

390 391 392 393
        /* Detect the amount of memory in the numa cell in KiB */
        virNumaGetNodeMemory(n, &memory, NULL);
        memory >>= 10;

394 395
        if (virCapabilitiesAddHostNUMACell(caps, n, memory,
                                           ncpus, cpus,
M
Michal Privoznik 已提交
396 397
                                           nsiblings, siblings,
                                           npageinfo, pageinfo) < 0)
398
            goto cleanup;
399 400

        cpus = NULL;
401
        siblings = NULL;
M
Michal Privoznik 已提交
402
        pageinfo = NULL;
403 404
        virBitmapFree(cpumap);
        cpumap = NULL;
405 406 407 408
    }

    ret = 0;

409
 cleanup:
410
    if ((topology_failed || ret < 0) && cpus)
411 412
        virCapabilitiesClearHostNUMACellCPUTopology(cpus, ncpus);

413 414
    virBitmapFree(cpumap);
    VIR_FREE(cpus);
415
    VIR_FREE(siblings);
M
Michal Privoznik 已提交
416
    VIR_FREE(pageinfo);
417 418
    return ret;
}