vircgroup.c 130.2 KB
Newer Older
1
/*
2
 * vircgroup.c: methods for managing control cgroups
3
 *
4
 * Copyright (C) 2010-2015 Red Hat, Inc.
5 6
 * Copyright IBM Corp. 2008
 *
O
Osier Yang 已提交
7 8 9 10 11 12 13 14 15 16 17
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23 24 25 26
 *
 * Authors:
 *  Dan Smith <danms@us.ibm.com>
 */
#include <config.h>

#include <stdio.h>
27 28
#if defined HAVE_MNTENT_H && defined HAVE_SYS_MOUNT_H \
    && defined HAVE_GETMNTENT_R
29
# include <mntent.h>
30 31
# include <sys/mount.h>
#endif
32 33 34 35 36
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/stat.h>
37 38 39 40 41 42 43

#ifdef MAJOR_IN_MKDEV
# include <sys/mkdev.h>
#elif MAJOR_IN_SYSMACROS
# include <sys/sysmacros.h>
#endif

44
#include <sys/types.h>
45
#include <signal.h>
46
#include <dirent.h>
M
Michal Privoznik 已提交
47
#include <unistd.h>
48

49 50 51
#define __VIR_CGROUP_ALLOW_INCLUDE_PRIV_H__
#include "vircgrouppriv.h"

52
#include "virutil.h"
53
#include "viralloc.h"
54
#include "virerror.h"
55
#include "virlog.h"
E
Eric Blake 已提交
56
#include "virfile.h"
57
#include "virhash.h"
58
#include "virhashcode.h"
59
#include "virstring.h"
60
#include "virsystemd.h"
61
#include "virtypedparam.h"
62
#include "virhostcpu.h"
63
#include "virthread.h"
64

65 66
VIR_LOG_INIT("util.cgroup");

67 68
#define CGROUP_MAX_VAL 512

69 70
#define VIR_FROM_THIS VIR_FROM_CGROUP

71
#define CGROUP_NB_TOTAL_CPU_STAT_PARAM 3
72
#define CGROUP_NB_PER_CPU_STAT_PARAM   1
73

74
#if defined(__linux__) && defined(HAVE_GETMNTENT_R) && \
75
    defined(_DIRENT_HAVE_D_TYPE) && defined(_SC_CLK_TCK)
76 77 78
# define VIR_CGROUP_SUPPORTED
#endif

79
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
R
Ryota Ozaki 已提交
80
              "cpu", "cpuacct", "cpuset", "memory", "devices",
81 82
              "freezer", "blkio", "net_cls", "perf_event",
              "name=systemd");
83

84 85 86 87 88 89 90 91
typedef enum {
    VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
    VIR_CGROUP_MEM_HIERACHY = 1 << 0, /* call virCgroupSetMemoryUseHierarchy
                                       * before creating subcgroups and
                                       * attaching tasks
                                       */
} virCgroupFlags;

E
Eric Blake 已提交
92

93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
/**
 * virCgroupGetDevicePermsString:
 *
 * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits
 *
 * Returns string corresponding to the appropriate bits set.
 */
const char *
virCgroupGetDevicePermsString(int perms)
{
    if (perms & VIR_CGROUP_DEVICE_READ) {
        if (perms & VIR_CGROUP_DEVICE_WRITE) {
            if (perms & VIR_CGROUP_DEVICE_MKNOD)
                return "rwm";
            else
                return "rw";
        } else {
            if (perms & VIR_CGROUP_DEVICE_MKNOD)
                return "rm";
            else
                return "r";
        }
    } else {
        if (perms & VIR_CGROUP_DEVICE_WRITE) {
            if (perms & VIR_CGROUP_DEVICE_MKNOD)
                return "wm";
            else
                return "w";
        } else {
            if (perms & VIR_CGROUP_DEVICE_MKNOD)
                return "m";
            else
                return "";
        }
    }
}


131
#ifdef VIR_CGROUP_SUPPORTED
E
Eric Blake 已提交
132 133
bool
virCgroupAvailable(void)
134
{
135
    bool ret = false;
136 137 138 139 140 141 142 143 144 145 146
    FILE *mounts = NULL;
    struct mntent entry;
    char buf[CGROUP_MAX_VAL];

    if (!virFileExists("/proc/cgroups"))
        return false;

    if (!(mounts = fopen("/proc/mounts", "r")))
        return false;

    while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) {
147 148 149 150
        /* We're looking for at least one 'cgroup' fs mount,
         * which is *not* a named mount. */
        if (STREQ(entry.mnt_type, "cgroup") &&
            !strstr(entry.mnt_opts, "name=")) {
151 152 153 154 155 156 157 158 159
            ret = true;
            break;
        }
    }

    VIR_FORCE_FCLOSE(mounts);
    return ret;
}

E
Eric Blake 已提交
160 161 162 163 164 165

static int
virCgroupPartitionNeedsEscaping(const char *path)
{
    FILE *fp = NULL;
    int ret = 0;
166
    VIR_AUTOFREE(char *) line = NULL;
E
Eric Blake 已提交
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
    size_t buflen;

    /* If it starts with 'cgroup.' or a '_' of any
     * of the controller names from /proc/cgroups,
     * then we must prefix a '_'
     */
    if (STRPREFIX(path, "cgroup."))
        return 1;

    if (path[0] == '_' ||
        path[0] == '.')
        return 1;

    if (!(fp = fopen("/proc/cgroups", "r"))) {
        /* The API contract is that we return ENXIO
         * if cgroups are not available on a host */
        if (errno == ENOENT)
            errno = ENXIO;
        virReportSystemError(errno, "%s",
                             _("Cannot open /proc/cgroups"));
        return -1;
    }

    /*
     * Data looks like this:
     * #subsys_name hierarchy num_cgroups enabled
     * cpuset  2 4  1
     * cpu     3 48 1
     * cpuacct 3 48 1
     * memory  4 4  1
     * devices 5 4  1
     * freezer 6 4  1
     * net_cls 7 1  1
     */
    while (getline(&line, &buflen, fp) > 0) {
        char *tmp;
        size_t len;

        if (STRPREFIX(line, "#subsys_name"))
            continue;

        tmp = strchrnul(line, ' ');
        *tmp = '\0';
        len = tmp - line;

        if (STRPREFIX(path, line) &&
            path[len] == '.') {
            ret = 1;
            goto cleanup;
        }
    }

    if (ferror(fp)) {
        virReportSystemError(errno, "%s",
                             _("Error while reading /proc/cgroups"));
        goto cleanup;
    }

225
 cleanup:
E
Eric Blake 已提交
226 227 228 229 230 231 232 233 234
    VIR_FORCE_FCLOSE(fp);
    return ret;
}


static int
virCgroupPartitionEscape(char **path)
{
    int rc;
235
    char *newstr = NULL;
E
Eric Blake 已提交
236 237 238 239

    if ((rc = virCgroupPartitionNeedsEscaping(*path)) <= 0)
        return rc;

240
    if (virAsprintf(&newstr, "_%s", *path) < 0)
E
Eric Blake 已提交
241 242
        return -1;

243 244 245
    VIR_FREE(*path);
    *path = newstr;

E
Eric Blake 已提交
246 247
    return 0;
}
E
Eric Blake 已提交
248 249


250
static bool
251 252 253
virCgroupValidateMachineGroup(virCgroupPtr group,
                              const char *name,
                              const char *drivername,
254
                              const char *machinename)
255 256
{
    size_t i;
257 258 259 260
    VIR_AUTOFREE(char *) partname = NULL;
    VIR_AUTOFREE(char *) scopename_old = NULL;
    VIR_AUTOFREE(char *) scopename_new = NULL;
    VIR_AUTOFREE(char *) partmachinename = NULL;
261 262 263

    if (virAsprintf(&partname, "%s.libvirt-%s",
                    name, drivername) < 0)
264
        return false;
265 266

    if (virCgroupPartitionEscape(&partname) < 0)
267
        return false;
268

269 270 271 272
    if (machinename &&
        (virAsprintf(&partmachinename, "%s.libvirt-%s",
                     machinename, drivername) < 0 ||
         virCgroupPartitionEscape(&partmachinename) < 0))
273
        return false;
274

275
    if (!(scopename_old = virSystemdMakeScopeName(name, drivername, true)))
276
        return false;
277

278 279 280 281 282
    /* We should keep trying even if this failed */
    if (!machinename)
        virResetLastError();
    else if (!(scopename_new = virSystemdMakeScopeName(machinename,
                                                       drivername, false)))
283
        return false;
284 285

    if (virCgroupPartitionEscape(&scopename_old) < 0)
286
        return false;
287 288 289

    if (scopename_new &&
        virCgroupPartitionEscape(&scopename_new) < 0)
290
        return false;
291

292 293 294
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        char *tmp;

295 296 297
        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
            continue;

298 299 300 301 302
        if (!group->controllers[i].placement)
            continue;

        tmp = strrchr(group->controllers[i].placement, '/');
        if (!tmp)
303
            return false;
304

305 306 307
        if (i == VIR_CGROUP_CONTROLLER_CPU ||
            i == VIR_CGROUP_CONTROLLER_CPUACCT ||
            i == VIR_CGROUP_CONTROLLER_CPUSET) {
308 309 310 311
            if (STREQ(tmp, "/emulator"))
                *tmp = '\0';
            tmp = strrchr(group->controllers[i].placement, '/');
            if (!tmp)
312
                return false;
313 314
        }

315 316 317
        tmp++;

        if (STRNEQ(tmp, name) &&
318
            STRNEQ_NULLABLE(tmp, machinename) &&
319
            STRNEQ(tmp, partname) &&
320
            STRNEQ_NULLABLE(tmp, partmachinename) &&
321 322
            STRNEQ(tmp, scopename_old) &&
            STRNEQ_NULLABLE(tmp, scopename_new)) {
E
Eric Blake 已提交
323
            VIR_DEBUG("Name '%s' for controller '%s' does not match "
324
                      "'%s', '%s', '%s', '%s' or '%s'",
E
Eric Blake 已提交
325
                      tmp, virCgroupControllerTypeToString(i),
326 327
                      name, NULLSTR(machinename), partname,
                      scopename_old, NULLSTR(scopename_new));
328
            return false;
329
        }
330 331
    }

332
    return true;
333
}
E
Eric Blake 已提交
334

L
Lai Jiangshan 已提交
335

E
Eric Blake 已提交
336 337 338
static int
virCgroupCopyMounts(virCgroupPtr group,
                    virCgroupPtr parent)
339
{
340
    size_t i;
341
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
342 343 344
        if (!parent->controllers[i].mountPoint)
            continue;

345 346 347
        if (VIR_STRDUP(group->controllers[i].mountPoint,
                       parent->controllers[i].mountPoint) < 0)
            return -1;
348

349 350 351
        if (VIR_STRDUP(group->controllers[i].linkPoint,
                       parent->controllers[i].linkPoint) < 0)
            return -1;
352 353 354 355
    }
    return 0;
}

E
Eric Blake 已提交
356

357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
static int
virCgroupResolveMountLink(char *mntDir,
                          const char *typeStr,
                          virCgroupControllerPtr controller)
{
    VIR_AUTOFREE(char *) linkSrc = NULL;
    char *dirName;
    struct stat sb;

    dirName = strrchr(mntDir, '/');
    if (!dirName) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Missing '/' separator in cgroup mount '%s'"), mntDir);
        return -1;
    }

    if (!strchr(dirName + 1, ','))
        return 0;

    *dirName = '\0';
    if (virAsprintf(&linkSrc, "%s/%s", mntDir, typeStr) < 0)
        return -1;
    *dirName = '/';

    if (lstat(linkSrc, &sb) < 0) {
        if (errno == ENOENT) {
            VIR_WARN("Controller %s co-mounted at %s is missing symlink at %s",
                     typeStr, mntDir, linkSrc);
        } else {
            virReportSystemError(errno, _("Cannot stat %s"), linkSrc);
            return -1;
        }
    } else {
        if (!S_ISLNK(sb.st_mode)) {
            VIR_WARN("Expecting a symlink at %s for controller %s",
                     linkSrc, typeStr);
        } else {
            VIR_STEAL_PTR(controller->linkPoint, linkSrc);
        }
    }

    return 0;
}


402 403 404 405
/*
 * Process /proc/mounts figuring out what controllers are
 * mounted and where
 */
406 407 408 409
int
virCgroupDetectMountsFromFile(virCgroupPtr group,
                              const char *path,
                              bool checkLinks)
410
{
411
    size_t i;
412
    FILE *mounts = NULL;
413 414
    struct mntent entry;
    char buf[CGROUP_MAX_VAL];
415
    int ret = -1;
416

417
    mounts = fopen(path, "r");
418
    if (mounts == NULL) {
419
        virReportSystemError(errno, _("Unable to open %s"), path);
420
        return -1;
421 422 423
    }

    while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) {
424 425
        if (STRNEQ(entry.mnt_type, "cgroup"))
            continue;
426

427
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
428 429 430
            const char *typestr = virCgroupControllerTypeToString(i);
            int typelen = strlen(typestr);
            char *tmp = entry.mnt_opts;
431
            virCgroupControllerPtr controller = &group->controllers[i];
432 433 434 435 436 437 438 439 440
            while (tmp) {
                char *next = strchr(tmp, ',');
                int len;
                if (next) {
                    len = next-tmp;
                    next++;
                } else {
                    len = strlen(tmp);
                }
441 442

                if (typelen == len && STREQLEN(typestr, tmp, len)) {
443

444 445 446 447 448 449 450 451 452 453
                    /* Note that the lines in /proc/mounts have the same
                     * order than the mount operations, and that there may
                     * be duplicates due to bind mounts. This means
                     * that the same mount point may be processed more than
                     * once. We need to save the results of the last one,
                     * and we need to be careful to release the memory used
                     * by previous processing. */
                    VIR_FREE(controller->mountPoint);
                    VIR_FREE(controller->linkPoint);
                    if (VIR_STRDUP(controller->mountPoint, entry.mnt_dir) < 0)
454
                        goto cleanup;
455 456 457

                    /* If it is a co-mount it has a filename like "cpu,cpuacct"
                     * and we must identify the symlink path */
458 459 460
                    if (checkLinks &&
                        virCgroupResolveMountLink(entry.mnt_dir, typestr,
                                                  controller) < 0) {
461
                            goto cleanup;
462 463
                    }
                }
464 465 466
                tmp = next;
            }
        }
467 468
    }

469 470
    ret = 0;
 cleanup:
471
    VIR_FORCE_FCLOSE(mounts);
472
    return ret;
473 474
}

475 476 477 478 479 480
static int
virCgroupDetectMounts(virCgroupPtr group)
{
    return virCgroupDetectMountsFromFile(group, "/proc/mounts", true);
}

481

E
Eric Blake 已提交
482 483 484 485
static int
virCgroupCopyPlacement(virCgroupPtr group,
                       const char *path,
                       virCgroupPtr parent)
486
{
487
    size_t i;
488
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
489 490 491
        if (!group->controllers[i].mountPoint)
            continue;

492 493 494
        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
            continue;

495
        if (path[0] == '/') {
496 497
            if (VIR_STRDUP(group->controllers[i].placement, path) < 0)
                return -1;
498 499
        } else {
            /*
500 501 502
             * parent == "/" + path="" => "/"
             * parent == "/libvirt.service" + path == "" => "/libvirt.service"
             * parent == "/libvirt.service" + path == "foo" => "/libvirt.service/foo"
503 504 505 506 507 508 509
             */
            if (virAsprintf(&group->controllers[i].placement,
                            "%s%s%s",
                            parent->controllers[i].placement,
                            (STREQ(parent->controllers[i].placement, "/") ||
                             STREQ(path, "") ? "" : "/"),
                            path) < 0)
510
                return -1;
511 512 513 514 515 516 517
        }
    }

    return 0;
}


518
/*
519 520 521 522
 * virCgroupDetectPlacement:
 * @group: the group to process
 * @path: the relative path to append, not starting with '/'
 *
523 524
 * Process /proc/self/cgroup figuring out what cgroup
 * sub-path the current process is assigned to. ie not
525 526 527 528 529 530 531 532 533 534 535 536 537 538
 * necessarily in the root. The contents of this file
 * looks like
 *
 * 9:perf_event:/
 * 8:blkio:/
 * 7:net_cls:/
 * 6:freezer:/
 * 5:devices:/
 * 4:memory:/
 * 3:cpuacct,cpu:/
 * 2:cpuset:/
 * 1:name=systemd:/user/berrange/2
 *
 * It then appends @path to each detected path.
539
 */
E
Eric Blake 已提交
540 541 542 543
static int
virCgroupDetectPlacement(virCgroupPtr group,
                         pid_t pid,
                         const char *path)
544
{
545
    size_t i;
546 547
    FILE *mapping  = NULL;
    char line[1024];
548
    int ret = -1;
549
    VIR_AUTOFREE(char *) procfile = NULL;
550

551
    VIR_DEBUG("Detecting placement for pid %lld path %s",
M
Michal Privoznik 已提交
552
              (long long) pid, path);
553 554 555 556
    if (pid == -1) {
        if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0)
            goto cleanup;
    } else {
M
Michal Privoznik 已提交
557 558
        if (virAsprintf(&procfile, "/proc/%lld/cgroup",
                        (long long) pid) < 0)
559 560 561 562
            goto cleanup;
    }

    mapping = fopen(procfile, "r");
563
    if (mapping == NULL) {
564 565 566 567
        virReportSystemError(errno,
                             _("Unable to open '%s'"),
                             procfile);
        goto cleanup;
568 569
    }

570 571
    while (fgets(line, sizeof(line), mapping) != NULL) {
        char *controllers = strchr(line, ':');
572 573
        char *selfpath = controllers ? strchr(controllers + 1, ':') : NULL;
        char *nl = selfpath ? strchr(selfpath, '\n') : NULL;
574

575
        if (!controllers || !selfpath)
576 577 578 579 580
            continue;

        if (nl)
            *nl = '\0';

581
        *selfpath = '\0';
582
        controllers++;
583
        selfpath++;
584

585
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
586 587 588
            const char *typestr = virCgroupControllerTypeToString(i);
            int typelen = strlen(typestr);
            char *tmp = controllers;
589

590 591 592 593
            while (tmp) {
                char *next = strchr(tmp, ',');
                int len;
                if (next) {
594
                    len = next - tmp;
595 596 597 598
                    next++;
                } else {
                    len = strlen(tmp);
                }
599 600

                /*
601 602 603
                 * selfpath == "/" + path="" -> "/"
                 * selfpath == "/libvirt.service" + path == "" -> "/libvirt.service"
                 * selfpath == "/libvirt.service" + path == "foo" -> "/libvirt.service/foo"
604
                 */
605
                if (typelen == len && STREQLEN(typestr, tmp, len) &&
606 607 608 609 610 611 612 613 614 615 616 617 618 619
                    group->controllers[i].mountPoint != NULL &&
                    group->controllers[i].placement == NULL) {
                    if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
                        if (VIR_STRDUP(group->controllers[i].placement,
                                       selfpath) < 0)
                            goto cleanup;
                    } else {
                        if (virAsprintf(&group->controllers[i].placement,
                                        "%s%s%s", selfpath,
                                        (STREQ(selfpath, "/") ||
                                         STREQ(path, "") ? "" : "/"),
                                        path) < 0)
                            goto cleanup;
                    }
620
                }
621 622 623 624 625 626

                tmp = next;
            }
        }
    }

627
    ret = 0;
628

629
 cleanup:
630
    VIR_FORCE_FCLOSE(mapping);
631
    return ret;
632 633
}

E
Eric Blake 已提交
634 635 636 637 638 639 640

static int
virCgroupDetect(virCgroupPtr group,
                pid_t pid,
                int controllers,
                const char *path,
                virCgroupPtr parent)
641
{
642 643
    size_t i;
    size_t j;
644 645
    VIR_DEBUG("group=%p controllers=%d path=%s parent=%p",
              group, controllers, path, parent);
646

647 648 649 650 651 652
    if (parent) {
        if (virCgroupCopyMounts(group, parent) < 0)
            return -1;
    } else {
        if (virCgroupDetectMounts(group) < 0)
            return -1;
653 654
    }

655
    if (controllers >= 0) {
656
        VIR_DEBUG("Filtering controllers %d", controllers);
657
        /* First mark requested but non-existing controllers to be ignored */
658
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
659
            if (((1 << i) & controllers)) {
660
                /* Remove non-existent controllers  */
661
                if (!group->controllers[i].mountPoint) {
662
                    VIR_DEBUG("Requested controller '%s' not mounted, ignoring",
663
                              virCgroupControllerTypeToString(i));
664
                    controllers &= ~(1 << i);
665
                }
666 667 668 669 670 671 672 673 674
            }
        }
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
            VIR_DEBUG("Controller '%s' wanted=%s, mount='%s'",
                      virCgroupControllerTypeToString(i),
                      (1 << i) & controllers ? "yes" : "no",
                      NULLSTR(group->controllers[i].mountPoint));
            if (!((1 << i) & controllers) &&
                group->controllers[i].mountPoint) {
675 676
                /* Check whether a request to disable a controller
                 * clashes with co-mounting of controllers */
677
                for (j = 0; j < VIR_CGROUP_CONTROLLER_LAST; j++) {
678 679 680 681 682 683 684
                    if (j == i)
                        continue;
                    if (!((1 << j) & controllers))
                        continue;

                    if (STREQ_NULLABLE(group->controllers[i].mountPoint,
                                       group->controllers[j].mountPoint)) {
685 686 687 688 689
                        virReportSystemError(EINVAL,
                                             _("Controller '%s' is not wanted, but '%s' is co-mounted"),
                                             virCgroupControllerTypeToString(i),
                                             virCgroupControllerTypeToString(j));
                        return -1;
690 691 692 693 694 695 696 697
                    }
                }
                VIR_FREE(group->controllers[i].mountPoint);
            }
        }
    } else {
        VIR_DEBUG("Auto-detecting controllers");
        controllers = 0;
698
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
699 700 701 702 703 704 705
            VIR_DEBUG("Controller '%s' present=%s",
                      virCgroupControllerTypeToString(i),
                      group->controllers[i].mountPoint ? "yes" : "no");
            if (group->controllers[i].mountPoint == NULL)
                continue;
            controllers |= (1 << i);
        }
706
    }
707

708
    /* Check that at least 1 controller is available */
709
    if (!controllers) {
710 711 712
        virReportSystemError(ENXIO, "%s",
                             _("At least one cgroup controller is required"));
        return -1;
713
    }
714

715 716 717 718 719 720 721 722 723 724
    /* In some cases we can copy part of the placement info
     * based on the parent cgroup...
     */
    if ((parent || path[0] == '/') &&
        virCgroupCopyPlacement(group, path, parent) < 0)
        return -1;

    /* ... but use /proc/cgroups to fill in the rest */
    if (virCgroupDetectPlacement(group, pid, path) < 0)
        return -1;
725

726 727 728 729
    /* Check that for every mounted controller, we found our placement */
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
        if (!group->controllers[i].mountPoint)
            continue;
730

731 732 733 734 735 736
        if (!group->controllers[i].placement) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Could not find placement for controller %s at %s"),
                           virCgroupControllerTypeToString(i),
                           group->controllers[i].placement);
            return -1;
737
        }
738

M
Michal Privoznik 已提交
739 740
        VIR_DEBUG("Detected mount/mapping %zu:%s at %s in %s for pid %lld",
                  i,
741 742
                  virCgroupControllerTypeToString(i),
                  group->controllers[i].mountPoint,
743
                  group->controllers[i].placement,
M
Michal Privoznik 已提交
744
                  (long long) pid);
745 746
    }

747
    return 0;
748 749
}

750

751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
static char *
virCgroupGetBlockDevString(const char *path)
{
    char *ret = NULL;
    struct stat sb;

    if (stat(path, &sb) < 0) {
        virReportSystemError(errno,
                             _("Path '%s' is not accessible"),
                             path);
        return NULL;
    }

    if (!S_ISBLK(sb.st_mode)) {
        virReportSystemError(EINVAL,
                             _("Path '%s' must be a block device"),
                             path);
        return NULL;
    }

    /* Automatically append space after the string since all callers
     * use it anyway */
    if (virAsprintf(&ret, "%d:%d ", major(sb.st_rdev), minor(sb.st_rdev)) < 0)
        return NULL;

    return ret;
}


E
Eric Blake 已提交
780 781 782 783 784
static int
virCgroupSetValueStr(virCgroupPtr group,
                     int controller,
                     const char *key,
                     const char *value)
785
{
786
    VIR_AUTOFREE(char *) keypath = NULL;
787
    char *tmp = NULL;
788

789 790
    if (virCgroupPathOfController(group, controller, key, &keypath) < 0)
        return -1;
791

792
    VIR_DEBUG("Set value '%s' to '%s'", keypath, value);
793
    if (virFileWriteStr(keypath, value, 0) < 0) {
794 795 796 797 798
        if (errno == EINVAL &&
            (tmp = strrchr(keypath, '/'))) {
            virReportSystemError(errno,
                                 _("Invalid value '%s' for '%s'"),
                                 value, tmp + 1);
799
            return -1;
800
        }
801 802
        virReportSystemError(errno,
                             _("Unable to write to '%s'"), keypath);
803
        return -1;
804 805
    }

806
    return 0;
807 808
}

E
Eric Blake 已提交
809 810 811 812 813 814

static int
virCgroupGetValueStr(virCgroupPtr group,
                     int controller,
                     const char *key,
                     char **value)
815
{
816 817
    VIR_AUTOFREE(char *) keypath = NULL;
    int rc;
818

819
    *value = NULL;
820

821 822
    if (virCgroupPathOfController(group, controller, key, &keypath) < 0)
        return -1;
823

824
    VIR_DEBUG("Get value %s", keypath);
825

826 827 828
    if ((rc = virFileReadAll(keypath, 1024*1024, value)) < 0) {
        virReportSystemError(errno,
                             _("Unable to read from '%s'"), keypath);
829
        return -1;
830 831
    }

832 833 834
    /* Terminated with '\n' has sometimes harmful effects to the caller */
    if (rc > 0 && (*value)[rc - 1] == '\n')
        (*value)[rc - 1] = '\0';
835

836
    return 0;
837 838
}

E
Eric Blake 已提交
839

840 841 842 843 844 845 846
static int
virCgroupGetValueForBlkDev(virCgroupPtr group,
                           int controller,
                           const char *key,
                           const char *path,
                           char **value)
{
847 848
    VIR_AUTOFREE(char *) prefix = NULL;
    VIR_AUTOFREE(char *) str = NULL;
849 850
    char **lines = NULL;
    int ret = -1;
851 852

    if (virCgroupGetValueStr(group, controller, key, &str) < 0)
853
        goto error;
854 855

    if (!(prefix = virCgroupGetBlockDevString(path)))
856
        goto error;
857 858

    if (!(lines = virStringSplit(str, "\n", -1)))
859
        goto error;
860

861
    if (VIR_STRDUP(*value, virStringListGetFirstWithPrefix(lines, prefix)) < 0)
862
        goto error;
863

864 865 866 867
    ret = 0;
 error:
    virStringListFree(lines);
    return ret;
868 869 870
}


E
Eric Blake 已提交
871 872 873 874 875
static int
virCgroupSetValueU64(virCgroupPtr group,
                     int controller,
                     const char *key,
                     unsigned long long int value)
876
{
877
    VIR_AUTOFREE(char *) strval = NULL;
878

879 880
    if (virAsprintf(&strval, "%llu", value) < 0)
        return -1;
881

882
    return virCgroupSetValueStr(group, controller, key, strval);
883 884 885
}


E
Eric Blake 已提交
886 887 888 889 890
static int
virCgroupSetValueI64(virCgroupPtr group,
                     int controller,
                     const char *key,
                     long long int value)
891
{
892
    VIR_AUTOFREE(char *) strval = NULL;
893

894 895
    if (virAsprintf(&strval, "%lld", value) < 0)
        return -1;
896

897
    return virCgroupSetValueStr(group, controller, key, strval);
898 899
}

E
Eric Blake 已提交
900 901 902 903 904 905

static int
virCgroupGetValueI64(virCgroupPtr group,
                     int controller,
                     const char *key,
                     long long int *value)
906
{
907
    VIR_AUTOFREE(char *) strval = NULL;
908

909
    if (virCgroupGetValueStr(group, controller, key, &strval) < 0)
910
        return -1;
911

912 913 914 915
    if (virStrToLong_ll(strval, NULL, 10, value) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       strval);
916
        return -1;
917
    }
918

919
    return 0;
920 921
}

E
Eric Blake 已提交
922 923 924 925 926 927

static int
virCgroupGetValueU64(virCgroupPtr group,
                     int controller,
                     const char *key,
                     unsigned long long int *value)
928
{
929
    VIR_AUTOFREE(char *) strval = NULL;
930

931
    if (virCgroupGetValueStr(group, controller, key, &strval) < 0)
932
        return -1;
933

934 935 936 937
    if (virStrToLong_ull(strval, NULL, 10, value) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       strval);
938
        return -1;
939
    }
940

941
    return 0;
942 943 944
}


E
Eric Blake 已提交
945 946
static int
virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group)
947
{
948
    size_t i;
949 950 951
    const char *inherit_values[] = {
        "cpuset.cpus",
        "cpuset.mems",
952
        "cpuset.memory_migrate",
953 954
    };

955
    VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path);
956
    for (i = 0; i < ARRAY_CARDINALITY(inherit_values); i++) {
957
        VIR_AUTOFREE(char *) value = NULL;
958

959 960 961 962
        if (virCgroupGetValueStr(parent,
                                 VIR_CGROUP_CONTROLLER_CPUSET,
                                 inherit_values[i],
                                 &value) < 0)
963
            return -1;
964 965 966

        VIR_DEBUG("Inherit %s = %s", inherit_values[i], value);

967 968 969
        if (virCgroupSetValueStr(group,
                                 VIR_CGROUP_CONTROLLER_CPUSET,
                                 inherit_values[i],
970
                                 value) < 0)
971
            return -1;
972 973
    }

974
    return 0;
975 976
}

E
Eric Blake 已提交
977 978 979

static int
virCgroupSetMemoryUseHierarchy(virCgroupPtr group)
980 981 982 983
{
    unsigned long long value;
    const char *filename = "memory.use_hierarchy";

984 985 986
    if (virCgroupGetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             filename, &value) < 0)
987
        return -1;
988 989 990 991 992 993

    /* Setting twice causes error, so if already enabled, skip setting */
    if (value == 1)
        return 0;

    VIR_DEBUG("Setting up %s/%s", group->path, filename);
994 995 996
    if (virCgroupSetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             filename, 1) < 0)
997
        return -1;
998

999
    return 0;
1000 1001
}

E
Eric Blake 已提交
1002 1003 1004 1005 1006 1007

static int
virCgroupMakeGroup(virCgroupPtr parent,
                   virCgroupPtr group,
                   bool create,
                   unsigned int flags)
1008
{
1009
    size_t i;
1010

1011
    VIR_DEBUG("Make group %s", group->path);
1012
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
1013
        VIR_AUTOFREE(char *) path = NULL;
1014

1015 1016 1017 1018 1019 1020
        /* We must never mkdir() in systemd's hierarchy */
        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
            VIR_DEBUG("Not creating systemd controller group");
            continue;
        }

1021
        /* Skip over controllers that aren't mounted */
1022 1023 1024
        if (!group->controllers[i].mountPoint) {
            VIR_DEBUG("Skipping unmounted controller %s",
                      virCgroupControllerTypeToString(i));
1025
            continue;
1026
        }
1027

1028
        if (virCgroupPathOfController(group, i, "", &path) < 0)
1029
            return -1;
1030

1031 1032 1033
        /* As of Feb 2011, clang can't see that the above function
         * call did not modify group. */
        sa_assert(group->controllers[i].mountPoint);
1034

1035
        VIR_DEBUG("Make controller %s", path);
1036
        if (!virFileExists(path)) {
1037 1038
            if (!create ||
                mkdir(path, 0755) < 0) {
1039
                if (errno == EEXIST)
1040
                    continue;
1041 1042 1043 1044 1045
                /* With a kernel that doesn't support multi-level directory
                 * for blkio controller, libvirt will fail and disable all
                 * other controllers even though they are available. So
                 * treat blkio as unmounted if mkdir fails. */
                if (i == VIR_CGROUP_CONTROLLER_BLKIO) {
1046
                    VIR_DEBUG("Ignoring mkdir failure with blkio controller. Kernel probably too old");
1047 1048 1049
                    VIR_FREE(group->controllers[i].mountPoint);
                    continue;
                } else {
1050 1051 1052
                    virReportSystemError(errno,
                                         _("Failed to create controller %s for group"),
                                         virCgroupControllerTypeToString(i));
1053
                    return -1;
1054
                }
1055
            }
1056 1057
            if (group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint != NULL &&
                (i == VIR_CGROUP_CONTROLLER_CPUSET ||
E
Eric Blake 已提交
1058 1059
                 STREQ(group->controllers[i].mountPoint,
                       group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint))) {
1060 1061
                if (virCgroupCpuSetInherit(parent, group) < 0)
                    return -1;
1062
            }
1063 1064 1065 1066
            /*
             * Note that virCgroupSetMemoryUseHierarchy should always be
             * called prior to creating subcgroups and attaching tasks.
             */
1067 1068
            if ((flags & VIR_CGROUP_MEM_HIERACHY) &&
                (group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint != NULL) &&
1069
                (i == VIR_CGROUP_CONTROLLER_MEMORY ||
E
Eric Blake 已提交
1070 1071
                 STREQ(group->controllers[i].mountPoint,
                       group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint))) {
1072 1073
                if (virCgroupSetMemoryUseHierarchy(group) < 0)
                    return -1;
1074
            }
1075 1076 1077
        }
    }

1078
    VIR_DEBUG("Done making controllers for group");
1079
    return 0;
1080 1081
}

1082

1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
/**
 * virCgroupNew:
 * @path: path for the new group
 * @parent: parent group, or NULL
 * @controllers: bitmask of controllers to activate
 *
 * Create a new cgroup storing it in @group.
 *
 * If @path starts with a '/' it is treated as an
 * absolute path, and @parent is ignored. Otherwise
 * it is treated as being relative to @parent. If
 * @parent is NULL, then the placement of the current
 * process is used.
 *
1097
 * Returns 0 on success, -1 on error
1098
 */
E
Eric Blake 已提交
1099 1100 1101 1102 1103 1104
static int
virCgroupNew(pid_t pid,
             const char *path,
             virCgroupPtr parent,
             int controllers,
             virCgroupPtr *group)
1105
{
1106 1107
    VIR_DEBUG("pid=%lld path=%s parent=%p controllers=%d group=%p",
              (long long) pid, path, parent, controllers, group);
1108
    *group = NULL;
1109

1110 1111
    if (VIR_ALLOC((*group)) < 0)
        goto error;
1112

1113
    if (path[0] == '/' || !parent) {
1114 1115
        if (VIR_STRDUP((*group)->path, path) < 0)
            goto error;
1116 1117 1118 1119
    } else {
        if (virAsprintf(&(*group)->path, "%s%s%s",
                        parent->path,
                        STREQ(parent->path, "") ? "" : "/",
1120 1121
                        path) < 0)
            goto error;
1122 1123
    }

1124
    if (virCgroupDetect(*group, pid, controllers, path, parent) < 0)
1125
        goto error;
1126

1127 1128
    return 0;

1129
 error:
1130
    virCgroupFree(group);
1131
    *group = NULL;
1132

1133
    return -1;
1134
}
1135

1136

1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
/**
 * virCgroupAddTaskController:
 *
 * @group: The cgroup to add a task to
 * @pid: The pid of the task to add
 * @controller: The cgroup controller to be operated on
 *
 * Returns: 0 on success or -1 on error
 */
static int
virCgroupAddTaskController(virCgroupPtr group, pid_t pid, int controller)
{
    if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Controller %d out of range"), controller);
        return -1;
    }

    if (!group->controllers[controller].mountPoint) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Controller '%s' not mounted"),
                       virCgroupControllerTypeToString(controller));
        return -1;
    }

    return virCgroupSetValueI64(group, controller, "tasks", pid);
}


1166 1167
static int
virCgroupAddTaskInternal(virCgroupPtr group, pid_t pid, bool withSystemd)
1168
{
1169
    int ret = -1;
1170
    size_t i;
1171

1172
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
1173 1174 1175
        /* Skip over controllers not mounted */
        if (!group->controllers[i].mountPoint)
            continue;
1176

1177 1178 1179 1180
        /* We must never add tasks in systemd's hierarchy
         * unless we're intentionally trying to move a
         * task into a systemd machine scope */
        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD && !withSystemd)
1181 1182
            continue;

1183
        if (virCgroupAddTaskController(group, pid, i) < 0)
1184
            goto cleanup;
1185 1186
    }

1187
    ret = 0;
1188
 cleanup:
1189
    return ret;
1190 1191
}

1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225
/**
 * virCgroupAddTask:
 *
 * @group: The cgroup to add a task to
 * @pid: The pid of the task to add
 *
 * Will add the task to all controllers, except the
 * systemd unit controller.
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupAddTask(virCgroupPtr group, pid_t pid)
{
    return virCgroupAddTaskInternal(group, pid, false);
}

/**
 * virCgroupAddMachineTask:
 *
 * @group: The cgroup to add a task to
 * @pid: The pid of the task to add
 *
 * Will add the task to all controllers, including the
 * systemd unit controller.
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupAddMachineTask(virCgroupPtr group, pid_t pid)
{
    return virCgroupAddTaskInternal(group, pid, true);
}

E
Eric Blake 已提交
1226 1227 1228

static int
virCgroupSetPartitionSuffix(const char *path, char **res)
1229
{
1230
    char **tokens;
1231
    size_t i;
1232
    int ret = -1;
1233

1234
    if (!(tokens = virStringSplit(path, "/", 0)))
1235
        return ret;
1236

1237
    for (i = 0; tokens[i] != NULL; i++) {
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
        /* Whitelist the 3 top level fixed dirs
         * NB i == 0 is "", since we have leading '/'
         */
        if (i == 1 &&
            (STREQ(tokens[i], "machine") ||
             STREQ(tokens[i], "system") ||
             STREQ(tokens[i], "user"))) {
            continue;
        }
        /* If there is no suffix set already, then
         * add ".partition"
         */
        if (STRNEQ(tokens[i], "") &&
            !strchr(tokens[i], '.')) {
            if (VIR_REALLOC_N(tokens[i],
1253
                              strlen(tokens[i]) + strlen(".partition") + 1) < 0)
1254
                goto cleanup;
1255 1256
            strcat(tokens[i], ".partition");
        }
1257

1258
        if (virCgroupPartitionEscape(&(tokens[i])) < 0)
1259
            goto cleanup;
1260 1261
    }

1262
    if (!(*res = virStringListJoin((const char **)tokens, "/")))
1263
        goto cleanup;
1264

1265 1266 1267 1268 1269
    ret = 0;

 cleanup:
    virStringListFree(tokens);
    return ret;
1270 1271
}

E
Eric Blake 已提交
1272

1273 1274 1275 1276 1277 1278 1279
/**
 * virCgroupNewPartition:
 * @path: path for the partition
 * @create: true to create the cgroup tree
 * @controllers: mask of controllers to create
 *
 * Creates a new cgroup to represent the resource
1280
 * partition path identified by @path.
1281
 *
1282
 * Returns 0 on success, -1 on failure
1283
 */
E
Eric Blake 已提交
1284 1285 1286 1287 1288
int
virCgroupNewPartition(const char *path,
                      bool create,
                      int controllers,
                      virCgroupPtr *group)
1289
{
1290
    int ret = -1;
1291 1292
    VIR_AUTOFREE(char *) parentPath = NULL;
    VIR_AUTOFREE(char *) newPath = NULL;
1293
    virCgroupPtr parent = NULL;
1294 1295 1296
    VIR_DEBUG("path=%s create=%d controllers=%x",
              path, create, controllers);

1297 1298 1299 1300 1301 1302
    if (path[0] != '/') {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Partition path '%s' must start with '/'"),
                       path);
        return -1;
    }
1303

1304
    if (virCgroupSetPartitionSuffix(path, &newPath) < 0)
1305
        goto cleanup;
1306

1307 1308
    if (virCgroupNew(-1, newPath, NULL, controllers, group) < 0)
        goto cleanup;
1309

1310
    if (STRNEQ(newPath, "/")) {
1311
        char *tmp;
1312
        if (VIR_STRDUP(parentPath, newPath) < 0)
1313
            goto cleanup;
1314 1315 1316 1317 1318

        tmp = strrchr(parentPath, '/');
        tmp++;
        *tmp = '\0';

1319
        if (virCgroupNew(-1, parentPath, NULL, controllers, &parent) < 0)
1320
            goto cleanup;
1321

1322 1323 1324
        if (virCgroupMakeGroup(parent, *group, create, VIR_CGROUP_NONE) < 0) {
            virCgroupRemove(*group);
            goto cleanup;
1325 1326 1327
        }
    }

1328 1329 1330
    ret = 0;
 cleanup:
    if (ret != 0)
1331 1332
        virCgroupFree(group);
    virCgroupFree(&parent);
1333
    return ret;
1334 1335
}

1336

G
Gao feng 已提交
1337
/**
1338
* virCgroupNewSelf:
G
Gao feng 已提交
1339 1340 1341
*
* @group: Pointer to returned virCgroupPtr
*
1342 1343 1344
* Obtain a cgroup representing the config of the
* current process
*
1345
* Returns 0 on success, or -1 on error
G
Gao feng 已提交
1346
*/
E
Eric Blake 已提交
1347 1348
int
virCgroupNewSelf(virCgroupPtr *group)
G
Gao feng 已提交
1349
{
1350
    return virCgroupNewDetect(-1, -1, group);
G
Gao feng 已提交
1351
}
1352

1353

1354 1355 1356 1357 1358 1359 1360 1361
/**
 * virCgroupNewDomainPartition:
 *
 * @partition: partition holding the domain
 * @driver: name of the driver
 * @name: name of the domain
 * @group: Pointer to returned virCgroupPtr
 *
1362
 * Returns 0 on success, or -1 on error
1363
 */
E
Eric Blake 已提交
1364 1365 1366 1367 1368 1369
int
virCgroupNewDomainPartition(virCgroupPtr partition,
                            const char *driver,
                            const char *name,
                            bool create,
                            virCgroupPtr *group)
1370
{
1371
    VIR_AUTOFREE(char *)grpname = NULL;
1372

1373
    if (virAsprintf(&grpname, "%s.libvirt-%s",
1374
                    name, driver) < 0)
1375
        return -1;
1376

1377
    if (virCgroupPartitionEscape(&grpname) < 0)
1378
        return -1;
1379

1380
    if (virCgroupNew(-1, grpname, partition, -1, group) < 0)
1381
        return -1;
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392

    /*
     * Create a cgroup with memory.use_hierarchy enabled to
     * surely account memory usage of lxc with ns subsystem
     * enabled. (To be exact, memory and ns subsystems are
     * enabled at the same time.)
     *
     * The reason why doing it here, not a upper group, say
     * a group for driver, is to avoid overhead to track
     * cumulative usage that we don't need.
     */
E
Eric Blake 已提交
1393 1394
    if (virCgroupMakeGroup(partition, *group, create,
                           VIR_CGROUP_MEM_HIERACHY) < 0) {
1395
        virCgroupRemove(*group);
1396
        virCgroupFree(group);
1397
        return -1;
1398 1399
    }

1400
    return 0;
1401
}
1402

E
Eric Blake 已提交
1403

1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421
/**
 * virCgroupNewThread:
 *
 * @domain: group for the domain
 * @name: enum to generate the name for the new thread
 * @id: id of the vcpu or iothread
 * @create: true to create if not already existing
 * @group: Pointer to returned virCgroupPtr
 *
 * Returns 0 on success, or -1 on error
 */
int
virCgroupNewThread(virCgroupPtr domain,
                   virCgroupThreadName nameval,
                   int id,
                   bool create,
                   virCgroupPtr *group)
{
1422
    VIR_AUTOFREE(char *) name = NULL;
1423 1424 1425 1426 1427
    int controllers;

    switch (nameval) {
    case VIR_CGROUP_THREAD_VCPU:
        if (virAsprintf(&name, "vcpu%d", id) < 0)
1428
            return -1;
1429 1430 1431
        break;
    case VIR_CGROUP_THREAD_EMULATOR:
        if (VIR_STRDUP(name, "emulator") < 0)
1432
            return -1;
1433 1434 1435
        break;
    case VIR_CGROUP_THREAD_IOTHREAD:
        if (virAsprintf(&name, "iothread%d", id) < 0)
1436
            return -1;
1437 1438 1439 1440
        break;
    case VIR_CGROUP_THREAD_LAST:
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("unexpected name value %d"), nameval);
1441
        return -1;
1442 1443 1444 1445 1446 1447 1448
    }

    controllers = ((1 << VIR_CGROUP_CONTROLLER_CPU) |
                   (1 << VIR_CGROUP_CONTROLLER_CPUACCT) |
                   (1 << VIR_CGROUP_CONTROLLER_CPUSET));

    if (virCgroupNew(-1, name, domain, controllers, group) < 0)
1449
        return -1;
1450 1451 1452

    if (virCgroupMakeGroup(domain, *group, create, VIR_CGROUP_NONE) < 0) {
        virCgroupRemove(*group);
1453
        virCgroupFree(group);
1454
        return -1;
1455 1456
    }

1457
    return 0;
1458 1459 1460
}


E
Eric Blake 已提交
1461 1462 1463 1464
int
virCgroupNewDetect(pid_t pid,
                   int controllers,
                   virCgroupPtr *group)
1465
{
1466
    return virCgroupNew(pid, "", NULL, controllers, group);
1467 1468
}

E
Eric Blake 已提交
1469

1470 1471 1472
/*
 * Returns 0 on success (but @group may be NULL), -1 on fatal error
 */
E
Eric Blake 已提交
1473 1474 1475 1476 1477
int
virCgroupNewDetectMachine(const char *name,
                          const char *drivername,
                          pid_t pid,
                          int controllers,
1478
                          char *machinename,
E
Eric Blake 已提交
1479
                          virCgroupPtr *group)
1480
{
1481
    if (virCgroupNewDetect(pid, controllers, group) < 0) {
1482 1483 1484 1485 1486
        if (virCgroupNewIgnoreError())
            return 0;
        return -1;
    }

1487
    if (!virCgroupValidateMachineGroup(*group, name, drivername, machinename)) {
1488 1489
        VIR_DEBUG("Failed to validate machine name for '%s' driver '%s'",
                  name, drivername);
1490
        virCgroupFree(group);
1491 1492 1493 1494 1495 1496
        return 0;
    }

    return 0;
}

E
Eric Blake 已提交
1497

1498 1499 1500 1501 1502 1503 1504 1505 1506 1507
/*
 * Returns 0 on success, -1 on fatal error, -2 on systemd not available
 */
static int
virCgroupNewMachineSystemd(const char *name,
                           const char *drivername,
                           const unsigned char *uuid,
                           const char *rootdir,
                           pid_t pidleader,
                           bool isContainer,
1508 1509
                           size_t nnicindexes,
                           int *nicindexes,
1510 1511 1512
                           const char *partition,
                           int controllers,
                           virCgroupPtr *group)
1513
{
1514
    int ret = -1;
1515
    int rv;
1516
    virCgroupPtr init, parent = NULL;
1517
    VIR_AUTOFREE(char *) path = NULL;
1518 1519 1520 1521 1522 1523 1524 1525 1526
    char *offset;

    VIR_DEBUG("Trying to setup machine '%s' via systemd", name);
    if ((rv = virSystemdCreateMachine(name,
                                      drivername,
                                      uuid,
                                      rootdir,
                                      pidleader,
                                      isContainer,
1527 1528
                                      nnicindexes,
                                      nicindexes,
1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539
                                      partition)) < 0)
        return rv;

    if (controllers != -1)
        controllers |= (1 << VIR_CGROUP_CONTROLLER_SYSTEMD);

    VIR_DEBUG("Detecting systemd placement");
    if (virCgroupNewDetect(pidleader,
                           controllers,
                           &init) < 0)
        return -1;
1540

1541 1542
    path = init->controllers[VIR_CGROUP_CONTROLLER_SYSTEMD].placement;
    init->controllers[VIR_CGROUP_CONTROLLER_SYSTEMD].placement = NULL;
1543
    virCgroupFree(&init);
1544 1545 1546

    if (!path || STREQ(path, "/") || path[0] != '/') {
        VIR_DEBUG("Systemd didn't setup its controller");
1547 1548
        ret = -2;
        goto cleanup;
1549 1550 1551 1552 1553 1554 1555 1556 1557
    }

    offset = path;

    if (virCgroupNew(pidleader,
                     "",
                     NULL,
                     controllers,
                     &parent) < 0)
1558
        goto cleanup;
1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571


    for (;;) {
        virCgroupPtr tmp;
        char *t = strchr(offset + 1, '/');
        if (t)
            *t = '\0';

        if (virCgroupNew(pidleader,
                         path,
                         parent,
                         controllers,
                         &tmp) < 0)
1572
            goto cleanup;
1573 1574

        if (virCgroupMakeGroup(parent, tmp, true, VIR_CGROUP_NONE) < 0) {
1575
            virCgroupFree(&tmp);
1576
            goto cleanup;
1577 1578 1579 1580
        }
        if (t) {
            *t = '/';
            offset = t;
1581
            virCgroupFree(&parent);
1582 1583 1584 1585 1586 1587 1588
            parent = tmp;
        } else {
            *group = tmp;
            break;
        }
    }

1589 1590 1591
    if (virCgroupAddTask(*group, pidleader) < 0) {
        virErrorPtr saved = virSaveLastError();
        virCgroupRemove(*group);
1592
        virCgroupFree(group);
1593 1594 1595 1596 1597 1598
        if (saved) {
            virSetError(saved);
            virFreeError(saved);
        }
    }

1599 1600
    ret = 0;
 cleanup:
1601
    virCgroupFree(&parent);
1602
    return ret;
1603
}
1604

E
Eric Blake 已提交
1605

1606 1607 1608
/*
 * Returns 0 on success, -1 on fatal error
 */
1609
int virCgroupTerminateMachine(const char *name)
1610
{
1611
    return virSystemdTerminateMachine(name);
1612 1613 1614
}


1615 1616 1617
static int
virCgroupNewMachineManual(const char *name,
                          const char *drivername,
1618
                          pid_t pidleader,
1619 1620 1621 1622
                          const char *partition,
                          int controllers,
                          virCgroupPtr *group)
{
1623 1624
    virCgroupPtr parent = NULL;
    int ret = -1;
1625 1626

    VIR_DEBUG("Fallback to non-systemd setup");
1627 1628 1629 1630 1631
    if (virCgroupNewPartition(partition,
                              STREQ(partition, "/machine"),
                              controllers,
                              &parent) < 0) {
        if (virCgroupNewIgnoreError())
1632
            goto done;
1633

1634
        goto cleanup;
1635 1636 1637 1638 1639 1640 1641
    }

    if (virCgroupNewDomainPartition(parent,
                                    drivername,
                                    name,
                                    true,
                                    group) < 0)
1642
        goto cleanup;
1643

1644 1645 1646
    if (virCgroupAddTask(*group, pidleader) < 0) {
        virErrorPtr saved = virSaveLastError();
        virCgroupRemove(*group);
1647
        virCgroupFree(group);
1648 1649 1650 1651 1652 1653
        if (saved) {
            virSetError(saved);
            virFreeError(saved);
        }
    }

1654 1655 1656 1657
 done:
    ret = 0;

 cleanup:
1658
    virCgroupFree(&parent);
1659
    return ret;
1660 1661
}

E
Eric Blake 已提交
1662 1663 1664 1665 1666 1667 1668 1669

int
virCgroupNewMachine(const char *name,
                    const char *drivername,
                    const unsigned char *uuid,
                    const char *rootdir,
                    pid_t pidleader,
                    bool isContainer,
1670 1671
                    size_t nnicindexes,
                    int *nicindexes,
E
Eric Blake 已提交
1672 1673 1674
                    const char *partition,
                    int controllers,
                    virCgroupPtr *group)
1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685
{
    int rv;

    *group = NULL;

    if ((rv = virCgroupNewMachineSystemd(name,
                                         drivername,
                                         uuid,
                                         rootdir,
                                         pidleader,
                                         isContainer,
1686 1687
                                         nnicindexes,
                                         nicindexes,
1688 1689 1690 1691 1692 1693 1694 1695 1696 1697
                                         partition,
                                         controllers,
                                         group)) == 0)
        return 0;

    if (rv == -1)
        return -1;

    return virCgroupNewMachineManual(name,
                                     drivername,
1698
                                     pidleader,
1699 1700 1701 1702 1703
                                     partition,
                                     controllers,
                                     group);
}

E
Eric Blake 已提交
1704 1705 1706

bool
virCgroupNewIgnoreError(void)
1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717
{
    if (virLastErrorIsSystemErrno(ENXIO) ||
        virLastErrorIsSystemErrno(EPERM) ||
        virLastErrorIsSystemErrno(EACCES)) {
        virResetLastError();
        VIR_DEBUG("No cgroups present/configured/accessible, ignoring error");
        return true;
    }
    return false;
}

E
Eric Blake 已提交
1718

E
Eric Blake 已提交
1719 1720 1721 1722 1723 1724
/**
 * virCgroupFree:
 *
 * @group: The group structure to free
 */
void
1725
virCgroupFree(virCgroupPtr *group)
E
Eric Blake 已提交
1726 1727 1728
{
    size_t i;

1729
    if (*group == NULL)
E
Eric Blake 已提交
1730 1731 1732
        return;

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
1733 1734 1735
        VIR_FREE((*group)->controllers[i].mountPoint);
        VIR_FREE((*group)->controllers[i].linkPoint);
        VIR_FREE((*group)->controllers[i].placement);
E
Eric Blake 已提交
1736 1737
    }

1738 1739
    VIR_FREE((*group)->path);
    VIR_FREE(*group);
E
Eric Blake 已提交
1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813
}


/**
 * virCgroupHasController: query whether a cgroup controller is present
 *
 * @cgroup: The group structure to be queried, or NULL
 * @controller: cgroup subsystem id
 *
 * Returns true if a cgroup controller is mounted and is associated
 * with this cgroup object.
 */
bool
virCgroupHasController(virCgroupPtr cgroup, int controller)
{
    if (!cgroup)
        return false;
    if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST)
        return false;
    return cgroup->controllers[controller].mountPoint != NULL;
}


int
virCgroupPathOfController(virCgroupPtr group,
                          int controller,
                          const char *key,
                          char **path)
{
    if (controller == -1) {
        size_t i;
        for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
            /* Reject any controller with a placement
             * of '/' to avoid doing bad stuff to the root
             * cgroup
             */
            if (group->controllers[i].mountPoint &&
                group->controllers[i].placement &&
                STRNEQ(group->controllers[i].placement, "/")) {
                controller = i;
                break;
            }
        }
    }
    if (controller == -1) {
        virReportSystemError(ENOSYS, "%s",
                             _("No controllers are mounted"));
        return -1;
    }

    if (group->controllers[controller].mountPoint == NULL) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Controller '%s' is not mounted"),
                       virCgroupControllerTypeToString(controller));
        return -1;
    }

    if (group->controllers[controller].placement == NULL) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Controller '%s' is not enabled for group"),
                       virCgroupControllerTypeToString(controller));
        return -1;
    }

    if (virAsprintf(path, "%s%s/%s",
                    group->controllers[controller].mountPoint,
                    group->controllers[controller].placement,
                    key ? key : "") < 0)
        return -1;

    return 0;
}


1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832
/**
 * virCgroupGetBlkioIoServiced:
 *
 * @group: The cgroup to get throughput for
 * @bytes_read: Pointer to returned bytes read
 * @bytes_write: Pointer to returned bytes written
 * @requests_read: Pointer to returned read io ops
 * @requests_write: Pointer to returned write io ops
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupGetBlkioIoServiced(virCgroupPtr group,
                            long long *bytes_read,
                            long long *bytes_write,
                            long long *requests_read,
                            long long *requests_write)
{
    long long stats_val;
1833 1834 1835 1836
    VIR_AUTOFREE(char *) str1 = NULL;
    VIR_AUTOFREE(char *) str2 = NULL;
    char *p1 = NULL;
    char *p2 = NULL;
1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859
    size_t i;

    const char *value_names[] = {
        "Read ",
        "Write "
    };
    long long *bytes_ptrs[] = {
        bytes_read,
        bytes_write
    };
    long long *requests_ptrs[] = {
        requests_read,
        requests_write
    };

    *bytes_read = 0;
    *bytes_write = 0;
    *requests_read = 0;
    *requests_write = 0;

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.io_service_bytes", &str1) < 0)
1860
        return -1;
1861 1862 1863 1864

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.io_serviced", &str2) < 0)
1865
        return -1;
1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878

    /* sum up all entries of the same kind, from all devices */
    for (i = 0; i < ARRAY_CARDINALITY(value_names); i++) {
        p1 = str1;
        p2 = str2;

        while ((p1 = strstr(p1, value_names[i]))) {
            p1 += strlen(value_names[i]);
            if (virStrToLong_ll(p1, &p1, 10, &stats_val) < 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Cannot parse byte %sstat '%s'"),
                               value_names[i],
                               p1);
1879
                return -1;
1880 1881 1882 1883 1884 1885 1886 1887
            }

            if (stats_val < 0 ||
                (stats_val > 0 && *bytes_ptrs[i] > (LLONG_MAX - stats_val)))
            {
                virReportError(VIR_ERR_OVERFLOW,
                               _("Sum of byte %sstat overflows"),
                               value_names[i]);
1888
                return -1;
1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899
            }
            *bytes_ptrs[i] += stats_val;
        }

        while ((p2 = strstr(p2, value_names[i]))) {
            p2 += strlen(value_names[i]);
            if (virStrToLong_ll(p2, &p2, 10, &stats_val) < 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Cannot parse %srequest stat '%s'"),
                               value_names[i],
                               p2);
1900
                return -1;
1901 1902 1903 1904 1905 1906 1907 1908
            }

            if (stats_val < 0 ||
                (stats_val > 0 && *requests_ptrs[i] > (LLONG_MAX - stats_val)))
            {
                virReportError(VIR_ERR_OVERFLOW,
                               _("Sum of %srequest stat overflows"),
                               value_names[i]);
1909
                return -1;
1910 1911 1912 1913 1914
            }
            *requests_ptrs[i] += stats_val;
        }
    }

1915
    return 0;
1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938
}


/**
 * virCgroupGetBlkioIoDeviceServiced:
 *
 * @group: The cgroup to get throughput for
 * @path: The device to get throughput for
 * @bytes_read: Pointer to returned bytes read
 * @bytes_write: Pointer to returned bytes written
 * @requests_read: Pointer to returned read io ops
 * @requests_write: Pointer to returned write io ops
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupGetBlkioIoDeviceServiced(virCgroupPtr group,
                                  const char *path,
                                  long long *bytes_read,
                                  long long *bytes_write,
                                  long long *requests_read,
                                  long long *requests_write)
{
1939 1940 1941 1942 1943
    VIR_AUTOFREE(char *) str1 = NULL;
    VIR_AUTOFREE(char *) str2 = NULL;
    VIR_AUTOFREE(char *) str3 = NULL;
    char *p1 = NULL;
    char *p2 = NULL;
1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
    size_t i;

    const char *value_names[] = {
        "Read ",
        "Write "
    };
    long long *bytes_ptrs[] = {
        bytes_read,
        bytes_write
    };
    long long *requests_ptrs[] = {
        requests_read,
        requests_write
    };

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.io_service_bytes", &str1) < 0)
1962
        return -1;
1963 1964 1965 1966

    if (virCgroupGetValueStr(group,
                             VIR_CGROUP_CONTROLLER_BLKIO,
                             "blkio.throttle.io_serviced", &str2) < 0)
1967
        return -1;
1968

1969
    if (!(str3 = virCgroupGetBlockDevString(path)))
1970
        return -1;
1971 1972 1973 1974 1975

    if (!(p1 = strstr(str1, str3))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot find byte stats for block device '%s'"),
                       str3);
1976
        return -1;
1977 1978 1979 1980 1981 1982
    }

    if (!(p2 = strstr(str2, str3))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot find request stats for block device '%s'"),
                       str3);
1983
        return -1;
1984 1985 1986 1987 1988 1989 1990
    }

    for (i = 0; i < ARRAY_CARDINALITY(value_names); i++) {
        if (!(p1 = strstr(p1, value_names[i]))) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Cannot find byte %sstats for block device '%s'"),
                           value_names[i], str3);
1991
            return -1;
1992 1993 1994 1995 1996 1997
        }

        if (virStrToLong_ll(p1 + strlen(value_names[i]), &p1, 10, bytes_ptrs[i]) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Cannot parse %sstat '%s'"),
                           value_names[i], p1 + strlen(value_names[i]));
1998
            return -1;
1999 2000 2001 2002 2003 2004
        }

        if (!(p2 = strstr(p2, value_names[i]))) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Cannot find request %sstats for block device '%s'"),
                           value_names[i], str3);
2005
            return -1;
2006 2007 2008 2009 2010 2011
        }

        if (virStrToLong_ll(p2 + strlen(value_names[i]), &p2, 10, requests_ptrs[i]) < 0) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Cannot parse %sstat '%s'"),
                           value_names[i], p2 + strlen(value_names[i]));
2012
            return -1;
2013 2014 2015
        }
    }

2016
    return 0;
2017 2018 2019
}


2020 2021 2022 2023 2024 2025
/**
 * virCgroupSetBlkioWeight:
 *
 * @group: The cgroup to change io weight for
 * @weight: The Weight for this cgroup
 *
2026
 * Returns: 0 on success, -1 on error
2027
 */
E
Eric Blake 已提交
2028 2029
int
virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight)
2030 2031 2032 2033 2034 2035 2036
{
    return virCgroupSetValueU64(group,
                                VIR_CGROUP_CONTROLLER_BLKIO,
                                "blkio.weight",
                                weight);
}

E
Eric Blake 已提交
2037

2038 2039 2040 2041 2042 2043
/**
 * virCgroupGetBlkioWeight:
 *
 * @group: The cgroup to get weight for
 * @Weight: Pointer to returned weight
 *
2044
 * Returns: 0 on success, -1 on error
2045
 */
E
Eric Blake 已提交
2046 2047
int
virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight)
2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058
{
    unsigned long long tmp;
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_BLKIO,
                               "blkio.weight", &tmp);
    if (ret == 0)
        *weight = tmp;
    return ret;
}

2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071
/**
 * virCgroupSetBlkioDeviceReadIops:
 * @group: The cgroup to change block io setting for
 * @path: The path of device
 * @riops: The new device read iops throttle, or 0 to clear
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupSetBlkioDeviceReadIops(virCgroupPtr group,
                                const char *path,
                                unsigned int riops)
{
2072 2073
    VIR_AUTOFREE(char *) str = NULL;
    VIR_AUTOFREE(char *) blkstr = NULL;
2074

2075
    if (!(blkstr = virCgroupGetBlockDevString(path)))
2076 2077
        return -1;

2078
    if (virAsprintf(&str, "%s%u", blkstr, riops) < 0)
2079
        return -1;
2080

2081
    return virCgroupSetValueStr(group,
2082 2083 2084 2085 2086
                               VIR_CGROUP_CONTROLLER_BLKIO,
                               "blkio.throttle.read_iops_device",
                               str);
}

E
Eric Blake 已提交
2087

2088
/**
2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100
 * virCgroupSetBlkioDeviceWriteIops:
 * @group: The cgroup to change block io setting for
 * @path: The path of device
 * @wiops: The new device write iops throttle, or 0 to clear
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupSetBlkioDeviceWriteIops(virCgroupPtr group,
                                 const char *path,
                                 unsigned int wiops)
{
2101 2102
    VIR_AUTOFREE(char *) str = NULL;
    VIR_AUTOFREE(char *) blkstr = NULL;
2103

2104
    if (!(blkstr = virCgroupGetBlockDevString(path)))
2105 2106
        return -1;

2107
    if (virAsprintf(&str, "%s%u", blkstr, wiops) < 0)
2108
        return -1;
2109

2110
    return virCgroupSetValueStr(group,
2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129
                               VIR_CGROUP_CONTROLLER_BLKIO,
                               "blkio.throttle.write_iops_device",
                               str);
}


/**
 * virCgroupSetBlkioDeviceReadBps:
 * @group: The cgroup to change block io setting for
 * @path: The path of device
 * @rbps: The new device read bps throttle, or 0 to clear
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupSetBlkioDeviceReadBps(virCgroupPtr group,
                               const char *path,
                               unsigned long long rbps)
{
2130 2131
    VIR_AUTOFREE(char *) str = NULL;
    VIR_AUTOFREE(char *) blkstr = NULL;
2132

2133
    if (!(blkstr = virCgroupGetBlockDevString(path)))
2134 2135
        return -1;

2136
    if (virAsprintf(&str, "%s%llu", blkstr, rbps) < 0)
2137
        return -1;
2138

2139
    return virCgroupSetValueStr(group,
2140 2141 2142 2143 2144 2145 2146 2147 2148 2149
                               VIR_CGROUP_CONTROLLER_BLKIO,
                               "blkio.throttle.read_bps_device",
                               str);
}

/**
 * virCgroupSetBlkioDeviceWriteBps:
 * @group: The cgroup to change block io setting for
 * @path: The path of device
 * @wbps: The new device write bps throttle, or 0 to clear
2150
 *
2151 2152 2153 2154 2155 2156 2157
 * Returns: 0 on success, -1 on error
 */
int
virCgroupSetBlkioDeviceWriteBps(virCgroupPtr group,
                                const char *path,
                                unsigned long long wbps)
{
2158 2159
    VIR_AUTOFREE(char *) str = NULL;
    VIR_AUTOFREE(char *) blkstr = NULL;
2160

2161
    if (!(blkstr = virCgroupGetBlockDevString(path)))
2162 2163
        return -1;

2164
    if (virAsprintf(&str, "%s%llu", blkstr, wbps) < 0)
2165
        return -1;
2166

2167
    return virCgroupSetValueStr(group,
2168 2169 2170 2171 2172 2173 2174 2175 2176 2177
                               VIR_CGROUP_CONTROLLER_BLKIO,
                               "blkio.throttle.write_bps_device",
                               str);
}


/**
 * virCgroupSetBlkioDeviceWeight:
 * @group: The cgroup to change block io setting for
 * @path: The path of device
2178 2179
 * @weight: The new device weight (100-1000),
 * (10-1000) after kernel 2.6.39, or 0 to clear
2180
 *
2181
 * Returns: 0 on success, -1 on error
2182
 */
E
Eric Blake 已提交
2183 2184 2185 2186
int
virCgroupSetBlkioDeviceWeight(virCgroupPtr group,
                              const char *path,
                              unsigned int weight)
2187
{
2188 2189
    VIR_AUTOFREE(char *) str = NULL;
    VIR_AUTOFREE(char *) blkstr = NULL;
2190

2191
    if (!(blkstr = virCgroupGetBlockDevString(path)))
2192
        return -1;
2193

2194
    if (virAsprintf(&str, "%s%d", blkstr, weight) < 0)
2195
        return -1;
2196

2197
    return virCgroupSetValueStr(group,
2198 2199 2200 2201
                               VIR_CGROUP_CONTROLLER_BLKIO,
                               "blkio.weight_device",
                               str);
}
2202

2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215
/**
 * virCgroupGetBlkioDeviceReadIops:
 * @group: The cgroup to gather block io setting for
 * @path: The path of device
 * @riops: Returned device read iops throttle, 0 if there is none
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupGetBlkioDeviceReadIops(virCgroupPtr group,
                                const char *path,
                                unsigned int *riops)
{
2216
    VIR_AUTOFREE(char *) str = NULL;
2217 2218 2219 2220 2221 2222

    if (virCgroupGetValueForBlkDev(group,
                                   VIR_CGROUP_CONTROLLER_BLKIO,
                                   "blkio.throttle.read_iops_device",
                                   path,
                                   &str) < 0)
2223
        return -1;
2224 2225 2226 2227 2228 2229 2230

    if (!str) {
        *riops = 0;
    } else if (virStrToLong_ui(str, NULL, 10, riops) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
2231
        return -1;
2232 2233
    }

2234
    return 0;
2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249
}

/**
 * virCgroupGetBlkioDeviceWriteIops:
 * @group: The cgroup to gather block io setting for
 * @path: The path of device
 * @wiops: Returned device write iops throttle, 0 if there is none
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupGetBlkioDeviceWriteIops(virCgroupPtr group,
                                 const char *path,
                                 unsigned int *wiops)
{
2250
    VIR_AUTOFREE(char *) str = NULL;
2251 2252 2253 2254 2255 2256

    if (virCgroupGetValueForBlkDev(group,
                                   VIR_CGROUP_CONTROLLER_BLKIO,
                                   "blkio.throttle.write_iops_device",
                                   path,
                                   &str) < 0)
2257
        return -1;
2258 2259 2260 2261 2262 2263 2264

    if (!str) {
        *wiops = 0;
    } else if (virStrToLong_ui(str, NULL, 10, wiops) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
2265
        return -1;
2266 2267
    }

2268
    return 0;
2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283
}

/**
 * virCgroupGetBlkioDeviceReadBps:
 * @group: The cgroup to gather block io setting for
 * @path: The path of device
 * @rbps: Returned device read bps throttle, 0 if there is none
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupGetBlkioDeviceReadBps(virCgroupPtr group,
                               const char *path,
                               unsigned long long *rbps)
{
2284
    VIR_AUTOFREE(char *) str = NULL;
2285 2286 2287 2288 2289 2290

    if (virCgroupGetValueForBlkDev(group,
                                   VIR_CGROUP_CONTROLLER_BLKIO,
                                   "blkio.throttle.read_bps_device",
                                   path,
                                   &str) < 0)
2291
        return -1;
2292 2293 2294 2295 2296 2297 2298

    if (!str) {
        *rbps = 0;
    } else if (virStrToLong_ull(str, NULL, 10, rbps) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
2299
        return -1;
2300 2301
    }

2302
    return 0;
2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317
}

/**
 * virCgroupGetBlkioDeviceWriteBps:
 * @group: The cgroup to gather block io setting for
 * @path: The path of device
 * @wbps: Returned device write bps throttle, 0 if there is none
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupGetBlkioDeviceWriteBps(virCgroupPtr group,
                                const char *path,
                                unsigned long long *wbps)
{
2318
    VIR_AUTOFREE(char *) str = NULL;
2319 2320 2321 2322 2323 2324

    if (virCgroupGetValueForBlkDev(group,
                                   VIR_CGROUP_CONTROLLER_BLKIO,
                                   "blkio.throttle.write_bps_device",
                                   path,
                                   &str) < 0)
2325
        return -1;
2326 2327 2328 2329 2330 2331 2332

    if (!str) {
        *wbps = 0;
    } else if (virStrToLong_ull(str, NULL, 10, wbps) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
2333
        return -1;
2334 2335
    }

2336
    return 0;
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351
}

/**
 * virCgroupGetBlkioDeviceWeight:
 * @group: The cgroup to gather block io setting for
 * @path: The path of device
 * @weight: Returned device weight, 0 if there is none
 *
 * Returns: 0 on success, -1 on error
 */
int
virCgroupGetBlkioDeviceWeight(virCgroupPtr group,
                              const char *path,
                              unsigned int *weight)
{
2352
    VIR_AUTOFREE(char *) str = NULL;
2353 2354 2355 2356 2357 2358

    if (virCgroupGetValueForBlkDev(group,
                                   VIR_CGROUP_CONTROLLER_BLKIO,
                                   "blkio.weight_device",
                                   path,
                                   &str) < 0)
2359
        return -1;
2360 2361 2362 2363 2364 2365 2366

    if (!str) {
        *weight = 0;
    } else if (virStrToLong_ui(str, NULL, 10, weight) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unable to parse '%s' as an integer"),
                       str);
2367
        return -1;
2368 2369
    }

2370
    return 0;
2371 2372
}

2373

2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387
/*
 * Retrieve the "memory.limit_in_bytes" value from the memory controller
 * root dir. This value cannot be modified by userspace and therefore
 * is the maximum limit value supported by cgroups on the local system.
 * Returns this value scaled to KB or falls back to the original
 * VIR_DOMAIN_MEMORY_PARAM_UNLIMITED. Either way, remember the return
 * value to avoid unnecessary cgroup filesystem access.
 */
static unsigned long long int virCgroupMemoryUnlimitedKB;
static virOnceControl virCgroupMemoryOnce = VIR_ONCE_CONTROL_INITIALIZER;

static void
virCgroupMemoryOnceInit(void)
{
2388
    virCgroupPtr group;
2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401
    unsigned long long int mem_unlimited = 0ULL;

    if (virCgroupNew(-1, "/", NULL, -1, &group) < 0)
        goto cleanup;

    if (!virCgroupHasController(group, VIR_CGROUP_CONTROLLER_MEMORY))
        goto cleanup;

    ignore_value(virCgroupGetValueU64(group,
                                      VIR_CGROUP_CONTROLLER_MEMORY,
                                      "memory.limit_in_bytes",
                                      &mem_unlimited));
 cleanup:
2402
    virCgroupFree(&group);
2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418
    virCgroupMemoryUnlimitedKB = mem_unlimited >> 10;
}

static unsigned long long int
virCgroupGetMemoryUnlimitedKB(void)
{
    if (virOnce(&virCgroupMemoryOnce, virCgroupMemoryOnceInit) < 0)
        VIR_DEBUG("Init failed, will fall back to defaults.");

    if (virCgroupMemoryUnlimitedKB)
        return virCgroupMemoryUnlimitedKB;
    else
        return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
}


2419 2420 2421 2422 2423 2424 2425 2426
/**
 * virCgroupSetMemory:
 *
 * @group: The cgroup to change memory for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2427 2428
int
virCgroupSetMemory(virCgroupPtr group, unsigned long long kb)
2429
{
2430 2431
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

2432 2433 2434 2435 2436 2437 2438 2439
    if (kb > maxkb) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("Memory '%llu' must be less than %llu"),
                       kb, maxkb);
        return -1;
    }

    if (kb == maxkb)
2440 2441 2442 2443 2444 2445 2446 2447 2448
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.limit_in_bytes",
                                    kb << 10);
2449 2450
}

E
Eric Blake 已提交
2451

R
Ryota Ozaki 已提交
2452 2453 2454 2455 2456 2457 2458 2459
/**
 * virCgroupGetMemoryUsage:
 *
 * @group: The cgroup to change memory for
 * @kb: Pointer to returned used memory in kilobytes
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2460 2461
int
virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb)
R
Ryota Ozaki 已提交
2462
{
C
Cole Robinson 已提交
2463
    long long unsigned int usage_in_bytes;
R
Ryota Ozaki 已提交
2464 2465 2466 2467 2468 2469 2470 2471 2472
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_MEMORY,
                               "memory.usage_in_bytes", &usage_in_bytes);
    if (ret == 0)
        *kb = (unsigned long) usage_in_bytes >> 10;
    return ret;
}

E
Eric Blake 已提交
2473

2474 2475 2476 2477 2478 2479 2480 2481
/**
 * virCgroupSetMemoryHardLimit:
 *
 * @group: The cgroup to change memory hard limit for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2482 2483
int
virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb)
2484 2485 2486 2487
{
    return virCgroupSetMemory(group, kb);
}

E
Eric Blake 已提交
2488

2489 2490 2491 2492 2493 2494 2495 2496
/**
 * virCgroupGetMemoryHardLimit:
 *
 * @group: The cgroup to get the memory hard limit for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2497 2498
int
virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb)
2499 2500
{
    long long unsigned int limit_in_bytes;
2501 2502 2503 2504

    if (virCgroupGetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             "memory.limit_in_bytes", &limit_in_bytes) < 0)
2505
        return -1;
2506 2507

    *kb = limit_in_bytes >> 10;
2508
    if (*kb >= virCgroupGetMemoryUnlimitedKB())
2509 2510
        *kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

2511
    return 0;
2512 2513
}

E
Eric Blake 已提交
2514

2515 2516 2517 2518 2519 2520 2521 2522
/**
 * virCgroupSetMemorySoftLimit:
 *
 * @group: The cgroup to change memory soft limit for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2523 2524
int
virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb)
2525
{
2526 2527
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

2528 2529 2530 2531 2532 2533 2534 2535
    if (kb > maxkb) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("Memory '%llu' must be less than %llu"),
                       kb, maxkb);
        return -1;
    }

    if (kb == maxkb)
2536 2537 2538 2539 2540 2541 2542 2543 2544
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.soft_limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.soft_limit_in_bytes",
                                    kb << 10);
2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555
}


/**
 * virCgroupGetMemorySoftLimit:
 *
 * @group: The cgroup to get the memory soft limit for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2556 2557
int
virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb)
2558 2559
{
    long long unsigned int limit_in_bytes;
2560 2561 2562 2563

    if (virCgroupGetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             "memory.soft_limit_in_bytes", &limit_in_bytes) < 0)
2564
        return -1;
2565 2566

    *kb = limit_in_bytes >> 10;
2567
    if (*kb >= virCgroupGetMemoryUnlimitedKB())
2568 2569
        *kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

2570
    return 0;
2571 2572
}

E
Eric Blake 已提交
2573

2574
/**
2575
 * virCgroupSetMemSwapHardLimit:
2576
 *
2577 2578
 * @group: The cgroup to change mem+swap hard limit for
 * @kb: The mem+swap amount in kilobytes
2579 2580 2581
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2582 2583
int
virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb)
2584
{
2585 2586
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

2587 2588 2589 2590 2591 2592 2593 2594
    if (kb > maxkb) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("Memory '%llu' must be less than %llu"),
                       kb, maxkb);
        return -1;
    }

    if (kb == maxkb)
2595 2596 2597 2598 2599 2600 2601 2602 2603
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.memsw.limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.memsw.limit_in_bytes",
                                    kb << 10);
2604 2605
}

E
Eric Blake 已提交
2606

2607
/**
2608
 * virCgroupGetMemSwapHardLimit:
2609
 *
2610 2611
 * @group: The cgroup to get mem+swap hard limit for
 * @kb: The mem+swap amount in kilobytes
2612 2613 2614
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2615 2616
int
virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb)
2617 2618
{
    long long unsigned int limit_in_bytes;
2619 2620 2621 2622

    if (virCgroupGetValueU64(group,
                             VIR_CGROUP_CONTROLLER_MEMORY,
                             "memory.memsw.limit_in_bytes", &limit_in_bytes) < 0)
2623
        return -1;
2624 2625

    *kb = limit_in_bytes >> 10;
2626
    if (*kb >= virCgroupGetMemoryUnlimitedKB())
2627 2628
        *kb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

2629
    return 0;
2630 2631
}

E
Eric Blake 已提交
2632

G
Gao feng 已提交
2633 2634 2635 2636 2637 2638 2639 2640
/**
 * virCgroupGetMemSwapUsage:
 *
 * @group: The cgroup to get mem+swap usage for
 * @kb: The mem+swap amount in kilobytes
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2641 2642
int
virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb)
G
Gao feng 已提交
2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653
{
    long long unsigned int usage_in_bytes;
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_MEMORY,
                               "memory.memsw.usage_in_bytes", &usage_in_bytes);
    if (ret == 0)
        *kb = usage_in_bytes >> 10;
    return ret;
}

E
Eric Blake 已提交
2654

2655 2656 2657 2658 2659 2660 2661 2662
/**
 * virCgroupSetCpusetMems:
 *
 * @group: The cgroup to set cpuset.mems for
 * @mems: the numa nodes to set
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2663 2664
int
virCgroupSetCpusetMems(virCgroupPtr group, const char *mems)
2665 2666 2667 2668 2669 2670 2671
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.mems",
                                mems);
}

E
Eric Blake 已提交
2672

2673 2674 2675 2676 2677 2678 2679 2680
/**
 * virCgroupGetCpusetMems:
 *
 * @group: The cgroup to get cpuset.mems for
 * @mems: the numa nodes to get
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2681 2682
int
virCgroupGetCpusetMems(virCgroupPtr group, char **mems)
2683 2684 2685 2686 2687 2688 2689
{
    return virCgroupGetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.mems",
                                mems);
}

E
Eric Blake 已提交
2690

2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729
/**
 * virCgroupSetCpusetMemoryMigrate:
 *
 * @group: The cgroup to set cpuset.memory_migrate for
 * @migrate: Whether to migrate the memory on change or not
 *
 * Returns: 0 on success
 */
int
virCgroupSetCpusetMemoryMigrate(virCgroupPtr group, bool migrate)
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.memory_migrate",
                                migrate ? "1" : "0");
}


/**
 * virCgroupGetCpusetMemoryMigrate:
 *
 * @group: The cgroup to get cpuset.memory_migrate for
 * @migrate: Migration setting
 *
 * Returns: 0 on success
 */
int
virCgroupGetCpusetMemoryMigrate(virCgroupPtr group, bool *migrate)
{
    unsigned long long value = 0;
    int ret = virCgroupGetValueU64(group,
                                   VIR_CGROUP_CONTROLLER_CPUSET,
                                   "cpuset.memory_migrate",
                                   &value);
    *migrate = !!value;
    return ret;
}


2730 2731 2732 2733 2734 2735
/**
 * virCgroupSetCpusetCpus:
 *
 * @group: The cgroup to set cpuset.cpus for
 * @cpus: the cpus to set
 *
N
Nitesh Konkar 已提交
2736
 * Returns: 0 on success
2737
 */
E
Eric Blake 已提交
2738 2739
int
virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus)
2740 2741 2742 2743 2744 2745 2746
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.cpus",
                                cpus);
}

E
Eric Blake 已提交
2747

2748 2749 2750 2751 2752 2753
/**
 * virCgroupGetCpusetCpus:
 *
 * @group: The cgroup to get cpuset.cpus for
 * @cpus: the cpus to get
 *
N
Nitesh Konkar 已提交
2754
 * Returns: 0 on success
2755
 */
E
Eric Blake 已提交
2756 2757
int
virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus)
2758 2759 2760 2761 2762 2763 2764
{
    return virCgroupGetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPUSET,
                                "cpuset.cpus",
                                cpus);
}

E
Eric Blake 已提交
2765

2766 2767 2768
/**
 * virCgroupDenyAllDevices:
 *
2769
 * @group: The cgroup to deny all permissions, for all devices
2770 2771 2772
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2773 2774
int
virCgroupDenyAllDevices(virCgroupPtr group)
2775 2776
{
    return virCgroupSetValueStr(group,
2777 2778 2779
                                VIR_CGROUP_CONTROLLER_DEVICES,
                                "devices.deny",
                                "a");
2780 2781
}

2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812
/**
 * virCgroupAllowAllDevices:
 *
 * Allows the permissiong for all devices by setting lines similar
 * to these ones (obviously the 'm' permission is an example):
 *
 * 'b *:* m'
 * 'c *:* m'
 *
 * @group: The cgroup to allow devices for
 * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
 *
 * Returns: 0 on success
 */
int
virCgroupAllowAllDevices(virCgroupPtr group, int perms)
{
    int ret = -1;

    if (virCgroupAllowDevice(group, 'b', -1, -1, perms) < 0)
        goto cleanup;

    if (virCgroupAllowDevice(group, 'c', -1, -1, perms) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    return ret;
}

E
Eric Blake 已提交
2813

2814 2815 2816 2817 2818
/**
 * virCgroupAllowDevice:
 *
 * @group: The cgroup to allow a device for
 * @type: The device type (i.e., 'c' or 'b')
2819 2820
 * @major: The major number of the device, a negative value means '*'
 * @minor: The minor number of the device, a negative value means '*'
2821
 * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
2822 2823 2824
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2825 2826 2827
int
virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor,
                     int perms)
2828
{
2829 2830 2831
    VIR_AUTOFREE(char *) devstr = NULL;
    VIR_AUTOFREE(char *) majorstr = NULL;
    VIR_AUTOFREE(char *) minorstr = NULL;
2832

2833
    if ((major < 0 && VIR_STRDUP(majorstr, "*") < 0) ||
2834
        (major >= 0 && virAsprintf(&majorstr, "%i", major) < 0))
2835
        return -1;
2836 2837

    if ((minor < 0 && VIR_STRDUP(minorstr, "*") < 0) ||
2838
        (minor >= 0 && virAsprintf(&minorstr, "%i", minor) < 0))
2839
        return -1;
2840 2841

    if (virAsprintf(&devstr, "%c %s:%s %s", type, majorstr, minorstr,
2842
                    virCgroupGetDevicePermsString(perms)) < 0)
2843
        return -1;
2844

2845 2846 2847 2848
    if (virCgroupSetValueStr(group,
                             VIR_CGROUP_CONTROLLER_DEVICES,
                             "devices.allow",
                             devstr) < 0)
2849
        return -1;
2850

2851
    return 0;
2852
}
2853

E
Eric Blake 已提交
2854

2855 2856 2857 2858 2859
/**
 * virCgroupAllowDevicePath:
 *
 * @group: The cgroup to allow the device for
 * @path: the device to allow
2860
 * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
2861
 * @ignoreEacces: Ignore lack of permission (mostly for NFS mounts)
2862 2863 2864 2865
 *
 * Queries the type of device and its major/minor number, and
 * adds that to the cgroup ACL
 *
2866 2867
 * Returns: 0 on success, 1 if path exists but is not a device or is not
 * accesible, or * -1 on error
2868
 */
E
Eric Blake 已提交
2869
int
2870 2871 2872 2873
virCgroupAllowDevicePath(virCgroupPtr group,
                         const char *path,
                         int perms,
                         bool ignoreEacces)
2874 2875 2876
{
    struct stat sb;

2877
    if (stat(path, &sb) < 0) {
2878 2879 2880
        if (errno == EACCES && ignoreEacces)
            return 1;

2881 2882 2883 2884 2885
        virReportSystemError(errno,
                             _("Path '%s' is not accessible"),
                             path);
        return -1;
    }
2886 2887

    if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode))
2888
        return 1;
2889 2890 2891 2892

    return virCgroupAllowDevice(group,
                                S_ISCHR(sb.st_mode) ? 'c' : 'b',
                                major(sb.st_rdev),
2893 2894
                                minor(sb.st_rdev),
                                perms);
2895
}
D
Daniel P. Berrange 已提交
2896

2897 2898 2899 2900 2901 2902

/**
 * virCgroupDenyDevice:
 *
 * @group: The cgroup to deny a device for
 * @type: The device type (i.e., 'c' or 'b')
2903 2904
 * @major: The major number of the device, a negative value means '*'
 * @minor: The minor number of the device, a negative value means '*'
2905
 * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny
2906 2907 2908
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
2909 2910 2911
int
virCgroupDenyDevice(virCgroupPtr group, char type, int major, int minor,
                    int perms)
2912
{
2913 2914 2915
    VIR_AUTOFREE(char *) devstr = NULL;
    VIR_AUTOFREE(char *) majorstr = NULL;
    VIR_AUTOFREE(char *) minorstr = NULL;
2916 2917 2918

    if ((major < 0 && VIR_STRDUP(majorstr, "*") < 0) ||
        (major >= 0 && virAsprintf(&majorstr, "%i", major) < 0))
2919
        return -1;
2920

2921 2922
    if ((minor < 0 && VIR_STRDUP(minorstr, "*") < 0) ||
        (minor >= 0 && virAsprintf(&minorstr, "%i", minor) < 0))
2923
        return -1;
2924 2925

    if (virAsprintf(&devstr, "%c %s:%s %s", type, majorstr, minorstr,
2926
                    virCgroupGetDevicePermsString(perms)) < 0)
2927
        return -1;
2928

2929 2930 2931 2932
    if (virCgroupSetValueStr(group,
                             VIR_CGROUP_CONTROLLER_DEVICES,
                             "devices.deny",
                             devstr) < 0)
2933
        return -1;
2934

2935
    return 0;
2936 2937
}

E
Eric Blake 已提交
2938

2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952
/**
 * virCgroupDenyDevicePath:
 *
 * @group: The cgroup to deny the device for
 * @path: the device to deny
 * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow
 * @ignoreEacces: Ignore lack of permission (mostly for NFS mounts)
 *
 * Queries the type of device and its major/minor number, and
 * removes it from the cgroup ACL
 *
 * Returns: 0 on success, 1 if path exists but is not a device or is not
 * accessible, or -1 on error.
 */
E
Eric Blake 已提交
2953
int
2954 2955 2956 2957
virCgroupDenyDevicePath(virCgroupPtr group,
                        const char *path,
                        int perms,
                        bool ignoreEacces)
2958 2959 2960
{
    struct stat sb;

2961
    if (stat(path, &sb) < 0) {
2962 2963 2964
        if (errno == EACCES && ignoreEacces)
            return 1;

2965 2966 2967 2968 2969
        virReportSystemError(errno,
                             _("Path '%s' is not accessible"),
                             path);
        return -1;
    }
2970 2971

    if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode))
2972
        return 1;
2973 2974 2975 2976

    return virCgroupDenyDevice(group,
                               S_ISCHR(sb.st_mode) ? 'c' : 'b',
                               major(sb.st_rdev),
2977 2978
                               minor(sb.st_rdev),
                               perms);
2979 2980
}

E
Eric Blake 已提交
2981

2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997
/* This function gets the sums of cpu time consumed by all vcpus.
 * For example, if there are 4 physical cpus, and 2 vcpus in a domain,
 * then for each vcpu, the cpuacct.usage_percpu looks like this:
 *   t0 t1 t2 t3
 * and we have 2 groups of such data:
 *   v\p   0   1   2   3
 *   0   t00 t01 t02 t03
 *   1   t10 t11 t12 t13
 * for each pcpu, the sum is cpu time consumed by all vcpus.
 *   s0 = t00 + t10
 *   s1 = t01 + t11
 *   s2 = t02 + t12
 *   s3 = t03 + t13
 */
static int
virCgroupGetPercpuVcpuSum(virCgroupPtr group,
2998
                          virBitmapPtr guestvcpus,
2999
                          unsigned long long *sum_cpu_time,
3000 3001
                          size_t nsum,
                          virBitmapPtr cpumap)
3002
{
3003
    int ret = -1;
3004
    ssize_t i = -1;
3005
    virCgroupPtr group_vcpu = NULL;
3006

3007
    while ((i = virBitmapNextSetBit(guestvcpus, i)) >= 0) {
3008
        VIR_AUTOFREE(char *) buf = NULL;
3009 3010
        char *pos;
        unsigned long long tmp;
3011
        ssize_t j;
3012

J
John Ferlan 已提交
3013 3014
        if (virCgroupNewThread(group, VIR_CGROUP_THREAD_VCPU, i,
                               false, &group_vcpu) < 0)
3015
            goto cleanup;
3016 3017

        if (virCgroupGetCpuacctPercpuUsage(group_vcpu, &buf) < 0)
3018
            goto cleanup;
3019 3020

        pos = buf;
3021 3022 3023
        for (j = virBitmapNextSetBit(cpumap, -1);
             j >= 0 && j < nsum;
             j = virBitmapNextSetBit(cpumap, j)) {
3024 3025 3026
            if (virStrToLong_ull(pos, &pos, 10, &tmp) < 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("cpuacct parse error"));
3027
                goto cleanup;
3028 3029 3030
            }
            sum_cpu_time[j] += tmp;
        }
3031

3032
        virCgroupFree(&group_vcpu);
3033 3034
    }

3035 3036
    ret = 0;
 cleanup:
3037
    virCgroupFree(&group_vcpu);
3038
    return ret;
3039 3040 3041
}


3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061
/**
 * virCgroupGetPercpuStats:
 * @cgroup: cgroup data structure
 * @params: typed parameter array where data is returned
 * @nparams: cardinality of @params
 * @start_cpu: offset of physical CPU to get data for
 * @ncpus: number of physical CPUs to get data for
 * @nvcpupids: number of vCPU threads for a domain (actual number of vcpus)
 *
 * This function is the worker that retrieves data in the appropriate format
 * for the terribly designed 'virDomainGetCPUStats' API. Sharing semantics with
 * the API, this function has two modes of operation depending on magic settings
 * of the input arguments. Please refer to docs of 'virDomainGetCPUStats' for
 * the usage patterns of the similarly named arguments.
 *
 * @nvcpupids determines the count of active vcpu threads for the vm. If the
 * threads could not be detected the percpu data is skipped.
 *
 * Please DON'T use this function anywhere else.
 */
3062 3063 3064 3065 3066
int
virCgroupGetPercpuStats(virCgroupPtr group,
                        virTypedParameterPtr params,
                        unsigned int nparams,
                        int start_cpu,
3067
                        unsigned int ncpus,
3068
                        virBitmapPtr guestvcpus)
3069
{
3070
    int ret = -1;
3071
    size_t i;
3072
    int need_cpus, total_cpus;
3073
    char *pos;
3074 3075
    VIR_AUTOFREE(char *) buf = NULL;
    VIR_AUTOFREE(unsigned long long *) sum_cpu_time = NULL;
3076 3077 3078
    virTypedParameterPtr ent;
    int param_idx;
    unsigned long long cpu_time;
3079
    virBitmapPtr cpumap = NULL;
3080 3081

    /* return the number of supported params */
3082
    if (nparams == 0 && ncpus != 0) {
3083
        if (!guestvcpus)
3084 3085 3086 3087
            return CGROUP_NB_PER_CPU_STAT_PARAM;
        else
            return CGROUP_NB_PER_CPU_STAT_PARAM + 1;
    }
3088 3089

    /* To parse account file, we need to know how many cpus are present.  */
3090
    if (!(cpumap = virHostCPUGetPresentBitmap()))
3091
        return -1;
3092

3093 3094
    total_cpus = virBitmapSize(cpumap);

3095
    /* return total number of cpus */
3096 3097 3098 3099
    if (ncpus == 0) {
        ret = total_cpus;
        goto cleanup;
    }
3100

3101
    if (start_cpu >= total_cpus) {
3102 3103
        virReportError(VIR_ERR_INVALID_ARG,
                       _("start_cpu %d larger than maximum of %d"),
3104
                       start_cpu, total_cpus - 1);
3105
        goto cleanup;
3106 3107 3108 3109
    }

    /* we get percpu cputime accounting info. */
    if (virCgroupGetCpuacctPercpuUsage(group, &buf))
3110
        goto cleanup;
3111 3112 3113 3114 3115 3116
    pos = buf;

    /* return percpu cputime in index 0 */
    param_idx = 0;

    /* number of cpus to compute */
J
Ján Tomko 已提交
3117
    need_cpus = MIN(total_cpus, start_cpu + ncpus);
3118

J
Ján Tomko 已提交
3119
    for (i = 0; i < need_cpus; i++) {
J
Ján Tomko 已提交
3120
        if (!virBitmapIsBitSet(cpumap, i)) {
3121 3122
            cpu_time = 0;
        } else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) {
3123 3124
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("cpuacct parse error"));
3125
            goto cleanup;
3126 3127 3128 3129 3130 3131
        }
        if (i < start_cpu)
            continue;
        ent = &params[(i - start_cpu) * nparams + param_idx];
        if (virTypedParameterAssign(ent, VIR_DOMAIN_CPU_STATS_CPUTIME,
                                    VIR_TYPED_PARAM_ULLONG, cpu_time) < 0)
3132
            goto cleanup;
3133 3134
    }

3135
    /* return percpu vcputime in index 1 */
3136
    param_idx = 1;
3137

3138
    if (guestvcpus && param_idx < nparams) {
3139
        if (VIR_ALLOC_N(sum_cpu_time, need_cpus) < 0)
3140
            goto cleanup;
3141 3142
        if (virCgroupGetPercpuVcpuSum(group, guestvcpus, sum_cpu_time,
                                      need_cpus, cpumap) < 0)
3143
            goto cleanup;
3144 3145 3146 3147 3148 3149 3150

        for (i = start_cpu; i < need_cpus; i++) {
            if (virTypedParameterAssign(&params[(i - start_cpu) * nparams +
                                                param_idx],
                                        VIR_DOMAIN_CPU_STATS_VCPUTIME,
                                        VIR_TYPED_PARAM_ULLONG,
                                        sum_cpu_time[i]) < 0)
3151
                goto cleanup;
3152 3153 3154
        }

        param_idx++;
3155 3156
    }

3157 3158 3159 3160 3161
    ret = param_idx;

 cleanup:
    virBitmapFree(cpumap);
    return ret;
3162 3163
}

3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213

int
virCgroupGetDomainTotalCpuStats(virCgroupPtr group,
                                virTypedParameterPtr params,
                                int nparams)
{
    unsigned long long cpu_time;
    int ret;

    if (nparams == 0) /* return supported number of params */
        return CGROUP_NB_TOTAL_CPU_STAT_PARAM;
    /* entry 0 is cputime */
    ret = virCgroupGetCpuacctUsage(group, &cpu_time);
    if (ret < 0) {
        virReportSystemError(-ret, "%s", _("unable to get cpu account"));
        return -1;
    }

    if (virTypedParameterAssign(&params[0], VIR_DOMAIN_CPU_STATS_CPUTIME,
                                VIR_TYPED_PARAM_ULLONG, cpu_time) < 0)
        return -1;

    if (nparams > 1) {
        unsigned long long user;
        unsigned long long sys;

        ret = virCgroupGetCpuacctStat(group, &user, &sys);
        if (ret < 0) {
            virReportSystemError(-ret, "%s", _("unable to get cpu account"));
            return -1;
        }

        if (virTypedParameterAssign(&params[1],
                                    VIR_DOMAIN_CPU_STATS_USERTIME,
                                    VIR_TYPED_PARAM_ULLONG, user) < 0)
            return -1;
        if (nparams > 2 &&
            virTypedParameterAssign(&params[2],
                                    VIR_DOMAIN_CPU_STATS_SYSTEMTIME,
                                    VIR_TYPED_PARAM_ULLONG, sys) < 0)
            return -1;

        if (nparams > CGROUP_NB_TOTAL_CPU_STAT_PARAM)
            nparams = CGROUP_NB_TOTAL_CPU_STAT_PARAM;
    }

    return nparams;
}


E
Eric Blake 已提交
3214 3215
int
virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares)
3216
{
3217 3218
    return virCgroupSetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
D
Daniel P. Berrange 已提交
3219
                                "cpu.shares", shares);
3220 3221
}

E
Eric Blake 已提交
3222 3223 3224

int
virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares)
3225
{
3226 3227
    return virCgroupGetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
D
Daniel P. Berrange 已提交
3228
                                "cpu.shares", shares);
3229
}
3230

E
Eric Blake 已提交
3231

3232 3233 3234 3235 3236 3237 3238 3239
/**
 * virCgroupSetCpuCfsPeriod:
 *
 * @group: The cgroup to change cpu.cfs_period_us for
 * @cfs_period: The bandwidth period in usecs
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
3240 3241
int
virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period)
3242
{
3243
    /* The cfs_period should be greater or equal than 1ms, and less or equal
3244 3245
     * than 1s.
     */
3246 3247 3248 3249 3250 3251
    if (cfs_period < 1000 || cfs_period > 1000000) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("cfs_period '%llu' must be in range (1000, 1000000)"),
                       cfs_period);
        return -1;
    }
3252 3253 3254 3255 3256 3257

    return virCgroupSetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.cfs_period_us", cfs_period);
}

E
Eric Blake 已提交
3258

3259 3260 3261 3262 3263 3264 3265 3266
/**
 * virCgroupGetCpuCfsPeriod:
 *
 * @group: The cgroup to get cpu.cfs_period_us for
 * @cfs_period: Pointer to the returned bandwidth period in usecs
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
3267 3268
int
virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period)
3269 3270 3271 3272 3273 3274
{
    return virCgroupGetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.cfs_period_us", cfs_period);
}

E
Eric Blake 已提交
3275

3276 3277 3278 3279 3280 3281 3282 3283 3284
/**
 * virCgroupSetCpuCfsQuota:
 *
 * @group: The cgroup to change cpu.cfs_quota_us for
 * @cfs_quota: the cpu bandwidth (in usecs) that this tg will be allowed to
 *             consume over period
 *
 * Returns: 0 on success
 */
E
Eric Blake 已提交
3285 3286
int
virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota)
3287
{
3288 3289 3290 3291 3292 3293 3294 3295
    /* The cfs_quota should be greater or equal than 1ms */
    if (cfs_quota >= 0 &&
        (cfs_quota < 1000 ||
         cfs_quota > ULLONG_MAX / 1000)) {
        virReportError(VIR_ERR_INVALID_ARG,
                       _("cfs_quota '%lld' must be in range (1000, %llu)"),
                       cfs_quota, ULLONG_MAX / 1000);
        return -1;
3296 3297 3298 3299 3300 3301 3302
    }

    return virCgroupSetValueI64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.cfs_quota_us", cfs_quota);
}

E
Eric Blake 已提交
3303 3304 3305

int
virCgroupGetCpuacctPercpuUsage(virCgroupPtr group, char **usage)
3306 3307 3308 3309 3310
{
    return virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT,
                                "cpuacct.usage_percpu", usage);
}

E
Eric Blake 已提交
3311

3312
static int
E
Eric Blake 已提交
3313 3314 3315 3316 3317
virCgroupRemoveRecursively(char *grppath)
{
    DIR *grpdir;
    struct dirent *ent;
    int rc = 0;
E
Eric Blake 已提交
3318
    int direrr;
E
Eric Blake 已提交
3319

J
Ján Tomko 已提交
3320
    if (virDirOpenQuiet(&grpdir, grppath) < 0) {
E
Eric Blake 已提交
3321 3322 3323 3324 3325 3326 3327
        if (errno == ENOENT)
            return 0;
        rc = -errno;
        VIR_ERROR(_("Unable to open %s (%d)"), grppath, errno);
        return rc;
    }

E
Eric Blake 已提交
3328 3329 3330
    /* This is best-effort cleanup: we want to log failures with just
     * VIR_ERROR instead of normal virReportError */
    while ((direrr = virDirRead(grpdir, &ent, NULL)) > 0) {
3331
        VIR_AUTOFREE(char *) path = NULL;
E
Eric Blake 已提交
3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342

        if (ent->d_type != DT_DIR) continue;

        if (virAsprintf(&path, "%s/%s", grppath, ent->d_name) == -1) {
            rc = -ENOMEM;
            break;
        }
        rc = virCgroupRemoveRecursively(path);
        if (rc != 0)
            break;
    }
E
Eric Blake 已提交
3343 3344 3345 3346 3347
    if (direrr < 0) {
        rc = -errno;
        VIR_ERROR(_("Failed to readdir for %s (%d)"), grppath, errno);
    }

J
Ján Tomko 已提交
3348
    VIR_DIR_CLOSE(grpdir);
E
Eric Blake 已提交
3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379

    VIR_DEBUG("Removing cgroup %s", grppath);
    if (rmdir(grppath) != 0 && errno != ENOENT) {
        rc = -errno;
        VIR_ERROR(_("Unable to remove %s (%d)"), grppath, errno);
    }

    return rc;
}


/**
 * virCgroupRemove:
 *
 * @group: The group to be removed
 *
 * It first removes all child groups recursively
 * in depth first order and then removes @group
 * because the presence of the child groups
 * prevents removing @group.
 *
 * Returns: 0 on success
 */
int
virCgroupRemove(virCgroupPtr group)
{
    int rc = 0;
    size_t i;

    VIR_DEBUG("Removing cgroup %s", group->path);
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
3380 3381
        VIR_AUTOFREE(char *) grppath = NULL;

E
Eric Blake 已提交
3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409
        /* Skip over controllers not mounted */
        if (!group->controllers[i].mountPoint)
            continue;

        /* We must never rmdir() in systemd's hierarchy */
        if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
            continue;

        /* Don't delete the root group, if we accidentally
           ended up in it for some reason */
        if (STREQ(group->controllers[i].placement, "/"))
            continue;

        if (virCgroupPathOfController(group,
                                      i,
                                      NULL,
                                      &grppath) != 0)
            continue;

        VIR_DEBUG("Removing cgroup %s and all child cgroups", grppath);
        rc = virCgroupRemoveRecursively(grppath);
    }
    VIR_DEBUG("Done removing cgroup %s", group->path);

    return rc;
}


3410 3411 3412
/*
 * Returns 1 if some PIDs are killed, 0 if none are killed, or -1 on error
 */
E
Eric Blake 已提交
3413 3414
static int
virCgroupKillInternal(virCgroupPtr group, int signum, virHashTablePtr pids)
3415
{
3416
    int ret = -1;
3417
    bool killedAny = false;
3418
    VIR_AUTOFREE(char *) keypath = NULL;
3419
    bool done = false;
E
Eric Blake 已提交
3420 3421 3422
    FILE *fp = NULL;
    VIR_DEBUG("group=%p path=%s signum=%d pids=%p",
              group, group->path, signum, pids);
3423

3424
    if (virCgroupPathOfController(group, -1, "tasks", &keypath) < 0)
3425
        return -1;
3426 3427 3428 3429 3430 3431 3432

    /* PIDs may be forking as we kill them, so loop
     * until there are no new PIDs found
     */
    while (!done) {
        done = true;
        if (!(fp = fopen(keypath, "r"))) {
3433 3434 3435 3436 3437 3438
            if (errno == ENOENT) {
                VIR_DEBUG("No file %s, assuming done", keypath);
                killedAny = false;
                goto done;
            }

3439 3440 3441
            virReportSystemError(errno,
                                 _("Failed to read %s"),
                                 keypath);
3442 3443 3444
            goto cleanup;
        } else {
            while (!feof(fp)) {
M
Michal Privoznik 已提交
3445 3446
                long pid_value;
                if (fscanf(fp, "%ld", &pid_value) != 1) {
3447 3448
                    if (feof(fp))
                        break;
3449 3450 3451
                    virReportSystemError(errno,
                                         _("Failed to read %s"),
                                         keypath);
E
Eric Blake 已提交
3452
                    goto cleanup;
3453
                }
3454
                if (virHashLookup(pids, (void*)pid_value))
3455 3456
                    continue;

M
Michal Privoznik 已提交
3457
                VIR_DEBUG("pid=%ld", pid_value);
3458 3459
                /* Cgroups is a Linux concept, so this cast is safe.  */
                if (kill((pid_t)pid_value, signum) < 0) {
3460
                    if (errno != ESRCH) {
3461
                        virReportSystemError(errno,
M
Michal Privoznik 已提交
3462
                                             _("Failed to kill process %ld"),
3463
                                             pid_value);
3464 3465 3466 3467
                        goto cleanup;
                    }
                    /* Leave RC == 0 since we didn't kill one */
                } else {
3468
                    killedAny = true;
3469 3470 3471
                    done = false;
                }

3472
                ignore_value(virHashAddEntry(pids, (void*)pid_value, (void*)1));
3473 3474 3475 3476 3477
            }
            VIR_FORCE_FCLOSE(fp);
        }
    }

3478
 done:
3479
    ret = killedAny ? 1 : 0;
3480

3481
 cleanup:
E
Eric Blake 已提交
3482
    VIR_FORCE_FCLOSE(fp);
3483

3484
    return ret;
3485 3486 3487
}


E
Eric Blake 已提交
3488 3489
static uint32_t
virCgroupPidCode(const void *name, uint32_t seed)
3490
{
M
Michal Privoznik 已提交
3491
    long pid_value = (long)(intptr_t)name;
3492
    return virHashCodeGen(&pid_value, sizeof(pid_value), seed);
3493
}
E
Eric Blake 已提交
3494 3495 3496 3497


static bool
virCgroupPidEqual(const void *namea, const void *nameb)
3498 3499 3500
{
    return namea == nameb;
}
E
Eric Blake 已提交
3501 3502 3503 3504


static void *
virCgroupPidCopy(const void *name)
3505 3506 3507 3508
{
    return (void*)name;
}

E
Eric Blake 已提交
3509 3510 3511 3512 3513 3514

static int
virCgroupKillRecursiveInternal(virCgroupPtr group,
                               int signum,
                               virHashTablePtr pids,
                               bool dormdir)
3515
{
3516
    int ret = -1;
3517
    int rc;
3518
    bool killedAny = false;
3519
    VIR_AUTOFREE(char *) keypath = NULL;
3520
    DIR *dp = NULL;
3521
    virCgroupPtr subgroup = NULL;
3522
    struct dirent *ent;
E
Eric Blake 已提交
3523
    int direrr;
E
Eric Blake 已提交
3524 3525
    VIR_DEBUG("group=%p path=%s signum=%d pids=%p",
              group, group->path, signum, pids);
3526

3527
    if (virCgroupPathOfController(group, -1, "", &keypath) < 0)
3528
        return -1;
3529

3530
    if ((rc = virCgroupKillInternal(group, signum, pids)) < 0)
3531
        goto cleanup;
3532 3533
    if (rc == 1)
        killedAny = true;
3534

3535
    VIR_DEBUG("Iterate over children of %s (killedAny=%d)", keypath, killedAny);
J
Ján Tomko 已提交
3536
    if ((rc = virDirOpenIfExists(&dp, keypath)) < 0)
3537
        goto cleanup;
J
Ján Tomko 已提交
3538 3539 3540 3541 3542

    if (rc == 0) {
        VIR_DEBUG("Path %s does not exist, assuming done", keypath);
        killedAny = false;
        goto done;
3543 3544
    }

E
Eric Blake 已提交
3545
    while ((direrr = virDirRead(dp, &ent, keypath)) > 0) {
3546 3547 3548 3549 3550
        if (ent->d_type != DT_DIR)
            continue;

        VIR_DEBUG("Process subdir %s", ent->d_name);

3551
        if (virCgroupNew(-1, ent->d_name, group, -1, &subgroup) < 0)
3552 3553
            goto cleanup;

E
Eric Blake 已提交
3554 3555
        if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids,
                                                 true)) < 0)
3556 3557
            goto cleanup;
        if (rc == 1)
3558
            killedAny = true;
3559 3560 3561

        if (dormdir)
            virCgroupRemove(subgroup);
3562

3563
        virCgroupFree(&subgroup);
3564
    }
E
Eric Blake 已提交
3565 3566
    if (direrr < 0)
        goto cleanup;
3567

3568
 done:
3569
    ret = killedAny ? 1 : 0;
3570

3571
 cleanup:
3572
    virCgroupFree(&subgroup);
J
Ján Tomko 已提交
3573
    VIR_DIR_CLOSE(dp);
3574
    return ret;
3575 3576
}

E
Eric Blake 已提交
3577 3578 3579

int
virCgroupKillRecursive(virCgroupPtr group, int signum)
3580
{
3581
    int ret;
3582
    VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum);
3583 3584 3585 3586 3587 3588
    virHashTablePtr pids = virHashCreateFull(100,
                                             NULL,
                                             virCgroupPidCode,
                                             virCgroupPidEqual,
                                             virCgroupPidCopy,
                                             NULL);
3589

3590 3591 3592 3593 3594
    ret = virCgroupKillRecursiveInternal(group, signum, pids, false);

    virHashFree(pids);

    return ret;
3595 3596 3597
}


E
Eric Blake 已提交
3598 3599
int
virCgroupKillPainfully(virCgroupPtr group)
3600
{
3601
    size_t i;
3602
    int ret;
3603
    VIR_DEBUG("cgroup=%p path=%s", group, group->path);
3604
    for (i = 0; i < 15; i++) {
3605 3606 3607 3608 3609 3610
        int signum;
        if (i == 0)
            signum = SIGTERM;
        else if (i == 8)
            signum = SIGKILL;
        else
J
Ján Tomko 已提交
3611
            signum = 0; /* Just check for existence */
3612

3613 3614 3615 3616
        ret = virCgroupKillRecursive(group, signum);
        VIR_DEBUG("Iteration %zu rc=%d", i, ret);
        /* If ret == -1 we hit error, if 0 we ran out of PIDs */
        if (ret <= 0)
3617 3618 3619 3620
            break;

        usleep(200 * 1000);
    }
3621 3622
    VIR_DEBUG("Complete %d", ret);
    return ret;
3623
}
3624

E
Eric Blake 已提交
3625 3626 3627

static char *
virCgroupIdentifyRoot(virCgroupPtr group)
3628 3629 3630 3631
{
    char *ret = NULL;
    size_t i;

3632
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
3633 3634 3635 3636 3637 3638 3639 3640 3641 3642
        char *tmp;
        if (!group->controllers[i].mountPoint)
            continue;
        if (!(tmp = strrchr(group->controllers[i].mountPoint, '/'))) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Could not find directory separator in %s"),
                           group->controllers[i].mountPoint);
            return NULL;
        }

3643 3644 3645
        if (VIR_STRNDUP(ret, group->controllers[i].mountPoint,
                        tmp - group->controllers[i].mountPoint) < 0)
            return NULL;
3646 3647 3648 3649 3650 3651 3652 3653 3654
        return ret;
    }

    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                   _("Could not find any mounted controllers"));
    return NULL;
}


3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685
/**
 * virCgroupGetCpuCfsQuota:
 *
 * @group: The cgroup to get cpu.cfs_quota_us for
 * @cfs_quota: Pointer to the returned cpu bandwidth (in usecs) that this tg
 *             will be allowed to consume over period
 *
 * Returns: 0 on success
 */
int
virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota)
{
    return virCgroupGetValueI64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "cpu.cfs_quota_us", cfs_quota);
}


int
virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage)
{
    return virCgroupGetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPUACCT,
                                "cpuacct.usage", usage);
}


int
virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user,
                        unsigned long long *sys)
{
3686
    VIR_AUTOFREE(char *) str = NULL;
3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698
    char *p;
    static double scale = -1.0;

    if (virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT,
                             "cpuacct.stat", &str) < 0)
        return -1;

    if (!(p = STRSKIP(str, "user ")) ||
        virStrToLong_ull(p, &p, 10, user) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot parse user stat '%s'"),
                       p);
3699
        return -1;
3700 3701 3702 3703 3704 3705
    }
    if (!(p = STRSKIP(p, "\nsystem ")) ||
        virStrToLong_ull(p, NULL, 10, sys) < 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Cannot parse sys stat '%s'"),
                       p);
3706
        return -1;
3707 3708 3709 3710 3711 3712 3713 3714 3715
    }
    /* times reported are in system ticks (generally 100 Hz), but that
     * rate can theoretically vary between machines.  Scale things
     * into approximate nanoseconds.  */
    if (scale < 0) {
        long ticks_per_sec = sysconf(_SC_CLK_TCK);
        if (ticks_per_sec == -1) {
            virReportSystemError(errno, "%s",
                                 _("Cannot determine system clock HZ"));
3716
            return -1;
3717 3718 3719 3720 3721 3722
        }
        scale = 1000000000.0 / ticks_per_sec;
    }
    *user *= scale;
    *sys *= scale;

3723
    return 0;
3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744
}


int
virCgroupSetFreezerState(virCgroupPtr group, const char *state)
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_FREEZER,
                                "freezer.state", state);
}


int
virCgroupGetFreezerState(virCgroupPtr group, char **state)
{
    return virCgroupGetValueStr(group,
                                VIR_CGROUP_CONTROLLER_FREEZER,
                                "freezer.state", state);
}


E
Eric Blake 已提交
3745
int
3746 3747
virCgroupBindMount(virCgroupPtr group, const char *oldroot,
                   const char *mountopts)
3748 3749
{
    size_t i;
3750 3751
    VIR_AUTOFREE(char *) opts = NULL;
    VIR_AUTOFREE(char *) root = NULL;
3752 3753 3754 3755 3756 3757 3758 3759 3760 3761

    if (!(root = virCgroupIdentifyRoot(group)))
        return -1;

    VIR_DEBUG("Mounting cgroups at '%s'", root);

    if (virFileMakePath(root) < 0) {
        virReportSystemError(errno,
                             _("Unable to create directory %s"),
                             root);
3762
        return -1;
3763 3764 3765
    }

    if (virAsprintf(&opts,
3766
                    "mode=755,size=65536%s", mountopts) < 0)
3767
        return -1;
3768 3769 3770 3771 3772

    if (mount("tmpfs", root, "tmpfs", MS_NOSUID|MS_NODEV|MS_NOEXEC, opts) < 0) {
        virReportSystemError(errno,
                             _("Failed to mount %s on %s type %s"),
                             "tmpfs", root, "tmpfs");
3773
        return -1;
3774 3775
    }

3776
    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
3777 3778 3779 3780
        if (!group->controllers[i].mountPoint)
            continue;

        if (!virFileExists(group->controllers[i].mountPoint)) {
3781
            VIR_AUTOFREE(char *) src = NULL;
3782
            if (virAsprintf(&src, "%s%s",
3783
                            oldroot,
3784
                            group->controllers[i].mountPoint) < 0)
3785
                return -1;
3786

E
Eric Blake 已提交
3787 3788
            VIR_DEBUG("Create mount point '%s'",
                      group->controllers[i].mountPoint);
3789 3790 3791 3792
            if (virFileMakePath(group->controllers[i].mountPoint) < 0) {
                virReportSystemError(errno,
                                     _("Unable to create directory %s"),
                                     group->controllers[i].mountPoint);
3793
                return -1;
3794 3795
            }

3796
            if (mount(src, group->controllers[i].mountPoint, "none", MS_BIND,
E
Eric Blake 已提交
3797
                      NULL) < 0) {
3798 3799 3800
                virReportSystemError(errno,
                                     _("Failed to bind cgroup '%s' on '%s'"),
                                     src, group->controllers[i].mountPoint);
3801
                return -1;
3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814
            }
        }

        if (group->controllers[i].linkPoint) {
            VIR_DEBUG("Link mount point '%s' to '%s'",
                      group->controllers[i].mountPoint,
                      group->controllers[i].linkPoint);
            if (symlink(group->controllers[i].mountPoint,
                        group->controllers[i].linkPoint) < 0) {
                virReportSystemError(errno,
                                     _("Unable to symlink directory %s to %s"),
                                     group->controllers[i].mountPoint,
                                     group->controllers[i].linkPoint);
3815
                return -1;
3816 3817 3818 3819
            }
        }
    }

3820
    return 0;
3821
}
3822 3823


3824 3825 3826 3827 3828 3829 3830 3831
int virCgroupSetOwner(virCgroupPtr cgroup,
                      uid_t uid,
                      gid_t gid,
                      int controllers)
{
    int ret = -1;
    size_t i;
    DIR *dh = NULL;
E
Eric Blake 已提交
3832
    int direrr;
3833 3834

    for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
3835
        VIR_AUTOFREE(char *) base = NULL;
3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847
        struct dirent *de;

        if (!((1 << i) & controllers))
            continue;

        if (!cgroup->controllers[i].mountPoint)
            continue;

        if (virAsprintf(&base, "%s%s", cgroup->controllers[i].mountPoint,
                        cgroup->controllers[i].placement) < 0)
            goto cleanup;

J
Ján Tomko 已提交
3848
        if (virDirOpen(&dh, base) < 0)
3849 3850
            goto cleanup;

E
Eric Blake 已提交
3851
        while ((direrr = virDirRead(dh, &de, base)) > 0) {
3852 3853
            VIR_AUTOFREE(char *) entry = NULL;

3854 3855 3856 3857 3858 3859 3860 3861 3862 3863
            if (virAsprintf(&entry, "%s/%s", base, de->d_name) < 0)
                goto cleanup;

            if (chown(entry, uid, gid) < 0) {
                virReportSystemError(errno,
                                     _("cannot chown '%s' to (%u, %u)"),
                                     entry, uid, gid);
                goto cleanup;
            }
        }
E
Eric Blake 已提交
3864 3865
        if (direrr < 0)
            goto cleanup;
3866 3867 3868 3869 3870 3871 3872 3873

        if (chown(base, uid, gid) < 0) {
            virReportSystemError(errno,
                                 _("cannot chown '%s' to (%u, %u)"),
                                 base, uid, gid);
            goto cleanup;
        }

J
Ján Tomko 已提交
3874
        VIR_DIR_CLOSE(dh);
3875 3876 3877 3878 3879
    }

    ret = 0;

 cleanup:
J
Ján Tomko 已提交
3880
    VIR_DIR_CLOSE(dh);
3881 3882 3883 3884
    return ret;
}


3885 3886 3887 3888 3889 3890 3891 3892 3893 3894
/**
 * virCgroupSupportsCpuBW():
 * Check whether the host supports CFS bandwidth.
 *
 * Return true when CFS bandwidth is supported,
 * false when CFS bandwidth is not supported.
 */
bool
virCgroupSupportsCpuBW(virCgroupPtr cgroup)
{
3895
    VIR_AUTOFREE(char *) path = NULL;
3896 3897 3898 3899 3900 3901 3902

    if (!cgroup)
        return false;

    if (virCgroupPathOfController(cgroup, VIR_CGROUP_CONTROLLER_CPU,
                                  "cpu.cfs_period_us", &path) < 0) {
        virResetLastError();
3903
        return false;
3904 3905
    }

3906
    return virFileExists(path);
3907 3908
}

3909 3910 3911 3912
int
virCgroupHasEmptyTasks(virCgroupPtr cgroup, int controller)
{
    int ret = -1;
3913
    VIR_AUTOFREE(char *) content = NULL;
3914

3915 3916 3917
    if (!cgroup)
        return -1;

3918 3919 3920 3921 3922 3923 3924
    ret = virCgroupGetValueStr(cgroup, controller, "tasks", &content);

    if (ret == 0 && content[0] == '\0')
        ret = 1;

    return ret;
}
3925

3926 3927 3928
bool
virCgroupControllerAvailable(int controller)
{
3929 3930
    virCgroupPtr cgroup;
    bool ret = false;
3931 3932

    if (virCgroupNewSelf(&cgroup) < 0)
3933
        return ret;
3934

3935
    ret = virCgroupHasController(cgroup, controller);
3936
    virCgroupFree(&cgroup);
3937
    return ret;
3938 3939
}

3940 3941
#else /* !VIR_CGROUP_SUPPORTED */

3942 3943 3944 3945 3946 3947 3948
bool
virCgroupAvailable(void)
{
    return false;
}


3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959
int
virCgroupDetectMountsFromFile(virCgroupPtr group ATTRIBUTE_UNUSED,
                              const char *path ATTRIBUTE_UNUSED,
                              bool checkLinks ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993
int
virCgroupNewPartition(const char *path ATTRIBUTE_UNUSED,
                      bool create ATTRIBUTE_UNUSED,
                      int controllers ATTRIBUTE_UNUSED,
                      virCgroupPtr *group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupNewSelf(virCgroupPtr *group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupNewDomainPartition(virCgroupPtr partition ATTRIBUTE_UNUSED,
                            const char *driver ATTRIBUTE_UNUSED,
                            const char *name ATTRIBUTE_UNUSED,
                            bool create ATTRIBUTE_UNUSED,
                            virCgroupPtr *group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006
int
virCgroupNewThread(virCgroupPtr domain ATTRIBUTE_UNUSED,
                   virCgroupThreadName nameval ATTRIBUTE_UNUSED,
                   int id ATTRIBUTE_UNUSED,
                   bool create ATTRIBUTE_UNUSED,
                   virCgroupPtr *group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017
int
virCgroupNewDetect(pid_t pid ATTRIBUTE_UNUSED,
                   int controllers ATTRIBUTE_UNUSED,
                   virCgroupPtr *group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4018 4019 4020 4021 4022
int
virCgroupNewDetectMachine(const char *name ATTRIBUTE_UNUSED,
                          const char *drivername ATTRIBUTE_UNUSED,
                          pid_t pid ATTRIBUTE_UNUSED,
                          int controllers ATTRIBUTE_UNUSED,
4023
                          char *machinename ATTRIBUTE_UNUSED,
4024 4025 4026 4027 4028 4029 4030
                          virCgroupPtr *group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

J
Ján Tomko 已提交
4031

4032
int virCgroupTerminateMachine(const char *name ATTRIBUTE_UNUSED)
J
Ján Tomko 已提交
4033 4034 4035 4036 4037 4038 4039
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4040 4041 4042 4043 4044 4045 4046
int
virCgroupNewMachine(const char *name ATTRIBUTE_UNUSED,
                    const char *drivername ATTRIBUTE_UNUSED,
                    const unsigned char *uuid ATTRIBUTE_UNUSED,
                    const char *rootdir ATTRIBUTE_UNUSED,
                    pid_t pidleader ATTRIBUTE_UNUSED,
                    bool isContainer ATTRIBUTE_UNUSED,
4047 4048
                    size_t nnicindexes ATTRIBUTE_UNUSED,
                    int *nicindexes ATTRIBUTE_UNUSED,
4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065
                    const char *partition ATTRIBUTE_UNUSED,
                    int controllers ATTRIBUTE_UNUSED,
                    virCgroupPtr *group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


bool
virCgroupNewIgnoreError(void)
{
    VIR_DEBUG("No cgroups present/configured/accessible, ignoring error");
    return true;
}

4066 4067

void
4068
virCgroupFree(virCgroupPtr *group ATTRIBUTE_UNUSED)
4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
}


bool
virCgroupHasController(virCgroupPtr cgroup ATTRIBUTE_UNUSED,
                       int controller ATTRIBUTE_UNUSED)
{
    return false;
}


4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094
int
virCgroupPathOfController(virCgroupPtr group ATTRIBUTE_UNUSED,
                          int controller ATTRIBUTE_UNUSED,
                          const char *key ATTRIBUTE_UNUSED,
                          char **path ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4095 4096 4097 4098 4099 4100 4101 4102 4103 4104
int
virCgroupAddTask(virCgroupPtr group ATTRIBUTE_UNUSED,
                 pid_t pid ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4105 4106 4107 4108 4109 4110 4111 4112 4113 4114
int
virCgroupAddMachineTask(virCgroupPtr group ATTRIBUTE_UNUSED,
                        pid_t pid ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141
int
virCgroupGetBlkioIoServiced(virCgroupPtr group ATTRIBUTE_UNUSED,
                            long long *bytes_read ATTRIBUTE_UNUSED,
                            long long *bytes_write ATTRIBUTE_UNUSED,
                            long long *requests_read ATTRIBUTE_UNUSED,
                            long long *requests_write ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetBlkioIoDeviceServiced(virCgroupPtr group ATTRIBUTE_UNUSED,
                                  const char *path ATTRIBUTE_UNUSED,
                                  long long *bytes_read ATTRIBUTE_UNUSED,
                                  long long *bytes_write ATTRIBUTE_UNUSED,
                                  long long *requests_read ATTRIBUTE_UNUSED,
                                  long long *requests_write ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161
int
virCgroupSetBlkioWeight(virCgroupPtr group ATTRIBUTE_UNUSED,
                        unsigned int weight ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetBlkioWeight(virCgroupPtr group ATTRIBUTE_UNUSED,
                        unsigned int *weight ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4162 4163 4164 4165 4166 4167 4168 4169 4170 4171
int
virCgroupSetBlkioDeviceWeight(virCgroupPtr group ATTRIBUTE_UNUSED,
                              const char *path ATTRIBUTE_UNUSED,
                              unsigned int weight ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211
int
virCgroupSetBlkioDeviceReadIops(virCgroupPtr group ATTRIBUTE_UNUSED,
                                const char *path ATTRIBUTE_UNUSED,
                                unsigned int riops ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

int
virCgroupSetBlkioDeviceWriteIops(virCgroupPtr group ATTRIBUTE_UNUSED,
                                 const char *path ATTRIBUTE_UNUSED,
                                 unsigned int wiops ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

int
virCgroupSetBlkioDeviceReadBps(virCgroupPtr group ATTRIBUTE_UNUSED,
                               const char *path ATTRIBUTE_UNUSED,
                               unsigned long long rbps ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

int
virCgroupSetBlkioDeviceWriteBps(virCgroupPtr group ATTRIBUTE_UNUSED,
                                const char *path ATTRIBUTE_UNUSED,
                                unsigned long long wbps ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

4212 4213 4214
int
virCgroupGetBlkioDeviceWeight(virCgroupPtr group ATTRIBUTE_UNUSED,
                              const char *path ATTRIBUTE_UNUSED,
4215
                              unsigned int *weight ATTRIBUTE_UNUSED)
4216 4217 4218 4219 4220 4221 4222 4223 4224
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

int
virCgroupGetBlkioDeviceReadIops(virCgroupPtr group ATTRIBUTE_UNUSED,
                                const char *path ATTRIBUTE_UNUSED,
4225
                                unsigned int *riops ATTRIBUTE_UNUSED)
4226 4227 4228 4229 4230 4231 4232 4233 4234
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

int
virCgroupGetBlkioDeviceWriteIops(virCgroupPtr group ATTRIBUTE_UNUSED,
                                 const char *path ATTRIBUTE_UNUSED,
4235
                                 unsigned int *wiops ATTRIBUTE_UNUSED)
4236 4237 4238 4239 4240 4241 4242 4243 4244
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

int
virCgroupGetBlkioDeviceReadBps(virCgroupPtr group ATTRIBUTE_UNUSED,
                               const char *path ATTRIBUTE_UNUSED,
4245
                               unsigned long long *rbps ATTRIBUTE_UNUSED)
4246 4247 4248 4249 4250 4251 4252 4253 4254
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

int
virCgroupGetBlkioDeviceWriteBps(virCgroupPtr group ATTRIBUTE_UNUSED,
                                const char *path ATTRIBUTE_UNUSED,
4255
                                unsigned long long *wbps ATTRIBUTE_UNUSED)
4256 4257 4258 4259 4260
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}
4261

4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370
int
virCgroupSetMemory(virCgroupPtr group ATTRIBUTE_UNUSED,
                   unsigned long long kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetMemoryUsage(virCgroupPtr group ATTRIBUTE_UNUSED,
                        unsigned long *kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupSetMemoryHardLimit(virCgroupPtr group ATTRIBUTE_UNUSED,
                            unsigned long long kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetMemoryHardLimit(virCgroupPtr group ATTRIBUTE_UNUSED,
                            unsigned long long *kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupSetMemorySoftLimit(virCgroupPtr group ATTRIBUTE_UNUSED,
                            unsigned long long kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetMemorySoftLimit(virCgroupPtr group ATTRIBUTE_UNUSED,
                            unsigned long long *kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupSetMemSwapHardLimit(virCgroupPtr group ATTRIBUTE_UNUSED,
                             unsigned long long kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetMemSwapHardLimit(virCgroupPtr group ATTRIBUTE_UNUSED,
                             unsigned long long *kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetMemSwapUsage(virCgroupPtr group ATTRIBUTE_UNUSED,
                         unsigned long long *kb ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupSetCpusetMems(virCgroupPtr group ATTRIBUTE_UNUSED,
                       const char *mems ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetCpusetMems(virCgroupPtr group ATTRIBUTE_UNUSED,
                       char **mems ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390

int
virCgroupSetCpusetMemoryMigrate(virCgroupPtr group ATTRIBUTE_UNUSED,
                                bool migrate ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetCpusetMemoryMigrate(virCgroupPtr group ATTRIBUTE_UNUSED,
                                bool *migrate ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410

int
virCgroupSetCpusetCpus(virCgroupPtr group ATTRIBUTE_UNUSED,
                       const char *cpus ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetCpusetCpus(virCgroupPtr group ATTRIBUTE_UNUSED,
                       char **cpus ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

4411 4412 4413 4414 4415 4416 4417 4418
int
virCgroupAllowAllDevices(virCgroupPtr group ATTRIBUTE_UNUSED,
                         int perms ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}
4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441

int
virCgroupDenyAllDevices(virCgroupPtr group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupAllowDevice(virCgroupPtr group ATTRIBUTE_UNUSED,
                     char type ATTRIBUTE_UNUSED,
                     int major ATTRIBUTE_UNUSED,
                     int minor ATTRIBUTE_UNUSED,
                     int perms ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4442 4443 4444
int
virCgroupAllowDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED,
                         const char *path ATTRIBUTE_UNUSED,
4445 4446
                         int perms ATTRIBUTE_UNUSED,
                         bool ignoreEaccess ATTRIBUTE_UNUSED)
4447 4448 4449 4450 4451 4452 4453
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466
int
virCgroupDenyDevice(virCgroupPtr group ATTRIBUTE_UNUSED,
                    char type ATTRIBUTE_UNUSED,
                    int major ATTRIBUTE_UNUSED,
                    int minor ATTRIBUTE_UNUSED,
                    int perms ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4467 4468 4469
int
virCgroupDenyDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED,
                        const char *path ATTRIBUTE_UNUSED,
4470 4471
                        int perms ATTRIBUTE_UNUSED,
                        bool ignoreEacces ATTRIBUTE_UNUSED)
4472 4473 4474 4475 4476 4477 4478
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537
int
virCgroupSetCpuShares(virCgroupPtr group ATTRIBUTE_UNUSED,
                      unsigned long long shares ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetCpuShares(virCgroupPtr group ATTRIBUTE_UNUSED,
                      unsigned long long *shares ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupSetCpuCfsPeriod(virCgroupPtr group ATTRIBUTE_UNUSED,
                         unsigned long long cfs_period ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetCpuCfsPeriod(virCgroupPtr group ATTRIBUTE_UNUSED,
                         unsigned long long *cfs_period ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupSetCpuCfsQuota(virCgroupPtr group ATTRIBUTE_UNUSED,
                        long long cfs_quota ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupRemove(virCgroupPtr group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENXIO, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556
int
virCgroupKillRecursive(virCgroupPtr group ATTRIBUTE_UNUSED,
                       int signum ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupKillPainfully(virCgroupPtr group ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597
int
virCgroupGetCpuCfsQuota(virCgroupPtr group ATTRIBUTE_UNUSED,
                        long long *cfs_quota ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetCpuacctUsage(virCgroupPtr group ATTRIBUTE_UNUSED,
                         unsigned long long *usage ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetCpuacctPercpuUsage(virCgroupPtr group ATTRIBUTE_UNUSED,
                               char **usage ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetCpuacctStat(virCgroupPtr group ATTRIBUTE_UNUSED,
                        unsigned long long *user ATTRIBUTE_UNUSED,
                        unsigned long long *sys ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608
int
virCgroupGetDomainTotalCpuStats(virCgroupPtr group ATTRIBUTE_UNUSED,
                                virTypedParameterPtr params ATTRIBUTE_UNUSED,
                                int nparams ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628
int
virCgroupSetFreezerState(virCgroupPtr group ATTRIBUTE_UNUSED,
                         const char *state ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupGetFreezerState(virCgroupPtr group ATTRIBUTE_UNUSED,
                         char **state ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


E
Eric Blake 已提交
4629
int
4630 4631 4632
virCgroupBindMount(virCgroupPtr group ATTRIBUTE_UNUSED,
                   const char *oldroot ATTRIBUTE_UNUSED,
                   const char *mountopts ATTRIBUTE_UNUSED)
4633
{
4634 4635 4636
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
4637
}
4638

4639 4640 4641 4642 4643 4644 4645 4646

bool
virCgroupSupportsCpuBW(virCgroupPtr cgroup ATTRIBUTE_UNUSED)
{
    VIR_DEBUG("Control groups not supported on this platform");
    return false;
}

E
Eric Blake 已提交
4647 4648 4649 4650 4651 4652

int
virCgroupGetPercpuStats(virCgroupPtr group ATTRIBUTE_UNUSED,
                        virTypedParameterPtr params ATTRIBUTE_UNUSED,
                        unsigned int nparams ATTRIBUTE_UNUSED,
                        int start_cpu ATTRIBUTE_UNUSED,
J
Ján Tomko 已提交
4653
                        unsigned int ncpus ATTRIBUTE_UNUSED,
4654
                        virBitmapPtr guestvcpus ATTRIBUTE_UNUSED)
E
Eric Blake 已提交
4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}


int
virCgroupSetOwner(virCgroupPtr cgroup ATTRIBUTE_UNUSED,
                  uid_t uid ATTRIBUTE_UNUSED,
                  gid_t gid ATTRIBUTE_UNUSED,
                  int controllers ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

4673 4674 4675 4676 4677 4678 4679 4680 4681
int
virCgroupHasEmptyTasks(virCgroupPtr cgroup ATTRIBUTE_UNUSED,
                       int controller ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Control groups not supported on this platform"));
    return -1;
}

4682 4683 4684 4685 4686
bool
virCgroupControllerAvailable(int controller ATTRIBUTE_UNUSED)
{
    return false;
}
4687
#endif /* !VIR_CGROUP_SUPPORTED */
4688 4689 4690 4691 4692 4693 4694


int
virCgroupDelThread(virCgroupPtr cgroup,
                   virCgroupThreadName nameval,
                   int idx)
{
4695
    virCgroupPtr new_cgroup = NULL;
4696 4697 4698 4699 4700 4701 4702

    if (cgroup) {
        if (virCgroupNewThread(cgroup, nameval, idx, false, &new_cgroup) < 0)
            return -1;

        /* Remove the offlined cgroup */
        virCgroupRemove(new_cgroup);
4703
        virCgroupFree(&new_cgroup);
4704 4705 4706 4707
    }

    return 0;
}