cgroup.c 32.3 KB
Newer Older
1 2 3
/*
 * cgroup.c: Tools for managing cgroups
 *
4
 * Copyright (C) 2010-2011 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15
 * Copyright IBM Corp. 2008
 *
 * See COPYING.LIB for the License of this software
 *
 * Authors:
 *  Dan Smith <danms@us.ibm.com>
 */
#include <config.h>

#include <stdio.h>
#include <stdint.h>
16
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
17
# include <mntent.h>
D
Daniel P. Berrange 已提交
18
#endif
19 20 21 22 23 24 25 26
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <stdbool.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <libgen.h>
27
#include <dirent.h>
28 29 30 31 32

#include "internal.h"
#include "util.h"
#include "memory.h"
#include "cgroup.h"
33
#include "logging.h"
34
#include "files.h"
35 36 37

#define CGROUP_MAX_VAL 512

38
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
R
Ryota Ozaki 已提交
39
              "cpu", "cpuacct", "cpuset", "memory", "devices",
40
              "freezer", "blkio");
41 42 43 44 45 46

struct virCgroupController {
    int type;
    char *mountPoint;
    char *placement;
};
47 48 49

struct virCgroup {
    char *path;
50 51

    struct virCgroupController controllers[VIR_CGROUP_CONTROLLER_LAST];
52 53 54 55 56 57 58 59 60
};

/**
 * virCgroupFree:
 *
 * @group: The group structure to free
 */
void virCgroupFree(virCgroupPtr *group)
{
61 62 63 64 65 66 67 68
    int i;

    if (*group == NULL)
        return;

    for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
        VIR_FREE((*group)->controllers[i].mountPoint);
        VIR_FREE((*group)->controllers[i].placement);
69
    }
70 71 72

    VIR_FREE((*group)->path);
    VIR_FREE(*group);
73 74
}

L
Lai Jiangshan 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87
/**
 * virCgroupMounted: query whether a cgroup subsystem is mounted or not
 *
 * @cgroup: The group structure to be queried
 * @controller: cgroup subsystem id
 *
 * Returns true if a cgroup is subsystem is mounted.
 */
bool virCgroupMounted(virCgroupPtr cgroup, int controller)
{
    return cgroup->controllers[controller].mountPoint != NULL;
}

88
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
89 90 91 92 93
/*
 * Process /proc/mounts figuring out what controllers are
 * mounted and where
 */
static int virCgroupDetectMounts(virCgroupPtr group)
94
{
95
    int i;
96
    FILE *mounts = NULL;
97 98 99 100 101
    struct mntent entry;
    char buf[CGROUP_MAX_VAL];

    mounts = fopen("/proc/mounts", "r");
    if (mounts == NULL) {
102
        VIR_ERROR0(_("Unable to open /proc/mounts"));
103
        return -ENOENT;
104 105 106
    }

    while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) {
107 108
        if (STRNEQ(entry.mnt_type, "cgroup"))
            continue;
109

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
        for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
            const char *typestr = virCgroupControllerTypeToString(i);
            int typelen = strlen(typestr);
            char *tmp = entry.mnt_opts;
            while (tmp) {
                char *next = strchr(tmp, ',');
                int len;
                if (next) {
                    len = next-tmp;
                    next++;
                } else {
                    len = strlen(tmp);
                }
                if (typelen == len && STREQLEN(typestr, tmp, len) &&
                    !(group->controllers[i].mountPoint = strdup(entry.mnt_dir)))
                    goto no_memory;
                tmp = next;
            }
        }
129 130
    }

131
    VIR_FORCE_FCLOSE(mounts);
132

133
    return 0;
134

135
no_memory:
136
    VIR_FORCE_FCLOSE(mounts);
137
    return -ENOMEM;
138 139
}

140 141 142 143 144

/*
 * Process /proc/self/cgroup figuring out what cgroup
 * sub-path the current process is assigned to. ie not
 * neccessarily in the root
145
 */
146
static int virCgroupDetectPlacement(virCgroupPtr group)
147 148
{
    int i;
149 150
    FILE *mapping  = NULL;
    char line[1024];
151

152 153
    mapping = fopen("/proc/self/cgroup", "r");
    if (mapping == NULL) {
154
        VIR_ERROR0(_("Unable to open /proc/self/cgroup"));
155
        return -ENOENT;
156 157
    }

158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
    while (fgets(line, sizeof(line), mapping) != NULL) {
        char *controllers = strchr(line, ':');
        char *path = controllers ? strchr(controllers+1, ':') : NULL;
        char *nl = path ? strchr(path, '\n') : NULL;

        if (!controllers || !path)
            continue;

        if (nl)
            *nl = '\0';

        *path = '\0';
        controllers++;
        path++;

        for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
            const char *typestr = virCgroupControllerTypeToString(i);
            int typelen = strlen(typestr);
            char *tmp = controllers;
            while (tmp) {
                char *next = strchr(tmp, ',');
                int len;
                if (next) {
                    len = next-tmp;
                    next++;
                } else {
                    len = strlen(tmp);
                }
                if (typelen == len && STREQLEN(typestr, tmp, len) &&
                    !(group->controllers[i].placement = strdup(STREQ(path, "/") ? "" : path)))
                    goto no_memory;

                tmp = next;
            }
        }
    }

195
    VIR_FORCE_FCLOSE(mapping);
196

197
    return 0;
198 199

no_memory:
200
    VIR_FORCE_FCLOSE(mapping);
201 202
    return -ENOMEM;

203 204
}

205
static int virCgroupDetect(virCgroupPtr group)
206
{
207 208 209
    int any = 0;
    int rc;
    int i;
210

211 212
    rc = virCgroupDetectMounts(group);
    if (rc < 0) {
213
        VIR_ERROR(_("Failed to detect mounts for %s"), group->path);
214
        return rc;
215 216
    }

217 218 219 220 221 222 223
    /* Check that at least 1 controller is available */
    for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
        if (group->controllers[i].mountPoint != NULL)
            any = 1;
    }
    if (!any)
        return -ENXIO;
224 225


226
    rc = virCgroupDetectPlacement(group);
227

228 229 230 231 232
    if (rc == 0) {
        /* Check that for every mounted controller, we found our placement */
        for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
            if (!group->controllers[i].mountPoint)
                continue;
233

234
            if (!group->controllers[i].placement) {
235
                VIR_ERROR(_("Could not find placement for controller %s at %s"),
236 237 238 239 240
                          virCgroupControllerTypeToString(i),
                          group->controllers[i].placement);
                rc = -ENOENT;
                break;
            }
241

242 243 244 245 246 247
            VIR_DEBUG("Detected mount/mapping %i:%s at %s in %s", i,
                      virCgroupControllerTypeToString(i),
                      group->controllers[i].mountPoint,
                      group->controllers[i].placement);
        }
    } else {
248
        VIR_ERROR(_("Failed to detect mapping for %s"), group->path);
249 250 251 252
    }

    return rc;
}
D
Daniel P. Berrange 已提交
253
#endif
254

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277

static int virCgroupPathOfController(virCgroupPtr group,
                                     int controller,
                                     const char *key,
                                     char **path)
{
    if (group->controllers[controller].mountPoint == NULL)
        return -ENOENT;

    if (group->controllers[controller].placement == NULL)
        return -ENOENT;

    if (virAsprintf(path, "%s%s%s/%s",
                    group->controllers[controller].mountPoint,
                    group->controllers[controller].placement,
                    STREQ(group->path, "/") ? "" : group->path,
                    key ? key : "") == -1)
        return -ENOMEM;

    return 0;
}


278
static int virCgroupSetValueStr(virCgroupPtr group,
279
                                int controller,
280 281 282 283 284 285
                                const char *key,
                                const char *value)
{
    int rc = 0;
    char *keypath = NULL;

286
    rc = virCgroupPathOfController(group, controller, key, &keypath);
287 288 289
    if (rc != 0)
        return rc;

290
    VIR_DEBUG("Set value '%s' to '%s'", keypath, value);
291
    rc = virFileWriteStr(keypath, value, 0);
292 293 294
    if (rc < 0) {
        DEBUG("Failed to write value '%s': %m", value);
        rc = -errno;
295 296
    } else {
        rc = 0;
297 298 299 300 301 302 303 304
    }

    VIR_FREE(keypath);

    return rc;
}

static int virCgroupGetValueStr(virCgroupPtr group,
305
                                int controller,
306 307 308 309 310 311
                                const char *key,
                                char **value)
{
    int rc;
    char *keypath = NULL;

312
    *value = NULL;
313

314
    rc = virCgroupPathOfController(group, controller, key, &keypath);
315 316 317 318 319
    if (rc != 0) {
        DEBUG("No path of %s, %s", group->path, key);
        return rc;
    }

320
    VIR_DEBUG("Get value %s", keypath);
321

322
    rc = virFileReadAll(keypath, 1024, value);
323 324 325
    if (rc < 0) {
        DEBUG("Failed to read %s: %m\n", keypath);
        rc = -errno;
326
    } else {
327 328 329 330
        /* Terminated with '\n' has sometimes harmful effects to the caller */
        char *p = strchr(*value, '\n');
        if (p) *p = '\0';

331
        rc = 0;
332 333 334 335 336 337 338
    }

    VIR_FREE(keypath);

    return rc;
}

339
static int virCgroupSetValueU64(virCgroupPtr group,
340
                                int controller,
341
                                const char *key,
D
Daniel P. Berrange 已提交
342
                                unsigned long long int value)
343 344 345 346
{
    char *strval = NULL;
    int rc;

D
Daniel P. Berrange 已提交
347
    if (virAsprintf(&strval, "%llu", value) == -1)
348 349
        return -ENOMEM;

350
    rc = virCgroupSetValueStr(group, controller, key, strval);
351 352 353 354 355 356 357

    VIR_FREE(strval);

    return rc;
}


358 359

static int virCgroupSetValueI64(virCgroupPtr group,
360
                                int controller,
361
                                const char *key,
D
Daniel P. Berrange 已提交
362
                                long long int value)
363 364 365 366
{
    char *strval = NULL;
    int rc;

D
Daniel P. Berrange 已提交
367
    if (virAsprintf(&strval, "%lld", value) == -1)
368 369
        return -ENOMEM;

370
    rc = virCgroupSetValueStr(group, controller, key, strval);
371 372 373 374 375 376

    VIR_FREE(strval);

    return rc;
}

377 378
#if 0
/* This is included for completeness, but not yet used */
379
static int virCgroupGetValueI64(virCgroupPtr group,
380
                                int controller,
381
                                const char *key,
D
Daniel P. Berrange 已提交
382
                                long long int *value)
383 384 385 386
{
    char *strval = NULL;
    int rc = 0;

387
    rc = virCgroupGetValueStr(group, controller, key, &strval);
388 389 390
    if (rc != 0)
        goto out;

391
    if (virStrToLong_ll(strval, NULL, 10, value) < 0)
392 393 394 395 396 397
        rc = -EINVAL;
out:
    VIR_FREE(strval);

    return rc;
}
398
#endif
399

400
static int virCgroupGetValueU64(virCgroupPtr group,
401
                                int controller,
402
                                const char *key,
D
Daniel P. Berrange 已提交
403
                                unsigned long long int *value)
404 405 406 407
{
    char *strval = NULL;
    int rc = 0;

408
    rc = virCgroupGetValueStr(group, controller, key, &strval);
409 410 411
    if (rc != 0)
        goto out;

D
Daniel P. Berrange 已提交
412
    if (virStrToLong_ull(strval, NULL, 10, value) < 0)
413 414 415 416 417 418 419 420
        rc = -EINVAL;
out:
    VIR_FREE(strval);

    return rc;
}


421
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
422
static int virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group)
423 424 425 426 427 428 429 430
{
    int i;
    int rc = 0;
    const char *inherit_values[] = {
        "cpuset.cpus",
        "cpuset.mems",
    };

431 432 433
    VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path);
    for (i = 0; i < ARRAY_CARDINALITY(inherit_values) ; i++) {
        char *value;
434

435 436 437 438
        rc = virCgroupGetValueStr(parent,
                                  VIR_CGROUP_CONTROLLER_CPUSET,
                                  inherit_values[i],
                                  &value);
439
        if (rc != 0) {
440
            VIR_ERROR(_("Failed to get %s %d"), inherit_values[i], rc);
441 442 443 444 445 446 447 448 449
            break;
        }

        VIR_DEBUG("Inherit %s = %s", inherit_values[i], value);

        rc = virCgroupSetValueStr(group,
                                  VIR_CGROUP_CONTROLLER_CPUSET,
                                  inherit_values[i],
                                  value);
450
        VIR_FREE(value);
451 452

        if (rc != 0) {
453
            VIR_ERROR(_("Failed to set %s %d"), inherit_values[i], rc);
454 455 456 457 458 459 460
            break;
        }
    }

    return rc;
}

461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
static int virCgroupSetMemoryUseHierarchy(virCgroupPtr group)
{
    int rc = 0;
    unsigned long long value;
    const char *filename = "memory.use_hierarchy";

    rc = virCgroupGetValueU64(group,
                              VIR_CGROUP_CONTROLLER_MEMORY,
                              filename, &value);
    if (rc != 0) {
        VIR_ERROR(_("Failed to read %s/%s (%d)"), group->path, filename, rc);
        return rc;
    }

    /* Setting twice causes error, so if already enabled, skip setting */
    if (value == 1)
        return 0;

    VIR_DEBUG("Setting up %s/%s", group->path, filename);
    rc = virCgroupSetValueU64(group,
                              VIR_CGROUP_CONTROLLER_MEMORY,
                              filename, 1);

    if (rc != 0) {
        VIR_ERROR(_("Failed to set %s/%s (%d)"), group->path, filename, rc);
    }

    return rc;
}

static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group,
                              int create, bool memory_hierarchy)
493 494 495 496
{
    int i;
    int rc = 0;

497
    VIR_DEBUG("Make group %s", group->path);
498
    for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
499 500
        char *path = NULL;

501 502
        /* Skip over controllers that aren't mounted */
        if (!group->controllers[i].mountPoint)
503 504
            continue;

505 506 507
        rc = virCgroupPathOfController(group, i, "", &path);
        if (rc < 0)
            return rc;
508 509 510
        /* As of Feb 2011, clang can't see that the above function
         * call did not modify group. */
        sa_assert(group->controllers[i].mountPoint);
511

512
        VIR_DEBUG("Make controller %s", path);
513
        if (access(path, F_OK) != 0) {
514 515
            if (!create ||
                mkdir(path, 0755) < 0) {
516 517 518 519
                rc = -errno;
                VIR_FREE(path);
                break;
            }
520 521 522 523
            if (group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint != NULL &&
                (i == VIR_CGROUP_CONTROLLER_CPUSET ||
                 STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint))) {
                rc = virCgroupCpuSetInherit(parent, group);
524 525
                if (rc != 0) {
                    VIR_FREE(path);
526
                    break;
527
                }
528
            }
529 530 531 532 533 534 535 536 537 538 539 540 541 542
            /*
             * Note that virCgroupSetMemoryUseHierarchy should always be
             * called prior to creating subcgroups and attaching tasks.
             */
            if (memory_hierarchy &&
                group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint != NULL &&
                (i == VIR_CGROUP_CONTROLLER_MEMORY ||
                 STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint))) {
                rc = virCgroupSetMemoryUseHierarchy(group);
                if (rc != 0) {
                    VIR_FREE(path);
                    break;
                }
            }
543 544 545 546 547 548 549 550
        }

        VIR_FREE(path);
    }

    return rc;
}

551 552 553

static int virCgroupNew(const char *path,
                        virCgroupPtr *group)
554 555 556 557
{
    int rc = 0;
    char *typpath = NULL;

558 559
    VIR_DEBUG("New group %s", path);
    *group = NULL;
560

561
    if (VIR_ALLOC((*group)) != 0) {
562 563 564 565
        rc = -ENOMEM;
        goto err;
    }

566
    if (!((*group)->path = strdup(path))) {
567 568 569 570
        rc = -ENOMEM;
        goto err;
    }

571 572 573
    rc = virCgroupDetect(*group);
    if (rc < 0)
        goto err;
574 575 576

    return rc;
err:
577 578
    virCgroupFree(group);
    *group = NULL;
579 580 581 582 583 584

    VIR_FREE(typpath);

    return rc;
}

585
static int virCgroupAppRoot(int privileged,
586 587
                            virCgroupPtr *group,
                            int create)
588
{
589 590
    virCgroupPtr rootgrp = NULL;
    int rc;
591

592 593 594
    rc = virCgroupNew("/", &rootgrp);
    if (rc != 0)
        return rc;
595

596 597 598 599 600
    if (privileged) {
        rc = virCgroupNew("/libvirt", group);
    } else {
        char *rootname;
        char *username;
601
        username = virGetUserName(getuid());
602 603 604 605 606 607 608 609 610 611
        if (!username) {
            rc = -ENOMEM;
            goto cleanup;
        }
        rc = virAsprintf(&rootname, "/libvirt-%s", username);
        VIR_FREE(username);
        if (rc < 0) {
            rc = -ENOMEM;
            goto cleanup;
        }
612

613 614
        rc = virCgroupNew(rootname, group);
        VIR_FREE(rootname);
615 616
    }
    if (rc != 0)
617
        goto cleanup;
618

619
    rc = virCgroupMakeGroup(rootgrp, *group, create, false);
620

621 622
cleanup:
    virCgroupFree(&rootgrp);
623 624
    return rc;
}
D
Daniel P. Berrange 已提交
625
#endif
626

627
#if defined _DIRENT_HAVE_D_TYPE
628 629 630 631 632 633 634 635
static int virCgroupRemoveRecursively(char *grppath)
{
    DIR *grpdir;
    struct dirent *ent;
    int rc = 0;

    grpdir = opendir(grppath);
    if (grpdir == NULL) {
636 637
        if (errno == ENOENT)
            return 0;
638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
        VIR_ERROR(_("Unable to open %s (%d)"), grppath, errno);
        rc = -errno;
        return rc;
    }

    for (;;) {
        char *path;

        errno = 0;
        ent = readdir(grpdir);
        if (ent == NULL) {
            if ((rc = -errno))
                VIR_ERROR(_("Failed to readdir for %s (%d)"), grppath, errno);
            break;
        }

        if (ent->d_name[0] == '.') continue;
        if (ent->d_type != DT_DIR) continue;

        if (virAsprintf(&path, "%s/%s", grppath, ent->d_name) == -1) {
            rc = -ENOMEM;
            break;
        }
        rc = virCgroupRemoveRecursively(path);
        VIR_FREE(path);
        if (rc != 0)
            break;
    }
    closedir(grpdir);

    DEBUG("Removing cgroup %s", grppath);
    if (rmdir(grppath) != 0 && errno != ENOENT) {
        rc = -errno;
        VIR_ERROR(_("Unable to remove %s (%d)"), grppath, errno);
    }

    return rc;
}
676 677 678 679 680 681 682
#else
static int virCgroupRemoveRecursively(char *grppath ATTRIBUTE_UNUSED)
{
    /* Claim no support */
    return -ENXIO;
}
#endif
683

684 685 686 687 688
/**
 * virCgroupRemove:
 *
 * @group: The group to be removed
 *
689 690 691 692 693
 * It first removes all child groups recursively
 * in depth first order and then removes @group
 * because the presence of the child groups
 * prevents removing @group.
 *
694 695 696 697 698 699 700 701
 * Returns: 0 on success
 */
int virCgroupRemove(virCgroupPtr group)
{
    int rc = 0;
    int i;
    char *grppath = NULL;

702
    for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
703 704
        /* Skip over controllers not mounted */
        if (!group->controllers[i].mountPoint)
705 706
            continue;

707 708 709 710 711
        if (virCgroupPathOfController(group,
                                      i,
                                      NULL,
                                      &grppath) != 0)
            continue;
712

713 714
        DEBUG("Removing cgroup %s and all child cgroups", grppath);
        rc = virCgroupRemoveRecursively(grppath);
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
        VIR_FREE(grppath);
    }

    return rc;
}

/**
 * virCgroupAddTask:
 *
 * @group: The cgroup to add a task to
 * @pid: The pid of the task to add
 *
 * Returns: 0 on success
 */
int virCgroupAddTask(virCgroupPtr group, pid_t pid)
{
    int rc = 0;
    int i;

734
    for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) {
735 736 737
        /* Skip over controllers not mounted */
        if (!group->controllers[i].mountPoint)
            continue;
738

739
        rc = virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid);
740 741 742 743 744 745 746
        if (rc != 0)
            break;
    }

    return rc;
}

747

748
/**
749
 * virCgroupForDriver:
750
 *
751
 * @name: name of this driver (e.g., xen, qemu, lxc)
752 753 754 755
 * @group: Pointer to returned virCgroupPtr
 *
 * Returns 0 on success
 */
756
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
757 758 759 760
int virCgroupForDriver(const char *name,
                       virCgroupPtr *group,
                       int privileged,
                       int create)
761 762
{
    int rc;
763
    char *path = NULL;
764
    virCgroupPtr rootgrp = NULL;
765

766
    rc = virCgroupAppRoot(privileged, &rootgrp, create);
767
    if (rc != 0)
768 769
        goto out;

770 771
    if (virAsprintf(&path, "%s/%s", rootgrp->path, name) < 0) {
        rc = -ENOMEM;
772
        goto out;
773
    }
774

775 776 777
    rc = virCgroupNew(path, group);
    VIR_FREE(path);

778
    if (rc == 0) {
779
        rc = virCgroupMakeGroup(rootgrp, *group, create, false);
780 781 782
        if (rc != 0)
            virCgroupFree(group);
    }
783

784
out:
785
    virCgroupFree(&rootgrp);
786 787 788

    return rc;
}
D
Daniel P. Berrange 已提交
789 790 791 792 793 794 795 796 797 798
#else
int virCgroupForDriver(const char *name ATTRIBUTE_UNUSED,
                       virCgroupPtr *group ATTRIBUTE_UNUSED,
                       int privileged ATTRIBUTE_UNUSED,
                       int create ATTRIBUTE_UNUSED)
{
    /* Claim no support */
    return -ENXIO;
}
#endif
799

800 801 802 803 804 805 806 807 808 809

/**
 * virCgroupForDomain:
 *
 * @driver: group for driver owning the domain
 * @name: name of the domain
 * @group: Pointer to returned virCgroupPtr
 *
 * Returns 0 on success
 */
810
#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
811 812 813 814 815 816 817 818
int virCgroupForDomain(virCgroupPtr driver,
                       const char *name,
                       virCgroupPtr *group,
                       int create)
{
    int rc;
    char *path;

819 820 821
    if (driver == NULL)
        return -EINVAL;

822 823 824 825 826 827
    if (virAsprintf(&path, "%s/%s", driver->path, name) < 0)
        return -ENOMEM;

    rc = virCgroupNew(path, group);
    VIR_FREE(path);

828
    if (rc == 0) {
829 830 831 832 833 834 835 836 837 838 839
        /*
         * Create a cgroup with memory.use_hierarchy enabled to
         * surely account memory usage of lxc with ns subsystem
         * enabled. (To be exact, memory and ns subsystems are
         * enabled at the same time.)
         *
         * The reason why doing it here, not a upper group, say
         * a group for driver, is to avoid overhead to track
         * cumulative usage that we don't need.
         */
        rc = virCgroupMakeGroup(driver, *group, create, true);
840 841 842 843 844 845
        if (rc != 0)
            virCgroupFree(group);
    }

    return rc;
}
D
Daniel P. Berrange 已提交
846 847 848 849 850 851 852 853 854
#else
int virCgroupForDomain(virCgroupPtr driver ATTRIBUTE_UNUSED,
                       const char *name ATTRIBUTE_UNUSED,
                       virCgroupPtr *group ATTRIBUTE_UNUSED,
                       int create ATTRIBUTE_UNUSED)
{
    return -ENXIO;
}
#endif
855

856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894
/**
 * virCgroupSetBlkioWeight:
 *
 * @group: The cgroup to change io weight for
 * @weight: The Weight for this cgroup
 *
 * Returns: 0 on success
 */
int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight)
{
    if (weight > 1000 || weight < 100)
        return -EINVAL;

    return virCgroupSetValueU64(group,
                                VIR_CGROUP_CONTROLLER_BLKIO,
                                "blkio.weight",
                                weight);
}

/**
 * virCgroupGetBlkioWeight:
 *
 * @group: The cgroup to get weight for
 * @Weight: Pointer to returned weight
 *
 * Returns: 0 on success
 */
int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight)
{
    unsigned long long tmp;
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_BLKIO,
                               "blkio.weight", &tmp);
    if (ret == 0)
        *weight = tmp;
    return ret;
}

895 896 897 898 899 900 901 902
/**
 * virCgroupSetMemory:
 *
 * @group: The cgroup to change memory for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
903
int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb)
904
{
905 906 907 908 909 910 911 912 913 914 915 916 917 918
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    if (kb > maxkb)
        return -EINVAL;
    else if (kb == maxkb)
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.limit_in_bytes",
                                    kb << 10);
919 920
}

R
Ryota Ozaki 已提交
921 922 923 924 925 926 927 928 929 930
/**
 * virCgroupGetMemoryUsage:
 *
 * @group: The cgroup to change memory for
 * @kb: Pointer to returned used memory in kilobytes
 *
 * Returns: 0 on success
 */
int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb)
{
C
Cole Robinson 已提交
931
    long long unsigned int usage_in_bytes;
R
Ryota Ozaki 已提交
932 933 934 935 936 937 938 939 940
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_MEMORY,
                               "memory.usage_in_bytes", &usage_in_bytes);
    if (ret == 0)
        *kb = (unsigned long) usage_in_bytes >> 10;
    return ret;
}

941 942 943 944 945 946 947 948
/**
 * virCgroupSetMemoryHardLimit:
 *
 * @group: The cgroup to change memory hard limit for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
949
int virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb)
950 951 952 953 954 955 956 957 958 959 960 961
{
    return virCgroupSetMemory(group, kb);
}

/**
 * virCgroupGetMemoryHardLimit:
 *
 * @group: The cgroup to get the memory hard limit for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
962
int virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb)
963 964 965 966 967 968 969
{
    long long unsigned int limit_in_bytes;
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_MEMORY,
                               "memory.limit_in_bytes", &limit_in_bytes);
    if (ret == 0)
970
        *kb = limit_in_bytes >> 10;
971 972 973 974 975 976 977 978 979 980 981
    return ret;
}

/**
 * virCgroupSetMemorySoftLimit:
 *
 * @group: The cgroup to change memory soft limit for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
982
int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb)
983
{
984 985 986 987 988 989 990 991 992 993 994 995 996 997
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    if (kb > maxkb)
        return -EINVAL;
    else if (kb == maxkb)
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.soft_limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.soft_limit_in_bytes",
                                    kb << 10);
998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
}


/**
 * virCgroupGetMemorySoftLimit:
 *
 * @group: The cgroup to get the memory soft limit for
 * @kb: The memory amount in kilobytes
 *
 * Returns: 0 on success
 */
1009
int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb)
1010 1011 1012 1013 1014 1015 1016
{
    long long unsigned int limit_in_bytes;
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_MEMORY,
                               "memory.soft_limit_in_bytes", &limit_in_bytes);
    if (ret == 0)
1017
        *kb = limit_in_bytes >> 10;
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
    return ret;
}

/**
 * virCgroupSetSwapHardLimit:
 *
 * @group: The cgroup to change swap hard limit for
 * @kb: The swap amount in kilobytes
 *
 * Returns: 0 on success
 */
1029
int virCgroupSetSwapHardLimit(virCgroupPtr group, unsigned long long kb)
1030
{
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
    unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;

    if (kb > maxkb)
        return -EINVAL;
    else if (kb == maxkb)
        return virCgroupSetValueI64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.memsw.limit_in_bytes",
                                    -1);
    else
        return virCgroupSetValueU64(group,
                                    VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.memsw.limit_in_bytes",
                                    kb << 10);
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
}

/**
 * virCgroupGetSwapHardLimit:
 *
 * @group: The cgroup to get swap hard limit for
 * @kb: The swap amount in kilobytes
 *
 * Returns: 0 on success
 */
1055
int virCgroupGetSwapHardLimit(virCgroupPtr group, unsigned long long *kb)
1056 1057 1058 1059 1060 1061 1062
{
    long long unsigned int limit_in_bytes;
    int ret;
    ret = virCgroupGetValueU64(group,
                               VIR_CGROUP_CONTROLLER_MEMORY,
                               "memory.memsw.limit_in_bytes", &limit_in_bytes);
    if (ret == 0)
1063
        *kb = limit_in_bytes >> 10;
1064 1065 1066
    return ret;
}

1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
/**
 * virCgroupDenyAllDevices:
 *
 * @group: The cgroup to deny devices for
 *
 * Returns: 0 on success
 */
int virCgroupDenyAllDevices(virCgroupPtr group)
{
    return virCgroupSetValueStr(group,
1077 1078 1079
                                VIR_CGROUP_CONTROLLER_DEVICES,
                                "devices.deny",
                                "a");
1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
}

/**
 * virCgroupAllowDevice:
 *
 * @group: The cgroup to allow a device for
 * @type: The device type (i.e., 'c' or 'b')
 * @major: The major number of the device
 * @minor: The minor number of the device
 *
 * Returns: 0 on success
 */
1092
int virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor)
1093 1094 1095 1096
{
    int rc;
    char *devstr = NULL;

1097
    if (virAsprintf(&devstr, "%c %i:%i rwm", type, major, minor) == -1) {
1098 1099 1100 1101 1102
        rc = -ENOMEM;
        goto out;
    }

    rc = virCgroupSetValueStr(group,
1103
                              VIR_CGROUP_CONTROLLER_DEVICES,
1104 1105 1106 1107 1108 1109 1110
                              "devices.allow",
                              devstr);
out:
    VIR_FREE(devstr);

    return rc;
}
1111

1112 1113 1114 1115 1116 1117 1118 1119 1120
/**
 * virCgroupAllowDeviceMajor:
 *
 * @group: The cgroup to allow an entire device major type for
 * @type: The device type (i.e., 'c' or 'b')
 * @major: The major number of the device type
 *
 * Returns: 0 on success
 */
1121
int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major)
1122 1123 1124 1125
{
    int rc;
    char *devstr = NULL;

1126
    if (virAsprintf(&devstr, "%c %i:* rwm", type, major) == -1) {
1127 1128 1129 1130 1131
        rc = -ENOMEM;
        goto out;
    }

    rc = virCgroupSetValueStr(group,
1132
                              VIR_CGROUP_CONTROLLER_DEVICES,
1133 1134 1135 1136 1137 1138 1139 1140
                              "devices.allow",
                              devstr);
 out:
    VIR_FREE(devstr);

    return rc;
}

1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
/**
 * virCgroupAllowDevicePath:
 *
 * @group: The cgroup to allow the device for
 * @path: the device to allow
 *
 * Queries the type of device and its major/minor number, and
 * adds that to the cgroup ACL
 *
 * Returns: 0 on success
 */
D
Daniel P. Berrange 已提交
1152
#if defined(major) && defined(minor)
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
int virCgroupAllowDevicePath(virCgroupPtr group, const char *path)
{
    struct stat sb;

    if (stat(path, &sb) < 0)
        return -errno;

    if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode))
        return -EINVAL;

    return virCgroupAllowDevice(group,
                                S_ISCHR(sb.st_mode) ? 'c' : 'b',
                                major(sb.st_rdev),
                                minor(sb.st_rdev));
}
D
Daniel P. Berrange 已提交
1168 1169 1170 1171 1172 1173 1174 1175
#else
int virCgroupAllowDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED,
                             const char *path ATTRIBUTE_UNUSED)
{
    return -ENOSYS;
}
#endif

1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235

/**
 * virCgroupDenyDevice:
 *
 * @group: The cgroup to deny a device for
 * @type: The device type (i.e., 'c' or 'b')
 * @major: The major number of the device
 * @minor: The minor number of the device
 *
 * Returns: 0 on success
 */
int virCgroupDenyDevice(virCgroupPtr group, char type, int major, int minor)
{
    int rc;
    char *devstr = NULL;

    if (virAsprintf(&devstr, "%c %i:%i rwm", type, major, minor) == -1) {
        rc = -ENOMEM;
        goto out;
    }

    rc = virCgroupSetValueStr(group,
                              VIR_CGROUP_CONTROLLER_DEVICES,
                              "devices.deny",
                              devstr);
out:
    VIR_FREE(devstr);

    return rc;
}

/**
 * virCgroupDenyDeviceMajor:
 *
 * @group: The cgroup to deny an entire device major type for
 * @type: The device type (i.e., 'c' or 'b')
 * @major: The major number of the device type
 *
 * Returns: 0 on success
 */
int virCgroupDenyDeviceMajor(virCgroupPtr group, char type, int major)
{
    int rc;
    char *devstr = NULL;

    if (virAsprintf(&devstr, "%c %i:* rwm", type, major) == -1) {
        rc = -ENOMEM;
        goto out;
    }

    rc = virCgroupSetValueStr(group,
                              VIR_CGROUP_CONTROLLER_DEVICES,
                              "devices.deny",
                              devstr);
 out:
    VIR_FREE(devstr);

    return rc;
}

D
Daniel P. Berrange 已提交
1236
#if defined(major) && defined(minor)
1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
int virCgroupDenyDevicePath(virCgroupPtr group, const char *path)
{
    struct stat sb;

    if (stat(path, &sb) < 0)
        return -errno;

    if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode))
        return -EINVAL;

    return virCgroupDenyDevice(group,
                               S_ISCHR(sb.st_mode) ? 'c' : 'b',
                               major(sb.st_rdev),
                               minor(sb.st_rdev));
}
D
Daniel P. Berrange 已提交
1252 1253 1254 1255 1256 1257 1258
#else
int virCgroupDenyDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED,
                            const char *path ATTRIBUTE_UNUSED)
{
    return -ENOSYS;
}
#endif
1259

1260
int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares)
1261
{
1262 1263
    return virCgroupSetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
D
Daniel P. Berrange 已提交
1264
                                "cpu.shares", shares);
1265 1266
}

1267
int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares)
1268
{
1269 1270
    return virCgroupGetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPU,
D
Daniel P. Berrange 已提交
1271
                                "cpu.shares", shares);
1272
}
1273 1274 1275

int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage)
{
1276 1277
    return virCgroupGetValueU64(group,
                                VIR_CGROUP_CONTROLLER_CPUACCT,
D
Daniel P. Berrange 已提交
1278
                                "cpuacct.usage", usage);
1279
}
R
Ryota Ozaki 已提交
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289

int virCgroupSetFreezerState(virCgroupPtr group, const char *state)
{
    return virCgroupSetValueStr(group,
                                VIR_CGROUP_CONTROLLER_CPU,
                                "freezer.state", state);
}

int virCgroupGetFreezerState(virCgroupPtr group, char **state)
{
1290
    return virCgroupGetValueStr(group,
R
Ryota Ozaki 已提交
1291 1292 1293
                                VIR_CGROUP_CONTROLLER_CPU,
                                "freezer.state", state);
}