lxc_cgroup.c 16.2 KB
Newer Older
1
/*
2
 * Copyright (C) 2010-2014 Red Hat, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright IBM Corp. 2008
 *
 * lxc_cgroup.c: LXC cgroup helpers
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23 24 25
 */

#include <config.h>

#include "lxc_cgroup.h"
#include "lxc_container.h"
26
#include "virfile.h"
27
#include "virerror.h"
28
#include "virlog.h"
29
#include "viralloc.h"
30
#include "vircgroup.h"
31
#include "virstring.h"
32 33 34

#define VIR_FROM_THIS VIR_FROM_LXC

35 36
VIR_LOG_INIT("lxc.lxc_cgroup");

37 38 39 40
static int virLXCCgroupSetupCpuTune(virDomainDefPtr def,
                                    virCgroupPtr cgroup)
{
    int ret = -1;
41 42 43 44 45 46 47 48 49 50

    if (def->cputune.sharesSpecified) {
        unsigned long long val;
        if (virCgroupSetCpuShares(cgroup, def->cputune.shares) < 0)
            goto cleanup;

        if (virCgroupGetCpuShares(cgroup, &val) < 0)
            goto cleanup;
        def->cputune.shares = val;
    }
51 52 53 54 55 56 57 58 59

    if (def->cputune.quota != 0 &&
        virCgroupSetCpuCfsQuota(cgroup, def->cputune.quota) < 0)
        goto cleanup;

    if (def->cputune.period != 0 &&
        virCgroupSetCpuCfsPeriod(cgroup, def->cputune.period) < 0)
        goto cleanup;

60
    ret = 0;
61
 cleanup:
62 63 64 65
    return ret;
}


66 67 68 69
static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def,
                                       virCgroupPtr cgroup,
                                       virBitmapPtr nodemask)
{
70
    int ret = -1;
71
    char *mask = NULL;
72
    virDomainNumatuneMemMode mode;
73 74 75

    if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO &&
        def->cpumask) {
76
        if (!(mask = virBitmapFormat(def->cpumask)))
77 78
            return -1;

79
        if (virCgroupSetCpusetCpus(cgroup, mask) < 0)
80
            goto cleanup;
81 82
        /* free mask to make sure we won't use it in a wrong way later */
        VIR_FREE(mask);
83 84
    }

85 86
    if (virDomainNumatuneGetMode(def->numa, -1, &mode) < 0 ||
        mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
87
        ret = 0;
88
        goto cleanup;
89
    }
90

91
    if (virDomainNumatuneMaybeFormatNodeset(def->numa, nodemask,
92
                                            &mask, -1) < 0)
93
        goto cleanup;
94

95 96
    if (mask && virCgroupSetCpusetMems(cgroup, mask) < 0)
        goto cleanup;
97

98
    ret = 0;
99
 cleanup:
100
    VIR_FREE(mask);
101
    return ret;
102 103 104
}


105 106 107
static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def,
                                      virCgroupPtr cgroup)
{
108
    size_t i;
109 110 111 112

    if (def->blkio.weight &&
        virCgroupSetBlkioWeight(cgroup, def->blkio.weight) < 0)
        return -1;
113

114 115
    if (def->blkio.ndevices) {
        for (i = 0; i < def->blkio.ndevices; i++) {
116
            virBlkioDevicePtr dev = &def->blkio.devices[i];
117 118 119

            if (dev->weight &&
                (virCgroupSetBlkioDeviceWeight(cgroup, dev->path,
120 121 122
                                               dev->weight) < 0 ||
                 virCgroupGetBlkioDeviceWeight(cgroup, dev->path,
                                               &dev->weight) < 0))
123 124 125 126
                return -1;

            if (dev->riops &&
                (virCgroupSetBlkioDeviceReadIops(cgroup, dev->path,
127 128 129
                                                 dev->riops) < 0 ||
                 virCgroupGetBlkioDeviceReadIops(cgroup, dev->path,
                                                 &dev->riops) < 0))
130 131 132 133
                return -1;

            if (dev->wiops &&
                (virCgroupSetBlkioDeviceWriteIops(cgroup, dev->path,
134 135 136
                                                  dev->wiops) < 0 ||
                 virCgroupGetBlkioDeviceWriteIops(cgroup, dev->path,
                                                  &dev->wiops) < 0))
137 138 139 140
                return -1;

            if (dev->rbps &&
                (virCgroupSetBlkioDeviceReadBps(cgroup, dev->path,
141 142 143
                                                dev->rbps) < 0 ||
                 virCgroupGetBlkioDeviceReadBps(cgroup, dev->path,
                                                &dev->rbps) < 0))
144 145 146 147
                return -1;

            if (dev->wbps &&
                (virCgroupSetBlkioDeviceWriteBps(cgroup, dev->path,
148 149 150
                                                 dev->wbps) < 0 ||
                 virCgroupGetBlkioDeviceWriteBps(cgroup, dev->path,
                                                 &dev->wbps) < 0))
151 152 153 154 155
                return -1;
        }
    }

    return 0;
156 157 158 159 160 161 162 163
}


static int virLXCCgroupSetupMemTune(virDomainDefPtr def,
                                    virCgroupPtr cgroup)
{
    int ret = -1;

164
    if (virCgroupSetMemory(cgroup, virDomainDefGetMemoryInitial(def)) < 0)
165 166
        goto cleanup;

167 168 169
    if (virMemoryLimitIsSet(def->mem.hard_limit))
        if (virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit) < 0)
            goto cleanup;
170

171 172 173
    if (virMemoryLimitIsSet(def->mem.soft_limit))
        if (virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit) < 0)
            goto cleanup;
174

175 176 177
    if (virMemoryLimitIsSet(def->mem.swap_hard_limit))
        if (virCgroupSetMemSwapHardLimit(cgroup, def->mem.swap_hard_limit) < 0)
            goto cleanup;
178 179

    ret = 0;
180
 cleanup:
181 182 183 184
    return ret;
}


185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
static int virLXCCgroupGetMemSwapUsage(virCgroupPtr cgroup,
                                       virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemSwapUsage(cgroup, &meminfo->swapusage);
}


static int virLXCCgroupGetMemSwapTotal(virCgroupPtr cgroup,
                                       virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemSwapHardLimit(cgroup, &meminfo->swaptotal);
}


static int virLXCCgroupGetMemUsage(virCgroupPtr cgroup,
                                   virLXCMeminfoPtr meminfo)
{
    int ret;
    unsigned long memUsage;

    ret = virCgroupGetMemoryUsage(cgroup, &memUsage);
    meminfo->memusage = (unsigned long long) memUsage;

    return ret;
}


static int virLXCCgroupGetMemTotal(virCgroupPtr cgroup,
                                   virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemoryHardLimit(cgroup, &meminfo->memtotal);
}


static int virLXCCgroupGetMemStat(virCgroupPtr cgroup,
                                  virLXCMeminfoPtr meminfo)
{
    int ret = 0;
    FILE *statfd = NULL;
    char *statFile = NULL;
    char *line = NULL;
    size_t n;

    ret = virCgroupPathOfController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.stat", &statFile);
    if (ret != 0) {
        virReportSystemError(-ret, "%s",
                             _("cannot get the path of MEMORY cgroup controller"));
        return ret;
    }

    statfd = fopen(statFile, "r");
    if (statfd == NULL) {
        ret = -errno;
        goto cleanup;
    }

    while (getline(&line, &n, statfd) > 0) {

        char *value = strchr(line, ' ');
        char *nl = value ? strchr(line, '\n') : NULL;
        unsigned long long stat_value;

        if (!value)
            continue;

        if (nl)
            *nl = '\0';

        *value = '\0';

        if (virStrToLong_ull(value + 1, NULL, 10, &stat_value) < 0) {
            ret = -EINVAL;
            goto cleanup;
        }
        if (STREQ(line, "cache"))
            meminfo->cached = stat_value >> 10;
        else if (STREQ(line, "inactive_anon"))
            meminfo->inactive_anon = stat_value >> 10;
        else if (STREQ(line, "active_anon"))
            meminfo->active_anon = stat_value >> 10;
        else if (STREQ(line, "inactive_file"))
            meminfo->inactive_file = stat_value >> 10;
        else if (STREQ(line, "active_file"))
            meminfo->active_file = stat_value >> 10;
        else if (STREQ(line, "unevictable"))
            meminfo->unevictable = stat_value >> 10;
    }
    ret = 0;

275
 cleanup:
276 277 278 279 280 281 282 283 284
    VIR_FREE(line);
    VIR_FREE(statFile);
    VIR_FORCE_FCLOSE(statfd);
    return ret;
}


int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo)
{
285
    int ret = -1;
286 287
    virCgroupPtr cgroup;

288 289
    if (virCgroupNewSelf(&cgroup) < 0)
        return -1;
290

291
    if (virLXCCgroupGetMemStat(cgroup, meminfo) < 0)
292 293
        goto cleanup;

294
    if (virLXCCgroupGetMemTotal(cgroup, meminfo) < 0)
295 296
        goto cleanup;

297
    if (virLXCCgroupGetMemUsage(cgroup, meminfo) < 0)
298 299
        goto cleanup;

300 301 302 303 304
    if (virLXCCgroupGetMemSwapTotal(cgroup, meminfo) < 0)
        goto cleanup;

    if (virLXCCgroupGetMemSwapUsage(cgroup, meminfo) < 0)
        goto cleanup;
305 306

    ret = 0;
307
 cleanup:
308 309 310 311 312 313
    virCgroupFree(&cgroup);
    return ret;
}



314 315 316 317 318 319 320 321 322 323
typedef struct _virLXCCgroupDevicePolicy virLXCCgroupDevicePolicy;
typedef virLXCCgroupDevicePolicy *virLXCCgroupDevicePolicyPtr;

struct _virLXCCgroupDevicePolicy {
    char type;
    int major;
    int minor;
};


324
int
325
virLXCSetupHostUSBDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED,
326 327 328 329 330 331
                               const char *path,
                               void *opaque)
{
    virCgroupPtr cgroup = opaque;

    VIR_DEBUG("Process path '%s' for USB device", path);
332
    if (virCgroupAllowDevicePath(cgroup, path,
333
                                 VIR_CGROUP_DEVICE_RWM) < 0)
334 335 336 337 338 339 340
        return -1;

    return 0;
}


int
341
virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED,
342 343 344 345 346 347
                                  const char *path,
                                  void *opaque)
{
    virCgroupPtr cgroup = opaque;

    VIR_DEBUG("Process path '%s' for USB device", path);
348
    if (virCgroupDenyDevicePath(cgroup, path,
349
                                VIR_CGROUP_DEVICE_RWM) < 0)
350 351 352 353 354
        return -1;

    return 0;
}

355 356 357 358

static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
                                      virCgroupPtr cgroup)
{
359
    int capMknod = def->caps_features[VIR_DOMAIN_CAPS_FEATURE_MKNOD];
360 361 362 363 364 365 366 367 368 369
    int ret = -1;
    size_t i;
    static virLXCCgroupDevicePolicy devices[] = {
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM},
        {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY},
        {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX},
G
Gao feng 已提交
370
        {'c', LXC_DEV_MAJ_FUSE, LXC_DEV_MIN_FUSE},
371 372
        {0,   0, 0}};

373
    if (virCgroupDenyAllDevices(cgroup) < 0)
374 375
        goto cleanup;

376
    /* white list mknod if CAP_MKNOD has to be kept */
J
Ján Tomko 已提交
377
    if (capMknod == VIR_TRISTATE_SWITCH_ON) {
378 379 380 381 382
        if (virCgroupAllowAllDevices(cgroup,
                                    VIR_CGROUP_DEVICE_MKNOD) < 0)
            goto cleanup;
    }

383 384
    for (i = 0; devices[i].type != 0; i++) {
        virLXCCgroupDevicePolicyPtr dev = &devices[i];
385 386 387 388 389
        if (virCgroupAllowDevice(cgroup,
                                 dev->type,
                                 dev->major,
                                 dev->minor,
                                 VIR_CGROUP_DEVICE_RWM) < 0)
390 391 392
            goto cleanup;
    }

393
    VIR_DEBUG("Allowing any disk block devs");
394
    for (i = 0; i < def->ndisks; i++) {
395
        if (!virDomainDiskSourceIsBlockType(def->disks[i]->src, false))
396 397
            continue;

398
        if (virCgroupAllowDevicePath(cgroup,
399
                                     virDomainDiskGetSource(def->disks[i]),
400
                                     (def->disks[i]->src->readonly ?
401 402 403
                                      VIR_CGROUP_DEVICE_READ :
                                      VIR_CGROUP_DEVICE_RW) |
                                     VIR_CGROUP_DEVICE_MKNOD) < 0)
404 405 406
            goto cleanup;
    }

407
    VIR_DEBUG("Allowing any filesystem block devs");
408
    for (i = 0; i < def->nfss; i++) {
409 410 411
        if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_BLOCK)
            continue;

412 413 414 415 416
        if (virCgroupAllowDevicePath(cgroup,
                                     def->fss[i]->src,
                                     def->fss[i]->readonly ?
                                     VIR_CGROUP_DEVICE_READ :
                                     VIR_CGROUP_DEVICE_RW) < 0)
417 418 419
            goto cleanup;
    }

420
    VIR_DEBUG("Allowing any hostdev block devs");
421 422
    for (i = 0; i < def->nhostdevs; i++) {
        virDomainHostdevDefPtr hostdev = def->hostdevs[i];
423
        virDomainHostdevSubsysUSBPtr usbsrc = &hostdev->source.subsys.u.usb;
424
        virUSBDevicePtr usb;
425

426 427 428 429 430 431 432
        switch (hostdev->mode) {
        case VIR_DOMAIN_HOSTDEV_MODE_SUBSYS:
            if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB)
                continue;
            if (hostdev->missing)
                continue;

433
            if ((usb = virUSBDeviceNew(usbsrc->bus, usbsrc->device,
434
                                       NULL)) == NULL)
435 436
                goto cleanup;

437
            if (virUSBDeviceFileIterate(usb, virLXCSetupHostUSBDeviceCgroup,
438 439
                                        cgroup) < 0) {
                virUSBDeviceFree(usb);
440
                goto cleanup;
441
            }
442
            virUSBDeviceFree(usb);
443 444 445 446 447 448 449 450 451 452
            break;
        case VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES:
            switch (hostdev->source.caps.type) {
            case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_STORAGE:
                if (virCgroupAllowDevicePath(cgroup,
                                             hostdev->source.caps.u.storage.block,
                                             VIR_CGROUP_DEVICE_RW |
                                             VIR_CGROUP_DEVICE_MKNOD) < 0)
                    goto cleanup;
                break;
453 454 455 456 457 458 459
            case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC:
                if (virCgroupAllowDevicePath(cgroup,
                                             hostdev->source.caps.u.misc.chardev,
                                             VIR_CGROUP_DEVICE_RW |
                                             VIR_CGROUP_DEVICE_MKNOD) < 0)
                    goto cleanup;
                break;
460 461 462 463 464 465
            default:
                break;
            }
        default:
            break;
        }
466 467
    }

468 469
    if (virCgroupAllowDeviceMajor(cgroup, 'c', LXC_DEV_MAJ_PTY,
                                  VIR_CGROUP_DEVICE_RWM) < 0)
470 471
        goto cleanup;

472 473
    VIR_DEBUG("Device whitelist complete");

474
    ret = 0;
475
 cleanup:
476 477 478 479
    return ret;
}


480
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
481 482 483
                                pid_t initpid,
                                size_t nnicindexes,
                                int *nicindexes)
484
{
485
    virCgroupPtr cgroup = NULL;
486

487 488 489 490 491
    if (def->resource->partition[0] != '/') {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Resource partition '%s' must start with '/'"),
                       def->resource->partition);
        goto cleanup;
492
    }
493

494 495 496 497 498
    if (virCgroupNewMachine(def->name,
                            "lxc",
                            true,
                            def->uuid,
                            NULL,
499
                            initpid,
500
                            true,
501
                            nnicindexes, nicindexes,
502 503
                            def->resource->partition,
                            -1,
504
                            &cgroup) < 0 || !cgroup)
505
        goto cleanup;
506

507 508 509 510 511 512 513 514 515 516 517 518
    /* setup control group permissions for user namespace */
    if (def->idmap.uidmap) {
        if (virCgroupSetOwner(cgroup,
                              def->idmap.uidmap[0].target,
                              def->idmap.gidmap[0].target,
                              (1 << VIR_CGROUP_CONTROLLER_SYSTEMD)) < 0) {
            virCgroupFree(&cgroup);
            cgroup = NULL;
            goto cleanup;
        }
    }

519
 cleanup:
520 521 522 523 524
    return cgroup;
}


int virLXCCgroupSetup(virDomainDefPtr def,
525 526
                      virCgroupPtr cgroup,
                      virBitmapPtr nodemask)
527 528 529
{
    int ret = -1;

530 531 532
    if (virLXCCgroupSetupCpuTune(def, cgroup) < 0)
        goto cleanup;

533 534 535
    if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask) < 0)
        goto cleanup;

536 537 538 539 540 541 542 543 544
    if (virLXCCgroupSetupBlkioTune(def, cgroup) < 0)
        goto cleanup;

    if (virLXCCgroupSetupMemTune(def, cgroup) < 0)
        goto cleanup;

    if (virLXCCgroupSetupDeviceACL(def, cgroup) < 0)
        goto cleanup;

545 546
    ret = 0;

547
 cleanup:
548
    return ret;
549
}