lxc_cgroup.c 15.4 KB
Newer Older
1
/*
2
 * Copyright (C) 2010-2014 Red Hat, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright IBM Corp. 2008
 *
 * lxc_cgroup.c: LXC cgroup helpers
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23 24 25
 */

#include <config.h>

#include "lxc_cgroup.h"
#include "lxc_container.h"
26
#include "virfile.h"
27
#include "virerror.h"
28
#include "virlog.h"
29
#include "viralloc.h"
30
#include "vircgroup.h"
31
#include "virstring.h"
32 33 34

#define VIR_FROM_THIS VIR_FROM_LXC

35 36
VIR_LOG_INIT("lxc.lxc_cgroup");

37 38 39 40
static int virLXCCgroupSetupCpuTune(virDomainDefPtr def,
                                    virCgroupPtr cgroup)
{
    int ret = -1;
41 42 43 44 45 46 47 48 49 50

    if (def->cputune.sharesSpecified) {
        unsigned long long val;
        if (virCgroupSetCpuShares(cgroup, def->cputune.shares) < 0)
            goto cleanup;

        if (virCgroupGetCpuShares(cgroup, &val) < 0)
            goto cleanup;
        def->cputune.shares = val;
    }
51 52 53 54 55 56 57 58 59

    if (def->cputune.quota != 0 &&
        virCgroupSetCpuCfsQuota(cgroup, def->cputune.quota) < 0)
        goto cleanup;

    if (def->cputune.period != 0 &&
        virCgroupSetCpuCfsPeriod(cgroup, def->cputune.period) < 0)
        goto cleanup;

60
    ret = 0;
61
 cleanup:
62 63 64 65
    return ret;
}


66 67 68 69
static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def,
                                       virCgroupPtr cgroup,
                                       virBitmapPtr nodemask)
{
70
    int ret = -1;
71 72 73 74 75 76 77 78 79 80 81
    char *mask = NULL;

    if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO &&
        def->cpumask) {
        mask = virBitmapFormat(def->cpumask);
        if (!mask) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("failed to convert cpumask"));
            return -1;
        }

82
        if (virCgroupSetCpusetCpus(cgroup, mask) < 0)
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
            goto cleanup;
    }

    if ((def->numatune.memory.nodemask ||
         (def->numatune.memory.placement_mode ==
          VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)) &&
          def->numatune.memory.mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
        if (def->numatune.memory.placement_mode ==
            VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO)
            mask = virBitmapFormat(nodemask);
        else
            mask = virBitmapFormat(def->numatune.memory.nodemask);

        if (!mask) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("failed to convert memory nodemask"));
            return -1;
        }

102 103
        if (virCgroupSetCpusetMems(cgroup, mask) < 0)
            goto cleanup;
104 105
    }

106
    ret = 0;
107
 cleanup:
108
    VIR_FREE(mask);
109
    return ret;
110 111 112
}


113 114 115
static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def,
                                      virCgroupPtr cgroup)
{
116
    size_t i;
117 118 119 120

    if (def->blkio.weight &&
        virCgroupSetBlkioWeight(cgroup, def->blkio.weight) < 0)
        return -1;
121

122 123
    if (def->blkio.ndevices) {
        for (i = 0; i < def->blkio.ndevices; i++) {
124
            virBlkioDevicePtr dev = &def->blkio.devices[i];
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148

            if (dev->weight &&
                (virCgroupSetBlkioDeviceWeight(cgroup, dev->path,
                                               dev->weight) < 0))
                return -1;

            if (dev->riops &&
                (virCgroupSetBlkioDeviceReadIops(cgroup, dev->path,
                                                 dev->riops) < 0))
                return -1;

            if (dev->wiops &&
                (virCgroupSetBlkioDeviceWriteIops(cgroup, dev->path,
                                                  dev->wiops) < 0))
                return -1;

            if (dev->rbps &&
                (virCgroupSetBlkioDeviceReadBps(cgroup, dev->path,
                                                dev->rbps) < 0))
                return -1;

            if (dev->wbps &&
                (virCgroupSetBlkioDeviceWriteBps(cgroup, dev->path,
                                                 dev->wbps) < 0))
149 150 151 152 153
                return -1;
        }
    }

    return 0;
154 155 156 157 158 159 160 161
}


static int virLXCCgroupSetupMemTune(virDomainDefPtr def,
                                    virCgroupPtr cgroup)
{
    int ret = -1;

162
    if (virCgroupSetMemory(cgroup, def->mem.max_balloon) < 0)
163 164
        goto cleanup;

165 166 167
    if (def->mem.hard_limit &&
        virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit) < 0)
        goto cleanup;
168

169 170 171
    if (def->mem.soft_limit &&
        virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit) < 0)
        goto cleanup;
172

173 174 175
    if (def->mem.swap_hard_limit &&
        virCgroupSetMemSwapHardLimit(cgroup, def->mem.swap_hard_limit) < 0)
        goto cleanup;
176 177

    ret = 0;
178
 cleanup:
179 180 181 182
    return ret;
}


183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
static int virLXCCgroupGetMemSwapUsage(virCgroupPtr cgroup,
                                       virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemSwapUsage(cgroup, &meminfo->swapusage);
}


static int virLXCCgroupGetMemSwapTotal(virCgroupPtr cgroup,
                                       virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemSwapHardLimit(cgroup, &meminfo->swaptotal);
}


static int virLXCCgroupGetMemUsage(virCgroupPtr cgroup,
                                   virLXCMeminfoPtr meminfo)
{
    int ret;
    unsigned long memUsage;

    ret = virCgroupGetMemoryUsage(cgroup, &memUsage);
    meminfo->memusage = (unsigned long long) memUsage;

    return ret;
}


static int virLXCCgroupGetMemTotal(virCgroupPtr cgroup,
                                   virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemoryHardLimit(cgroup, &meminfo->memtotal);
}


static int virLXCCgroupGetMemStat(virCgroupPtr cgroup,
                                  virLXCMeminfoPtr meminfo)
{
    int ret = 0;
    FILE *statfd = NULL;
    char *statFile = NULL;
    char *line = NULL;
    size_t n;

    ret = virCgroupPathOfController(cgroup, VIR_CGROUP_CONTROLLER_MEMORY,
                                    "memory.stat", &statFile);
    if (ret != 0) {
        virReportSystemError(-ret, "%s",
                             _("cannot get the path of MEMORY cgroup controller"));
        return ret;
    }

    statfd = fopen(statFile, "r");
    if (statfd == NULL) {
        ret = -errno;
        goto cleanup;
    }

    while (getline(&line, &n, statfd) > 0) {

        char *value = strchr(line, ' ');
        char *nl = value ? strchr(line, '\n') : NULL;
        unsigned long long stat_value;

        if (!value)
            continue;

        if (nl)
            *nl = '\0';

        *value = '\0';

        if (virStrToLong_ull(value + 1, NULL, 10, &stat_value) < 0) {
            ret = -EINVAL;
            goto cleanup;
        }
        if (STREQ(line, "cache"))
            meminfo->cached = stat_value >> 10;
        else if (STREQ(line, "inactive_anon"))
            meminfo->inactive_anon = stat_value >> 10;
        else if (STREQ(line, "active_anon"))
            meminfo->active_anon = stat_value >> 10;
        else if (STREQ(line, "inactive_file"))
            meminfo->inactive_file = stat_value >> 10;
        else if (STREQ(line, "active_file"))
            meminfo->active_file = stat_value >> 10;
        else if (STREQ(line, "unevictable"))
            meminfo->unevictable = stat_value >> 10;
    }
    ret = 0;

273
 cleanup:
274 275 276 277 278 279 280 281 282
    VIR_FREE(line);
    VIR_FREE(statFile);
    VIR_FORCE_FCLOSE(statfd);
    return ret;
}


int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo)
{
283
    int ret = -1;
284 285
    virCgroupPtr cgroup;

286 287
    if (virCgroupNewSelf(&cgroup) < 0)
        return -1;
288

289
    if (virLXCCgroupGetMemStat(cgroup, meminfo) < 0)
290 291
        goto cleanup;

292
    if (virLXCCgroupGetMemTotal(cgroup, meminfo) < 0)
293 294
        goto cleanup;

295
    if (virLXCCgroupGetMemUsage(cgroup, meminfo) < 0)
296 297 298 299 300 301
        goto cleanup;

    virLXCCgroupGetMemSwapTotal(cgroup, meminfo);
    virLXCCgroupGetMemSwapUsage(cgroup, meminfo);

    ret = 0;
302
 cleanup:
303 304 305 306 307 308
    virCgroupFree(&cgroup);
    return ret;
}



309 310 311 312 313 314 315 316 317 318
typedef struct _virLXCCgroupDevicePolicy virLXCCgroupDevicePolicy;
typedef virLXCCgroupDevicePolicy *virLXCCgroupDevicePolicyPtr;

struct _virLXCCgroupDevicePolicy {
    char type;
    int major;
    int minor;
};


319
int
320
virLXCSetupHostUSBDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED,
321 322 323 324 325 326
                               const char *path,
                               void *opaque)
{
    virCgroupPtr cgroup = opaque;

    VIR_DEBUG("Process path '%s' for USB device", path);
327
    if (virCgroupAllowDevicePath(cgroup, path,
328
                                 VIR_CGROUP_DEVICE_RWM) < 0)
329 330 331 332 333 334 335
        return -1;

    return 0;
}


int
336
virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED,
337 338 339 340 341 342
                                  const char *path,
                                  void *opaque)
{
    virCgroupPtr cgroup = opaque;

    VIR_DEBUG("Process path '%s' for USB device", path);
343
    if (virCgroupDenyDevicePath(cgroup, path,
344
                                VIR_CGROUP_DEVICE_RWM) < 0)
345 346 347 348 349
        return -1;

    return 0;
}

350 351 352 353 354 355 356 357 358 359 360 361 362 363

static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
                                      virCgroupPtr cgroup)
{
    int ret = -1;
    size_t i;
    static virLXCCgroupDevicePolicy devices[] = {
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM},
        {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY},
        {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX},
G
Gao feng 已提交
364
        {'c', LXC_DEV_MAJ_FUSE, LXC_DEV_MIN_FUSE},
365 366
        {0,   0, 0}};

367
    if (virCgroupDenyAllDevices(cgroup) < 0)
368 369 370 371
        goto cleanup;

    for (i = 0; devices[i].type != 0; i++) {
        virLXCCgroupDevicePolicyPtr dev = &devices[i];
372 373 374 375 376
        if (virCgroupAllowDevice(cgroup,
                                 dev->type,
                                 dev->major,
                                 dev->minor,
                                 VIR_CGROUP_DEVICE_RWM) < 0)
377 378 379
            goto cleanup;
    }

380
    VIR_DEBUG("Allowing any disk block devs");
381
    for (i = 0; i < def->ndisks; i++) {
382
        if (!virDomainDiskSourceIsBlockType(def->disks[i]))
383 384
            continue;

385
        if (virCgroupAllowDevicePath(cgroup,
386
                                     virDomainDiskGetSource(def->disks[i]),
387 388 389 390
                                     (def->disks[i]->readonly ?
                                      VIR_CGROUP_DEVICE_READ :
                                      VIR_CGROUP_DEVICE_RW) |
                                     VIR_CGROUP_DEVICE_MKNOD) < 0)
391 392 393
            goto cleanup;
    }

394
    VIR_DEBUG("Allowing any filesystem block devs");
395
    for (i = 0; i < def->nfss; i++) {
396 397 398
        if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_BLOCK)
            continue;

399 400 401 402 403
        if (virCgroupAllowDevicePath(cgroup,
                                     def->fss[i]->src,
                                     def->fss[i]->readonly ?
                                     VIR_CGROUP_DEVICE_READ :
                                     VIR_CGROUP_DEVICE_RW) < 0)
404 405 406
            goto cleanup;
    }

407
    VIR_DEBUG("Allowing any hostdev block devs");
408 409
    for (i = 0; i < def->nhostdevs; i++) {
        virDomainHostdevDefPtr hostdev = def->hostdevs[i];
410
        virUSBDevicePtr usb;
411

412 413 414 415 416 417 418
        switch (hostdev->mode) {
        case VIR_DOMAIN_HOSTDEV_MODE_SUBSYS:
            if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB)
                continue;
            if (hostdev->missing)
                continue;

419 420 421
            if ((usb = virUSBDeviceNew(hostdev->source.subsys.u.usb.bus,
                                       hostdev->source.subsys.u.usb.device,
                                       NULL)) == NULL)
422 423
                goto cleanup;

424
            if (virUSBDeviceFileIterate(usb, virLXCSetupHostUSBDeviceCgroup,
425 426
                                        cgroup) < 0) {
                virUSBDeviceFree(usb);
427
                goto cleanup;
428
            }
429
            virUSBDeviceFree(usb);
430 431 432 433 434 435 436 437 438 439
            break;
        case VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES:
            switch (hostdev->source.caps.type) {
            case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_STORAGE:
                if (virCgroupAllowDevicePath(cgroup,
                                             hostdev->source.caps.u.storage.block,
                                             VIR_CGROUP_DEVICE_RW |
                                             VIR_CGROUP_DEVICE_MKNOD) < 0)
                    goto cleanup;
                break;
440 441 442 443 444 445 446
            case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC:
                if (virCgroupAllowDevicePath(cgroup,
                                             hostdev->source.caps.u.misc.chardev,
                                             VIR_CGROUP_DEVICE_RW |
                                             VIR_CGROUP_DEVICE_MKNOD) < 0)
                    goto cleanup;
                break;
447 448 449 450 451 452
            default:
                break;
            }
        default:
            break;
        }
453 454
    }

455 456
    if (virCgroupAllowDeviceMajor(cgroup, 'c', LXC_DEV_MAJ_PTY,
                                  VIR_CGROUP_DEVICE_RWM) < 0)
457 458
        goto cleanup;

459 460
    VIR_DEBUG("Device whitelist complete");

461
    ret = 0;
462
 cleanup:
463 464 465 466
    return ret;
}


467
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def)
468
{
469
    virCgroupPtr cgroup = NULL;
470

471 472 473 474 475
    if (def->resource->partition[0] != '/') {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Resource partition '%s' must start with '/'"),
                       def->resource->partition);
        goto cleanup;
476
    }
477

478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
    /*
     * XXX
     * We should pass the PID of the LXC init process
     * not ourselves, but this requires some more
     * refactoring. We should also pass the root dir
     */
    if (virCgroupNewMachine(def->name,
                            "lxc",
                            true,
                            def->uuid,
                            NULL,
                            getpid(),
                            true,
                            def->resource->partition,
                            -1,
                            &cgroup) < 0)
494
        goto cleanup;
495

496 497 498 499 500 501 502 503 504 505 506 507
    /* setup control group permissions for user namespace */
    if (def->idmap.uidmap) {
        if (virCgroupSetOwner(cgroup,
                              def->idmap.uidmap[0].target,
                              def->idmap.gidmap[0].target,
                              (1 << VIR_CGROUP_CONTROLLER_SYSTEMD)) < 0) {
            virCgroupFree(&cgroup);
            cgroup = NULL;
            goto cleanup;
        }
    }

508
 cleanup:
509 510 511 512 513
    return cgroup;
}


int virLXCCgroupSetup(virDomainDefPtr def,
514 515
                      virCgroupPtr cgroup,
                      virBitmapPtr nodemask)
516 517 518
{
    int ret = -1;

519 520 521
    if (virLXCCgroupSetupCpuTune(def, cgroup) < 0)
        goto cleanup;

522 523 524
    if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask) < 0)
        goto cleanup;

525 526 527 528 529 530 531 532 533
    if (virLXCCgroupSetupBlkioTune(def, cgroup) < 0)
        goto cleanup;

    if (virLXCCgroupSetupMemTune(def, cgroup) < 0)
        goto cleanup;

    if (virLXCCgroupSetupDeviceACL(def, cgroup) < 0)
        goto cleanup;

534 535
    ret = 0;

536
 cleanup:
537
    return ret;
538
}