lxc_cgroup.c 15.2 KB
Newer Older
1
/*
2
 * Copyright (C) 2010-2014 Red Hat, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright IBM Corp. 2008
 *
 * lxc_cgroup.c: LXC cgroup helpers
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23 24 25
 */

#include <config.h>

#include "lxc_cgroup.h"
#include "lxc_container.h"
26
#include "virfile.h"
27
#include "virerror.h"
28
#include "virlog.h"
29
#include "viralloc.h"
30
#include "virstring.h"
31
#include "virsystemd.h"
32 33 34

#define VIR_FROM_THIS VIR_FROM_LXC

35 36
VIR_LOG_INIT("lxc.lxc_cgroup");

37 38 39 40
static int virLXCCgroupSetupCpuTune(virDomainDefPtr def,
                                    virCgroupPtr cgroup)
{
    int ret = -1;
41 42 43 44 45 46 47 48 49 50

    if (def->cputune.sharesSpecified) {
        unsigned long long val;
        if (virCgroupSetCpuShares(cgroup, def->cputune.shares) < 0)
            goto cleanup;

        if (virCgroupGetCpuShares(cgroup, &val) < 0)
            goto cleanup;
        def->cputune.shares = val;
    }
51 52 53 54 55 56 57 58 59

    if (def->cputune.quota != 0 &&
        virCgroupSetCpuCfsQuota(cgroup, def->cputune.quota) < 0)
        goto cleanup;

    if (def->cputune.period != 0 &&
        virCgroupSetCpuCfsPeriod(cgroup, def->cputune.period) < 0)
        goto cleanup;

60
    ret = 0;
61
 cleanup:
62 63 64 65
    return ret;
}


66 67 68 69
static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def,
                                       virCgroupPtr cgroup,
                                       virBitmapPtr nodemask)
{
70
    int ret = -1;
71
    char *mask = NULL;
72
    virDomainNumatuneMemMode mode;
73 74 75

    if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO &&
        def->cpumask) {
76
        if (!(mask = virBitmapFormat(def->cpumask)))
77 78
            return -1;

79
        if (virCgroupSetCpusetCpus(cgroup, mask) < 0)
80
            goto cleanup;
81 82
        /* free mask to make sure we won't use it in a wrong way later */
        VIR_FREE(mask);
83 84
    }

85 86
    if (virDomainNumatuneGetMode(def->numa, -1, &mode) < 0 ||
        mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
87
        ret = 0;
88
        goto cleanup;
89
    }
90

91
    if (virDomainNumatuneMaybeFormatNodeset(def->numa, nodemask,
92
                                            &mask, -1) < 0)
93
        goto cleanup;
94

95 96
    if (mask && virCgroupSetCpusetMems(cgroup, mask) < 0)
        goto cleanup;
97

98
    ret = 0;
99
 cleanup:
100
    VIR_FREE(mask);
101
    return ret;
102 103 104
}


105 106 107
static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def,
                                      virCgroupPtr cgroup)
{
108
    size_t i;
109 110 111 112

    if (def->blkio.weight &&
        virCgroupSetBlkioWeight(cgroup, def->blkio.weight) < 0)
        return -1;
113

114 115
    if (def->blkio.ndevices) {
        for (i = 0; i < def->blkio.ndevices; i++) {
116
            virBlkioDevicePtr dev = &def->blkio.devices[i];
117 118 119

            if (dev->weight &&
                (virCgroupSetBlkioDeviceWeight(cgroup, dev->path,
120 121 122
                                               dev->weight) < 0 ||
                 virCgroupGetBlkioDeviceWeight(cgroup, dev->path,
                                               &dev->weight) < 0))
123 124 125 126
                return -1;

            if (dev->riops &&
                (virCgroupSetBlkioDeviceReadIops(cgroup, dev->path,
127 128 129
                                                 dev->riops) < 0 ||
                 virCgroupGetBlkioDeviceReadIops(cgroup, dev->path,
                                                 &dev->riops) < 0))
130 131 132 133
                return -1;

            if (dev->wiops &&
                (virCgroupSetBlkioDeviceWriteIops(cgroup, dev->path,
134 135 136
                                                  dev->wiops) < 0 ||
                 virCgroupGetBlkioDeviceWriteIops(cgroup, dev->path,
                                                  &dev->wiops) < 0))
137 138 139 140
                return -1;

            if (dev->rbps &&
                (virCgroupSetBlkioDeviceReadBps(cgroup, dev->path,
141 142 143
                                                dev->rbps) < 0 ||
                 virCgroupGetBlkioDeviceReadBps(cgroup, dev->path,
                                                &dev->rbps) < 0))
144 145 146 147
                return -1;

            if (dev->wbps &&
                (virCgroupSetBlkioDeviceWriteBps(cgroup, dev->path,
148 149 150
                                                 dev->wbps) < 0 ||
                 virCgroupGetBlkioDeviceWriteBps(cgroup, dev->path,
                                                 &dev->wbps) < 0))
151 152 153 154 155
                return -1;
        }
    }

    return 0;
156 157 158 159 160 161 162 163
}


static int virLXCCgroupSetupMemTune(virDomainDefPtr def,
                                    virCgroupPtr cgroup)
{
    int ret = -1;

164
    if (virCgroupSetMemory(cgroup, virDomainDefGetMemoryInitial(def)) < 0)
165 166
        goto cleanup;

167 168 169
    if (virMemoryLimitIsSet(def->mem.hard_limit))
        if (virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit) < 0)
            goto cleanup;
170

171 172 173
    if (virMemoryLimitIsSet(def->mem.soft_limit))
        if (virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit) < 0)
            goto cleanup;
174

175 176 177
    if (virMemoryLimitIsSet(def->mem.swap_hard_limit))
        if (virCgroupSetMemSwapHardLimit(cgroup, def->mem.swap_hard_limit) < 0)
            goto cleanup;
178 179

    ret = 0;
180
 cleanup:
181 182 183 184
    return ret;
}


185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
static int virLXCCgroupGetMemSwapUsage(virCgroupPtr cgroup,
                                       virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemSwapUsage(cgroup, &meminfo->swapusage);
}


static int virLXCCgroupGetMemSwapTotal(virCgroupPtr cgroup,
                                       virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemSwapHardLimit(cgroup, &meminfo->swaptotal);
}


static int virLXCCgroupGetMemUsage(virCgroupPtr cgroup,
                                   virLXCMeminfoPtr meminfo)
{
    int ret;
    unsigned long memUsage;

    ret = virCgroupGetMemoryUsage(cgroup, &memUsage);
206
    meminfo->memusage = (unsigned long long)memUsage;
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221

    return ret;
}


static int virLXCCgroupGetMemTotal(virCgroupPtr cgroup,
                                   virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemoryHardLimit(cgroup, &meminfo->memtotal);
}


static int virLXCCgroupGetMemStat(virCgroupPtr cgroup,
                                  virLXCMeminfoPtr meminfo)
{
P
Pavel Hrdina 已提交
222 223 224 225 226 227 228
    return virCgroupGetMemoryStat(cgroup,
                                  &meminfo->cached,
                                  &meminfo->inactive_anon,
                                  &meminfo->active_anon,
                                  &meminfo->inactive_file,
                                  &meminfo->active_file,
                                  &meminfo->unevictable);
229 230 231 232 233
}


int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo)
{
234
    int ret = -1;
235 236
    virCgroupPtr cgroup;

237 238
    if (virCgroupNewSelf(&cgroup) < 0)
        return -1;
239

240
    if (virLXCCgroupGetMemStat(cgroup, meminfo) < 0)
241 242
        goto cleanup;

243
    if (virLXCCgroupGetMemTotal(cgroup, meminfo) < 0)
244 245
        goto cleanup;

246
    if (virLXCCgroupGetMemUsage(cgroup, meminfo) < 0)
247 248
        goto cleanup;

249 250 251 252 253
    if (virLXCCgroupGetMemSwapTotal(cgroup, meminfo) < 0)
        goto cleanup;

    if (virLXCCgroupGetMemSwapUsage(cgroup, meminfo) < 0)
        goto cleanup;
254 255

    ret = 0;
256
 cleanup:
257
    virCgroupFree(&cgroup);
258 259 260 261 262
    return ret;
}



263 264 265 266 267 268 269 270 271 272
typedef struct _virLXCCgroupDevicePolicy virLXCCgroupDevicePolicy;
typedef virLXCCgroupDevicePolicy *virLXCCgroupDevicePolicyPtr;

struct _virLXCCgroupDevicePolicy {
    char type;
    int major;
    int minor;
};


273
int
J
Ján Tomko 已提交
274
virLXCSetupHostUSBDeviceCgroup(virUSBDevicePtr dev G_GNUC_UNUSED,
275 276 277 278 279 280
                               const char *path,
                               void *opaque)
{
    virCgroupPtr cgroup = opaque;

    VIR_DEBUG("Process path '%s' for USB device", path);
281
    if (virCgroupAllowDevicePath(cgroup, path,
282
                                 VIR_CGROUP_DEVICE_RWM, false) < 0)
283 284 285 286 287 288 289
        return -1;

    return 0;
}


int
J
Ján Tomko 已提交
290
virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev G_GNUC_UNUSED,
291 292 293 294 295 296
                                  const char *path,
                                  void *opaque)
{
    virCgroupPtr cgroup = opaque;

    VIR_DEBUG("Process path '%s' for USB device", path);
297
    if (virCgroupDenyDevicePath(cgroup, path,
298
                                VIR_CGROUP_DEVICE_RWM, false) < 0)
299 300 301 302 303
        return -1;

    return 0;
}

304 305 306 307

static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
                                      virCgroupPtr cgroup)
{
308
    int capMknod = def->caps_features[VIR_DOMAIN_PROCES_CAPS_FEATURE_MKNOD];
309 310 311 312 313 314 315 316 317 318
    int ret = -1;
    size_t i;
    static virLXCCgroupDevicePolicy devices[] = {
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM},
        {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY},
        {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX},
G
Gao feng 已提交
319
        {'c', LXC_DEV_MAJ_FUSE, LXC_DEV_MIN_FUSE},
320 321
        {0,   0, 0}};

322
    if (virCgroupDenyAllDevices(cgroup) < 0)
323 324
        goto cleanup;

325
    /* white list mknod if CAP_MKNOD has to be kept */
J
Ján Tomko 已提交
326
    if (capMknod == VIR_TRISTATE_SWITCH_ON) {
327 328 329 330 331
        if (virCgroupAllowAllDevices(cgroup,
                                    VIR_CGROUP_DEVICE_MKNOD) < 0)
            goto cleanup;
    }

332 333
    for (i = 0; devices[i].type != 0; i++) {
        virLXCCgroupDevicePolicyPtr dev = &devices[i];
334 335 336 337 338
        if (virCgroupAllowDevice(cgroup,
                                 dev->type,
                                 dev->major,
                                 dev->minor,
                                 VIR_CGROUP_DEVICE_RWM) < 0)
339 340 341
            goto cleanup;
    }

342
    VIR_DEBUG("Allowing any disk block devs");
343
    for (i = 0; i < def->ndisks; i++) {
344 345
        if (virStorageSourceIsEmpty(def->disks[i]->src) ||
            !virStorageSourceIsBlockLocal(def->disks[i]->src))
346 347
            continue;

348
        if (virCgroupAllowDevicePath(cgroup,
349
                                     virDomainDiskGetSource(def->disks[i]),
350
                                     (def->disks[i]->src->readonly ?
351 352
                                      VIR_CGROUP_DEVICE_READ :
                                      VIR_CGROUP_DEVICE_RW) |
353
                                     VIR_CGROUP_DEVICE_MKNOD, false) < 0)
354 355 356
            goto cleanup;
    }

357
    VIR_DEBUG("Allowing any filesystem block devs");
358
    for (i = 0; i < def->nfss; i++) {
359 360 361
        if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_BLOCK)
            continue;

362
        if (virCgroupAllowDevicePath(cgroup,
363
                                     def->fss[i]->src->path,
364 365
                                     def->fss[i]->readonly ?
                                     VIR_CGROUP_DEVICE_READ :
366
                                     VIR_CGROUP_DEVICE_RW, false) < 0)
367 368 369
            goto cleanup;
    }

370
    VIR_DEBUG("Allowing any hostdev block devs");
371 372
    for (i = 0; i < def->nhostdevs; i++) {
        virDomainHostdevDefPtr hostdev = def->hostdevs[i];
373
        virDomainHostdevSubsysUSBPtr usbsrc = &hostdev->source.subsys.u.usb;
374
        virUSBDevicePtr usb;
375

376 377 378 379 380 381 382
        switch (hostdev->mode) {
        case VIR_DOMAIN_HOSTDEV_MODE_SUBSYS:
            if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB)
                continue;
            if (hostdev->missing)
                continue;

383
            if ((usb = virUSBDeviceNew(usbsrc->bus, usbsrc->device,
384
                                       NULL)) == NULL)
385 386
                goto cleanup;

387
            if (virUSBDeviceFileIterate(usb, virLXCSetupHostUSBDeviceCgroup,
388 389
                                        cgroup) < 0) {
                virUSBDeviceFree(usb);
390
                goto cleanup;
391
            }
392
            virUSBDeviceFree(usb);
393 394 395 396 397 398 399
            break;
        case VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES:
            switch (hostdev->source.caps.type) {
            case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_STORAGE:
                if (virCgroupAllowDevicePath(cgroup,
                                             hostdev->source.caps.u.storage.block,
                                             VIR_CGROUP_DEVICE_RW |
400
                                             VIR_CGROUP_DEVICE_MKNOD, false) < 0)
401 402
                    goto cleanup;
                break;
403 404 405 406
            case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC:
                if (virCgroupAllowDevicePath(cgroup,
                                             hostdev->source.caps.u.misc.chardev,
                                             VIR_CGROUP_DEVICE_RW |
407
                                             VIR_CGROUP_DEVICE_MKNOD, false) < 0)
408 409
                    goto cleanup;
                break;
410 411 412 413 414 415
            default:
                break;
            }
        default:
            break;
        }
416 417
    }

418 419
    if (virCgroupAllowDevice(cgroup, 'c', LXC_DEV_MAJ_PTY, -1,
                             VIR_CGROUP_DEVICE_RWM) < 0)
420 421
        goto cleanup;

422 423
    VIR_DEBUG("Device whitelist complete");

424
    ret = 0;
425
 cleanup:
426 427 428 429
    return ret;
}


430
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
431 432 433
                                pid_t initpid,
                                size_t nnicindexes,
                                int *nicindexes)
434
{
435
    virCgroupPtr cgroup = NULL;
436
    char *machineName = virLXCDomainGetMachineName(def, 0);
437 438 439

    if (!machineName)
        goto cleanup;
440

441 442 443 444 445
    if (def->resource->partition[0] != '/') {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Resource partition '%s' must start with '/'"),
                       def->resource->partition);
        goto cleanup;
446
    }
447

448
    if (virCgroupNewMachine(machineName,
449 450 451
                            "lxc",
                            def->uuid,
                            NULL,
452
                            initpid,
453
                            true,
454
                            nnicindexes, nicindexes,
455 456
                            def->resource->partition,
                            -1,
457
                            0,
458
                            &cgroup) < 0)
459
        goto cleanup;
460

461 462 463 464 465 466
    /* setup control group permissions for user namespace */
    if (def->idmap.uidmap) {
        if (virCgroupSetOwner(cgroup,
                              def->idmap.uidmap[0].target,
                              def->idmap.gidmap[0].target,
                              (1 << VIR_CGROUP_CONTROLLER_SYSTEMD)) < 0) {
467
            virCgroupFree(&cgroup);
468 469 470 471 472
            cgroup = NULL;
            goto cleanup;
        }
    }

473
 cleanup:
474 475
    VIR_FREE(machineName);

476 477 478 479 480
    return cgroup;
}


int virLXCCgroupSetup(virDomainDefPtr def,
481 482
                      virCgroupPtr cgroup,
                      virBitmapPtr nodemask)
483 484 485
{
    int ret = -1;

486 487 488
    if (virLXCCgroupSetupCpuTune(def, cgroup) < 0)
        goto cleanup;

489 490 491
    if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask) < 0)
        goto cleanup;

492 493 494 495 496 497 498 499 500
    if (virLXCCgroupSetupBlkioTune(def, cgroup) < 0)
        goto cleanup;

    if (virLXCCgroupSetupMemTune(def, cgroup) < 0)
        goto cleanup;

    if (virLXCCgroupSetupDeviceACL(def, cgroup) < 0)
        goto cleanup;

501 502
    ret = 0;

503
 cleanup:
504
    return ret;
505
}