lxc_cgroup.c 12.3 KB
Newer Older
1
/*
2
 * Copyright (C) 2010-2014 Red Hat, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright IBM Corp. 2008
 *
 * lxc_cgroup.c: LXC cgroup helpers
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23 24 25
 */

#include <config.h>

#include "lxc_cgroup.h"
#include "lxc_container.h"
26
#include "domain_cgroup.h"
27
#include "virfile.h"
28
#include "virerror.h"
29
#include "virlog.h"
30
#include "viralloc.h"
31
#include "virstring.h"
32
#include "virsystemd.h"
J
Ján Tomko 已提交
33
#include "virutil.h"
34 35 36

#define VIR_FROM_THIS VIR_FROM_LXC

37 38
VIR_LOG_INIT("lxc.lxc_cgroup");

39 40 41
static int virLXCCgroupSetupCpuTune(virDomainDefPtr def,
                                    virCgroupPtr cgroup)
{
42 43
    if (def->cputune.sharesSpecified) {
        unsigned long long val;
44
        if (virCgroupSetupCpuShares(cgroup, def->cputune.shares, &val) < 0)
45
            return -1;
46 47
        def->cputune.shares = val;
    }
48

49 50
    return virCgroupSetupCpuPeriodQuota(cgroup, def->cputune.period,
                                        def->cputune.quota);
51 52 53
}


54 55 56 57
static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def,
                                       virCgroupPtr cgroup,
                                       virBitmapPtr nodemask)
{
58
    g_autofree char *mask = NULL;
59
    virDomainNumatuneMemMode mode;
60 61

    if (def->placement_mode != VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO &&
62 63 64
        def->cpumask &&
        virCgroupSetupCpusetCpus(cgroup, def->cpumask) < 0) {
        return -1;
65 66
    }

67 68
    if (virDomainNumatuneGetMode(def->numa, -1, &mode) < 0 ||
        mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) {
69
        return 0;
70
    }
71

72
    if (virDomainNumatuneMaybeFormatNodeset(def->numa, nodemask,
73
                                            &mask, -1) < 0)
74
        return -1;
75

76
    if (mask && virCgroupSetCpusetMems(cgroup, mask) < 0)
77
        return -1;
78

79
    return 0;
80 81 82
}


83 84 85
static int virLXCCgroupSetupBlkioTune(virDomainDefPtr def,
                                      virCgroupPtr cgroup)
{
86
    return virDomainCgroupSetupBlkio(cgroup, def->blkio);
87 88 89 90 91 92
}


static int virLXCCgroupSetupMemTune(virDomainDefPtr def,
                                    virCgroupPtr cgroup)
{
93
    if (virCgroupSetMemory(cgroup, virDomainDefGetMemoryInitial(def)) < 0)
94
        return -1;
95

96
    return virDomainCgroupSetupMemtune(cgroup, def->mem);
97 98 99
}


100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
static int virLXCCgroupGetMemSwapUsage(virCgroupPtr cgroup,
                                       virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemSwapUsage(cgroup, &meminfo->swapusage);
}


static int virLXCCgroupGetMemSwapTotal(virCgroupPtr cgroup,
                                       virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemSwapHardLimit(cgroup, &meminfo->swaptotal);
}


static int virLXCCgroupGetMemUsage(virCgroupPtr cgroup,
                                   virLXCMeminfoPtr meminfo)
{
    int ret;
    unsigned long memUsage;

    ret = virCgroupGetMemoryUsage(cgroup, &memUsage);
121
    meminfo->memusage = (unsigned long long)memUsage;
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136

    return ret;
}


static int virLXCCgroupGetMemTotal(virCgroupPtr cgroup,
                                   virLXCMeminfoPtr meminfo)
{
    return virCgroupGetMemoryHardLimit(cgroup, &meminfo->memtotal);
}


static int virLXCCgroupGetMemStat(virCgroupPtr cgroup,
                                  virLXCMeminfoPtr meminfo)
{
P
Pavel Hrdina 已提交
137 138 139 140 141 142 143
    return virCgroupGetMemoryStat(cgroup,
                                  &meminfo->cached,
                                  &meminfo->inactive_anon,
                                  &meminfo->active_anon,
                                  &meminfo->inactive_file,
                                  &meminfo->active_file,
                                  &meminfo->unevictable);
144 145 146 147 148
}


int virLXCCgroupGetMeminfo(virLXCMeminfoPtr meminfo)
{
149
    int ret = -1;
150 151
    virCgroupPtr cgroup;

152 153
    if (virCgroupNewSelf(&cgroup) < 0)
        return -1;
154

155
    if (virLXCCgroupGetMemStat(cgroup, meminfo) < 0)
156 157
        goto cleanup;

158
    if (virLXCCgroupGetMemTotal(cgroup, meminfo) < 0)
159 160
        goto cleanup;

161
    if (virLXCCgroupGetMemUsage(cgroup, meminfo) < 0)
162 163
        goto cleanup;

164 165 166 167 168
    if (virLXCCgroupGetMemSwapTotal(cgroup, meminfo) < 0)
        goto cleanup;

    if (virLXCCgroupGetMemSwapUsage(cgroup, meminfo) < 0)
        goto cleanup;
169 170

    ret = 0;
171
 cleanup:
172
    virCgroupFree(&cgroup);
173 174 175 176 177
    return ret;
}



178 179 180 181 182 183 184 185 186 187
typedef struct _virLXCCgroupDevicePolicy virLXCCgroupDevicePolicy;
typedef virLXCCgroupDevicePolicy *virLXCCgroupDevicePolicyPtr;

struct _virLXCCgroupDevicePolicy {
    char type;
    int major;
    int minor;
};


188
int
J
Ján Tomko 已提交
189
virLXCSetupHostUSBDeviceCgroup(virUSBDevicePtr dev G_GNUC_UNUSED,
190 191 192 193 194 195
                               const char *path,
                               void *opaque)
{
    virCgroupPtr cgroup = opaque;

    VIR_DEBUG("Process path '%s' for USB device", path);
196
    if (virCgroupAllowDevicePath(cgroup, path,
197
                                 VIR_CGROUP_DEVICE_RWM, false) < 0)
198 199 200 201 202 203 204
        return -1;

    return 0;
}


int
J
Ján Tomko 已提交
205
virLXCTeardownHostUSBDeviceCgroup(virUSBDevicePtr dev G_GNUC_UNUSED,
206 207 208 209 210 211
                                  const char *path,
                                  void *opaque)
{
    virCgroupPtr cgroup = opaque;

    VIR_DEBUG("Process path '%s' for USB device", path);
212
    if (virCgroupDenyDevicePath(cgroup, path,
213
                                VIR_CGROUP_DEVICE_RWM, false) < 0)
214 215 216 217 218
        return -1;

    return 0;
}

219 220 221 222

static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def,
                                      virCgroupPtr cgroup)
{
223
    int capMknod = def->caps_features[VIR_DOMAIN_PROCES_CAPS_FEATURE_MKNOD];
224 225 226 227 228 229 230 231 232
    size_t i;
    static virLXCCgroupDevicePolicy devices[] = {
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM},
        {'c', LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM},
        {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY},
        {'c', LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX},
G
Gao feng 已提交
233
        {'c', LXC_DEV_MAJ_FUSE, LXC_DEV_MIN_FUSE},
234 235
        {0,   0, 0}};

236
    if (virCgroupDenyAllDevices(cgroup) < 0)
237
        return -1;
238

239
    /* white list mknod if CAP_MKNOD has to be kept */
J
Ján Tomko 已提交
240
    if (capMknod == VIR_TRISTATE_SWITCH_ON) {
241 242
        if (virCgroupAllowAllDevices(cgroup,
                                    VIR_CGROUP_DEVICE_MKNOD) < 0)
243
            return -1;
244 245
    }

246 247
    for (i = 0; devices[i].type != 0; i++) {
        virLXCCgroupDevicePolicyPtr dev = &devices[i];
248 249 250 251 252
        if (virCgroupAllowDevice(cgroup,
                                 dev->type,
                                 dev->major,
                                 dev->minor,
                                 VIR_CGROUP_DEVICE_RWM) < 0)
253
            return -1;
254 255
    }

256
    VIR_DEBUG("Allowing any disk block devs");
257
    for (i = 0; i < def->ndisks; i++) {
258 259
        if (virStorageSourceIsEmpty(def->disks[i]->src) ||
            !virStorageSourceIsBlockLocal(def->disks[i]->src))
260 261
            continue;

262
        if (virCgroupAllowDevicePath(cgroup,
263
                                     virDomainDiskGetSource(def->disks[i]),
264
                                     (def->disks[i]->src->readonly ?
265 266
                                      VIR_CGROUP_DEVICE_READ :
                                      VIR_CGROUP_DEVICE_RW) |
267
                                     VIR_CGROUP_DEVICE_MKNOD, false) < 0)
268
            return -1;
269 270
    }

271
    VIR_DEBUG("Allowing any filesystem block devs");
272
    for (i = 0; i < def->nfss; i++) {
273 274 275
        if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_BLOCK)
            continue;

276
        if (virCgroupAllowDevicePath(cgroup,
277
                                     def->fss[i]->src->path,
278 279
                                     def->fss[i]->readonly ?
                                     VIR_CGROUP_DEVICE_READ :
280
                                     VIR_CGROUP_DEVICE_RW, false) < 0)
281
            return -1;
282 283
    }

284
    VIR_DEBUG("Allowing any hostdev block devs");
285 286
    for (i = 0; i < def->nhostdevs; i++) {
        virDomainHostdevDefPtr hostdev = def->hostdevs[i];
287
        virDomainHostdevSubsysUSBPtr usbsrc = &hostdev->source.subsys.u.usb;
288
        virUSBDevicePtr usb;
289

290 291 292 293 294 295 296
        switch (hostdev->mode) {
        case VIR_DOMAIN_HOSTDEV_MODE_SUBSYS:
            if (hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB)
                continue;
            if (hostdev->missing)
                continue;

297
            if ((usb = virUSBDeviceNew(usbsrc->bus, usbsrc->device,
298
                                       NULL)) == NULL)
299
                return -1;
300

301
            if (virUSBDeviceFileIterate(usb, virLXCSetupHostUSBDeviceCgroup,
302 303
                                        cgroup) < 0) {
                virUSBDeviceFree(usb);
304
                return -1;
305
            }
306
            virUSBDeviceFree(usb);
307 308 309 310 311 312 313
            break;
        case VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES:
            switch (hostdev->source.caps.type) {
            case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_STORAGE:
                if (virCgroupAllowDevicePath(cgroup,
                                             hostdev->source.caps.u.storage.block,
                                             VIR_CGROUP_DEVICE_RW |
314
                                             VIR_CGROUP_DEVICE_MKNOD, false) < 0)
315
                    return -1;
316
                break;
317 318 319 320
            case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC:
                if (virCgroupAllowDevicePath(cgroup,
                                             hostdev->source.caps.u.misc.chardev,
                                             VIR_CGROUP_DEVICE_RW |
321
                                             VIR_CGROUP_DEVICE_MKNOD, false) < 0)
322
                    return -1;
323
                break;
324 325 326 327 328 329
            default:
                break;
            }
        default:
            break;
        }
330 331
    }

332 333
    if (virCgroupAllowDevice(cgroup, 'c', LXC_DEV_MAJ_PTY, -1,
                             VIR_CGROUP_DEVICE_RWM) < 0)
334
        return -1;
335

336 337
    VIR_DEBUG("Device whitelist complete");

338
    return 0;
339 340 341
}


342
virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
343 344 345
                                pid_t initpid,
                                size_t nnicindexes,
                                int *nicindexes)
346
{
347
    virCgroupPtr cgroup = NULL;
348
    char *machineName = virLXCDomainGetMachineName(def, 0);
349 350 351

    if (!machineName)
        goto cleanup;
352

353 354 355 356 357
    if (def->resource->partition[0] != '/') {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Resource partition '%s' must start with '/'"),
                       def->resource->partition);
        goto cleanup;
358
    }
359

360
    if (virCgroupNewMachine(machineName,
361 362 363
                            "lxc",
                            def->uuid,
                            NULL,
364
                            initpid,
365
                            true,
366
                            nnicindexes, nicindexes,
367 368
                            def->resource->partition,
                            -1,
369
                            0,
370
                            &cgroup) < 0)
371
        goto cleanup;
372

373 374 375 376 377 378
    /* setup control group permissions for user namespace */
    if (def->idmap.uidmap) {
        if (virCgroupSetOwner(cgroup,
                              def->idmap.uidmap[0].target,
                              def->idmap.gidmap[0].target,
                              (1 << VIR_CGROUP_CONTROLLER_SYSTEMD)) < 0) {
379
            virCgroupFree(&cgroup);
380 381 382 383 384
            cgroup = NULL;
            goto cleanup;
        }
    }

385
 cleanup:
386 387
    VIR_FREE(machineName);

388 389 390 391 392
    return cgroup;
}


int virLXCCgroupSetup(virDomainDefPtr def,
393 394
                      virCgroupPtr cgroup,
                      virBitmapPtr nodemask)
395
{
396
    if (virLXCCgroupSetupCpuTune(def, cgroup) < 0)
397
        return -1;
398

399
    if (virLXCCgroupSetupCpusetTune(def, cgroup, nodemask) < 0)
400
        return -1;
401

402
    if (virLXCCgroupSetupBlkioTune(def, cgroup) < 0)
403
        return -1;
404 405

    if (virLXCCgroupSetupMemTune(def, cgroup) < 0)
406
        return -1;
407 408

    if (virLXCCgroupSetupDeviceACL(def, cgroup) < 0)
409
        return -1;
410

411
    return 0;
412
}