qemu_cgroup.c 39.4 KB
Newer Older
1 2 3
/*
 * qemu_cgroup.c: QEMU cgroup management
 *
4
 * Copyright (C) 2006-2015 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright (C) 2006 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23 24 25 26
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 */

#include <config.h>

#include "qemu_cgroup.h"
27
#include "qemu_domain.h"
28
#include "qemu_process.h"
29
#include "vircgroup.h"
30
#include "virlog.h"
31
#include "viralloc.h"
32
#include "virerror.h"
33
#include "domain_audit.h"
34
#include "virscsi.h"
35
#include "virstring.h"
36
#include "virfile.h"
37
#include "virtypedparam.h"
38
#include "virnuma.h"
39 40 41

#define VIR_FROM_THIS VIR_FROM_QEMU

42 43
VIR_LOG_INIT("qemu.qemu_cgroup");

44 45 46 47
static const char *const defaultDeviceACL[] = {
    "/dev/null", "/dev/full", "/dev/zero",
    "/dev/random", "/dev/urandom",
    "/dev/ptmx", "/dev/kvm", "/dev/kqemu",
48
    "/dev/rtc", "/dev/hpet", "/dev/vfio/vfio",
49 50 51 52 53
    NULL,
};
#define DEVICE_PTY_MAJOR 136
#define DEVICE_SND_MAJOR 116

54 55 56 57 58
static int
qemuSetImageCgroupInternal(virDomainObjPtr vm,
                           virStorageSourcePtr src,
                           bool deny,
                           bool forceReadonly)
59
{
60
    qemuDomainObjPrivatePtr priv = vm->privateData;
61
    int perms = VIR_CGROUP_DEVICE_READ;
62
    int ret;
63

64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
    if (!virCgroupHasController(priv->cgroup,
                                VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (!src->path || !virStorageSourceIsLocalStorage(src)) {
        VIR_DEBUG("Not updating cgroups for disk path '%s', type: %s",
                  NULLSTR(src->path), virStorageTypeToString(src->type));
        return 0;
    }

    if (deny) {
        perms |= VIR_CGROUP_DEVICE_WRITE | VIR_CGROUP_DEVICE_MKNOD;

        VIR_DEBUG("Deny path %s", src->path);

        ret = virCgroupDenyDevicePath(priv->cgroup, src->path, perms);
    } else {
81
        if (!src->readonly && !forceReadonly)
82 83 84 85 86 87 88 89 90 91 92 93 94
            perms |= VIR_CGROUP_DEVICE_WRITE;

        VIR_DEBUG("Allow path %s, perms: %s",
                  src->path, virCgroupGetDevicePermsString(perms));

        ret = virCgroupAllowDevicePath(priv->cgroup, src->path, perms);
    }

    virDomainAuditCgroupPath(vm, priv->cgroup,
                             deny ? "deny" : "allow",
                             src->path,
                             virCgroupGetDevicePermsString(perms),
                             ret == 0);
95 96 97 98

    /* Get this for root squash NFS */
    if (ret < 0 &&
        virLastErrorIsSystemErrno(EACCES)) {
99
        VIR_DEBUG("Ignoring EACCES for %s", src->path);
100 101
        virResetLastError();
        ret = 0;
102
    }
103

104
    return ret;
105 106 107
}


108 109 110 111 112 113 114 115 116
int
qemuSetImageCgroup(virDomainObjPtr vm,
                   virStorageSourcePtr src,
                   bool deny)
{
    return qemuSetImageCgroupInternal(vm, src, deny, false);
}


117 118 119
int
qemuSetupDiskCgroup(virDomainObjPtr vm,
                    virDomainDiskDefPtr disk)
120
{
121
    virStorageSourcePtr next;
122
    bool forceReadonly = false;
123

124
    for (next = disk->src; next; next = next->backingStore) {
125
        if (qemuSetImageCgroupInternal(vm, next, false, forceReadonly) < 0)
126
            return -1;
127 128 129

        /* setup only the top level image for read-write */
        forceReadonly = true;
130
    }
131 132

    return 0;
133 134 135
}


136 137 138
int
qemuTeardownDiskCgroup(virDomainObjPtr vm,
                       virDomainDiskDefPtr disk)
139
{
140
    virStorageSourcePtr next;
141

142 143 144 145
    for (next = disk->src; next; next = next->backingStore) {
        if (qemuSetImageCgroup(vm, next, true) < 0)
            return -1;
    }
146

147
    return 0;
148 149
}

150

151
static int
152
qemuSetupChrSourceCgroup(virDomainDefPtr def ATTRIBUTE_UNUSED,
153
                         virDomainChrSourceDefPtr dev,
154
                         void *opaque)
155
{
156 157
    virDomainObjPtr vm = opaque;
    qemuDomainObjPrivatePtr priv = vm->privateData;
158
    int ret;
159

160
    if (dev->type != VIR_DOMAIN_CHR_TYPE_DEV)
161 162
        return 0;

163
    VIR_DEBUG("Process path '%s' for device", dev->data.file.path);
164

165 166
    ret = virCgroupAllowDevicePath(priv->cgroup, dev->data.file.path,
                                   VIR_CGROUP_DEVICE_RW);
167
    virDomainAuditCgroupPath(vm, priv->cgroup, "allow",
168
                             dev->data.file.path, "rw", ret == 0);
169

170
    return ret;
171 172
}

173 174 175 176 177
static int
qemuSetupChardevCgroup(virDomainDefPtr def,
                       virDomainChrDefPtr dev,
                       void *opaque)
{
178
    return qemuSetupChrSourceCgroup(def, &dev->source, opaque);
179 180 181 182 183 184
}


static int
qemuSetupTPMCgroup(virDomainDefPtr def,
                   virDomainTPMDefPtr dev,
185
                   void *opaque)
186
{
187
    int ret = 0;
188 189 190

    switch (dev->type) {
    case VIR_DOMAIN_TPM_TYPE_PASSTHROUGH:
191 192
        ret = qemuSetupChrSourceCgroup(def, &dev->data.passthrough.source,
                                       opaque);
193 194 195 196 197
        break;
    case VIR_DOMAIN_TPM_TYPE_LAST:
        break;
    }

198
    return ret;
199 200
}

201

202
static int
203
qemuSetupHostUSBDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED,
204 205
                             const char *path,
                             void *opaque)
206
{
207 208
    virDomainObjPtr vm = opaque;
    qemuDomainObjPrivatePtr priv = vm->privateData;
209
    int ret;
210 211

    VIR_DEBUG("Process path '%s' for USB device", path);
212 213 214
    ret = virCgroupAllowDevicePath(priv->cgroup, path,
                                   VIR_CGROUP_DEVICE_RW);
    virDomainAuditCgroupPath(vm, priv->cgroup, "allow", path, "rw", ret == 0);
215

216
    return ret;
217 218
}

219
static int
220
qemuSetupHostSCSIDeviceCgroup(virSCSIDevicePtr dev ATTRIBUTE_UNUSED,
221 222 223 224 225
                              const char *path,
                              void *opaque)
{
    virDomainObjPtr vm = opaque;
    qemuDomainObjPrivatePtr priv = vm->privateData;
226
    int ret;
227 228 229

    VIR_DEBUG("Process path '%s' for SCSI device", path);

230 231 232 233
    ret = virCgroupAllowDevicePath(priv->cgroup, path,
                                   virSCSIDeviceGetReadonly(dev) ?
                                   VIR_CGROUP_DEVICE_READ :
                                   VIR_CGROUP_DEVICE_RW);
234 235

    virDomainAuditCgroupPath(vm, priv->cgroup, "allow", path,
236
                             virSCSIDeviceGetReadonly(dev) ? "r" : "rw", ret == 0);
237

238
    return ret;
239
}
240

241 242 243 244 245 246
int
qemuSetupHostdevCGroup(virDomainObjPtr vm,
                       virDomainHostdevDefPtr dev)
{
    int ret = -1;
    qemuDomainObjPrivatePtr priv = vm->privateData;
247
    virDomainHostdevSubsysUSBPtr usbsrc = &dev->source.subsys.u.usb;
248
    virDomainHostdevSubsysPCIPtr pcisrc = &dev->source.subsys.u.pci;
249
    virDomainHostdevSubsysSCSIPtr scsisrc = &dev->source.subsys.u.scsi;
250
    virPCIDevicePtr pci = NULL;
251
    virUSBDevicePtr usb = NULL;
252
    virSCSIDevicePtr scsi = NULL;
253 254 255 256 257 258 259 260 261 262 263 264 265 266
    char *path = NULL;

    /* currently this only does something for PCI devices using vfio
     * for device assignment, but it is called for *all* hostdev
     * devices.
     */

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {

        switch (dev->source.subsys.type) {
        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
267
            if (pcisrc->backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
268
                int rv;
269

270 271 272 273
                pci = virPCIDeviceNew(pcisrc->addr.domain,
                                      pcisrc->addr.bus,
                                      pcisrc->addr.slot,
                                      pcisrc->addr.function);
274 275 276
                if (!pci)
                    goto cleanup;

277
                if (!(path = virPCIDeviceGetIOMMUGroupDev(pci)))
278 279 280
                    goto cleanup;

                VIR_DEBUG("Cgroup allow %s for PCI device assignment", path);
281
                rv = virCgroupAllowDevicePath(priv->cgroup, path,
282 283
                                              VIR_CGROUP_DEVICE_RW);
                virDomainAuditCgroupPath(vm, priv->cgroup,
284 285
                                         "allow", path, "rw", rv == 0);
                if (rv < 0)
286 287 288
                    goto cleanup;
            }
            break;
289 290 291 292 293 294 295 296

        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB:
            /* NB: hostdev->missing wasn't previously checked in the
             * case of hotplug, only when starting a domain. Now it is
             * always checked, and the cgroup setup skipped if true.
             */
            if (dev->missing)
                break;
297
            if ((usb = virUSBDeviceNew(usbsrc->bus, usbsrc->device,
298 299 300 301
                                       NULL)) == NULL) {
                goto cleanup;
            }

302
            /* oddly, qemuSetupHostUSBDeviceCgroup doesn't ever
303 304
             * reference the usb object we just created
             */
305
            if (virUSBDeviceFileIterate(usb, qemuSetupHostUSBDeviceCgroup,
306 307 308 309
                                        vm) < 0) {
                goto cleanup;
            }
            break;
310

311
        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_SCSI: {
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
            if (scsisrc->protocol ==
                VIR_DOMAIN_HOSTDEV_SCSI_PROTOCOL_TYPE_ISCSI) {
                virDomainHostdevSubsysSCSIiSCSIPtr iscsisrc = &scsisrc->u.iscsi;
                /* Follow qemuSetupDiskCgroup() and qemuSetImageCgroupInternal()
                 * which does nothing for non local storage
                 */
                VIR_DEBUG("Not updating cgroups for hostdev iSCSI path '%s'",
                          iscsisrc->path);
            } else {
                virDomainHostdevSubsysSCSIHostPtr scsihostsrc =
                    &scsisrc->u.host;
                if ((scsi = virSCSIDeviceNew(NULL,
                                             scsihostsrc->adapter,
                                             scsihostsrc->bus,
                                             scsihostsrc->target,
                                             scsihostsrc->unit,
                                             dev->readonly,
                                             dev->shareable)) == NULL)
                    goto cleanup;
331

332 333 334 335 336
                if (virSCSIDeviceFileIterate(scsi,
                                             qemuSetupHostSCSIDeviceCgroup,
                                             vm) < 0)
                    goto cleanup;
            }
337 338
            break;
        }
339

340 341 342 343 344 345
        default:
            break;
        }
    }

    ret = 0;
346
 cleanup:
347
    virPCIDeviceFree(pci);
348
    virUSBDeviceFree(usb);
349
    virSCSIDeviceFree(scsi);
350 351 352 353 354 355 356 357 358 359
    VIR_FREE(path);
    return ret;
}

int
qemuTeardownHostdevCgroup(virDomainObjPtr vm,
                       virDomainHostdevDefPtr dev)
{
    int ret = -1;
    qemuDomainObjPrivatePtr priv = vm->privateData;
360
    virDomainHostdevSubsysPCIPtr pcisrc = &dev->source.subsys.u.pci;
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
    virPCIDevicePtr pci = NULL;
    char *path = NULL;

    /* currently this only does something for PCI devices using vfio
     * for device assignment, but it is called for *all* hostdev
     * devices.
     */

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

    if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) {

        switch (dev->source.subsys.type) {
        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI:
376
            if (pcisrc->backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
377
                int rv;
378

379 380 381 382
                pci = virPCIDeviceNew(pcisrc->addr.domain,
                                      pcisrc->addr.bus,
                                      pcisrc->addr.slot,
                                      pcisrc->addr.function);
383 384 385
                if (!pci)
                    goto cleanup;

386
                if (!(path = virPCIDeviceGetIOMMUGroupDev(pci)))
387 388 389
                    goto cleanup;

                VIR_DEBUG("Cgroup deny %s for PCI device assignment", path);
390
                rv = virCgroupDenyDevicePath(priv->cgroup, path,
391 392
                                             VIR_CGROUP_DEVICE_RWM);
                virDomainAuditCgroupPath(vm, priv->cgroup,
393 394
                                         "deny", path, "rwm", rv == 0);
                if (rv < 0)
395 396 397
                    goto cleanup;
            }
            break;
398 399 400
        case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB:
            /* nothing to tear down for USB */
            break;
401 402 403 404 405 406
        default:
            break;
        }
    }

    ret = 0;
407
 cleanup:
408 409 410 411 412
    virPCIDeviceFree(pci);
    VIR_FREE(path);
    return ret;
}

413 414 415 416
static int
qemuSetupBlkioCgroup(virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
417
    size_t i;
418 419 420 421 422 423 424 425 426 427 428 429

    if (!virCgroupHasController(priv->cgroup,
                                VIR_CGROUP_CONTROLLER_BLKIO)) {
        if (vm->def->blkio.weight || vm->def->blkio.ndevices) {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                           _("Block I/O tuning is not available on this host"));
            return -1;
        } else {
            return 0;
        }
    }

430 431 432
    if (vm->def->blkio.weight != 0 &&
        virCgroupSetBlkioWeight(priv->cgroup, vm->def->blkio.weight) < 0)
        return -1;
433 434 435

    if (vm->def->blkio.ndevices) {
        for (i = 0; i < vm->def->blkio.ndevices; i++) {
436
            virBlkioDevicePtr dev = &vm->def->blkio.devices[i];
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
            if (dev->weight &&
                (virCgroupSetBlkioDeviceWeight(priv->cgroup, dev->path,
                                               dev->weight) < 0))
                return -1;

            if (dev->riops &&
                (virCgroupSetBlkioDeviceReadIops(priv->cgroup, dev->path,
                                                 dev->riops) < 0))
                return -1;

            if (dev->wiops &&
                (virCgroupSetBlkioDeviceWriteIops(priv->cgroup, dev->path,
                                                  dev->wiops) < 0))
                return -1;

            if (dev->rbps &&
                (virCgroupSetBlkioDeviceReadBps(priv->cgroup, dev->path,
                                                dev->rbps) < 0))
                return -1;

            if (dev->wbps &&
                (virCgroupSetBlkioDeviceWriteBps(priv->cgroup, dev->path,
                                                 dev->wbps) < 0))
460 461 462 463 464 465 466
                return -1;
        }
    }

    return 0;
}

467

468 469 470 471 472
static int
qemuSetupMemoryCgroup(virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;

E
Eric Blake 已提交
473
    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_MEMORY)) {
474 475 476
        if (virMemoryLimitIsSet(vm->def->mem.hard_limit) ||
            virMemoryLimitIsSet(vm->def->mem.soft_limit) ||
            virMemoryLimitIsSet(vm->def->mem.swap_hard_limit)) {
477 478 479
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                           _("Memory cgroup is not available on this host"));
            return -1;
O
Osier Yang 已提交
480 481
        } else {
            return 0;
482 483 484
        }
    }

485 486 487
    if (virMemoryLimitIsSet(vm->def->mem.hard_limit))
        if (virCgroupSetMemoryHardLimit(priv->cgroup, vm->def->mem.hard_limit) < 0)
            return -1;
488

489 490 491
    if (virMemoryLimitIsSet(vm->def->mem.soft_limit))
        if (virCgroupSetMemorySoftLimit(priv->cgroup, vm->def->mem.soft_limit) < 0)
            return -1;
492

493 494 495
    if (virMemoryLimitIsSet(vm->def->mem.swap_hard_limit))
        if (virCgroupSetMemSwapHardLimit(priv->cgroup, vm->def->mem.swap_hard_limit) < 0)
            return -1;
496 497 498 499 500

    return 0;
}


501 502 503 504 505 506 507
static int
qemuSetupDevicesCgroup(virQEMUDriverPtr driver,
                       virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virQEMUDriverConfigPtr cfg = NULL;
    const char *const *deviceACL = NULL;
508
    int rv = -1;
509
    int ret = -1;
510
    size_t i;
511 512 513 514

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES))
        return 0;

515 516 517 518 519
    rv = virCgroupDenyAllDevices(priv->cgroup);
    virDomainAuditCgroup(vm, priv->cgroup, "deny", "all", rv == 0);
    if (rv < 0) {
        if (virLastErrorIsSystemErrno(EPERM)) {
            virResetLastError();
520 521 522 523 524 525 526
            VIR_WARN("Group devices ACL is not accessible, disabling whitelisting");
            return 0;
        }

        goto cleanup;
    }

527
    for (i = 0; i < vm->def->ndisks; i++) {
528 529 530 531
        if (qemuSetupDiskCgroup(vm, vm->def->disks[i]) < 0)
            goto cleanup;
    }

532
    rv = virCgroupAllowDeviceMajor(priv->cgroup, 'c', DEVICE_PTY_MAJOR,
533 534
                                   VIR_CGROUP_DEVICE_RW);
    virDomainAuditCgroupMajor(vm, priv->cgroup, "allow", DEVICE_PTY_MAJOR,
535 536
                              "pty", "rw", rv == 0);
    if (rv < 0)
537 538 539 540 541 542 543 544
        goto cleanup;

    cfg = virQEMUDriverGetConfig(driver);
    deviceACL = cfg->cgroupDeviceACL ?
                (const char *const *)cfg->cgroupDeviceACL :
                defaultDeviceACL;

    if (vm->def->nsounds &&
545
        ((!vm->def->ngraphics && cfg->nogfxAllowHostAudio) ||
546 547
         (vm->def->graphics &&
          ((vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC &&
548
           cfg->vncAllowHostAudio) ||
549
           (vm->def->graphics[0]->type == VIR_DOMAIN_GRAPHICS_TYPE_SDL))))) {
550
        rv = virCgroupAllowDeviceMajor(priv->cgroup, 'c', DEVICE_SND_MAJOR,
551 552
                                       VIR_CGROUP_DEVICE_RW);
        virDomainAuditCgroupMajor(vm, priv->cgroup, "allow", DEVICE_SND_MAJOR,
553 554
                                  "sound", "rw", rv == 0);
        if (rv < 0)
555 556 557
            goto cleanup;
    }

558
    for (i = 0; deviceACL[i] != NULL; i++) {
559
        if (!virFileExists(deviceACL[i])) {
N
Nehal J Wani 已提交
560
            VIR_DEBUG("Ignoring non-existent device %s", deviceACL[i]);
561 562 563
            continue;
        }

564
        rv = virCgroupAllowDevicePath(priv->cgroup, deviceACL[i],
565
                                      VIR_CGROUP_DEVICE_RW);
566 567 568
        virDomainAuditCgroupPath(vm, priv->cgroup, "allow", deviceACL[i], "rw", rv == 0);
        if (rv < 0 &&
            !virLastErrorIsSystemErrno(ENOENT))
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
            goto cleanup;
    }

    if (virDomainChrDefForeach(vm->def,
                               true,
                               qemuSetupChardevCgroup,
                               vm) < 0)
        goto cleanup;

    if (vm->def->tpm &&
        (qemuSetupTPMCgroup(vm->def,
                            vm->def->tpm,
                            vm) < 0))
        goto cleanup;

    for (i = 0; i < vm->def->nhostdevs; i++) {
        if (qemuSetupHostdevCGroup(vm, vm->def->hostdevs[i]) < 0)
            goto cleanup;
    }

589 590 591
    for (i = 0; i < vm->def->nrngs; i++) {
        if (vm->def->rngs[i]->backend == VIR_DOMAIN_RNG_BACKEND_RANDOM) {
            VIR_DEBUG("Setting Cgroup ACL for RNG device");
592 593
            rv = virCgroupAllowDevicePath(priv->cgroup,
                                          vm->def->rngs[i]->source.file,
594 595
                                          VIR_CGROUP_DEVICE_RW);
            virDomainAuditCgroupPath(vm, priv->cgroup, "allow",
596 597
                                     vm->def->rngs[i]->source.file,
                                     "rw", rv == 0);
598 599 600 601
            if (rv < 0 &&
                !virLastErrorIsSystemErrno(ENOENT))
                goto cleanup;
        }
602 603
    }

604
    ret = 0;
605
 cleanup:
606 607 608 609 610
    virObjectUnref(cfg);
    return ret;
}


611
int
612
qemuSetupCpusetMems(virDomainObjPtr vm)
613
{
614
    virCgroupPtr cgroup_temp = NULL;
615
    qemuDomainObjPrivatePtr priv = vm->privateData;
616
    char *mem_mask = NULL;
617 618 619 620 621
    int ret = -1;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

622
    if (virDomainNumatuneGetMode(vm->def->numa, -1) !=
623 624 625
        VIR_DOMAIN_NUMATUNE_MEM_STRICT)
        return 0;

626
    if (virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
627
                                            priv->autoNodeset,
628
                                            &mem_mask, -1) < 0)
629
        goto cleanup;
630

631 632
    if (mem_mask)
        if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
633
            virCgroupSetCpusetMems(cgroup_temp, mem_mask) < 0)
634
            goto cleanup;
635

636 637 638
    ret = 0;
 cleanup:
    VIR_FREE(mem_mask);
639
    virCgroupFree(&cgroup_temp);
640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
    return ret;
}


static int
qemuSetupCpusetCgroup(virDomainObjPtr vm,
                      virCapsPtr caps)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    char *cpu_mask = NULL;
    int ret = -1;

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

655 656 657
    if (virCgroupSetCpusetMemoryMigrate(priv->cgroup, true) < 0)
        return -1;

658 659 660 661 662
    if (vm->def->cpumask ||
        (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)) {

        if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
            virBitmapPtr cpumap;
663
            if (!(cpumap = virCapabilitiesGetCpusForNodemask(caps, priv->autoNodeset)))
664 665 666 667 668 669 670
                goto cleanup;
            cpu_mask = virBitmapFormat(cpumap);
            virBitmapFree(cpumap);
        } else {
            cpu_mask = virBitmapFormat(vm->def->cpumask);
        }

671
        if (!cpu_mask)
672 673
            goto cleanup;

674
        if (virCgroupSetCpusetCpus(priv->cgroup, cpu_mask) < 0)
675 676 677
            goto cleanup;
    }

678
    ret = 0;
679
 cleanup:
680
    VIR_FREE(cpu_mask);
681 682 683 684
    return ret;
}


685
static int
686 687
qemuSetupCpuCgroup(virQEMUDriverPtr driver,
                   virDomainObjPtr vm)
688 689
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
690 691 692 693
    virObjectEventPtr event = NULL;
    virTypedParameterPtr eventParams = NULL;
    int eventNparams = 0;
    int eventMaxparams = 0;
694 695

    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
696
       if (vm->def->cputune.sharesSpecified) {
697 698 699 700 701 702 703 704
           virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                          _("CPU tuning is not available on this host"));
           return -1;
       } else {
           return 0;
       }
    }

705 706 707 708 709 710 711
    if (vm->def->cputune.sharesSpecified) {
        unsigned long long val;
        if (virCgroupSetCpuShares(priv->cgroup, vm->def->cputune.shares) < 0)
            return -1;

        if (virCgroupGetCpuShares(priv->cgroup, &val) < 0)
            return -1;
712 713 714 715
        if (vm->def->cputune.shares != val) {
            vm->def->cputune.shares = val;
            if (virTypedParamsAddULLong(&eventParams, &eventNparams,
                                        &eventMaxparams,
716
                                        VIR_DOMAIN_TUNABLE_CPU_CPU_SHARES,
717 718 719 720 721 722 723
                                        val) < 0)
                return -1;

            event = virDomainEventTunableNewFromObj(vm, eventParams, eventNparams);
        }

        if (event)
724
            qemuDomainEventQueue(driver, event);
725
    }
726 727 728 729 730

    return 0;
}


731
static int
732
qemuInitCgroup(virQEMUDriverPtr driver,
733 734 735
               virDomainObjPtr vm,
               size_t nnicindexes,
               int *nicindexes)
736
{
737
    int ret = -1;
738 739 740
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);

741 742 743
    if (!cfg->privileged)
        goto done;

744 745 746
    if (!virCgroupAvailable())
        goto done;

747 748
    virCgroupFree(&priv->cgroup);

749
    if (!vm->def->resource) {
750 751
        virDomainResourceDefPtr res;

752
        if (VIR_ALLOC(res) < 0)
753
            goto cleanup;
754

755
        if (VIR_STRDUP(res->partition, "/machine") < 0) {
756 757 758 759 760
            VIR_FREE(res);
            goto cleanup;
        }

        vm->def->resource = res;
761 762
    }

763 764 765 766 767 768
    if (vm->def->resource->partition[0] != '/') {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Resource partition '%s' must start with '/'"),
                       vm->def->resource->partition);
        goto cleanup;
    }
769 770 771 772 773 774 775 776

    if (virCgroupNewMachine(vm->def->name,
                            "qemu",
                            cfg->privileged,
                            vm->def->uuid,
                            NULL,
                            vm->pid,
                            false,
777
                            nnicindexes, nicindexes,
778 779 780
                            vm->def->resource->partition,
                            cfg->cgroupControllers,
                            &priv->cgroup) < 0) {
781 782
        if (virCgroupNewIgnoreError())
            goto done;
783

784 785
        goto cleanup;
    }
786

787
 done:
788
    ret = 0;
789
 cleanup:
790 791 792
    virObjectUnref(cfg);
    return ret;
}
793

794 795 796
static void
qemuRestoreCgroupState(virDomainObjPtr vm)
{
797
    char *mem_mask = NULL;
798
    char *nodeset = NULL;
799 800
    int empty = -1;
    qemuDomainObjPrivatePtr priv = vm->privateData;
801
    size_t i = 0;
802
    virBitmapPtr all_nodes;
803
    virCgroupPtr cgroup_temp = NULL;
804 805 806 807 808 809 810 811

    if (!(all_nodes = virNumaGetHostNodeset()))
        goto error;

    if (!(mem_mask = virBitmapFormat(all_nodes)))
        goto error;

    if ((empty = virCgroupHasEmptyTasks(priv->cgroup,
812
                                        VIR_CGROUP_CONTROLLER_CPUSET)) <= 0)
813 814 815 816 817
        goto error;

    if (virCgroupSetCpusetMems(priv->cgroup, mem_mask) < 0)
        goto error;

818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843
    for (i = 0; i < priv->nvcpupids; i++) {
        if (virCgroupNewVcpu(priv->cgroup, i, false, &cgroup_temp) < 0 ||
            virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
            virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
            virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
            goto cleanup;

        virCgroupFree(&cgroup_temp);
    }

    for (i = 0; i < priv->niothreadpids; i++) {
        if (virCgroupNewIOThread(priv->cgroup, i + 1, false, &cgroup_temp) < 0 ||
            virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
            virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
            virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
            goto cleanup;

        virCgroupFree(&cgroup_temp);
    }

    if (virCgroupNewEmulator(priv->cgroup, false, &cgroup_temp) < 0 ||
        virCgroupSetCpusetMemoryMigrate(cgroup_temp, true) < 0 ||
        virCgroupGetCpusetMems(cgroup_temp, &nodeset) < 0 ||
        virCgroupSetCpusetMems(cgroup_temp, nodeset) < 0)
        goto cleanup;

844 845
 cleanup:
    VIR_FREE(mem_mask);
846
    VIR_FREE(nodeset);
847
    virBitmapFree(all_nodes);
848
    virCgroupFree(&cgroup_temp);
849 850 851 852 853 854 855
    return;

 error:
    virResetLastError();
    VIR_DEBUG("Couldn't restore cgroups to meaningful state");
    goto cleanup;
}
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872

int
qemuConnectCgroup(virQEMUDriverPtr driver,
                  virDomainObjPtr vm)
{
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret = -1;

    if (!cfg->privileged)
        goto done;

    if (!virCgroupAvailable())
        goto done;

    virCgroupFree(&priv->cgroup);

873 874 875
    if (virCgroupNewDetectMachine(vm->def->name,
                                  "qemu",
                                  vm->pid,
876 877 878
                                  vm->def->resource ?
                                  vm->def->resource->partition :
                                  NULL,
879
                                  cfg->cgroupControllers,
880
                                  &priv->cgroup) < 0)
881
        goto cleanup;
882

883 884
    qemuRestoreCgroupState(vm);

885
 done:
886
    ret = 0;
887
 cleanup:
888
    virObjectUnref(cfg);
889
    return ret;
890 891
}

892 893
int
qemuSetupCgroup(virQEMUDriverPtr driver,
894 895 896
                virDomainObjPtr vm,
                size_t nnicindexes,
                int *nicindexes)
897
{
898
    qemuDomainObjPrivatePtr priv = vm->privateData;
899
    virCapsPtr caps = NULL;
900
    int ret = -1;
901

902 903 904 905 906 907
    if (!vm->pid) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot setup cgroups until process is started"));
        return -1;
    }

908
    if (qemuInitCgroup(driver, vm, nnicindexes, nicindexes) < 0)
909
        return -1;
910

911
    if (!priv->cgroup)
912
        return 0;
913

914 915 916
    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
        goto cleanup;

917 918
    if (qemuSetupDevicesCgroup(driver, vm) < 0)
        goto cleanup;
919

920 921
    if (qemuSetupBlkioCgroup(vm) < 0)
        goto cleanup;
922

923 924
    if (qemuSetupMemoryCgroup(vm) < 0)
        goto cleanup;
925

926
    if (qemuSetupCpuCgroup(driver, vm) < 0)
927
        goto cleanup;
928

929
    if (qemuSetupCpusetCgroup(vm, caps) < 0)
930
        goto cleanup;
931

932
    ret = 0;
933
 cleanup:
934
    virObjectUnref(caps);
935
    return ret;
936 937
}

938 939 940 941
int
qemuSetupCgroupVcpuBW(virCgroupPtr cgroup,
                      unsigned long long period,
                      long long quota)
942 943 944 945 946 947 948 949
{
    unsigned long long old_period;

    if (period == 0 && quota == 0)
        return 0;

    if (period) {
        /* get old period, and we can rollback if set quota failed */
950
        if (virCgroupGetCpuCfsPeriod(cgroup, &old_period) < 0)
951 952
            return -1;

953
        if (virCgroupSetCpuCfsPeriod(cgroup, period) < 0)
954 955 956
            return -1;
    }

957 958 959
    if (quota &&
        virCgroupSetCpuCfsQuota(cgroup, quota) < 0)
        goto error;
960 961 962

    return 0;

963
 error:
964
    if (period) {
965 966 967 968 969 970
        virErrorPtr saved = virSaveLastError();
        ignore_value(virCgroupSetCpuCfsPeriod(cgroup, old_period));
        if (saved) {
            virSetError(saved);
            virFreeError(saved);
        }
971 972 973 974 975
    }

    return -1;
}

976 977
int
qemuSetupCgroupVcpuPin(virCgroupPtr cgroup,
978
                       virDomainPinDefPtr *vcpupin,
979 980
                       int nvcpupin,
                       int vcpuid)
981
{
982
    size_t i;
983 984

    for (i = 0; i < nvcpupin; i++) {
985
        if (vcpuid == vcpupin[i]->id)
986
            return qemuSetupCgroupEmulatorPin(cgroup, vcpupin[i]->cpumask);
987 988
    }

989 990 991
    return -1;
}

992 993
int
qemuSetupCgroupIOThreadsPin(virCgroupPtr cgroup,
994
                            virDomainPinDefPtr *iothreadspin,
995 996 997 998 999 1000
                            int niothreadspin,
                            int iothreadid)
{
    size_t i;

    for (i = 0; i < niothreadspin; i++) {
1001
        if (iothreadid == iothreadspin[i]->id)
1002 1003 1004 1005 1006 1007
            return qemuSetupCgroupEmulatorPin(cgroup, iothreadspin[i]->cpumask);
    }

    return -1;
}

1008 1009 1010
int
qemuSetupCgroupEmulatorPin(virCgroupPtr cgroup,
                           virBitmapPtr cpumask)
1011
{
1012
    int ret = -1;
1013 1014
    char *new_cpus = NULL;

1015
    if (!(new_cpus = virBitmapFormat(cpumask)))
1016 1017
        goto cleanup;

1018
    if (virCgroupSetCpusetCpus(cgroup, new_cpus) < 0)
1019 1020
        goto cleanup;

1021
    ret = 0;
1022
 cleanup:
1023
    VIR_FREE(new_cpus);
1024
    return ret;
1025 1026
}

1027 1028
int
qemuSetupCgroupForVcpu(virDomainObjPtr vm)
1029 1030 1031
{
    virCgroupPtr cgroup_vcpu = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
1032
    virDomainDefPtr def = vm->def;
1033
    size_t i, j;
1034 1035
    unsigned long long period = vm->def->cputune.period;
    long long quota = vm->def->cputune.quota;
1036
    char *mem_mask = NULL;
1037

1038
    if ((period || quota) &&
1039
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
1040 1041
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
H
Hu Tao 已提交
1042 1043 1044
        return -1;
    }

1045 1046 1047 1048 1049 1050 1051 1052 1053
    /*
     * If CPU cgroup controller is not initialized here, then we need
     * neither period nor quota settings.  And if CPUSET controller is
     * not initialized either, then there's nothing to do anyway.
     */
    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

1054
    /* We are trying to setup cgroups for CPU pinning, which can also be done
1055
     * with virProcessSetAffinity, thus the lack of cgroups is not fatal here.
1056
     */
1057
    if (priv->cgroup == NULL)
1058 1059
        return 0;

1060
    if (priv->nvcpupids == 0 || priv->vcpupids[0] == vm->pid) {
1061
        /* If we don't know VCPU<->PID mapping or all vcpu runs in the same
W
Wen Congyang 已提交
1062
         * thread, we cannot control each vcpu.
1063
         */
1064
        return 0;
1065 1066
    }

1067
    if (virDomainNumatuneGetMode(vm->def->numa, -1) ==
1068
        VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
1069
        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
1070 1071 1072 1073
                                            priv->autoNodeset,
                                            &mem_mask, -1) < 0)
        goto cleanup;

1074
    for (i = 0; i < priv->nvcpupids; i++) {
1075
        if (virCgroupNewVcpu(priv->cgroup, i, true, &cgroup_vcpu) < 0)
1076 1077 1078
            goto cleanup;

        /* move the thread for vcpu to sub dir */
1079
        if (virCgroupAddTask(cgroup_vcpu, priv->vcpupids[i]) < 0)
1080 1081
            goto cleanup;

1082 1083 1084 1085
        if (mem_mask &&
            virCgroupSetCpusetMems(cgroup_vcpu, mem_mask) < 0)
            goto cleanup;

1086
        if (period || quota) {
H
Hu Tao 已提交
1087 1088
            if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
                goto cleanup;
1089 1090
        }

1091
        /* Set vcpupin in cgroup if vcpupin xml is provided */
1092
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
M
Martin Kletzander 已提交
1093 1094 1095
            /* find the right CPU to pin, otherwise
             * qemuSetupCgroupVcpuPin will fail. */
            for (j = 0; j < def->cputune.nvcpupin; j++) {
1096
                if (def->cputune.vcpupin[j]->id != i)
M
Martin Kletzander 已提交
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
                    continue;

                if (qemuSetupCgroupVcpuPin(cgroup_vcpu,
                                           def->cputune.vcpupin,
                                           def->cputune.nvcpupin,
                                           i) < 0)
                    goto cleanup;

                break;
            }
        }
1108

1109 1110
        virCgroupFree(&cgroup_vcpu);
    }
1111
    VIR_FREE(mem_mask);
1112 1113 1114

    return 0;

1115
 cleanup:
1116 1117 1118 1119
    if (cgroup_vcpu) {
        virCgroupRemove(cgroup_vcpu);
        virCgroupFree(&cgroup_vcpu);
    }
1120
    VIR_FREE(mem_mask);
1121

1122 1123 1124
    return -1;
}

1125 1126
int
qemuSetupCgroupForEmulator(virQEMUDriverPtr driver,
1127
                           virDomainObjPtr vm)
1128
{
1129
    virBitmapPtr cpumask = NULL;
1130
    virBitmapPtr cpumap = NULL;
1131
    virCgroupPtr cgroup_emulator = NULL;
1132
    virDomainDefPtr def = vm->def;
1133
    qemuDomainObjPrivatePtr priv = vm->privateData;
1134 1135
    unsigned long long period = vm->def->cputune.emulator_period;
    long long quota = vm->def->cputune.emulator_quota;
1136

1137
    if ((period || quota) &&
1138
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
1139 1140 1141 1142 1143
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

1144 1145 1146 1147 1148 1149 1150 1151 1152
    /*
     * If CPU cgroup controller is not initialized here, then we need
     * neither period nor quota settings.  And if CPUSET controller is
     * not initialized either, then there's nothing to do anyway.
     */
    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

1153
    if (priv->cgroup == NULL)
1154 1155
        return 0; /* Not supported, so claim success */

1156
    if (virCgroupNewEmulator(priv->cgroup, true, &cgroup_emulator) < 0)
1157 1158
        goto cleanup;

1159
    if (virCgroupMoveTask(priv->cgroup, cgroup_emulator) < 0)
1160
        goto cleanup;
1161

1162
    if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
1163
        if (!(cpumap = qemuPrepareCpumap(driver, priv->autoNodeset)))
1164 1165 1166
            goto cleanup;
        cpumask = cpumap;
    } else if (def->cputune.emulatorpin) {
1167
        cpumask = def->cputune.emulatorpin->cpumask;
1168
    } else if (def->cpumask) {
1169
        cpumask = def->cpumask;
1170
    }
1171 1172

    if (cpumask) {
1173 1174 1175
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET) &&
            qemuSetupCgroupEmulatorPin(cgroup_emulator, cpumask) < 0)
            goto cleanup;
H
Hu Tao 已提交
1176
    }
1177

1178
    if (period || quota) {
1179 1180 1181 1182
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
            qemuSetupCgroupVcpuBW(cgroup_emulator, period,
                                  quota) < 0)
            goto cleanup;
1183 1184
    }

1185
    virCgroupFree(&cgroup_emulator);
1186
    virBitmapFree(cpumap);
1187 1188
    return 0;

1189
 cleanup:
1190 1191
    virBitmapFree(cpumap);

1192 1193 1194 1195 1196
    if (cgroup_emulator) {
        virCgroupRemove(cgroup_emulator);
        virCgroupFree(&cgroup_emulator);
    }

1197
    return -1;
1198
}
1199

1200 1201 1202 1203 1204 1205 1206 1207 1208
int
qemuSetupCgroupForIOThreads(virDomainObjPtr vm)
{
    virCgroupPtr cgroup_iothread = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainDefPtr def = vm->def;
    size_t i, j;
    unsigned long long period = vm->def->cputune.period;
    long long quota = vm->def->cputune.quota;
1209
    char *mem_mask = NULL;
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    /*
     * If CPU cgroup controller is not initialized here, then we need
     * neither period nor quota settings.  And if CPUSET controller is
     * not initialized either, then there's nothing to do anyway.
     */
    if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET))
        return 0;

    /* We are trying to setup cgroups for CPU pinning, which can also be done
     * with virProcessSetAffinity, thus the lack of cgroups is not fatal here.
     */
    if (priv->cgroup == NULL)
        return 0;

J
Ján Tomko 已提交
1233
    if (def->iothreads && priv->niothreadpids == 0) {
1234 1235 1236 1237
        VIR_WARN("Unable to get iothreads' pids.");
        return 0;
    }

1238
    if (virDomainNumatuneGetMode(vm->def->numa, -1) ==
1239
        VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
1240
        virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
1241 1242 1243 1244
                                            priv->autoNodeset,
                                            &mem_mask, -1) < 0)
        goto cleanup;

1245 1246 1247 1248
    for (i = 0; i < priv->niothreadpids; i++) {
        /* IOThreads are numbered 1..n, although the array is 0..n-1,
         * so we will account for that here
         */
1249 1250
        if (virCgroupNewIOThread(priv->cgroup, i + 1, true,
                                 &cgroup_iothread) < 0)
1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
            goto cleanup;

        /* move the thread for iothread to sub dir */
        if (virCgroupAddTask(cgroup_iothread, priv->iothreadpids[i]) < 0)
            goto cleanup;

        if (period || quota) {
            if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
                goto cleanup;
        }

1262 1263 1264 1265
        if (mem_mask &&
            virCgroupSetCpusetMems(cgroup_iothread, mem_mask) < 0)
            goto cleanup;

1266 1267 1268 1269 1270 1271 1272
        /* Set iothreadpin in cgroup if iothreadpin xml is provided */
        if (virCgroupHasController(priv->cgroup,
                                   VIR_CGROUP_CONTROLLER_CPUSET)) {
            /* find the right CPU to pin, otherwise
             * qemuSetupCgroupIOThreadsPin will fail. */
            for (j = 0; j < def->cputune.niothreadspin; j++) {
                /* IOThreads are numbered/named 1..n */
1273
                if (def->cputune.iothreadspin[j]->id != i + 1)
1274 1275 1276 1277 1278
                    continue;

                if (qemuSetupCgroupIOThreadsPin(cgroup_iothread,
                                                def->cputune.iothreadspin,
                                                def->cputune.niothreadspin,
1279
                                                i + 1) < 0)
1280 1281 1282 1283 1284 1285 1286 1287
                    goto cleanup;

                break;
            }
        }

        virCgroupFree(&cgroup_iothread);
    }
1288
    VIR_FREE(mem_mask);
1289 1290 1291 1292 1293 1294 1295 1296

    return 0;

 cleanup:
    if (cgroup_iothread) {
        virCgroupRemove(cgroup_iothread);
        virCgroupFree(&cgroup_iothread);
    }
1297
    VIR_FREE(mem_mask);
1298 1299 1300 1301

    return -1;
}

1302
int
1303 1304
qemuRemoveCgroup(virQEMUDriverPtr driver,
                 virDomainObjPtr vm)
1305
{
1306
    qemuDomainObjPrivatePtr priv = vm->privateData;
1307
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
1308

1309
    if (priv->cgroup == NULL)
1310 1311
        return 0; /* Not supported, so claim success */

1312 1313 1314 1315 1316 1317 1318
    if (virCgroupTerminateMachine(vm->def->name,
                                  "qemu",
                                  cfg->privileged) < 0) {
        if (!virCgroupNewIgnoreError())
            VIR_DEBUG("Failed to terminate cgroup for %s", vm->def->name);
    }

1319 1320
    virObjectUnref(cfg);

1321
    return virCgroupRemove(priv->cgroup);
1322 1323
}

1324 1325
int
qemuAddToCgroup(virDomainObjPtr vm)
1326
{
1327
    qemuDomainObjPrivatePtr priv = vm->privateData;
1328

1329
    if (priv->cgroup == NULL)
1330 1331
        return 0; /* Not supported, so claim success */

1332
    return 0;
1333
}