From db44eb1b5fa0acf08b9b10e3c445a802e1482a0c Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Wed, 3 Apr 2013 11:01:49 +0100 Subject: [PATCH] Change default cgroup layout for QEMU/LXC and honour XML config Historically QEMU/LXC guests have been placed in a cgroup layout that is $LOCATION-OF-LIBVIRTD/libvirt/{qemu,lxc}/$VMNAME This is bad for a number of reasons - The cgroup hierarchy gets very deep which seriously impacts kernel performance due to cgroups scalability limitations. - It is hard to setup cgroup policies which apply across services and virtual machines, since all VMs are underneath the libvirtd service. To address this the default cgroup location is changed to be /system/$VMNAME.{lxc,qemu}.libvirt This puts virtual machines at the same level in the hierarchy as system services, allowing consistent policy to be setup across all of them. This also honours the new resource partition location from the XML configuration, for example /virtualmachines/production will result in the VM being placed at /virtualmachines/production/$VMNAME.{lxc,qemu}.libvirt NB, with the exception of the default, /system, path which is intended to always exist, libvirt will not attempt to auto-create the partitions in the XML. It is the responsibility of the admin/app to configure the partitions. Later libvirt APIs will provide a way todo this. Signed-off-by: Daniel P. Berrange --- src/lxc/lxc_cgroup.c | 91 ++++++++++++++++++++++++++------ src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_process.c | 4 +- src/qemu/qemu_cgroup.c | 114 +++++++++++++++++++++++++++++++--------- src/qemu/qemu_cgroup.h | 3 +- src/qemu/qemu_process.c | 2 +- 6 files changed, 169 insertions(+), 47 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 72940bde00..8f19057acc 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -523,29 +523,88 @@ cleanup: } -virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def) +virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, bool startup) { - virCgroupPtr driver = NULL; - virCgroupPtr cgroup = NULL; int rc; + virCgroupPtr parent = NULL; + virCgroupPtr cgroup = NULL; - rc = virCgroupNewDriver("lxc", true, false, -1, &driver); - if (rc != 0) { - virReportSystemError(-rc, "%s", - _("Unable to get cgroup for driver")); - goto cleanup; + if (!def->resource && startup) { + virDomainResourceDefPtr res; + + if (VIR_ALLOC(res) < 0) { + virReportOOMError(); + goto cleanup; + } + + if (!(res->partition = strdup("/system"))) { + virReportOOMError(); + VIR_FREE(res); + goto cleanup; + } + + def->resource = res; } - rc = virCgroupNewDomainDriver(driver, def->name, true, &cgroup); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to create cgroup for domain %s"), - def->name); - goto cleanup; + if (def->resource && + def->resource->partition) { + if (def->resource->partition[0] != '/') { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + def->resource->partition); + goto cleanup; + } + /* We only auto-create the default partition. In other + * cases we expec the sysadmin/app to have done so */ + rc = virCgroupNewPartition(def->resource->partition, + STREQ(def->resource->partition, "/system"), + -1, + &parent); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to initialize %s cgroup"), + def->resource->partition); + goto cleanup; + } + + rc = virCgroupNewDomainPartition(parent, + "lxc", + def->name, + true, + &cgroup); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + def->name); + goto cleanup; + } + } else { + rc = virCgroupNewDriver("lxc", + true, + true, + -1, + &parent); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + def->name); + goto cleanup; + } + + rc = virCgroupNewDomainDriver(parent, + def->name, + true, + &cgroup); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + def->name); + goto cleanup; + } } cleanup: - virCgroupFree(&driver); + virCgroupFree(&parent); return cgroup; } @@ -556,7 +615,7 @@ virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def) int ret = -1; int rc; - if (!(cgroup = virLXCCgroupCreate(def))) + if (!(cgroup = virLXCCgroupCreate(def, true))) return NULL; rc = virCgroupAddTask(cgroup, getpid()); diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index 25a427c546..f040de229d 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -27,7 +27,7 @@ # include "lxc_fuse.h" # include "virusb.h" -virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def); +virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, bool startup); virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def); int virLXCCgroupSetup(virDomainDefPtr def, virCgroupPtr cgroup, diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 1bbffa35fc..ab07a1e514 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -1049,7 +1049,7 @@ int virLXCProcessStart(virConnectPtr conn, virCgroupFree(&priv->cgroup); - if (!(priv->cgroup = virLXCCgroupCreate(vm->def))) + if (!(priv->cgroup = virLXCCgroupCreate(vm->def, true))) return -1; if (!virCgroupHasController(priv->cgroup, @@ -1464,7 +1464,7 @@ virLXCProcessReconnectDomain(virDomainObjPtr vm, if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) goto error; - if (!(priv->cgroup = virLXCCgroupCreate(vm->def))) + if (!(priv->cgroup = virLXCCgroupCreate(vm->def, false))) goto error; if (virLXCUpdateActiveUsbHostdevs(driver, vm->def) < 0) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index e6af69c467..0098d77ed7 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -215,46 +215,108 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED, int qemuInitCgroup(virQEMUDriverPtr driver, - virDomainObjPtr vm) + virDomainObjPtr vm, + bool startup) { - int rc; + int rc = -1; qemuDomainObjPrivatePtr priv = vm->privateData; - virCgroupPtr driverGroup = NULL; + virCgroupPtr parent = NULL; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); virCgroupFree(&priv->cgroup); - rc = virCgroupNewDriver("qemu", - cfg->privileged, - true, - cfg->cgroupControllers, - &driverGroup); - if (rc != 0) { - if (rc == -ENXIO || - rc == -EPERM || - rc == -EACCES) { /* No cgroups mounts == success */ - VIR_DEBUG("No cgroups present/configured/accessible, ignoring error"); - goto done; + if (!vm->def->resource && startup) { + virDomainResourceDefPtr res; + + if (VIR_ALLOC(res) < 0) { + virReportOOMError(); + goto cleanup; } - virReportSystemError(-rc, - _("Unable to create cgroup for %s"), - vm->def->name); - goto cleanup; + if (!(res->partition = strdup("/system"))) { + virReportOOMError(); + VIR_FREE(res); + goto cleanup; + } + + vm->def->resource = res; } - rc = virCgroupNewDomainDriver(driverGroup, vm->def->name, true, &priv->cgroup); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to create cgroup for %s"), - vm->def->name); - goto cleanup; + if (vm->def->resource && + vm->def->resource->partition) { + if (vm->def->resource->partition[0] != '/') { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + vm->def->resource->partition); + goto cleanup; + } + /* We only auto-create the default partition. In other + * cases we expec the sysadmin/app to have done so */ + rc = virCgroupNewPartition(vm->def->resource->partition, + STREQ(vm->def->resource->partition, "/system"), + cfg->cgroupControllers, + &parent); + if (rc != 0) { + if (rc == -ENXIO || + rc == -EPERM || + rc == -EACCES) { /* No cgroups mounts == success */ + VIR_DEBUG("No cgroups present/configured/accessible, ignoring error"); + goto done; + } + + virReportSystemError(-rc, + _("Unable to initialize %s cgroup"), + vm->def->resource->partition); + goto cleanup; + } + + rc = virCgroupNewDomainPartition(parent, + "qemu", + vm->def->name, + true, + &priv->cgroup); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + vm->def->name); + goto cleanup; + } + } else { + rc = virCgroupNewDriver("qemu", + cfg->privileged, + true, + cfg->cgroupControllers, + &parent); + if (rc != 0) { + if (rc == -ENXIO || + rc == -EPERM || + rc == -EACCES) { /* No cgroups mounts == success */ + VIR_DEBUG("No cgroups present/configured/accessible, ignoring error"); + goto done; + } + + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + vm->def->name); + goto cleanup; + } + + rc = virCgroupNewDomainDriver(parent, + vm->def->name, + true, + &priv->cgroup); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + vm->def->name); + goto cleanup; + } } done: rc = 0; cleanup: - virCgroupFree(&driverGroup); + virCgroupFree(&parent); virObjectUnref(cfg); return rc; } @@ -273,7 +335,7 @@ int qemuSetupCgroup(virQEMUDriverPtr driver, (const char *const *)cfg->cgroupDeviceACL : defaultDeviceACL; - if (qemuInitCgroup(driver, vm) < 0) + if (qemuInitCgroup(driver, vm, true) < 0) return -1; if (!priv->cgroup) diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 6cbfebcfe9..e63f443ed9 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -37,7 +37,8 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev, const char *path, void *opaque); int qemuInitCgroup(virQEMUDriverPtr driver, - virDomainObjPtr vm); + virDomainObjPtr vm, + bool startup); int qemuSetupCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index da47b437c8..ce9f50171a 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3005,7 +3005,7 @@ qemuProcessReconnect(void *opaque) if (qemuUpdateActiveUsbHostdevs(driver, obj->def) < 0) goto error; - if (qemuInitCgroup(driver, obj) < 0) + if (qemuInitCgroup(driver, obj, false) < 0) goto error; /* XXX: Need to change as long as lock is introduced for -- GitLab