提交 aedd46e7 编写于 作者: D Daniel P. Berrange

Add support for systemd cgroup mount

Systemd uses a named cgroup mount for tracking processes. Add
it as another type of controller, albeit one which we have to
special case in a number of places. In particular we must
never create/delete directories there, nor add tasks. Essentially
the systemd mount is to be considered read-only for libvirt.

With this change both the virCgroupDetectPlacement and
virCgroupCopyPlacement methods must be invoked. The copy
placement method will copy setup for resource controllers
only. The detect placement method will probe for any
named controllers, or resource controllers not already
setup.
Signed-off-by: NDaniel P. Berrange <berrange@redhat.com>
上级 4574b475
......@@ -57,7 +57,8 @@
VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST,
"cpu", "cpuacct", "cpuset", "memory", "devices",
"freezer", "blkio", "net_cls", "perf_event");
"freezer", "blkio", "net_cls", "perf_event",
"name=systemd");
typedef enum {
VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */
......@@ -117,6 +118,9 @@ virCgroupValidateMachineGroup(virCgroupPtr group,
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
char *tmp;
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
if (!group->controllers[i].placement)
continue;
......@@ -331,6 +335,9 @@ static int virCgroupCopyPlacement(virCgroupPtr group,
if (!group->controllers[i].mountPoint)
continue;
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
if (path[0] == '/') {
if (VIR_STRDUP(group->controllers[i].placement, path) < 0)
return -1;
......@@ -386,6 +393,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
int ret = -1;
char *procfile;
VIR_DEBUG("Detecting placement for pid %lld path %s",
(unsigned long long)pid, path);
if (pid == -1) {
if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0)
goto cleanup;
......@@ -422,6 +431,7 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
const char *typestr = virCgroupControllerTypeToString(i);
int typelen = strlen(typestr);
char *tmp = controllers;
while (tmp) {
char *next = strchr(tmp, ',');
int len;
......@@ -438,13 +448,20 @@ static int virCgroupDetectPlacement(virCgroupPtr group,
* selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo"
*/
if (typelen == len && STREQLEN(typestr, tmp, len) &&
group->controllers[i].mountPoint != NULL) {
if (virAsprintf(&group->controllers[i].placement,
"%s%s%s", selfpath,
(STREQ(selfpath, "/") ||
STREQ(path, "") ? "" : "/"),
path) < 0)
goto cleanup;
group->controllers[i].mountPoint != NULL &&
group->controllers[i].placement == NULL) {
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
if (VIR_STRDUP(group->controllers[i].placement,
selfpath) < 0)
goto cleanup;
} else {
if (virAsprintf(&group->controllers[i].placement,
"%s%s%s", selfpath,
(STREQ(selfpath, "/") ||
STREQ(path, "") ? "" : "/"),
path) < 0)
goto cleanup;
}
}
tmp = next;
......@@ -535,13 +552,16 @@ static int virCgroupDetect(virCgroupPtr group,
return -1;
}
if (parent || path[0] == '/') {
if (virCgroupCopyPlacement(group, path, parent) < 0)
return -1;
} else {
if (virCgroupDetectPlacement(group, pid, path) < 0)
return -1;
}
/* In some cases we can copy part of the placement info
* based on the parent cgroup...
*/
if ((parent || path[0] == '/') &&
virCgroupCopyPlacement(group, path, parent) < 0)
return -1;
/* ... but use /proc/cgroups to fill in the rest */
if (virCgroupDetectPlacement(group, pid, path) < 0)
return -1;
/* Check that for every mounted controller, we found our placement */
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
......@@ -833,6 +853,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent,
for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) {
char *path = NULL;
/* We must never mkdir() in systemd's hierarchy */
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) {
VIR_DEBUG("Not creating systemd controller group");
continue;
}
/* Skip over controllers that aren't mounted */
if (!group->controllers[i].mountPoint) {
VIR_DEBUG("Skipping unmounted controller %s",
......@@ -1037,6 +1063,10 @@ int virCgroupRemove(virCgroupPtr group)
if (!group->controllers[i].mountPoint)
continue;
/* We must never rmdir() in systemd's hierarchy */
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
/* Don't delete the root group, if we accidentally
ended up in it for some reason */
if (STREQ(group->controllers[i].placement, "/"))
......@@ -1076,6 +1106,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid)
if (!group->controllers[i].mountPoint)
continue;
/* We must never add tasks in systemd's hierarchy */
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid) < 0)
goto cleanup;
}
......@@ -1177,6 +1211,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group)
!dest_group->controllers[i].mountPoint)
continue;
/* We must never move tasks in systemd's hierarchy */
if (i == VIR_CGROUP_CONTROLLER_SYSTEMD)
continue;
/* New threads are created in the same group as their parent;
* but if a thread is created after we first read we aren't
* aware that it needs to move. Therefore, we must iterate
......
......@@ -40,6 +40,7 @@ enum {
VIR_CGROUP_CONTROLLER_BLKIO,
VIR_CGROUP_CONTROLLER_NET_CLS,
VIR_CGROUP_CONTROLLER_PERF_EVENT,
VIR_CGROUP_CONTROLLER_SYSTEMD,
VIR_CGROUP_CONTROLLER_LAST
};
......
......@@ -87,6 +87,7 @@ const char *mountsSmall[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
[VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
[VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
};
const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu,cpuacct",
......@@ -96,6 +97,7 @@ const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/not/really/sys/fs/cgroup/freezer",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup/blkio",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd",
};
const char *links[VIR_CGROUP_CONTROLLER_LAST] = {
......@@ -121,6 +123,7 @@ static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if (virCgroupNewSelf(&cgroup) < 0) {
......@@ -161,6 +164,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = NULL,
[VIR_CGROUP_CONTROLLER_BLKIO] = NULL,
[VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL,
};
const char *placementFull[VIR_CGROUP_CONTROLLER_LAST] = {
[VIR_CGROUP_CONTROLLER_CPU] = "/virtualmachines.partition",
......@@ -170,6 +174,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/virtualmachines.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/virtualmachines.partition",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/virtualmachines", false, -1, &cgroup)) != -1) {
......@@ -233,6 +238,7 @@ static int testCgroupNewForPartitionNested(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/deployment.partition/production.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/deployment.partition/production.partition",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/deployment/production", false, -1, &cgroup)) != -1) {
......@@ -281,6 +287,7 @@ static int testCgroupNewForPartitionNestedDeep(const void *args ATTRIBUTE_UNUSED
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/user/berrange.user/production.partition",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/user/berrange.user/production.partition",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/user/berrange.user/production", false, -1, &cgroup)) != -1) {
......@@ -336,6 +343,7 @@ static int testCgroupNewForPartitionDomain(const void *args ATTRIBUTE_UNUSED)
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/production.partition/foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/production.partition/foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/production", true, -1, &partitioncgroup)) != 0) {
......@@ -372,6 +380,7 @@ static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNU
[VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
[VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
[VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123",
};
if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册