From 4e3117ae50efc0fcbd5ce485cd610dfab7f5c625 Mon Sep 17 00:00:00 2001 From: "Daniel P. Berrange" Date: Tue, 22 Feb 2011 17:35:06 +0000 Subject: [PATCH] Make LXC container startup/shutdown/I/O more robust The current LXC I/O controller looks for HUP to detect when a guest has quit. This isn't reliable as during initial bootup it is possible that 'init' will close the console and let mingetty re-open it. The shutdown of containers was also flakey because it only killed the libvirt I/O controller and expected container processes to gracefully follow. Change the I/O controller such that when it see HUP or an I/O error, it uses kill($PID, 0) to see if the process has really quit. Change the container shutdown sequence to use the virCgroupKillPainfully function to ensure every really goes away This change makes the use of the 'cpu', 'devices' and 'memory' cgroups controllers compulsory with LXC * docs/drvlxc.html.in: Document that certain cgroups controllers are now mandatory * src/lxc/lxc_controller.c: Check if PID is still alive before quitting on I/O error/HUP * src/lxc/lxc_driver.c: Use virCgroupKillPainfully --- docs/drvlxc.html.in | 22 ++++++ src/lxc/lxc_controller.c | 42 ++++++++--- src/lxc/lxc_driver.c | 153 +++++++++++++++------------------------ 3 files changed, 113 insertions(+), 104 deletions(-) diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in index 35058c4f5b..8c681bd4cf 100644 --- a/docs/drvlxc.html.in +++ b/docs/drvlxc.html.in @@ -9,6 +9,28 @@ light-weight "application container" which does not have it's own root image. Y start it using

+

Cgroups Requirements

+ +

+The libvirt LXC driver requires that certain cgroups controllers are +mounted on the host OS. The minimum required controllers are 'cpuacct', +'memory' and 'devices', while recommended extra controllers are +'cpu', 'freezer' and 'blkio'. The /etc/cgconfig.conf & cgconfig +init service used to mount cgroups at host boot time. To manually +mount them use: +

+ +
+ # mount -t cgroup cgroup /dev/cgroup -o cpuacct,memory,devices,cpu,freezer,blkio
+
+ +

+NB, the blkio controller in some kernels will not allow creation of nested +sub-directories which will prevent correct operation of the libvirt LXC +driver. On such kernels, it may be neccessary to unmount the blkio controller. +

+ +

Example config version 1

diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 296b302f12..eec05c7662 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -121,12 +122,10 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         virReportSystemError(-rc,
                              _("Unable to set memory limit for domain %s"),
                              def->name);
-        /* Don't fail if we can't set memory due to lack of kernel support */
-        if (rc != -ENOENT)
-            goto cleanup;
+        goto cleanup;
     }
 
-    if(def->mem.hard_limit) {
+    if (def->mem.hard_limit) {
         rc = virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -136,7 +135,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
-    if(def->mem.soft_limit) {
+    if (def->mem.soft_limit) {
         rc = virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -146,7 +145,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
-    if(def->mem.swap_hard_limit) {
+    if (def->mem.swap_hard_limit) {
         rc = virCgroupSetSwapHardLimit(cgroup, def->mem.swap_hard_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -327,6 +326,18 @@ ignorable_epoll_accept_errno(int errnum)
           || errnum == EWOULDBLOCK);
 }
 
+static bool
+lxcPidGone(pid_t container)
+{
+    waitpid(container, NULL, WNOHANG);
+
+    if (kill(container, 0) < 0 &&
+        errno == ESRCH)
+        return true;
+
+    return false;
+}
+
 /**
  * lxcControllerMain
  * @monitor: server socket fd to accept client requests
@@ -344,7 +355,8 @@ ignorable_epoll_accept_errno(int errnum)
 static int lxcControllerMain(int monitor,
                              int client,
                              int appPty,
-                             int contPty)
+                             int contPty,
+                             pid_t container)
 {
     int rc = -1;
     int epollFd;
@@ -450,7 +462,13 @@ static int lxcControllerMain(int monitor,
                         ++numActive;
                     }
                 } else if (epollEvent.events & EPOLLHUP) {
-                    VIR_DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
+                    if (lxcPidGone(container))
+                        goto cleanup;
+                    curFdOff = epollEvent.data.fd == appPty ? 0 : 1;
+                    if (fdArray[curFdOff].active) {
+                        fdArray[curFdOff].active = 0;
+                        --numActive;
+                    }
                     continue;
                 } else {
                     lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -489,7 +507,9 @@ static int lxcControllerMain(int monitor,
                 --numActive;
                 fdArray[curFdOff].active = 0;
             } else if (-1 == rc) {
-                goto cleanup;
+                if (lxcPidGone(container))
+                    goto cleanup;
+                continue;
             }
 
         }
@@ -587,7 +607,7 @@ lxcControllerRun(virDomainDefPtr def,
     int rc = -1;
     int control[2] = { -1, -1};
     int containerPty = -1;
-    char *containerPtyPath;
+    char *containerPtyPath = NULL;
     pid_t container = -1;
     virDomainFSDefPtr root;
     char *devpts = NULL;
@@ -709,7 +729,7 @@ lxcControllerRun(virDomainDefPtr def,
     if (lxcControllerClearCapabilities() < 0)
         goto cleanup;
 
-    rc = lxcControllerMain(monitor, client, appPty, containerPty);
+    rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
 
 cleanup:
     VIR_FREE(devptmx);
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index f99c99b27e..79b687994e 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -952,36 +952,16 @@ cleanup:
  * @driver: pointer to driver structure
  * @vm: pointer to VM to clean up
  *
- * waitpid() on the container process.  kill and wait the tty process
- * This is called by both lxcDomainDestroy and lxcSigHandler when a
- * container exits.
+ * Cleanout resources associated with the now dead VM
  *
- * Returns 0 on success or -1 in case of error
  */
-static int lxcVmCleanup(lxc_driver_t *driver,
+static void lxcVmCleanup(lxc_driver_t *driver,
                         virDomainObjPtr  vm)
 {
-    int rc = 0;
-    int waitRc;
-    int childStatus = -1;
     virCgroupPtr cgroup;
     int i;
     lxcDomainObjPrivatePtr priv = vm->privateData;
 
-    while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
-           errno == EINTR)
-        ; /* empty */
-
-    if ((waitRc != vm->pid) && (errno != ECHILD)) {
-        virReportSystemError(errno,
-                             _("waitpid failed to wait for container %d: %d"),
-                             vm->pid, waitRc);
-        rc = -1;
-    } else if (WIFEXITED(childStatus)) {
-        VIR_DEBUG("container exited with rc: %d", WEXITSTATUS(childStatus));
-        rc = -1;
-    }
-
     /* now that we know it's stopped call the hook if present */
     if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
         char *xml = virDomainDefFormat(vm->def, 0);
@@ -1021,8 +1001,6 @@ static int lxcVmCleanup(lxc_driver_t *driver,
         vm->def->id = -1;
         vm->newDef = NULL;
     }
-
-    return rc;
 }
 
 /**
@@ -1181,11 +1159,10 @@ error:
 
 
 static int lxcVmTerminate(lxc_driver_t *driver,
-                          virDomainObjPtr vm,
-                          int signum)
+                          virDomainObjPtr vm)
 {
-    if (signum == 0)
-        signum = SIGINT;
+    virCgroupPtr group = NULL;
+    int rc;
 
     if (vm->pid <= 0) {
         lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -1193,18 +1170,29 @@ static int lxcVmTerminate(lxc_driver_t *driver,
         return -1;
     }
 
-    if (kill(vm->pid, signum) < 0) {
-        if (errno != ESRCH) {
-            virReportSystemError(errno,
-                                 _("Failed to kill pid %d"),
-                                 vm->pid);
-            return -1;
-        }
+    if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
+        return -1;
+
+    rc = virCgroupKillPainfully(group);
+    if (rc < 0) {
+        virReportSystemError(-rc, "%s",
+                             _("Failed to kill container PIDs"));
+        rc = -1;
+        goto cleanup;
     }
+    if (rc == 1) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Some container PIDs refused to die"));
+        rc = -1;
+        goto cleanup;
+    }
+    lxcVmCleanup(driver, vm);
 
-    vm->state = VIR_DOMAIN_SHUTDOWN;
+    rc = 0;
 
-    return lxcVmCleanup(driver, vm);
+cleanup:
+    virCgroupFree(&group);
+    return rc;
 }
 
 static void lxcMonitorEvent(int watch,
@@ -1228,7 +1216,7 @@ static void lxcMonitorEvent(int watch,
         goto cleanup;
     }
 
-    if (lxcVmTerminate(driver, vm, SIGINT) < 0) {
+    if (lxcVmTerminate(driver, vm) < 0) {
         virEventRemoveHandle(watch);
     } else {
         event = virDomainEventNewFromObj(vm,
@@ -1473,6 +1461,31 @@ static int lxcVmStart(virConnectPtr conn,
     char **veths = NULL;
     lxcDomainObjPrivatePtr priv = vm->privateData;
 
+    if (!lxc_driver->cgroup) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("The 'cpuacct', 'devices' & 'memory' cgroups controllers must be mounted"));
+        return -1;
+    }
+
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_CPUACCT)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'cpuacct' cgroups controller mount"));
+        return -1;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_DEVICES)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'devices' cgroups controller mount"));
+        return -1;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_MEMORY)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'memory' cgroups controller mount"));
+        return -1;
+    }
+
     if ((r = virFileMakePath(driver->logDir)) != 0) {
         virReportSystemError(r,
                              _("Cannot create log directory '%s'"),
@@ -1543,7 +1556,7 @@ static int lxcVmStart(virConnectPtr conn,
              VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
              lxcMonitorEvent,
              vm, NULL)) < 0) {
-        lxcVmTerminate(driver, vm, 0);
+        lxcVmTerminate(driver, vm);
         goto cleanup;
     }
 
@@ -1711,55 +1724,6 @@ cleanup:
     return dom;
 }
 
-/**
- * lxcDomainShutdown:
- * @dom: pointer to domain to shutdown
- *
- * Sends SIGINT to container root process to request it to shutdown
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcDomainShutdown(virDomainPtr dom)
-{
-    lxc_driver_t *driver = dom->conn->privateData;
-    virDomainObjPtr vm;
-    virDomainEventPtr event = NULL;
-    int ret = -1;
-
-    lxcDriverLock(driver);
-    vm = virDomainFindByUUID(&driver->domains, dom->uuid);
-    if (!vm) {
-        char uuidstr[VIR_UUID_STRING_BUFLEN];
-        virUUIDFormat(dom->uuid, uuidstr);
-        lxcError(VIR_ERR_NO_DOMAIN,
-                 _("No domain with matching uuid '%s'"), uuidstr);
-        goto cleanup;
-    }
-
-    if (!virDomainObjIsActive(vm)) {
-        lxcError(VIR_ERR_OPERATION_INVALID,
-                 "%s", _("Domain is not running"));
-        goto cleanup;
-    }
-
-    ret = lxcVmTerminate(driver, vm, 0);
-    event = virDomainEventNewFromObj(vm,
-                                     VIR_DOMAIN_EVENT_STOPPED,
-                                     VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
-    if (!vm->persistent) {
-        virDomainRemoveInactive(&driver->domains, vm);
-        vm = NULL;
-    }
-
-cleanup:
-    if (vm)
-        virDomainObjUnlock(vm);
-    if (event)
-        lxcDomainEventQueue(driver, event);
-    lxcDriverUnlock(driver);
-    return ret;
-}
-
 
 static int
 lxcDomainEventRegister(virConnectPtr conn,
@@ -1927,7 +1891,7 @@ static int lxcDomainDestroy(virDomainPtr dom)
         goto cleanup;
     }
 
-    ret = lxcVmTerminate(driver, vm, SIGKILL);
+    ret = lxcVmTerminate(driver, vm);
     event = virDomainEventNewFromObj(vm,
                                      VIR_DOMAIN_EVENT_STOPPED,
                                      VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
@@ -2056,7 +2020,7 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED, void *opaque)
                  VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
                  lxcMonitorEvent,
                  vm, NULL)) < 0) {
-            lxcVmTerminate(driver, vm, 0);
+            lxcVmTerminate(driver, vm);
             goto cleanup;
         }
     } else {
@@ -2123,8 +2087,11 @@ static int lxcStartup(int privileged)
     rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
     if (rc < 0) {
         char buf[1024];
-        VIR_WARN("Unable to create cgroup for driver: %s",
-                 virStrerror(-rc, buf, sizeof(buf)));
+        VIR_DEBUG("Unable to create cgroup for LXC driver: %s",
+                  virStrerror(-rc, buf, sizeof(buf)));
+        /* Don't abort startup. We will explicitly report to
+         * the user when they try to start a VM
+         */
     }
 
     /* Call function to load lxc driver configuration information */
@@ -2844,7 +2811,7 @@ static virDriver lxcDriver = {
     lxcDomainLookupByName, /* domainLookupByName */
     lxcDomainSuspend, /* domainSuspend */
     lxcDomainResume, /* domainResume */
-    lxcDomainShutdown, /* domainShutdown */
+    NULL, /* domainShutdown */
     NULL, /* domainReboot */
     lxcDomainDestroy, /* domainDestroy */
     lxcGetOSType, /* domainGetOSType */
-- 
GitLab