diff --git a/docs/drvlxc.html.in b/docs/drvlxc.html.in index 35058c4f5b657d11979da60f9ebc4ab53c2c327e..8c681bd4cffb2b63b6edfcdde9b38ad44338f261 100644 --- a/docs/drvlxc.html.in +++ b/docs/drvlxc.html.in @@ -9,6 +9,28 @@ light-weight "application container" which does not have it's own root image. Y start it using

+

Cgroups Requirements

+ +

+The libvirt LXC driver requires that certain cgroups controllers are +mounted on the host OS. The minimum required controllers are 'cpuacct', +'memory' and 'devices', while recommended extra controllers are +'cpu', 'freezer' and 'blkio'. The /etc/cgconfig.conf & cgconfig +init service used to mount cgroups at host boot time. To manually +mount them use: +

+ +
+ # mount -t cgroup cgroup /dev/cgroup -o cpuacct,memory,devices,cpu,freezer,blkio
+
+ +

+NB, the blkio controller in some kernels will not allow creation of nested +sub-directories which will prevent correct operation of the libvirt LXC +driver. On such kernels, it may be neccessary to unmount the blkio controller. +

+ +

Example config version 1

diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 296b302f1260af83c9a6844d956d54ac9884a816..eec05c7662c723d5de083d3c68c66f465769c478 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -121,12 +122,10 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         virReportSystemError(-rc,
                              _("Unable to set memory limit for domain %s"),
                              def->name);
-        /* Don't fail if we can't set memory due to lack of kernel support */
-        if (rc != -ENOENT)
-            goto cleanup;
+        goto cleanup;
     }
 
-    if(def->mem.hard_limit) {
+    if (def->mem.hard_limit) {
         rc = virCgroupSetMemoryHardLimit(cgroup, def->mem.hard_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -136,7 +135,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
-    if(def->mem.soft_limit) {
+    if (def->mem.soft_limit) {
         rc = virCgroupSetMemorySoftLimit(cgroup, def->mem.soft_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -146,7 +145,7 @@ static int lxcSetContainerResources(virDomainDefPtr def)
         }
     }
 
-    if(def->mem.swap_hard_limit) {
+    if (def->mem.swap_hard_limit) {
         rc = virCgroupSetSwapHardLimit(cgroup, def->mem.swap_hard_limit);
         if (rc != 0) {
             virReportSystemError(-rc,
@@ -327,6 +326,18 @@ ignorable_epoll_accept_errno(int errnum)
           || errnum == EWOULDBLOCK);
 }
 
+static bool
+lxcPidGone(pid_t container)
+{
+    waitpid(container, NULL, WNOHANG);
+
+    if (kill(container, 0) < 0 &&
+        errno == ESRCH)
+        return true;
+
+    return false;
+}
+
 /**
  * lxcControllerMain
  * @monitor: server socket fd to accept client requests
@@ -344,7 +355,8 @@ ignorable_epoll_accept_errno(int errnum)
 static int lxcControllerMain(int monitor,
                              int client,
                              int appPty,
-                             int contPty)
+                             int contPty,
+                             pid_t container)
 {
     int rc = -1;
     int epollFd;
@@ -450,7 +462,13 @@ static int lxcControllerMain(int monitor,
                         ++numActive;
                     }
                 } else if (epollEvent.events & EPOLLHUP) {
-                    VIR_DEBUG("EPOLLHUP from fd %d", epollEvent.data.fd);
+                    if (lxcPidGone(container))
+                        goto cleanup;
+                    curFdOff = epollEvent.data.fd == appPty ? 0 : 1;
+                    if (fdArray[curFdOff].active) {
+                        fdArray[curFdOff].active = 0;
+                        --numActive;
+                    }
                     continue;
                 } else {
                     lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -489,7 +507,9 @@ static int lxcControllerMain(int monitor,
                 --numActive;
                 fdArray[curFdOff].active = 0;
             } else if (-1 == rc) {
-                goto cleanup;
+                if (lxcPidGone(container))
+                    goto cleanup;
+                continue;
             }
 
         }
@@ -587,7 +607,7 @@ lxcControllerRun(virDomainDefPtr def,
     int rc = -1;
     int control[2] = { -1, -1};
     int containerPty = -1;
-    char *containerPtyPath;
+    char *containerPtyPath = NULL;
     pid_t container = -1;
     virDomainFSDefPtr root;
     char *devpts = NULL;
@@ -709,7 +729,7 @@ lxcControllerRun(virDomainDefPtr def,
     if (lxcControllerClearCapabilities() < 0)
         goto cleanup;
 
-    rc = lxcControllerMain(monitor, client, appPty, containerPty);
+    rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
 
 cleanup:
     VIR_FREE(devptmx);
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index f99c99b27e09a264f05e1f3ee36db52f9e4e0090..79b687994e04d583ed65000a9e4aebe5951e660b 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -952,36 +952,16 @@ cleanup:
  * @driver: pointer to driver structure
  * @vm: pointer to VM to clean up
  *
- * waitpid() on the container process.  kill and wait the tty process
- * This is called by both lxcDomainDestroy and lxcSigHandler when a
- * container exits.
+ * Cleanout resources associated with the now dead VM
  *
- * Returns 0 on success or -1 in case of error
  */
-static int lxcVmCleanup(lxc_driver_t *driver,
+static void lxcVmCleanup(lxc_driver_t *driver,
                         virDomainObjPtr  vm)
 {
-    int rc = 0;
-    int waitRc;
-    int childStatus = -1;
     virCgroupPtr cgroup;
     int i;
     lxcDomainObjPrivatePtr priv = vm->privateData;
 
-    while (((waitRc = waitpid(vm->pid, &childStatus, 0)) == -1) &&
-           errno == EINTR)
-        ; /* empty */
-
-    if ((waitRc != vm->pid) && (errno != ECHILD)) {
-        virReportSystemError(errno,
-                             _("waitpid failed to wait for container %d: %d"),
-                             vm->pid, waitRc);
-        rc = -1;
-    } else if (WIFEXITED(childStatus)) {
-        VIR_DEBUG("container exited with rc: %d", WEXITSTATUS(childStatus));
-        rc = -1;
-    }
-
     /* now that we know it's stopped call the hook if present */
     if (virHookPresent(VIR_HOOK_DRIVER_LXC)) {
         char *xml = virDomainDefFormat(vm->def, 0);
@@ -1021,8 +1001,6 @@ static int lxcVmCleanup(lxc_driver_t *driver,
         vm->def->id = -1;
         vm->newDef = NULL;
     }
-
-    return rc;
 }
 
 /**
@@ -1181,11 +1159,10 @@ error:
 
 
 static int lxcVmTerminate(lxc_driver_t *driver,
-                          virDomainObjPtr vm,
-                          int signum)
+                          virDomainObjPtr vm)
 {
-    if (signum == 0)
-        signum = SIGINT;
+    virCgroupPtr group = NULL;
+    int rc;
 
     if (vm->pid <= 0) {
         lxcError(VIR_ERR_INTERNAL_ERROR,
@@ -1193,18 +1170,29 @@ static int lxcVmTerminate(lxc_driver_t *driver,
         return -1;
     }
 
-    if (kill(vm->pid, signum) < 0) {
-        if (errno != ESRCH) {
-            virReportSystemError(errno,
-                                 _("Failed to kill pid %d"),
-                                 vm->pid);
-            return -1;
-        }
+    if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
+        return -1;
+
+    rc = virCgroupKillPainfully(group);
+    if (rc < 0) {
+        virReportSystemError(-rc, "%s",
+                             _("Failed to kill container PIDs"));
+        rc = -1;
+        goto cleanup;
     }
+    if (rc == 1) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Some container PIDs refused to die"));
+        rc = -1;
+        goto cleanup;
+    }
+    lxcVmCleanup(driver, vm);
 
-    vm->state = VIR_DOMAIN_SHUTDOWN;
+    rc = 0;
 
-    return lxcVmCleanup(driver, vm);
+cleanup:
+    virCgroupFree(&group);
+    return rc;
 }
 
 static void lxcMonitorEvent(int watch,
@@ -1228,7 +1216,7 @@ static void lxcMonitorEvent(int watch,
         goto cleanup;
     }
 
-    if (lxcVmTerminate(driver, vm, SIGINT) < 0) {
+    if (lxcVmTerminate(driver, vm) < 0) {
         virEventRemoveHandle(watch);
     } else {
         event = virDomainEventNewFromObj(vm,
@@ -1473,6 +1461,31 @@ static int lxcVmStart(virConnectPtr conn,
     char **veths = NULL;
     lxcDomainObjPrivatePtr priv = vm->privateData;
 
+    if (!lxc_driver->cgroup) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("The 'cpuacct', 'devices' & 'memory' cgroups controllers must be mounted"));
+        return -1;
+    }
+
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_CPUACCT)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'cpuacct' cgroups controller mount"));
+        return -1;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_DEVICES)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'devices' cgroups controller mount"));
+        return -1;
+    }
+    if (!virCgroupMounted(lxc_driver->cgroup,
+                          VIR_CGROUP_CONTROLLER_MEMORY)) {
+        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                 _("Unable to find 'memory' cgroups controller mount"));
+        return -1;
+    }
+
     if ((r = virFileMakePath(driver->logDir)) != 0) {
         virReportSystemError(r,
                              _("Cannot create log directory '%s'"),
@@ -1543,7 +1556,7 @@ static int lxcVmStart(virConnectPtr conn,
              VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
              lxcMonitorEvent,
              vm, NULL)) < 0) {
-        lxcVmTerminate(driver, vm, 0);
+        lxcVmTerminate(driver, vm);
         goto cleanup;
     }
 
@@ -1711,55 +1724,6 @@ cleanup:
     return dom;
 }
 
-/**
- * lxcDomainShutdown:
- * @dom: pointer to domain to shutdown
- *
- * Sends SIGINT to container root process to request it to shutdown
- *
- * Returns 0 on success or -1 in case of error
- */
-static int lxcDomainShutdown(virDomainPtr dom)
-{
-    lxc_driver_t *driver = dom->conn->privateData;
-    virDomainObjPtr vm;
-    virDomainEventPtr event = NULL;
-    int ret = -1;
-
-    lxcDriverLock(driver);
-    vm = virDomainFindByUUID(&driver->domains, dom->uuid);
-    if (!vm) {
-        char uuidstr[VIR_UUID_STRING_BUFLEN];
-        virUUIDFormat(dom->uuid, uuidstr);
-        lxcError(VIR_ERR_NO_DOMAIN,
-                 _("No domain with matching uuid '%s'"), uuidstr);
-        goto cleanup;
-    }
-
-    if (!virDomainObjIsActive(vm)) {
-        lxcError(VIR_ERR_OPERATION_INVALID,
-                 "%s", _("Domain is not running"));
-        goto cleanup;
-    }
-
-    ret = lxcVmTerminate(driver, vm, 0);
-    event = virDomainEventNewFromObj(vm,
-                                     VIR_DOMAIN_EVENT_STOPPED,
-                                     VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN);
-    if (!vm->persistent) {
-        virDomainRemoveInactive(&driver->domains, vm);
-        vm = NULL;
-    }
-
-cleanup:
-    if (vm)
-        virDomainObjUnlock(vm);
-    if (event)
-        lxcDomainEventQueue(driver, event);
-    lxcDriverUnlock(driver);
-    return ret;
-}
-
 
 static int
 lxcDomainEventRegister(virConnectPtr conn,
@@ -1927,7 +1891,7 @@ static int lxcDomainDestroy(virDomainPtr dom)
         goto cleanup;
     }
 
-    ret = lxcVmTerminate(driver, vm, SIGKILL);
+    ret = lxcVmTerminate(driver, vm);
     event = virDomainEventNewFromObj(vm,
                                      VIR_DOMAIN_EVENT_STOPPED,
                                      VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
@@ -2056,7 +2020,7 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED, void *opaque)
                  VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
                  lxcMonitorEvent,
                  vm, NULL)) < 0) {
-            lxcVmTerminate(driver, vm, 0);
+            lxcVmTerminate(driver, vm);
             goto cleanup;
         }
     } else {
@@ -2123,8 +2087,11 @@ static int lxcStartup(int privileged)
     rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1);
     if (rc < 0) {
         char buf[1024];
-        VIR_WARN("Unable to create cgroup for driver: %s",
-                 virStrerror(-rc, buf, sizeof(buf)));
+        VIR_DEBUG("Unable to create cgroup for LXC driver: %s",
+                  virStrerror(-rc, buf, sizeof(buf)));
+        /* Don't abort startup. We will explicitly report to
+         * the user when they try to start a VM
+         */
     }
 
     /* Call function to load lxc driver configuration information */
@@ -2844,7 +2811,7 @@ static virDriver lxcDriver = {
     lxcDomainLookupByName, /* domainLookupByName */
     lxcDomainSuspend, /* domainSuspend */
     lxcDomainResume, /* domainResume */
-    lxcDomainShutdown, /* domainShutdown */
+    NULL, /* domainShutdown */
     NULL, /* domainReboot */
     lxcDomainDestroy, /* domainDestroy */
     lxcGetOSType, /* domainGetOSType */