/* * qemu_migration.c: QEMU migration handling * * Copyright (C) 2006-2011 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #include #include "qemu_migration.h" #include "qemu_monitor.h" #include "qemu_domain.h" #include "qemu_process.h" #include "qemu_capabilities.h" #include "qemu_audit.h" #include "qemu_cgroup.h" #include "logging.h" #include "virterror_internal.h" #include "memory.h" #include "util.h" #include "files.h" #include "datatypes.h" #include "fdstream.h" #define VIR_FROM_THIS VIR_FROM_QEMU #define timeval_to_ms(tv) (((tv).tv_sec * 1000ull) + ((tv).tv_usec / 1000)) bool qemuMigrationIsAllowed(virDomainDefPtr def) { if (def->nhostdevs > 0) { qemuReportError(VIR_ERR_OPERATION_INVALID, "%s", _("Domain with assigned host devices cannot be migrated")); return false; } return true; } /** qemuMigrationSetOffline * Pause domain for non-live migration. */ int qemuMigrationSetOffline(struct qemud_driver *driver, virDomainObjPtr vm) { int ret; ret = qemuProcessStopCPUs(driver, vm); if (ret == 0) { virDomainEventPtr event; event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED); if (event) qemuDomainEventQueue(driver, event); } return ret; } int qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm) { int ret = -1; int status; unsigned long long memProcessed; unsigned long long memRemaining; unsigned long long memTotal; qemuDomainObjPrivatePtr priv = vm->privateData; priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; while (priv->jobInfo.type == VIR_DOMAIN_JOB_UNBOUNDED) { /* Poll every 50ms for progress & to allow cancellation */ struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull }; struct timeval now; int rc; const char *job; switch (priv->jobActive) { case QEMU_JOB_MIGRATION_OUT: job = _("migration job"); break; case QEMU_JOB_SAVE: job = _("domain save job"); break; case QEMU_JOB_DUMP: job = _("domain core dump job"); break; default: job = _("job"); } if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"), job, _("guest unexpectedly quit")); goto cleanup; } if (priv->jobSignals & QEMU_JOB_SIGNAL_CANCEL) { priv->jobSignals ^= QEMU_JOB_SIGNAL_CANCEL; VIR_DEBUG0("Cancelling job at client request"); qemuDomainObjEnterMonitorWithDriver(driver, vm); rc = qemuMonitorMigrateCancel(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); if (rc < 0) { VIR_WARN0("Unable to cancel job"); } } else if (priv->jobSignals & QEMU_JOB_SIGNAL_SUSPEND) { priv->jobSignals ^= QEMU_JOB_SIGNAL_SUSPEND; VIR_DEBUG0("Pausing domain for non-live migration"); if (qemuMigrationSetOffline(driver, vm) < 0) VIR_WARN0("Unable to pause domain"); } else if (priv->jobSignals & QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME) { unsigned long long ms = priv->jobSignalsData.migrateDowntime; priv->jobSignals ^= QEMU_JOB_SIGNAL_MIGRATE_DOWNTIME; priv->jobSignalsData.migrateDowntime = 0; VIR_DEBUG("Setting migration downtime to %llums", ms); qemuDomainObjEnterMonitorWithDriver(driver, vm); rc = qemuMonitorSetMigrationDowntime(priv->mon, ms); qemuDomainObjExitMonitorWithDriver(driver, vm); if (rc < 0) VIR_WARN0("Unable to set migration downtime"); } else if (priv->jobSignals & QEMU_JOB_SIGNAL_MIGRATE_SPEED) { unsigned long bandwidth = priv->jobSignalsData.migrateBandwidth; priv->jobSignals ^= QEMU_JOB_SIGNAL_MIGRATE_SPEED; priv->jobSignalsData.migrateBandwidth = 0; VIR_DEBUG("Setting migration bandwidth to %luMbs", bandwidth); qemuDomainObjEnterMonitorWithDriver(driver, vm); rc = qemuMonitorSetMigrationSpeed(priv->mon, bandwidth); qemuDomainObjExitMonitorWithDriver(driver, vm); if (rc < 0) VIR_WARN0("Unable to set migration speed"); } /* Repeat check because the job signals might have caused * guest to die */ if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"), job, _("guest unexpectedly quit")); goto cleanup; } qemuDomainObjEnterMonitorWithDriver(driver, vm); rc = qemuMonitorGetMigrationStatus(priv->mon, &status, &memProcessed, &memRemaining, &memTotal); qemuDomainObjExitMonitorWithDriver(driver, vm); if (rc < 0) { priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; goto cleanup; } if (gettimeofday(&now, NULL) < 0) { priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; virReportSystemError(errno, "%s", _("cannot get time of day")); goto cleanup; } priv->jobInfo.timeElapsed = timeval_to_ms(now) - priv->jobStart; switch (status) { case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: priv->jobInfo.type = VIR_DOMAIN_JOB_NONE; qemuReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), job, _("is not active")); break; case QEMU_MONITOR_MIGRATION_STATUS_ACTIVE: priv->jobInfo.dataTotal = memTotal; priv->jobInfo.dataRemaining = memRemaining; priv->jobInfo.dataProcessed = memProcessed; priv->jobInfo.memTotal = memTotal; priv->jobInfo.memRemaining = memRemaining; priv->jobInfo.memProcessed = memProcessed; break; case QEMU_MONITOR_MIGRATION_STATUS_COMPLETED: priv->jobInfo.type = VIR_DOMAIN_JOB_COMPLETED; ret = 0; break; case QEMU_MONITOR_MIGRATION_STATUS_ERROR: priv->jobInfo.type = VIR_DOMAIN_JOB_FAILED; qemuReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), job, _("unexpectedly failed")); break; case QEMU_MONITOR_MIGRATION_STATUS_CANCELLED: priv->jobInfo.type = VIR_DOMAIN_JOB_CANCELLED; qemuReportError(VIR_ERR_OPERATION_FAILED, _("%s: %s"), job, _("canceled by client")); break; } virDomainObjUnlock(vm); qemuDriverUnlock(driver); nanosleep(&ts, NULL); qemuDriverLock(driver); virDomainObjLock(vm); } cleanup: return ret; } /* Prepare is the first step, and it runs on the destination host. * * This version starts an empty VM listening on a localhost TCP port, and * sets up the corresponding virStream to handle the incoming data. */ int qemuMigrationPrepareTunnel(struct qemud_driver *driver, virConnectPtr dconn, virStreamPtr st, const char *dname, const char *dom_xml) { virDomainDefPtr def = NULL; virDomainObjPtr vm = NULL; virDomainEventPtr event = NULL; int ret = -1; int internalret; int dataFD[2] = { -1, -1 }; virBitmapPtr qemuCaps = NULL; qemuDomainObjPrivatePtr priv = NULL; struct timeval now; if (gettimeofday(&now, NULL) < 0) { virReportSystemError(errno, "%s", _("cannot get time of day")); return -1; } /* Parse the domain XML. */ if (!(def = virDomainDefParseString(driver->caps, dom_xml, VIR_DOMAIN_XML_INACTIVE))) goto cleanup; if (!qemuMigrationIsAllowed(def)) goto cleanup; /* Target domain name, maybe renamed. */ if (dname) { VIR_FREE(def->name); def->name = strdup(dname); if (def->name == NULL) goto cleanup; } if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) goto cleanup; if (!(vm = virDomainAssignDef(driver->caps, &driver->domains, def, true))) { /* virDomainAssignDef already set the error */ goto cleanup; } def = NULL; priv = vm->privateData; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) goto cleanup; priv->jobActive = QEMU_JOB_MIGRATION_OUT; /* Domain starts inactive, even if the domain XML had an id field. */ vm->def->id = -1; if (pipe(dataFD) < 0 || virSetCloseExec(dataFD[0]) < 0) { virReportSystemError(errno, "%s", _("cannot create pipe for tunnelled migration")); goto endjob; } /* check that this qemu version supports the interactive exec */ if (qemuCapsExtractVersionInfo(vm->def->emulator, vm->def->os.arch, NULL, &qemuCaps) < 0) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("Cannot determine QEMU argv syntax %s"), vm->def->emulator); goto endjob; } /* Start the QEMU daemon, with the same command-line arguments plus * -incoming stdin (which qemu_command might convert to exec:cat or fd:n) */ internalret = qemuProcessStart(dconn, driver, vm, "stdin", true, dataFD[1], NULL, VIR_VM_OP_MIGRATE_IN_START); if (internalret < 0) { qemuAuditDomainStart(vm, "migrated", false); /* Note that we don't set an error here because qemuProcessStart * should have already done that. */ if (!vm->persistent) { virDomainRemoveInactive(&driver->domains, vm); vm = NULL; } goto endjob; } if (virFDStreamOpen(st, dataFD[0]) < 0) { qemuAuditDomainStart(vm, "migrated", false); qemuProcessStop(driver, vm, 0); if (!vm->persistent) { if (qemuDomainObjEndJob(vm) > 0) virDomainRemoveInactive(&driver->domains, vm); vm = NULL; } virReportSystemError(errno, "%s", _("cannot pass pipe for tunnelled migration")); goto endjob; } qemuAuditDomainStart(vm, "migrated", true); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED, VIR_DOMAIN_EVENT_STARTED_MIGRATED); ret = 0; endjob: if (vm && qemuDomainObjEndJob(vm) == 0) vm = NULL; /* We set a fake job active which is held across * API calls until the finish() call. This prevents * any other APIs being invoked while incoming * migration is taking place */ if (vm && virDomainObjIsActive(vm)) { priv->jobActive = QEMU_JOB_MIGRATION_IN; priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; priv->jobStart = timeval_to_ms(now); } cleanup: qemuCapsFree(qemuCaps); virDomainDefFree(def); VIR_FORCE_CLOSE(dataFD[0]); VIR_FORCE_CLOSE(dataFD[1]); if (vm) virDomainObjUnlock(vm); if (event) qemuDomainEventQueue(driver, event); qemuDriverUnlock(driver); return ret; } int qemuMigrationPrepareDirect(struct qemud_driver *driver, virConnectPtr dconn, const char *uri_in, char **uri_out, const char *dname, const char *dom_xml) { static int port = 0; virDomainDefPtr def = NULL; virDomainObjPtr vm = NULL; int this_port; char *hostname = NULL; char migrateFrom [64]; const char *p; virDomainEventPtr event = NULL; int ret = -1; int internalret; qemuDomainObjPrivatePtr priv = NULL; struct timeval now; if (gettimeofday(&now, NULL) < 0) { virReportSystemError(errno, "%s", _("cannot get time of day")); return -1; } /* The URI passed in may be NULL or a string "tcp://somehostname:port". * * If the URI passed in is NULL then we allocate a port number * from our pool of port numbers and return a URI of * "tcp://ourhostname:port". * * If the URI passed in is not NULL then we try to parse out the * port number and use that (note that the hostname is assumed * to be a correct hostname which refers to the target machine). */ if (uri_in == NULL) { this_port = QEMUD_MIGRATION_FIRST_PORT + port++; if (port == QEMUD_MIGRATION_NUM_PORTS) port = 0; /* Get hostname */ if ((hostname = virGetHostname(NULL)) == NULL) goto cleanup; if (STRPREFIX(hostname, "localhost")) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("hostname on destination resolved to localhost, but migration requires an FQDN")); goto cleanup; } /* XXX this really should have been a properly well-formed * URI, but we can't add in tcp:// now without breaking * compatability with old targets. We at least make the * new targets accept both syntaxes though. */ /* Caller frees */ internalret = virAsprintf(uri_out, "tcp:%s:%d", hostname, this_port); if (internalret < 0) { virReportOOMError(); goto cleanup; } } else { /* Check the URI starts with "tcp:". We will escape the * URI when passing it to the qemu monitor, so bad * characters in hostname part don't matter. */ if (!STRPREFIX (uri_in, "tcp:")) { qemuReportError (VIR_ERR_INVALID_ARG, "%s", _("only tcp URIs are supported for KVM/QEMU migrations")); goto cleanup; } /* Get the port number. */ p = strrchr (uri_in, ':'); if (p == strchr(uri_in, ':')) { /* Generate a port */ this_port = QEMUD_MIGRATION_FIRST_PORT + port++; if (port == QEMUD_MIGRATION_NUM_PORTS) port = 0; /* Caller frees */ if (virAsprintf(uri_out, "%s:%d", uri_in, this_port) < 0) { virReportOOMError(); goto cleanup; } } else { p++; /* definitely has a ':' in it, see above */ this_port = virParseNumber (&p); if (this_port == -1 || p-uri_in != strlen (uri_in)) { qemuReportError(VIR_ERR_INVALID_ARG, "%s", _("URI ended with incorrect ':port'")); goto cleanup; } } } if (*uri_out) VIR_DEBUG("Generated uri_out=%s", *uri_out); /* Parse the domain XML. */ if (!(def = virDomainDefParseString(driver->caps, dom_xml, VIR_DOMAIN_XML_INACTIVE))) goto cleanup; if (!qemuMigrationIsAllowed(def)) goto cleanup; /* Target domain name, maybe renamed. */ if (dname) { VIR_FREE(def->name); def->name = strdup(dname); if (def->name == NULL) goto cleanup; } if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) goto cleanup; if (!(vm = virDomainAssignDef(driver->caps, &driver->domains, def, true))) { /* virDomainAssignDef already set the error */ goto cleanup; } def = NULL; priv = vm->privateData; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) goto cleanup; priv->jobActive = QEMU_JOB_MIGRATION_OUT; /* Domain starts inactive, even if the domain XML had an id field. */ vm->def->id = -1; /* Start the QEMU daemon, with the same command-line arguments plus * -incoming tcp:0.0.0.0:port */ snprintf (migrateFrom, sizeof (migrateFrom), "tcp:0.0.0.0:%d", this_port); if (qemuProcessStart(dconn, driver, vm, migrateFrom, true, -1, NULL, VIR_VM_OP_MIGRATE_IN_START) < 0) { qemuAuditDomainStart(vm, "migrated", false); /* Note that we don't set an error here because qemuProcessStart * should have already done that. */ if (!vm->persistent) { if (qemuDomainObjEndJob(vm) > 0) virDomainRemoveInactive(&driver->domains, vm); vm = NULL; } goto endjob; } qemuAuditDomainStart(vm, "migrated", true); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED, VIR_DOMAIN_EVENT_STARTED_MIGRATED); ret = 0; endjob: if (vm && qemuDomainObjEndJob(vm) == 0) vm = NULL; /* We set a fake job active which is held across * API calls until the finish() call. This prevents * any other APIs being invoked while incoming * migration is taking place */ if (vm && virDomainObjIsActive(vm)) { priv->jobActive = QEMU_JOB_MIGRATION_IN; priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; priv->jobStart = timeval_to_ms(now); } cleanup: VIR_FREE(hostname); virDomainDefFree(def); if (ret != 0) VIR_FREE(*uri_out); if (vm) virDomainObjUnlock(vm); if (event) qemuDomainEventQueue(driver, event); return ret; } /* Perform migration using QEMU's native TCP migrate support, * not encrypted obviously */ static int doNativeMigrate(struct qemud_driver *driver, virDomainObjPtr vm, const char *uri, unsigned int flags, const char *dname ATTRIBUTE_UNUSED, unsigned long resource) { int ret = -1; xmlURIPtr uribits = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; /* Issue the migrate command. */ if (STRPREFIX(uri, "tcp:") && !STRPREFIX(uri, "tcp://")) { /* HACK: source host generates bogus URIs, so fix them up */ char *tmpuri; if (virAsprintf(&tmpuri, "tcp://%s", uri + strlen("tcp:")) < 0) { virReportOOMError(); goto cleanup; } uribits = xmlParseURI(tmpuri); VIR_FREE(tmpuri); } else { uribits = xmlParseURI(uri); } if (!uribits) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("cannot parse URI %s"), uri); goto cleanup; } qemuDomainObjEnterMonitorWithDriver(driver, vm); if (resource > 0 && qemuMonitorSetMigrationSpeed(priv->mon, resource) < 0) { qemuDomainObjExitMonitorWithDriver(driver, vm); goto cleanup; } if (flags & VIR_MIGRATE_NON_SHARED_DISK) background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_DISK; if (flags & VIR_MIGRATE_NON_SHARED_INC) background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_INC; if (qemuMonitorMigrateToHost(priv->mon, background_flags, uribits->server, uribits->port) < 0) { qemuDomainObjExitMonitorWithDriver(driver, vm); goto cleanup; } qemuDomainObjExitMonitorWithDriver(driver, vm); if (qemuMigrationWaitForCompletion(driver, vm) < 0) goto cleanup; ret = 0; cleanup: xmlFreeURI(uribits); return ret; } #define TUNNEL_SEND_BUF_SIZE 65536 static int doTunnelSendAll(virStreamPtr st, int sock) { char *buffer; int nbytes = TUNNEL_SEND_BUF_SIZE; if (VIR_ALLOC_N(buffer, TUNNEL_SEND_BUF_SIZE) < 0) { virReportOOMError(); virStreamAbort(st); return -1; } /* XXX should honour the 'resource' parameter here */ for (;;) { nbytes = saferead(sock, buffer, nbytes); if (nbytes < 0) { virReportSystemError(errno, "%s", _("tunnelled migration failed to read from qemu")); virStreamAbort(st); VIR_FREE(buffer); return -1; } else if (nbytes == 0) /* EOF; get out of here */ break; if (virStreamSend(st, buffer, nbytes) < 0) { qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("Failed to write migration data to remote libvirtd")); VIR_FREE(buffer); return -1; } } VIR_FREE(buffer); if (virStreamFinish(st) < 0) /* virStreamFinish set the error for us */ return -1; return 0; } static int doTunnelMigrate(struct qemud_driver *driver, virConnectPtr dconn, virDomainObjPtr vm, const char *dom_xml, const char *uri, unsigned long flags, const char *dname, unsigned long resource) { qemuDomainObjPrivatePtr priv = vm->privateData; int client_sock = -1; int qemu_sock = -1; struct sockaddr_un sa_qemu, sa_client; socklen_t addrlen; virDomainPtr ddomain = NULL; int retval = -1; virStreamPtr st = NULL; char *unixfile = NULL; int internalret; virBitmapPtr qemuCaps = NULL; int status; unsigned long long transferred, remaining, total; unsigned int background_flags = QEMU_MONITOR_MIGRATE_BACKGROUND; /* * The order of operations is important here to avoid touching * the source VM until we are very sure we can successfully * start the migration operation. * * 1. setup local support infrastructure (eg sockets) * 2. setup destination fully * 3. start migration on source */ /* Stage 1. setup local support infrastructure */ if (virAsprintf(&unixfile, "%s/qemu.tunnelmigrate.src.%s", driver->libDir, vm->def->name) < 0) { virReportOOMError(); goto cleanup; } qemu_sock = socket(AF_UNIX, SOCK_STREAM, 0); if (qemu_sock < 0) { virReportSystemError(errno, "%s", _("cannot open tunnelled migration socket")); goto cleanup; } memset(&sa_qemu, 0, sizeof(sa_qemu)); sa_qemu.sun_family = AF_UNIX; if (virStrcpy(sa_qemu.sun_path, unixfile, sizeof(sa_qemu.sun_path)) == NULL) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("Unix socket '%s' too big for destination"), unixfile); goto cleanup; } unlink(unixfile); if (bind(qemu_sock, (struct sockaddr *)&sa_qemu, sizeof(sa_qemu)) < 0) { virReportSystemError(errno, _("Cannot bind to unix socket '%s' for tunnelled migration"), unixfile); goto cleanup; } if (listen(qemu_sock, 1) < 0) { virReportSystemError(errno, _("Cannot listen on unix socket '%s' for tunnelled migration"), unixfile); goto cleanup; } if (chown(unixfile, driver->user, driver->group) < 0) { virReportSystemError(errno, _("Cannot change unix socket '%s' owner"), unixfile); goto cleanup; } /* check that this qemu version supports the unix migration */ if (qemuCapsExtractVersionInfo(vm->def->emulator, vm->def->os.arch, NULL, &qemuCaps) < 0) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("Cannot extract Qemu version from '%s'"), vm->def->emulator); goto cleanup; } if (!qemuCapsGet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_UNIX) && !qemuCapsGet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_EXEC)) { qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("Source qemu is too old to support tunnelled migration")); goto cleanup; } /* Stage 2. setup destination fully * * Once stage 2 has completed successfully, we *must* call finish * to cleanup the target whether we succeed or fail */ st = virStreamNew(dconn, 0); if (st == NULL) /* virStreamNew only fails on OOM, and it reports the error itself */ goto cleanup; qemuDomainObjEnterRemoteWithDriver(driver, vm); internalret = dconn->driver->domainMigratePrepareTunnel(dconn, st, flags, dname, resource, dom_xml); qemuDomainObjExitRemoteWithDriver(driver, vm); if (internalret < 0) /* domainMigratePrepareTunnel sets the error for us */ goto cleanup; /* the domain may have shutdown or crashed while we had the locks dropped * in qemuDomainObjEnterRemoteWithDriver, so check again */ if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("guest unexpectedly quit")); goto cleanup; } /* 3. start migration on source */ qemuDomainObjEnterMonitorWithDriver(driver, vm); if (flags & VIR_MIGRATE_NON_SHARED_DISK) background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_DISK; if (flags & VIR_MIGRATE_NON_SHARED_INC) background_flags |= QEMU_MONITOR_MIGRATE_NON_SHARED_INC; if (qemuCapsGet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_UNIX)) { internalret = qemuMonitorMigrateToUnix(priv->mon, background_flags, unixfile); } else if (qemuCapsGet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_EXEC)) { const char *args[] = { "nc", "-U", unixfile, NULL }; internalret = qemuMonitorMigrateToCommand(priv->mon, QEMU_MONITOR_MIGRATE_BACKGROUND, args); } else { internalret = -1; } qemuDomainObjExitMonitorWithDriver(driver, vm); if (internalret < 0) { qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("tunnelled migration monitor command failed")); goto finish; } if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("guest unexpectedly quit")); goto cleanup; } /* From this point onwards we *must* call cancel to abort the * migration on source if anything goes wrong */ /* it is also possible that the migrate didn't fail initially, but * rather failed later on. Check the output of "info migrate" */ qemuDomainObjEnterMonitorWithDriver(driver, vm); if (qemuMonitorGetMigrationStatus(priv->mon, &status, &transferred, &remaining, &total) < 0) { qemuDomainObjExitMonitorWithDriver(driver, vm); goto cancel; } qemuDomainObjExitMonitorWithDriver(driver, vm); if (status == QEMU_MONITOR_MIGRATION_STATUS_ERROR) { qemuReportError(VIR_ERR_OPERATION_FAILED, "%s",_("migrate failed")); goto cancel; } addrlen = sizeof(sa_client); while ((client_sock = accept(qemu_sock, (struct sockaddr *)&sa_client, &addrlen)) < 0) { if (errno == EAGAIN || errno == EINTR) continue; virReportSystemError(errno, "%s", _("tunnelled migration failed to accept from qemu")); goto cancel; } retval = doTunnelSendAll(st, client_sock); cancel: if (retval != 0 && virDomainObjIsActive(vm)) { qemuDomainObjEnterMonitorWithDriver(driver, vm); qemuMonitorMigrateCancel(priv->mon); qemuDomainObjExitMonitorWithDriver(driver, vm); } finish: dname = dname ? dname : vm->def->name; qemuDomainObjEnterRemoteWithDriver(driver, vm); ddomain = dconn->driver->domainMigrateFinish2 (dconn, dname, NULL, 0, uri, flags, retval); qemuDomainObjExitRemoteWithDriver(driver, vm); cleanup: VIR_FORCE_CLOSE(client_sock); VIR_FORCE_CLOSE(qemu_sock); qemuCapsFree(qemuCaps); if (ddomain) virUnrefDomain(ddomain); if (unixfile) { unlink(unixfile); VIR_FREE(unixfile); } if (st) /* don't call virStreamFree(), because that resets any pending errors */ virUnrefStream(st); return retval; } /* This is essentially a simplified re-impl of * virDomainMigrateVersion2 from libvirt.c, but running in source * libvirtd context, instead of client app context */ static int doNonTunnelMigrate(struct qemud_driver *driver, virConnectPtr dconn, virDomainObjPtr vm, const char *dom_xml, const char *uri ATTRIBUTE_UNUSED, unsigned long flags, const char *dname, unsigned long resource) { virDomainPtr ddomain = NULL; int retval = -1; char *uri_out = NULL; int rc; qemuDomainObjEnterRemoteWithDriver(driver, vm); /* NB we don't pass 'uri' into this, since that's the libvirtd * URI in this context - so we let dest pick it */ rc = dconn->driver->domainMigratePrepare2(dconn, NULL, /* cookie */ 0, /* cookielen */ NULL, /* uri */ &uri_out, flags, dname, resource, dom_xml); qemuDomainObjExitRemoteWithDriver(driver, vm); if (rc < 0) /* domainMigratePrepare2 sets the error for us */ goto cleanup; /* the domain may have shutdown or crashed while we had the locks dropped * in qemuDomainObjEnterRemoteWithDriver, so check again */ if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("guest unexpectedly quit")); goto cleanup; } if (uri_out == NULL) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("domainMigratePrepare2 did not set uri")); goto cleanup; } if (doNativeMigrate(driver, vm, uri_out, flags, dname, resource) < 0) goto finish; retval = 0; finish: dname = dname ? dname : vm->def->name; qemuDomainObjEnterRemoteWithDriver(driver, vm); ddomain = dconn->driver->domainMigrateFinish2 (dconn, dname, NULL, 0, uri_out, flags, retval); qemuDomainObjExitRemoteWithDriver(driver, vm); if (ddomain) virUnrefDomain(ddomain); cleanup: return retval; } static int doPeer2PeerMigrate(struct qemud_driver *driver, virDomainObjPtr vm, const char *uri, unsigned long flags, const char *dname, unsigned long resource) { int ret = -1; virConnectPtr dconn = NULL; char *dom_xml; bool p2p; /* the order of operations is important here; we make sure the * destination side is completely setup before we touch the source */ qemuDomainObjEnterRemoteWithDriver(driver, vm); dconn = virConnectOpen(uri); qemuDomainObjExitRemoteWithDriver(driver, vm); if (dconn == NULL) { qemuReportError(VIR_ERR_OPERATION_FAILED, _("Failed to connect to remote libvirt URI %s"), uri); return -1; } qemuDomainObjEnterRemoteWithDriver(driver, vm); p2p = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, VIR_DRV_FEATURE_MIGRATION_P2P); qemuDomainObjExitRemoteWithDriver(driver, vm); if (!p2p) { qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("Destination libvirt does not support peer-to-peer migration protocol")); goto cleanup; } /* domain may have been stopped while we were talking to remote daemon */ if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("guest unexpectedly quit")); goto cleanup; } dom_xml = qemuDomainFormatXML(driver, vm, VIR_DOMAIN_XML_SECURE | VIR_DOMAIN_XML_UPDATE_CPU); if (!dom_xml) { qemuReportError(VIR_ERR_OPERATION_FAILED, "%s", _("failed to get domain xml")); goto cleanup; } if (flags & VIR_MIGRATE_TUNNELLED) ret = doTunnelMigrate(driver, dconn, vm, dom_xml, uri, flags, dname, resource); else ret = doNonTunnelMigrate(driver, dconn, vm, dom_xml, uri, flags, dname, resource); cleanup: VIR_FREE(dom_xml); /* don't call virConnectClose(), because that resets any pending errors */ qemuDomainObjEnterRemoteWithDriver(driver, vm); virUnrefConnect(dconn); qemuDomainObjExitRemoteWithDriver(driver, vm); return ret; } int qemuMigrationPerform(struct qemud_driver *driver, virConnectPtr conn, virDomainObjPtr vm, const char *uri, unsigned long flags, const char *dname, unsigned long resource) { virDomainEventPtr event = NULL; int ret = -1; int resume = 0; qemuDomainObjPrivatePtr priv = vm->privateData; if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) goto cleanup; priv->jobActive = QEMU_JOB_MIGRATION_OUT; if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); goto endjob; } memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); priv->jobInfo.type = VIR_DOMAIN_JOB_UNBOUNDED; resume = vm->state == VIR_DOMAIN_RUNNING; if (!(flags & VIR_MIGRATE_LIVE) && vm->state == VIR_DOMAIN_RUNNING) { if (qemuMigrationSetOffline(driver, vm) < 0) goto endjob; } if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { if (doPeer2PeerMigrate(driver, vm, uri, flags, dname, resource) < 0) /* doPeer2PeerMigrate already set the error, so just get out */ goto endjob; } else { if (doNativeMigrate(driver, vm, uri, flags, dname, resource) < 0) goto endjob; } /* Clean up the source domain. */ qemuProcessStop(driver, vm, 1); qemuAuditDomainStop(vm, "migrated"); resume = 0; event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED_MIGRATED); if (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE)) { virDomainDeleteConfig(driver->configDir, driver->autostartDir, vm); if (qemuDomainObjEndJob(vm) > 0) virDomainRemoveInactive(&driver->domains, vm); vm = NULL; } ret = 0; endjob: if (resume && vm->state == VIR_DOMAIN_PAUSED) { /* we got here through some sort of failure; start the domain again */ if (qemuProcessStartCPUs(driver, vm, conn) < 0) { /* Hm, we already know we are in error here. We don't want to * overwrite the previous error, though, so we just throw something * to the logs and hope for the best */ VIR_ERROR(_("Failed to resume guest %s after failure"), vm->def->name); } event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_RESUMED, VIR_DOMAIN_EVENT_RESUMED_MIGRATED); } if (vm && qemuDomainObjEndJob(vm) == 0) vm = NULL; cleanup: if (vm) virDomainObjUnlock(vm); if (event) qemuDomainEventQueue(driver, event); return ret; } #if WITH_MACVTAP static void qemuMigrationVPAssociatePortProfiles(virDomainDefPtr def) { int i; int last_good_net = -1; virDomainNetDefPtr net; for (i = 0; i < def->nnets; i++) { net = def->nets[i]; if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { if (vpAssociatePortProfileId(net->ifname, net->mac, net->data.direct.linkdev, &net->data.direct.virtPortProfile, def->uuid, VIR_VM_OP_MIGRATE_IN_FINISH) != 0) goto err_exit; } last_good_net = i; } return; err_exit: for (i = 0; i < last_good_net; i++) { net = def->nets[i]; if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { vpDisassociatePortProfileId(net->ifname, net->mac, net->data.direct.linkdev, &net->data.direct.virtPortProfile, VIR_VM_OP_MIGRATE_IN_FINISH); } } } #else /* !WITH_MACVTAP */ static void qemuMigrationVPAssociatePortProfiles(virDomainDefPtr def ATTRIBUTE_UNUSED) { } #endif /* WITH_MACVTAP */ virDomainPtr qemuMigrationFinish(struct qemud_driver *driver, virConnectPtr dconn, virDomainObjPtr vm, unsigned long flags, int retcode) { virDomainPtr dom = NULL; virDomainEventPtr event = NULL; int newVM = 1; qemuDomainObjPrivatePtr priv = NULL; priv = vm->privateData; if (priv->jobActive != QEMU_JOB_MIGRATION_IN) { qemuReportError(VIR_ERR_NO_DOMAIN, _("domain '%s' is not processing incoming migration"), vm->def->name); goto cleanup; } priv->jobActive = QEMU_JOB_NONE; memset(&priv->jobInfo, 0, sizeof(priv->jobInfo)); if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) goto cleanup; /* Did the migration go as planned? If yes, return the domain * object, but if no, clean up the empty qemu process. */ if (retcode == 0) { if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("guest unexpectedly quit")); goto cleanup; } qemuMigrationVPAssociatePortProfiles(vm->def); if (flags & VIR_MIGRATE_PERSIST_DEST) { if (vm->persistent) newVM = 0; vm->persistent = 1; if (virDomainSaveConfig(driver->configDir, vm->def) < 0) { /* Hmpf. Migration was successful, but making it persistent * was not. If we report successful, then when this domain * shuts down, management tools are in for a surprise. On the * other hand, if we report failure, then the management tools * might try to restart the domain on the source side, even * though the domain is actually running on the destination. * Return a NULL dom pointer, and hope that this is a rare * situation and management tools are smart. */ vm = NULL; goto endjob; } event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_DEFINED, newVM ? VIR_DOMAIN_EVENT_DEFINED_ADDED : VIR_DOMAIN_EVENT_DEFINED_UPDATED); if (event) qemuDomainEventQueue(driver, event); event = NULL; } dom = virGetDomain (dconn, vm->def->name, vm->def->uuid); if (!(flags & VIR_MIGRATE_PAUSED)) { /* run 'cont' on the destination, which allows migration on qemu * >= 0.10.6 to work properly. This isn't strictly necessary on * older qemu's, but it also doesn't hurt anything there */ if (qemuProcessStartCPUs(driver, vm, dconn) < 0) { if (virGetLastError() == NULL) qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("resume operation failed")); goto endjob; } } event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_RESUMED, VIR_DOMAIN_EVENT_RESUMED_MIGRATED); if (vm->state == VIR_DOMAIN_PAUSED) { qemuDomainEventQueue(driver, event); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_SUSPENDED, VIR_DOMAIN_EVENT_SUSPENDED_PAUSED); } if (virDomainSaveStatus(driver->caps, driver->stateDir, vm) < 0) { VIR_WARN("Failed to save status on vm %s", vm->def->name); goto endjob; } } else { qemuProcessStop(driver, vm, 1); qemuAuditDomainStop(vm, "failed"); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED_FAILED); if (!vm->persistent) { if (qemuDomainObjEndJob(vm) > 0) virDomainRemoveInactive(&driver->domains, vm); vm = NULL; } } endjob: if (vm && qemuDomainObjEndJob(vm) == 0) vm = NULL; cleanup: if (vm) virDomainObjUnlock(vm); if (event) qemuDomainEventQueue(driver, event); return dom; } /* Helper function called while driver lock is held and vm is active. */ int qemuMigrationToFile(struct qemud_driver *driver, virDomainObjPtr vm, virBitmapPtr qemuCaps, int fd, off_t offset, const char *path, const char *compressor, bool is_reg, bool bypassSecurityDriver) { qemuDomainObjPrivatePtr priv = vm->privateData; virCgroupPtr cgroup = NULL; int ret = -1; int rc; bool restoreLabel = false; virCommandPtr cmd = NULL; int pipeFD[2] = { -1, -1 }; if (qemuCaps && qemuCapsGet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_FD) && (!compressor || pipe(pipeFD) == 0)) { /* All right! We can use fd migration, which means that qemu * doesn't have to open() the file, so we don't have to futz * around with granting access or revoking it later. */ is_reg = true; bypassSecurityDriver = true; } else { /* Phooey - we have to fall back on exec migration, where qemu * has to popen() the file by name. We might also stumble on * a race present in some qemu versions where it does a wait() * that botches pclose. */ if (!is_reg && qemuCgroupControllerActive(driver, VIR_CGROUP_CONTROLLER_DEVICES)) { if (virCgroupForDomain(driver->cgroup, vm->def->name, &cgroup, 0) != 0) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("Unable to find cgroup for %s"), vm->def->name); goto cleanup; } rc = virCgroupAllowDevicePath(cgroup, path, VIR_CGROUP_DEVICE_RW); qemuAuditCgroupPath(vm, cgroup, "allow", path, "rw", rc); if (rc < 0) { virReportSystemError(-rc, _("Unable to allow device %s for %s"), path, vm->def->name); goto cleanup; } } if ((!bypassSecurityDriver) && virSecurityManagerSetSavedStateLabel(driver->securityManager, vm, path) < 0) goto cleanup; restoreLabel = true; } qemuDomainObjEnterMonitorWithDriver(driver, vm); if (!compressor) { const char *args[] = { "cat", NULL }; if (qemuCaps && qemuCapsGet(qemuCaps, QEMU_CAPS_MIGRATE_QEMU_FD) && priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX) { rc = qemuMonitorMigrateToFd(priv->mon, QEMU_MONITOR_MIGRATE_BACKGROUND, fd); } else { rc = qemuMonitorMigrateToFile(priv->mon, QEMU_MONITOR_MIGRATE_BACKGROUND, args, path, offset); } } else { const char *prog = compressor; const char *args[] = { prog, "-c", NULL }; if (pipeFD[0] != -1) { cmd = virCommandNewArgs(args); virCommandSetInputFD(cmd, pipeFD[0]); virCommandSetOutputFD(cmd, &fd); if (virSetCloseExec(pipeFD[1]) < 0) { virReportSystemError(errno, "%s", _("Unable to set cloexec flag")); qemuDomainObjExitMonitorWithDriver(driver, vm); goto cleanup; } if (virCommandRunAsync(cmd, NULL) < 0) { qemuDomainObjExitMonitorWithDriver(driver, vm); goto cleanup; } rc = qemuMonitorMigrateToFd(priv->mon, QEMU_MONITOR_MIGRATE_BACKGROUND, pipeFD[1]); if (VIR_CLOSE(pipeFD[0]) < 0 || VIR_CLOSE(pipeFD[1]) < 0) VIR_WARN0("failed to close intermediate pipe"); } else { rc = qemuMonitorMigrateToFile(priv->mon, QEMU_MONITOR_MIGRATE_BACKGROUND, args, path, offset); } } qemuDomainObjExitMonitorWithDriver(driver, vm); if (rc < 0) goto cleanup; rc = qemuMigrationWaitForCompletion(driver, vm); if (rc < 0) goto cleanup; if (cmd && virCommandWait(cmd, NULL) < 0) goto cleanup; ret = 0; cleanup: VIR_FORCE_CLOSE(pipeFD[0]); VIR_FORCE_CLOSE(pipeFD[1]); virCommandFree(cmd); if (restoreLabel && (!bypassSecurityDriver) && virSecurityManagerRestoreSavedStateLabel(driver->securityManager, vm, path) < 0) VIR_WARN("failed to restore save state label on %s", path); if (cgroup != NULL) { rc = virCgroupDenyDevicePath(cgroup, path, VIR_CGROUP_DEVICE_RWM); qemuAuditCgroupPath(vm, cgroup, "deny", path, "rwm", rc); if (rc < 0) VIR_WARN("Unable to deny device %s for %s %d", path, vm->def->name, rc); virCgroupFree(&cgroup); } return ret; }