From 5f998219111327dc4f66586ce8953a6d7ecfb619 Mon Sep 17 00:00:00 2001 From: Jiri Denemark Date: Thu, 28 Jun 2018 11:38:52 +0200 Subject: [PATCH] qemu_migration: Check for active domain after talking to remote daemon Once we called qemuDomainObjEnterRemote to talk to the destination daemon during a peer to peer migration, the vm lock is released and we only hold an async job. If the source domain dies at this point the monitor EOF callback is allowed to do its job and (among other things) clear all private data irrelevant for stopped domain. Thus when we call qemuDomainObjExitRemote, the domain may already be gone and we should avoid touching runtime private data (such as current job info). In other words after acquiring the lock in qemuDomainObjExitRemote, we need to check the domain is still alive. Unless we're doing offline migration. https://bugzilla.redhat.com/show_bug.cgi?id=1589730 Signed-off-by: Jiri Denemark --- src/qemu/qemu_domain.c | 14 +++++++++- src/qemu/qemu_domain.h | 5 ++-- src/qemu/qemu_migration.c | 54 ++++++++++++++++++--------------------- 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index afd572fc5e..4c15d5a36a 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7023,11 +7023,23 @@ void qemuDomainObjEnterRemote(virDomainObjPtr obj) virObjectUnlock(obj); } -void qemuDomainObjExitRemote(virDomainObjPtr obj) + +int +qemuDomainObjExitRemote(virDomainObjPtr obj, + bool checkActive) { virObjectLock(obj); VIR_DEBUG("Exited remote (vm=%p name=%s)", obj, obj->def->name); + + if (checkActive && !virDomainObjIsActive(obj)) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("domain '%s' is not running"), + obj->def->name); + return -1; + } + + return 0; } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 0a9af756b8..30d186a921 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -584,8 +584,9 @@ void qemuDomainObjExitAgent(virDomainObjPtr obj, qemuAgentPtr agent) void qemuDomainObjEnterRemote(virDomainObjPtr obj) ATTRIBUTE_NONNULL(1); -void qemuDomainObjExitRemote(virDomainObjPtr obj) - ATTRIBUTE_NONNULL(1); +int qemuDomainObjExitRemote(virDomainObjPtr obj, + bool checkActive) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; virDomainDefPtr qemuDomainDefCopy(virQEMUDriverPtr driver, virDomainDefPtr src, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index c9aaa38029..435cd174af 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3920,27 +3920,20 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); ret = dconn->driver->domainMigratePrepareTunnel (dconn, st, destflags, dname, resource, dom_xml); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, true) < 0) + goto cleanup; } else { qemuDomainObjEnterRemote(vm); ret = dconn->driver->domainMigratePrepare2 (dconn, &cookie, &cookielen, NULL, &uri_out, destflags, dname, resource, dom_xml); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, true) < 0) + goto cleanup; } VIR_FREE(dom_xml); if (ret == -1) goto cleanup; - /* the domain may have shutdown or crashed while we had the locks dropped - * in qemuDomainObjEnterRemote, so check again - */ - if (!virDomainObjIsActive(vm)) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - if (!(flags & VIR_MIGRATE_TUNNELLED) && (uri_out == NULL)) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", @@ -3987,7 +3980,8 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish2 (dconn, dname, cookie, cookielen, uri_out ? uri_out : dconnuri, destflags, cancelled); - qemuDomainObjExitRemote(vm); + /* The domain is already gone at this point */ + ignore_value(qemuDomainObjExitRemote(vm, false)); if (cancelled && ddomain) VIR_ERROR(_("finish step ignored that migration was cancelled")); @@ -4052,6 +4046,7 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, int nparams = 0; int maxparams = 0; size_t i; + bool offline = !!(flags & VIR_MIGRATE_OFFLINE); VIR_DEBUG("driver=%p, sconn=%p, dconn=%p, dconnuri=%s, vm=%p, xmlin=%s, " "dname=%s, uri=%s, graphicsuri=%s, listenAddress=%s, " @@ -4145,7 +4140,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, (dconn, st, cookiein, cookieinlen, &cookieout, &cookieoutlen, destflags, dname, bandwidth, dom_xml); } - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } else { qemuDomainObjEnterRemote(vm); if (useParams) { @@ -4157,13 +4153,14 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, (dconn, cookiein, cookieinlen, &cookieout, &cookieoutlen, uri, &uri_out, destflags, dname, bandwidth, dom_xml); } - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } VIR_FREE(dom_xml); if (ret == -1) goto cleanup; - if (flags & VIR_MIGRATE_OFFLINE) { + if (offline) { VIR_DEBUG("Offline migration, skipping Perform phase"); VIR_FREE(cookieout); cookieoutlen = 0; @@ -4253,7 +4250,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish3Params (dconn, params, nparams, cookiein, cookieinlen, &cookieout, &cookieoutlen, destflags, cancelled); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } } else { dname = dname ? dname : vm->def->name; @@ -4261,7 +4259,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish3 (dconn, dname, cookiein, cookieinlen, &cookieout, &cookieoutlen, dconnuri, uri, destflags, cancelled); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } if (cancelled) { @@ -4399,6 +4398,7 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, virConnectPtr dconn = NULL; bool p2p; virErrorPtr orig_err = NULL; + bool offline = !!(flags & VIR_MIGRATE_OFFLINE); bool dstOffline = false; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); bool useParams; @@ -4439,7 +4439,9 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); dconn = virConnectOpenAuth(dconnuri, &virConnectAuthConfig, 0); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; + if (dconn == NULL) { virReportError(VIR_ERR_OPERATION_FAILED, _("Failed to connect to remote libvirt URI %s: %s"), @@ -4468,10 +4470,11 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, VIR_DRV_FEATURE_MIGRATION_V3); useParams = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, VIR_DRV_FEATURE_MIGRATION_PARAMS); - if (flags & VIR_MIGRATE_OFFLINE) + if (offline) dstOffline = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, VIR_DRV_FEATURE_MIGRATION_OFFLINE); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; if (!p2p) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", @@ -4488,20 +4491,13 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, goto cleanup; } - if (flags & VIR_MIGRATE_OFFLINE && !dstOffline) { + if (offline && !dstOffline) { virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", _("offline migration is not supported by " "the destination host")); goto cleanup; } - /* domain may have been stopped while we were talking to remote daemon */ - if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - /* Change protection is only required on the source side (us), and * only for v3 migration when begin and perform are separate jobs. * But peer-2-peer is already a single job, and we still want to @@ -4526,7 +4522,7 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); virConnectUnregisterCloseCallback(dconn, qemuMigrationSrcConnectionClosed); virObjectUnref(dconn); - qemuDomainObjExitRemote(vm); + ignore_value(qemuDomainObjExitRemote(vm, false)); if (orig_err) { virSetError(orig_err); virFreeError(orig_err); -- GitLab