From 440a1aa508f7abec635a035158e9b37e179f2db2 Mon Sep 17 00:00:00 2001 From: Martin Kletzander Date: Thu, 6 Feb 2014 14:30:59 +0100 Subject: [PATCH] qemu: keep pre-migration domain state after failed migration Couple of codepaths shared the same code which can be moved out to a function and on one of such places, qemuMigrationConfirmPhase(), the domain was resumed even if it wasn't running before the migration started. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1057407 Signed-off-by: Martin Kletzander --- src/qemu/qemu_domain.h | 3 +- src/qemu/qemu_migration.c | 112 +++++++++++++++++++++----------------- 2 files changed, 63 insertions(+), 52 deletions(-) diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 6a92351a2d..84624f9ac9 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -1,7 +1,7 @@ /* * qemu_domain.h: QEMU domain private state * - * Copyright (C) 2006-2013 Red Hat, Inc. + * Copyright (C) 2006-2014 Red Hat, Inc. * Copyright (C) 2006 Daniel P. Berrange * * This library is free software; you can redistribute it and/or @@ -161,6 +161,7 @@ struct _qemuDomainObjPrivate { char *origname; int nbdPort; /* Port used for migration with NBD */ unsigned short migrationPort; + int preMigrationState; virChrdevsPtr devs; diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 407fb7040e..331e18acf1 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1075,6 +1075,53 @@ error: return NULL; } +static void +qemuMigrationStoreDomainState(virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + priv->preMigrationState = virDomainObjGetState(vm, NULL); + + VIR_DEBUG("Storing pre-migration state=%d domain=%p", + priv->preMigrationState, vm); +} + +/* Returns true if the domain was resumed, false otherwise */ +static bool +qemuMigrationRestoreDomainState(virConnectPtr conn, virDomainObjPtr vm) +{ + virQEMUDriverPtr driver = conn->privateData; + qemuDomainObjPrivatePtr priv = vm->privateData; + int state = virDomainObjGetState(vm, NULL); + bool ret = false; + + VIR_DEBUG("driver=%p, vm=%p, pre-mig-state=%d, state=%d", + driver, vm, priv->preMigrationState, state); + + if (state == VIR_DOMAIN_PAUSED && + priv->preMigrationState == VIR_DOMAIN_RUNNING) { + /* This is basically the only restore possibility that's safe + * and we should attempt to do */ + + VIR_DEBUG("Restoring pre-migration state due to migration error"); + + /* we got here through some sort of failure; start the domain again */ + if (qemuProcessStartCPUs(driver, vm, conn, + VIR_DOMAIN_RUNNING_MIGRATION_CANCELED, + QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { + /* Hm, we already know we are in error here. We don't want to + * overwrite the previous error, though, so we just throw something + * to the logs and hope for the best */ + VIR_ERROR(_("Failed to resume guest %s after failure"), vm->def->name); + goto cleanup; + } + ret = true; + } + + cleanup: + priv->preMigrationState = VIR_DOMAIN_NOSTATE; + return ret; +} + /** * qemuMigrationStartNBDServer: * @driver: qemu driver @@ -2075,6 +2122,8 @@ qemuMigrationBegin(virConnectPtr conn, asyncJob = QEMU_ASYNC_JOB_NONE; } + qemuMigrationStoreDomainState(vm); + if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); @@ -2744,22 +2793,12 @@ qemuMigrationConfirmPhase(virQEMUDriverPtr driver, /* cancel any outstanding NBD jobs */ qemuMigrationCancelDriveMirror(mig, driver, vm); - /* run 'cont' on the destination, which allows migration on qemu - * >= 0.10.6 to work properly. This isn't strictly necessary on - * older qemu's, but it also doesn't hurt anything there - */ - if (qemuProcessStartCPUs(driver, vm, conn, - VIR_DOMAIN_RUNNING_MIGRATED, - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { - if (virGetLastError() == NULL) - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("resume operation failed")); - goto cleanup; + if (qemuMigrationRestoreDomainState(conn, vm)) { + event = virDomainEventLifecycleNewFromObj(vm, + VIR_DOMAIN_EVENT_RESUMED, + VIR_DOMAIN_EVENT_RESUMED_MIGRATED); } - event = virDomainEventLifecycleNewFromObj(vm, - VIR_DOMAIN_EVENT_RESUMED, - VIR_DOMAIN_EVENT_RESUMED_MIGRATED); if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm) < 0) { VIR_WARN("Failed to save status on vm %s", vm->def->name); goto cleanup; @@ -4065,7 +4104,6 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver, { virObjectEventPtr event = NULL; int ret = -1; - int resume = 0; virErrorPtr orig_err = NULL; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR); @@ -4085,7 +4123,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver, if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def)) goto endjob; - resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING; + qemuMigrationStoreDomainState(vm); if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { ret = doPeer2PeerMigrate(driver, conn, vm, xmlin, @@ -4112,25 +4150,12 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver, VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED_MIGRATED); } - resume = 0; endjob: if (ret < 0) orig_err = virSaveLastError(); - if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { - /* we got here through some sort of failure; start the domain again */ - if (qemuProcessStartCPUs(driver, vm, conn, - VIR_DOMAIN_RUNNING_MIGRATION_CANCELED, - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { - /* Hm, we already know we are in error here. We don't want to - * overwrite the previous error, though, so we just throw something - * to the logs and hope for the best - */ - VIR_ERROR(_("Failed to resume guest %s after failure"), - vm->def->name); - } - + if (qemuMigrationRestoreDomainState(conn, vm)) { event = virDomainEventLifecycleNewFromObj(vm, VIR_DOMAIN_EVENT_RESUMED, VIR_DOMAIN_EVENT_RESUMED_MIGRATED); @@ -4179,7 +4204,6 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver, { virObjectEventPtr event = NULL; int ret = -1; - bool resume; bool hasrefs; /* If we didn't start the job in the begin phase, start it now. */ @@ -4194,32 +4218,18 @@ qemuMigrationPerformPhase(virQEMUDriverPtr driver, virCloseCallbacksUnset(driver->closeCallbacks, vm, qemuMigrationCleanup); - resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING; ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen, cookieout, cookieoutlen, flags, resource, NULL, graphicsuri); - if (ret < 0 && resume && - virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { - /* we got here through some sort of failure; start the domain again */ - if (qemuProcessStartCPUs(driver, vm, conn, - VIR_DOMAIN_RUNNING_MIGRATION_CANCELED, - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) { - /* Hm, we already know we are in error here. We don't want to - * overwrite the previous error, though, so we just throw something - * to the logs and hope for the best - */ - VIR_ERROR(_("Failed to resume guest %s after failure"), - vm->def->name); + if (ret < 0) { + if (qemuMigrationRestoreDomainState(conn, vm)) { + event = virDomainEventLifecycleNewFromObj(vm, + VIR_DOMAIN_EVENT_RESUMED, + VIR_DOMAIN_EVENT_RESUMED_MIGRATED); } - - event = virDomainEventLifecycleNewFromObj(vm, - VIR_DOMAIN_EVENT_RESUMED, - VIR_DOMAIN_EVENT_RESUMED_MIGRATED); - } - - if (ret < 0) goto endjob; + } qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE); -- GitLab