提交 f0c8e1cb 编写于 作者: C Chris Lalancette

Fix a deadlock in bi-directional p2p concurrent migration.

If you try to execute two concurrent migrations p2p
from A->B and B->A, the two libvirtd's will deadlock
trying to perform the migrations.  The reason for this is
that in p2p migration, the libvirtd's are responsible for
making the RPC Prepare, Migrate, and Finish calls.  However,
they are currently holding the driver lock while doing so,
which basically guarantees deadlock in this scenario.

This patch fixes the situation by adding
qemuDomainObjEnterRemoteWithDriver and
qemuDomainObjExitRemoteWithDriver helper methods.  The Enter
take an additional object reference, then drops both the
domain object lock and the driver lock.  The Exit takes
both the driver and domain object lock, then drops the
reference.  Adding calls to these Enter and Exit helpers
around remote calls in the various migration methods
seems to fix the problem for me in testing.

This should make the situation safe. The additional domain
object reference ensures that the domain object won't disappear
while this operation is happening.  The BeginJob that is called
inside of qemudDomainMigratePerform ensures that we can't execute a
second migrate (or shutdown, or save, etc) job while the
migration is active.  Finally, the additional check on the state
of the vm after we reacquire the locks ensures that we can't
be surprised by an external event (domain crash, etc).
Signed-off-by: NChris Lalancette <clalance@redhat.com>
上级 963a5b7a
...@@ -531,6 +531,21 @@ static void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, virD ...@@ -531,6 +531,21 @@ static void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, virD
} }
} }
static void qemuDomainObjEnterRemoteWithDriver(struct qemud_driver *driver,
virDomainObjPtr obj)
{
virDomainObjRef(obj);
virDomainObjUnlock(obj);
qemuDriverUnlock(driver);
}
static void qemuDomainObjExitRemoteWithDriver(struct qemud_driver *driver,
virDomainObjPtr obj)
{
qemuDriverLock(driver);
virDomainObjLock(obj);
virDomainObjUnref(obj);
}
static int qemuCgroupControllerActive(struct qemud_driver *driver, static int qemuCgroupControllerActive(struct qemud_driver *driver,
int controller) int controller)
...@@ -10797,14 +10812,25 @@ static int doTunnelMigrate(virDomainPtr dom, ...@@ -10797,14 +10812,25 @@ static int doTunnelMigrate(virDomainPtr dom,
/* virStreamNew only fails on OOM, and it reports the error itself */ /* virStreamNew only fails on OOM, and it reports the error itself */
goto cleanup; goto cleanup;
qemuDomainObjEnterRemoteWithDriver(driver, vm);
internalret = dconn->driver->domainMigratePrepareTunnel(dconn, st, internalret = dconn->driver->domainMigratePrepareTunnel(dconn, st,
flags, dname, flags, dname,
resource, dom_xml); resource, dom_xml);
qemuDomainObjExitRemoteWithDriver(driver, vm);
if (internalret < 0) if (internalret < 0)
/* domainMigratePrepareTunnel sets the error for us */ /* domainMigratePrepareTunnel sets the error for us */
goto cleanup; goto cleanup;
/* the domain may have shutdown or crashed while we had the locks dropped
* in qemuDomainObjEnterRemoteWithDriver, so check again
*/
if (!virDomainObjIsActive(vm)) {
qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("guest unexpectedly quit"));
goto cleanup;
}
/* 3. start migration on source */ /* 3. start migration on source */
qemuDomainObjEnterMonitorWithDriver(driver, vm); qemuDomainObjEnterMonitorWithDriver(driver, vm);
if (flags & VIR_MIGRATE_NON_SHARED_DISK) if (flags & VIR_MIGRATE_NON_SHARED_DISK)
...@@ -10877,8 +10903,10 @@ cancel: ...@@ -10877,8 +10903,10 @@ cancel:
finish: finish:
dname = dname ? dname : dom->name; dname = dname ? dname : dom->name;
qemuDomainObjEnterRemoteWithDriver(driver, vm);
ddomain = dconn->driver->domainMigrateFinish2 ddomain = dconn->driver->domainMigrateFinish2
(dconn, dname, NULL, 0, uri, flags, retval); (dconn, dname, NULL, 0, uri, flags, retval);
qemuDomainObjExitRemoteWithDriver(driver, vm);
cleanup: cleanup:
if (client_sock != -1) if (client_sock != -1)
...@@ -10917,19 +10945,32 @@ static int doNonTunnelMigrate(virDomainPtr dom, ...@@ -10917,19 +10945,32 @@ static int doNonTunnelMigrate(virDomainPtr dom,
virDomainPtr ddomain = NULL; virDomainPtr ddomain = NULL;
int retval = -1; int retval = -1;
char *uri_out = NULL; char *uri_out = NULL;
int rc;
qemuDomainObjEnterRemoteWithDriver(driver, vm);
/* NB we don't pass 'uri' into this, since that's the libvirtd /* NB we don't pass 'uri' into this, since that's the libvirtd
* URI in this context - so we let dest pick it */ * URI in this context - so we let dest pick it */
if (dconn->driver->domainMigratePrepare2(dconn, rc = dconn->driver->domainMigratePrepare2(dconn,
NULL, /* cookie */ NULL, /* cookie */
0, /* cookielen */ 0, /* cookielen */
NULL, /* uri */ NULL, /* uri */
&uri_out, &uri_out,
flags, dname, flags, dname,
resource, dom_xml) < 0) resource, dom_xml);
qemuDomainObjExitRemoteWithDriver(driver, vm);
if (rc < 0)
/* domainMigratePrepare2 sets the error for us */ /* domainMigratePrepare2 sets the error for us */
goto cleanup; goto cleanup;
/* the domain may have shutdown or crashed while we had the locks dropped
* in qemuDomainObjEnterRemoteWithDriver, so check again
*/
if (!virDomainObjIsActive(vm)) {
qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("guest unexpectedly quit"));
goto cleanup;
}
if (uri_out == NULL) { if (uri_out == NULL) {
qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("domainMigratePrepare2 did not set uri")); _("domainMigratePrepare2 did not set uri"));
...@@ -10943,8 +10984,10 @@ static int doNonTunnelMigrate(virDomainPtr dom, ...@@ -10943,8 +10984,10 @@ static int doNonTunnelMigrate(virDomainPtr dom,
finish: finish:
dname = dname ? dname : dom->name; dname = dname ? dname : dom->name;
qemuDomainObjEnterRemoteWithDriver(driver, vm);
ddomain = dconn->driver->domainMigrateFinish2 ddomain = dconn->driver->domainMigrateFinish2
(dconn, dname, NULL, 0, uri_out, flags, retval); (dconn, dname, NULL, 0, uri_out, flags, retval);
qemuDomainObjExitRemoteWithDriver(driver, vm);
if (ddomain) if (ddomain)
virUnrefDomain(ddomain); virUnrefDomain(ddomain);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册