提交 b79f25e8 编写于 作者: J Jiri Denemark

qemu: Track the API which started the current job

This is very helpful when we want to log and report why we could not
acquire a state change lock. Reporting what job keeps it locked helps
with understanding the issue. Moreover, after calling
virDomainGetControlInfo, it's possible to tell whether libvirt is just
stuck somewhere within the API (or it just forgot to cleanup the job) or
whether libvirt is waiting for QEMU to reply.

The error message will look like the following:

    # virsh resume cd
    error: Failed to resume domain cd
    error: Timed out during operation: cannot acquire state change lock
    (held by remoteDispatchDomainSuspend)

https://bugzilla.redhat.com/show_bug.cgi?id=853839Signed-off-by: NJiri Denemark <jdenemar@redhat.com>
上级 667cce7b
......@@ -153,6 +153,7 @@ qemuDomainObjResetJob(qemuDomainObjPrivatePtr priv)
job->active = QEMU_JOB_NONE;
job->owner = 0;
job->ownerAPI = NULL;
}
static void
......@@ -162,6 +163,7 @@ qemuDomainObjResetAsyncJob(qemuDomainObjPrivatePtr priv)
job->asyncJob = QEMU_ASYNC_JOB_NONE;
job->asyncOwner = 0;
job->asyncOwnerAPI = NULL;
job->phase = 0;
job->mask = QEMU_JOB_DEFAULT_MASK;
job->dump_memory_only = false;
......@@ -1330,6 +1332,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
unsigned long long then;
bool nested = job == QEMU_JOB_ASYNC_NESTED;
virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
const char *blocker = NULL;
int ret = -1;
VIR_DEBUG("Starting %s: %s (async=%s vm=%p name=%s)",
......@@ -1378,6 +1381,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
obj, obj->def->name);
priv->job.active = job;
priv->job.owner = virThreadSelfID();
priv->job.ownerAPI = virThreadJobGet();
} else {
VIR_DEBUG("Started async job: %s (vm=%p name=%s)",
qemuDomainAsyncJobTypeToString(asyncJob),
......@@ -1387,6 +1391,7 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
goto cleanup;
priv->job.asyncJob = asyncJob;
priv->job.asyncOwner = virThreadSelfID();
priv->job.asyncOwnerAPI = virThreadJobGet();
priv->job.current->started = now;
}
......@@ -1397,29 +1402,47 @@ qemuDomainObjBeginJobInternal(virQEMUDriverPtr driver,
return 0;
error:
VIR_WARN("Cannot start job (%s, %s) for domain %s;"
" current job is (%s, %s) owned by (%llu, %llu)",
VIR_WARN("Cannot start job (%s, %s) for domain %s; "
"current job is (%s, %s) owned by (%llu %s, %llu %s)",
qemuDomainJobTypeToString(job),
qemuDomainAsyncJobTypeToString(asyncJob),
obj->def->name,
qemuDomainJobTypeToString(priv->job.active),
qemuDomainAsyncJobTypeToString(priv->job.asyncJob),
priv->job.owner, priv->job.asyncOwner);
priv->job.owner, NULLSTR(priv->job.ownerAPI),
priv->job.asyncOwner, NULLSTR(priv->job.asyncOwnerAPI));
if (nested || qemuDomainNestedJobAllowed(priv, job))
blocker = priv->job.ownerAPI;
else
blocker = priv->job.asyncOwnerAPI;
ret = -1;
if (errno == ETIMEDOUT) {
virReportError(VIR_ERR_OPERATION_TIMEOUT,
"%s", _("cannot acquire state change lock"));
if (blocker) {
virReportError(VIR_ERR_OPERATION_TIMEOUT,
_("cannot acquire state change lock (held by %s)"),
blocker);
} else {
virReportError(VIR_ERR_OPERATION_TIMEOUT, "%s",
_("cannot acquire state change lock"));
}
ret = -2;
} else if (cfg->maxQueuedJobs &&
priv->jobs_queued > cfg->maxQueuedJobs) {
virReportError(VIR_ERR_OPERATION_FAILED,
"%s", _("cannot acquire state change lock "
"due to max_queued limit"));
if (blocker) {
virReportError(VIR_ERR_OPERATION_FAILED,
_("cannot acquire state change lock (held by %s) "
"due to max_queued limit"),
blocker);
} else {
virReportError(VIR_ERR_OPERATION_FAILED, "%s",
_("cannot acquire state change lock "
"due to max_queued limit"));
}
ret = -2;
} else {
virReportSystemError(errno,
"%s", _("cannot acquire job mutex"));
virReportSystemError(errno, "%s", _("cannot acquire job mutex"));
}
cleanup:
......
......@@ -117,10 +117,12 @@ struct qemuDomainJobObj {
virCond cond; /* Use to coordinate jobs */
qemuDomainJob active; /* Currently running job */
unsigned long long owner; /* Thread id which set current job */
const char *ownerAPI; /* The API which owns the job */
virCond asyncCond; /* Use to coordinate with async jobs */
qemuDomainAsyncJob asyncJob; /* Currently active async job */
unsigned long long asyncOwner; /* Thread which set current async job */
const char *asyncOwnerAPI; /* The API which owns the async job */
int phase; /* Job phase (mainly for migrations) */
unsigned long long mask; /* Jobs allowed during async job */
bool dump_memory_only; /* use dump-guest-memory to do dump */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册