提交 4454a9ef 编写于 作者: D Daniel P. Berrange 提交者: Daniel Veillard

Don't kill QEMU process when a monitor I/O parsing error occurs

Currently whenever there is any failure with parsing the monitor,
this is treated in the same was as end-of-file (ie QEMU quit).
The domain is terminated, if not already dead.

With this change, failures in parsing the monitor stream do not
result in the death of QEMU. The guest continues running unchanged,
but all further use of the monitor will be disabled.

The VMM_FAILURE event will be emitted, and the mgmt application
can decide when to kill/restart the guest to re-gain control

* src/qemu/qemu_monitor.c, src/qemu/qemu_monitor.h: Run a
  different callback for monitor EOF vs error conditions.
* src/qemu/qemu_process.c: Emit VMM_FAILURE event when monitor
  fails
上级 a6135ec1
...@@ -517,7 +517,8 @@ static void qemuMonitorUpdateWatch(qemuMonitorPtr mon) ...@@ -517,7 +517,8 @@ static void qemuMonitorUpdateWatch(qemuMonitorPtr mon)
static void static void
qemuMonitorIO(int watch, int fd, int events, void *opaque) { qemuMonitorIO(int watch, int fd, int events, void *opaque) {
qemuMonitorPtr mon = opaque; qemuMonitorPtr mon = opaque;
int quit = 0, failed = 0; bool error = false;
bool eof = false;
/* lock access to the monitor and protect fd */ /* lock access to the monitor and protect fd */
qemuMonitorLock(mon); qemuMonitorLock(mon);
...@@ -528,27 +529,27 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) { ...@@ -528,27 +529,27 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
if (mon->fd != fd || mon->watch != watch) { if (mon->fd != fd || mon->watch != watch) {
VIR_ERROR(_("event from unexpected fd %d!=%d / watch %d!=%d"), mon->fd, fd, mon->watch, watch); VIR_ERROR(_("event from unexpected fd %d!=%d / watch %d!=%d"), mon->fd, fd, mon->watch, watch);
failed = 1; error = true;
} else { } else {
if (!mon->lastErrno && if (!mon->lastErrno &&
events & VIR_EVENT_HANDLE_WRITABLE) { events & VIR_EVENT_HANDLE_WRITABLE) {
int done = qemuMonitorIOWrite(mon); int done = qemuMonitorIOWrite(mon);
if (done < 0) if (done < 0)
failed = 1; error = 1;
events &= ~VIR_EVENT_HANDLE_WRITABLE; events &= ~VIR_EVENT_HANDLE_WRITABLE;
} }
if (!mon->lastErrno && if (!mon->lastErrno &&
events & VIR_EVENT_HANDLE_READABLE) { events & VIR_EVENT_HANDLE_READABLE) {
int got = qemuMonitorIORead(mon); int got = qemuMonitorIORead(mon);
if (got < 0) if (got < 0)
failed = 1; error = true;
/* Ignore hangup/error events if we read some data, to /* Ignore hangup/error events if we read some data, to
* give time for that data to be consumed */ * give time for that data to be consumed */
if (got > 0) { if (got > 0) {
events = 0; events = 0;
if (qemuMonitorIOProcess(mon) < 0) if (qemuMonitorIOProcess(mon) < 0)
failed = 1; error = true;
} else } else
events &= ~VIR_EVENT_HANDLE_READABLE; events &= ~VIR_EVENT_HANDLE_READABLE;
} }
...@@ -572,36 +573,44 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) { ...@@ -572,36 +573,44 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) {
mon->msg->lastErrno = EIO; mon->msg->lastErrno = EIO;
virCondSignal(&mon->notify); virCondSignal(&mon->notify);
} }
quit = 1; eof = 1;
} else if (events) { } else if (events) {
VIR_ERROR(_("unhandled fd event %d for monitor fd %d"), VIR_ERROR(_("unhandled fd event %d for monitor fd %d"),
events, mon->fd); events, mon->fd);
failed = 1; error = 1;
} }
} }
if (eof || error)
mon->lastErrno = EIO;
qemuMonitorUpdateWatch(mon);
/* We have to unlock to avoid deadlock against command thread, /* We have to unlock to avoid deadlock against command thread,
* but is this safe ? I think it is, because the callback * but is this safe ? I think it is, because the callback
* will try to acquire the virDomainObjPtr mutex next */ * will try to acquire the virDomainObjPtr mutex next */
if (failed || quit) { if (eof) {
void (*eofNotify)(qemuMonitorPtr, virDomainObjPtr, int) void (*eofNotify)(qemuMonitorPtr, virDomainObjPtr)
= mon->cb->eofNotify; = mon->cb->eofNotify;
virDomainObjPtr vm = mon->vm; virDomainObjPtr vm = mon->vm;
/* If qemu quited unexpectedly, and we may try to send monitor /* Make sure anyone waiting wakes up now */
* command later. But we have no chance to wake up it. So set virCondSignal(&mon->notify);
* mon->lastErrno to EIO, and check it before sending monitor if (qemuMonitorUnref(mon) > 0)
* command. qemuMonitorUnlock(mon);
*/ VIR_DEBUG("Triggering EOF callback");
if (!mon->lastErrno) (eofNotify)(mon, vm);
mon->lastErrno = EIO; } else if (error) {
void (*errorNotify)(qemuMonitorPtr, virDomainObjPtr)
= mon->cb->errorNotify;
virDomainObjPtr vm = mon->vm;
/* Make sure anyone waiting wakes up now */ /* Make sure anyone waiting wakes up now */
virCondSignal(&mon->notify); virCondSignal(&mon->notify);
if (qemuMonitorUnref(mon) > 0) if (qemuMonitorUnref(mon) > 0)
qemuMonitorUnlock(mon); qemuMonitorUnlock(mon);
VIR_DEBUG("Triggering EOF callback error? %d", failed); VIR_DEBUG("Triggering error callback");
(eofNotify)(mon, vm, failed); (errorNotify)(mon, vm);
} else { } else {
if (qemuMonitorUnref(mon) > 0) if (qemuMonitorUnref(mon) > 0)
qemuMonitorUnlock(mon); qemuMonitorUnlock(mon);
......
...@@ -67,9 +67,10 @@ struct _qemuMonitorCallbacks { ...@@ -67,9 +67,10 @@ struct _qemuMonitorCallbacks {
virDomainObjPtr vm); virDomainObjPtr vm);
void (*eofNotify)(qemuMonitorPtr mon, void (*eofNotify)(qemuMonitorPtr mon,
virDomainObjPtr vm, virDomainObjPtr vm);
int withError); void (*errorNotify)(qemuMonitorPtr mon,
/* XXX we'd really like to avoid virCOnnectPtr here virDomainObjPtr vm);
/* XXX we'd really like to avoid virConnectPtr here
* It is required so the callback can find the active * It is required so the callback can find the active
* secret driver. Need to change this to work like the * secret driver. Need to change this to work like the
* security drivers do, to avoid this * security drivers do, to avoid this
......
...@@ -100,12 +100,14 @@ extern struct qemud_driver *qemu_driver; ...@@ -100,12 +100,14 @@ extern struct qemud_driver *qemu_driver;
*/ */
static void static void
qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
virDomainObjPtr vm, virDomainObjPtr vm)
int hasError)
{ {
struct qemud_driver *driver = qemu_driver; struct qemud_driver *driver = qemu_driver;
virDomainEventPtr event = NULL; virDomainEventPtr event = NULL;
qemuDomainObjPrivatePtr priv; qemuDomainObjPrivatePtr priv;
int eventReason = VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN;
int stopReason = VIR_DOMAIN_SHUTOFF_SHUTDOWN;
const char *auditReason = "shutdown";
VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name); VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
...@@ -120,32 +122,54 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, ...@@ -120,32 +122,54 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
} }
priv = vm->privateData; priv = vm->privateData;
if (!hasError && priv->monJSON && !priv->gotShutdown) { if (priv->monJSON && !priv->gotShutdown) {
VIR_DEBUG("Monitor connection to '%s' closed without SHUTDOWN event; " VIR_DEBUG("Monitor connection to '%s' closed without SHUTDOWN event; "
"assuming the domain crashed", vm->def->name); "assuming the domain crashed", vm->def->name);
hasError = 1; eventReason = VIR_DOMAIN_EVENT_STOPPED_FAILED;
stopReason = VIR_DOMAIN_SHUTOFF_FAILED;
auditReason = "failed";
} }
event = virDomainEventNewFromObj(vm, event = virDomainEventNewFromObj(vm,
VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED,
hasError ? eventReason);
VIR_DOMAIN_EVENT_STOPPED_FAILED : qemuProcessStop(driver, vm, 0, stopReason);
VIR_DOMAIN_EVENT_STOPPED_SHUTDOWN); qemuAuditDomainStop(vm, auditReason);
qemuProcessStop(driver, vm, 0,
hasError ?
VIR_DOMAIN_SHUTOFF_CRASHED :
VIR_DOMAIN_SHUTOFF_SHUTDOWN);
qemuAuditDomainStop(vm, hasError ? "failed" : "shutdown");
if (!vm->persistent) if (!vm->persistent)
virDomainRemoveInactive(&driver->domains, vm); virDomainRemoveInactive(&driver->domains, vm);
else else
virDomainObjUnlock(vm); virDomainObjUnlock(vm);
if (event) { if (event)
qemuDomainEventQueue(driver, event); qemuDomainEventQueue(driver, event);
} qemuDriverUnlock(driver);
}
/*
* This is invoked when there is some kind of error
* parsing data to/from the monitor. The VM can continue
* to run, but no further monitor commands will be
* allowed
*/
static void
qemuProcessHandleMonitorError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
virDomainObjPtr vm)
{
struct qemud_driver *driver = qemu_driver;
virDomainEventPtr event = NULL;
VIR_DEBUG("Received error on %p '%s'", vm, vm->def->name);
qemuDriverLock(driver);
virDomainObjLock(vm);
event = virDomainEventControlErrorNewFromObj(vm);
if (event)
qemuDomainEventQueue(driver, event);
virDomainObjUnlock(vm);
qemuDriverUnlock(driver); qemuDriverUnlock(driver);
} }
...@@ -626,6 +650,7 @@ static void qemuProcessHandleMonitorDestroy(qemuMonitorPtr mon, ...@@ -626,6 +650,7 @@ static void qemuProcessHandleMonitorDestroy(qemuMonitorPtr mon,
static qemuMonitorCallbacks monitorCallbacks = { static qemuMonitorCallbacks monitorCallbacks = {
.destroy = qemuProcessHandleMonitorDestroy, .destroy = qemuProcessHandleMonitorDestroy,
.eofNotify = qemuProcessHandleMonitorEOF, .eofNotify = qemuProcessHandleMonitorEOF,
.errorNotify = qemuProcessHandleMonitorError,
.diskSecretLookup = qemuProcessFindVolumeQcowPassphrase, .diskSecretLookup = qemuProcessFindVolumeQcowPassphrase,
.domainShutdown = qemuProcessHandleShutdown, .domainShutdown = qemuProcessHandleShutdown,
.domainStop = qemuProcessHandleStop, .domainStop = qemuProcessHandleStop,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册