You need to sign in or sign up before continuing.
qemu_process.c 190.3 KB
Newer Older
1
/*
2
 * qemu_process.c: QEMU process management
3
 *
4
 * Copyright (C) 2006-2016 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22 23 24 25 26 27
 *
 */

#include <config.h>

#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
R
Roman Bogorodskiy 已提交
28 29 30 31 32 33
#if defined(__linux__)
# include <linux/capability.h>
#elif defined(__FreeBSD__)
# include <sys/param.h>
# include <sys/cpuset.h>
#endif
34 35

#include "qemu_process.h"
36
#include "qemu_processpriv.h"
37
#include "qemu_alias.h"
38
#include "qemu_domain.h"
39
#include "qemu_domain_address.h"
40 41 42 43 44 45
#include "qemu_cgroup.h"
#include "qemu_capabilities.h"
#include "qemu_monitor.h"
#include "qemu_command.h"
#include "qemu_hostdev.h"
#include "qemu_hotplug.h"
46
#include "qemu_migration.h"
47
#include "qemu_interface.h"
48

49
#include "cpu/cpu.h"
50
#include "datatypes.h"
51
#include "virlog.h"
52
#include "virerror.h"
53
#include "viralloc.h"
54
#include "virhook.h"
E
Eric Blake 已提交
55
#include "virfile.h"
56
#include "virpidfile.h"
57
#include "nodeinfo.h"
58
#include "domain_audit.h"
59
#include "domain_nwfilter.h"
60
#include "locking/domain_lock.h"
61
#include "network/bridge_driver.h"
62
#include "viruuid.h"
63
#include "virprocess.h"
64
#include "virtime.h"
A
Ansis Atteka 已提交
65
#include "virnetdevtap.h"
66
#include "virnetdevopenvswitch.h"
67
#include "virnetdevmidonet.h"
68
#include "virbitmap.h"
69
#include "viratomic.h"
70
#include "virnuma.h"
71
#include "virstring.h"
72
#include "virhostdev.h"
73
#include "storage/storage_driver.h"
74
#include "configmake.h"
75
#include "nwfilter_conf.h"
76
#include "netdev_bandwidth_conf.h"
77 78 79

#define VIR_FROM_THIS VIR_FROM_QEMU

80 81
VIR_LOG_INIT("qemu.qemu_process");

82
/**
83
 * qemuProcessRemoveDomainStatus
84 85 86 87 88 89
 *
 * remove all state files of a domain from statedir
 *
 * Returns 0 on success
 */
static int
90
qemuProcessRemoveDomainStatus(virQEMUDriverPtr driver,
91 92 93 94
                              virDomainObjPtr vm)
{
    char ebuf[1024];
    char *file = NULL;
95
    qemuDomainObjPrivatePtr priv = vm->privateData;
96 97
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
    int ret = -1;
98

99
    if (virAsprintf(&file, "%s/%s.xml", cfg->stateDir, vm->def->name) < 0)
100
        goto cleanup;
101 102 103 104 105 106

    if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR)
        VIR_WARN("Failed to remove domain XML for %s: %s",
                 vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf)));
    VIR_FREE(file);

107 108 109
    if (priv->pidfile &&
        unlink(priv->pidfile) < 0 &&
        errno != ENOENT)
110 111 112
        VIR_WARN("Failed to remove PID file for %s: %s",
                 vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf)));

113
    ret = 0;
114
 cleanup:
115 116
    virObjectUnref(cfg);
    return ret;
117 118 119 120
}


/* XXX figure out how to remove this */
121
extern virQEMUDriverPtr qemu_driver;
122

D
Daniel P. Berrange 已提交
123 124 125 126 127 128 129
/*
 * This is a callback registered with a qemuAgentPtr instance,
 * and to be invoked when the agent console hits an end of file
 * condition, or error, thus indicating VM shutdown should be
 * performed
 */
static void
130
qemuProcessHandleAgentEOF(qemuAgentPtr agent,
D
Daniel P. Berrange 已提交
131 132 133 134 135 136
                          virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv;

    VIR_DEBUG("Received EOF from agent on %p '%s'", vm, vm->def->name);

137
    virObjectLock(vm);
D
Daniel P. Berrange 已提交
138 139

    priv = vm->privateData;
140 141 142 143 144 145 146 147 148 149 150

    if (!priv->agent) {
        VIR_DEBUG("Agent freed already");
        goto unlock;
    }

    if (priv->beingDestroyed) {
        VIR_DEBUG("Domain is being destroyed, agent EOF is expected");
        goto unlock;
    }

151
    qemuAgentClose(agent);
152
    priv->agent = NULL;
D
Daniel P. Berrange 已提交
153

154
    virObjectUnlock(vm);
155 156
    return;

157
 unlock:
158 159
    virObjectUnlock(vm);
    return;
D
Daniel P. Berrange 已提交
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
}


/*
 * This is invoked when there is some kind of error
 * parsing data to/from the agent. The VM can continue
 * to run, but no further agent commands will be
 * allowed
 */
static void
qemuProcessHandleAgentError(qemuAgentPtr agent ATTRIBUTE_UNUSED,
                            virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv;

    VIR_DEBUG("Received error from agent on %p '%s'", vm, vm->def->name);

177
    virObjectLock(vm);
D
Daniel P. Berrange 已提交
178 179 180 181 182

    priv = vm->privateData;

    priv->agentError = true;

183
    virObjectUnlock(vm);
D
Daniel P. Berrange 已提交
184 185 186 187 188
}

static void qemuProcessHandleAgentDestroy(qemuAgentPtr agent,
                                          virDomainObjPtr vm)
{
189 190
    VIR_DEBUG("Received destroy agent=%p vm=%p", agent, vm);

191
    virObjectUnref(vm);
D
Daniel P. Berrange 已提交
192 193 194 195 196 197 198 199 200 201
}


static qemuAgentCallbacks agentCallbacks = {
    .destroy = qemuProcessHandleAgentDestroy,
    .eofNotify = qemuProcessHandleAgentEOF,
    .errorNotify = qemuProcessHandleAgentError,
};


202
int
203
qemuConnectAgent(virQEMUDriverPtr driver, virDomainObjPtr vm)
D
Daniel P. Berrange 已提交
204 205 206 207
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret = -1;
    qemuAgentPtr agent = NULL;
208
    virDomainChrDefPtr config = qemuFindAgentConfig(vm->def);
D
Daniel P. Berrange 已提交
209 210 211 212

    if (!config)
        return 0;

213 214 215 216 217 218 219 220 221
    if (priv->agent)
        return 0;

    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VSERPORT_CHANGE) &&
        config->state != VIR_DOMAIN_CHR_DEVICE_STATE_CONNECTED) {
        VIR_DEBUG("Deferring connecting to guest agent");
        return 0;
    }

D
Daniel P. Berrange 已提交
222 223 224 225 226 227 228 229 230
    if (virSecurityManagerSetDaemonSocketLabel(driver->securityManager,
                                               vm->def) < 0) {
        VIR_ERROR(_("Failed to set security context for agent for %s"),
                  vm->def->name);
        goto cleanup;
    }

    /* Hold an extra reference because we can't allow 'vm' to be
     * deleted while the agent is active */
231
    virObjectRef(vm);
D
Daniel P. Berrange 已提交
232 233

    ignore_value(virTimeMillisNow(&priv->agentStart));
234
    virObjectUnlock(vm);
D
Daniel P. Berrange 已提交
235 236

    agent = qemuAgentOpen(vm,
237
                          &config->source,
D
Daniel P. Berrange 已提交
238 239
                          &agentCallbacks);

240
    virObjectLock(vm);
D
Daniel P. Berrange 已提交
241 242
    priv->agentStart = 0;

243 244 245 246 247 248 249 250 251 252 253
    if (agent == NULL)
        virObjectUnref(vm);

    if (!virDomainObjIsActive(vm)) {
        qemuAgentClose(agent);
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("guest crashed while connecting to the guest agent"));
        ret = -2;
        goto cleanup;
    }

D
Daniel P. Berrange 已提交
254 255 256 257
    if (virSecurityManagerClearSocketLabel(driver->securityManager,
                                           vm->def) < 0) {
        VIR_ERROR(_("Failed to clear security context for agent for %s"),
                  vm->def->name);
258
        qemuAgentClose(agent);
D
Daniel P. Berrange 已提交
259 260 261 262 263 264 265 266 267 268 269 270 271
        goto cleanup;
    }


    priv->agent = agent;

    if (priv->agent == NULL) {
        VIR_INFO("Failed to connect agent for %s", vm->def->name);
        goto cleanup;
    }

    ret = 0;

272
 cleanup:
D
Daniel P. Berrange 已提交
273 274 275 276
    return ret;
}


277
/*
278
 * This is a callback registered with a qemuMonitorPtr instance,
279 280 281 282 283 284
 * and to be invoked when the monitor console hits an end of file
 * condition, or error, thus indicating VM shutdown should be
 * performed
 */
static void
qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
285 286
                            virDomainObjPtr vm,
                            void *opaque)
287
{
288
    virQEMUDriverPtr driver = opaque;
289
    qemuDomainObjPrivatePtr priv;
290
    struct qemuProcessEvent *processEvent;
291

292
    virObjectLock(vm);
293

294
    VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
295

296
    priv = vm->privateData;
297 298
    if (priv->beingDestroyed) {
        VIR_DEBUG("Domain is being destroyed, EOF is expected");
299
        goto cleanup;
300 301
    }

302
    if (VIR_ALLOC(processEvent) < 0)
303
        goto cleanup;
304

305 306
    processEvent->eventType = QEMU_PROCESS_EVENT_MONITOR_EOF;
    processEvent->vm = vm;
307

308 309 310 311 312
    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        ignore_value(virObjectUnref(vm));
        VIR_FREE(processEvent);
        goto cleanup;
313
    }
314

315 316 317 318
    /* We don't want this EOF handler to be called over and over while the
     * thread is waiting for a job.
     */
    qemuMonitorUnregister(mon);
319

320
 cleanup:
321
    virObjectUnlock(vm);
322 323 324 325 326 327 328 329 330 331 332
}


/*
 * This is invoked when there is some kind of error
 * parsing data to/from the monitor. The VM can continue
 * to run, but no further monitor commands will be
 * allowed
 */
static void
qemuProcessHandleMonitorError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
333 334
                              virDomainObjPtr vm,
                              void *opaque)
335
{
336
    virQEMUDriverPtr driver = opaque;
337
    virObjectEventPtr event = NULL;
338 339 340

    VIR_DEBUG("Received error on %p '%s'", vm, vm->def->name);

341
    virObjectLock(vm);
342

343
    ((qemuDomainObjPrivatePtr) vm->privateData)->monError = true;
344
    event = virDomainEventControlErrorNewFromObj(vm);
345
    qemuDomainEventQueue(driver, event);
346

347
    virObjectUnlock(vm);
348 349 350
}


351
virDomainDiskDefPtr
352 353 354
qemuProcessFindDomainDiskByAlias(virDomainObjPtr vm,
                                 const char *alias)
{
355
    size_t i;
356 357 358 359 360 361 362 363 364 365 366 367

    if (STRPREFIX(alias, QEMU_DRIVE_HOST_PREFIX))
        alias += strlen(QEMU_DRIVE_HOST_PREFIX);

    for (i = 0; i < vm->def->ndisks; i++) {
        virDomainDiskDefPtr disk;

        disk = vm->def->disks[i];
        if (disk->info.alias != NULL && STREQ(disk->info.alias, alias))
            return disk;
    }

368 369 370
    virReportError(VIR_ERR_INTERNAL_ERROR,
                   _("no disk found with alias %s"),
                   alias);
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
    return NULL;
}

static int
qemuProcessGetVolumeQcowPassphrase(virConnectPtr conn,
                                   virDomainDiskDefPtr disk,
                                   char **secretRet,
                                   size_t *secretLen)
{
    virSecretPtr secret;
    char *passphrase;
    unsigned char *data;
    size_t size;
    int ret = -1;
    virStorageEncryptionPtr enc;

387
    if (!disk->src->encryption) {
388 389
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("disk %s does not have any encryption information"),
390
                       disk->src->path);
391 392
        return -1;
    }
393
    enc = disk->src->encryption;
394 395

    if (!conn) {
396 397
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("cannot find secrets without a connection"));
398 399 400 401
        goto cleanup;
    }

    if (conn->secretDriver == NULL ||
402 403
        conn->secretDriver->secretLookupByUUID == NULL ||
        conn->secretDriver->secretGetValue == NULL) {
404 405
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("secret storage not supported"));
406 407 408 409 410 411 412
        goto cleanup;
    }

    if (enc->format != VIR_STORAGE_ENCRYPTION_FORMAT_QCOW ||
        enc->nsecrets != 1 ||
        enc->secrets[0]->type !=
        VIR_STORAGE_ENCRYPTION_SECRET_TYPE_PASSPHRASE) {
413
        virReportError(VIR_ERR_XML_ERROR,
414 415
                       _("invalid <encryption> for volume %s"),
                       virDomainDiskGetSource(disk));
416 417 418
        goto cleanup;
    }

419 420
    secret = conn->secretDriver->secretLookupByUUID(conn,
                                                    enc->secrets[0]->uuid);
421 422
    if (secret == NULL)
        goto cleanup;
423 424
    data = conn->secretDriver->secretGetValue(secret, &size, 0,
                                              VIR_SECRET_GET_VALUE_INTERNAL_CALL);
425
    virObjectUnref(secret);
426 427 428 429 430 431
    if (data == NULL)
        goto cleanup;

    if (memchr(data, '\0', size) != NULL) {
        memset(data, 0, size);
        VIR_FREE(data);
432 433
        virReportError(VIR_ERR_XML_ERROR,
                       _("format='qcow' passphrase for %s must not contain a "
434
                         "'\\0'"), virDomainDiskGetSource(disk));
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
        goto cleanup;
    }

    if (VIR_ALLOC_N(passphrase, size + 1) < 0) {
        memset(data, 0, size);
        VIR_FREE(data);
        goto cleanup;
    }
    memcpy(passphrase, data, size);
    passphrase[size] = '\0';

    memset(data, 0, size);
    VIR_FREE(data);

    *secretRet = passphrase;
    *secretLen = size;

    ret = 0;

454
 cleanup:
455 456 457 458 459 460 461 462 463
    return ret;
}

static int
qemuProcessFindVolumeQcowPassphrase(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                                    virConnectPtr conn,
                                    virDomainObjPtr vm,
                                    const char *path,
                                    char **secretRet,
464 465
                                    size_t *secretLen,
                                    void *opaque ATTRIBUTE_UNUSED)
466 467 468 469
{
    virDomainDiskDefPtr disk;
    int ret = -1;

470
    virObjectLock(vm);
471 472 473 474
    if (!(disk = virDomainDiskByName(vm->def, path, true))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("no disk found with path %s"),
                       path);
475
        goto cleanup;
476
    }
477 478 479

    ret = qemuProcessGetVolumeQcowPassphrase(conn, disk, secretRet, secretLen);

480
 cleanup:
481
    virObjectUnlock(vm);
482 483 484 485 486 487
    return ret;
}


static int
qemuProcessHandleReset(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
488 489
                       virDomainObjPtr vm,
                       void *opaque)
490
{
491
    virQEMUDriverPtr driver = opaque;
492
    virObjectEventPtr event;
493
    qemuDomainObjPrivatePtr priv;
494
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
495

496
    virObjectLock(vm);
497

498
    event = virDomainEventRebootNewFromObj(vm);
499 500 501
    priv = vm->privateData;
    if (priv->agent)
        qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_RESET);
502

503
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
504
        VIR_WARN("Failed to save status on vm %s", vm->def->name);
505

506
    virObjectUnlock(vm);
507

508
    qemuDomainEventQueue(driver, event);
509

510
    virObjectUnref(cfg);
511 512 513 514
    return 0;
}


515 516 517 518 519 520 521 522 523 524 525
/*
 * Since we have the '-no-shutdown' flag set, the
 * QEMU process will currently have guest OS shutdown
 * and the CPUS stopped. To fake the reboot, we thus
 * want todo a reset of the virtual hardware, followed
 * by restart of the CPUs. This should result in the
 * guest OS booting up again
 */
static void
qemuProcessFakeReboot(void *opaque)
{
526
    virQEMUDriverPtr driver = qemu_driver;
527 528
    virDomainObjPtr vm = opaque;
    qemuDomainObjPrivatePtr priv = vm->privateData;
529
    virObjectEventPtr event = NULL;
530
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
531
    virDomainRunningReason reason = VIR_DOMAIN_RUNNING_BOOTED;
532
    int ret = -1, rc;
533

534
    VIR_DEBUG("vm=%p", vm);
535
    virObjectLock(vm);
536
    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
537 538 539
        goto cleanup;

    if (!virDomainObjIsActive(vm)) {
540 541
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("guest unexpectedly quit"));
542 543 544
        goto endjob;
    }

545
    qemuDomainObjEnterMonitor(driver, vm);
546 547 548
    rc = qemuMonitorSystemReset(priv->mon);

    if (qemuDomainObjExitMonitor(driver, vm) < 0)
549 550
        goto endjob;

551
    if (rc < 0)
552 553
        goto endjob;

554 555 556
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_CRASHED)
        reason = VIR_DOMAIN_RUNNING_CRASHED;

557
    if (qemuProcessStartCPUs(driver, vm, NULL,
558
                             reason,
559
                             QEMU_ASYNC_JOB_NONE) < 0) {
560
        if (virGetLastError() == NULL)
561 562
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("resume operation failed"));
563 564
        goto endjob;
    }
565
    priv->gotShutdown = false;
566
    event = virDomainEventLifecycleNewFromObj(vm,
567 568 569
                                     VIR_DOMAIN_EVENT_RESUMED,
                                     VIR_DOMAIN_EVENT_RESUMED_UNPAUSED);

570
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
571 572 573 574
        VIR_WARN("Unable to save status on vm %s after state change",
                 vm->def->name);
    }

575 576
    ret = 0;

577
 endjob:
578
    qemuDomainObjEndJob(driver, vm);
579

580
 cleanup:
581 582
    if (ret == -1)
        ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_FORCE));
M
Michal Privoznik 已提交
583
    virDomainObjEndAPI(&vm);
584
    qemuDomainEventQueue(driver, event);
585
    virObjectUnref(cfg);
586 587 588
}


589
void
590
qemuProcessShutdownOrReboot(virQEMUDriverPtr driver,
591
                            virDomainObjPtr vm)
592
{
593 594 595
    qemuDomainObjPrivatePtr priv = vm->privateData;

    if (priv->fakeReboot) {
596
        qemuDomainSetFakeReboot(driver, vm, false);
597
        virObjectRef(vm);
598 599 600 601 602
        virThread th;
        if (virThreadCreate(&th,
                            false,
                            qemuProcessFakeReboot,
                            vm) < 0) {
603
            VIR_ERROR(_("Failed to create reboot thread, killing domain"));
604
            ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
605
            virObjectUnref(vm);
606 607
        }
    } else {
608
        ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
609
    }
610
}
611

612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632

static int
qemuProcessHandleEvent(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                       virDomainObjPtr vm,
                       const char *eventName,
                       long long seconds,
                       unsigned int micros,
                       const char *details,
                       void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    virObjectEventPtr event = NULL;

    VIR_DEBUG("vm=%p", vm);

    virObjectLock(vm);
    event = virDomainQemuMonitorEventNew(vm->def->id, vm->def->name,
                                         vm->def->uuid, eventName,
                                         seconds, micros, details);

    virObjectUnlock(vm);
633
    qemuDomainEventQueue(driver, event);
634 635 636 637 638

    return 0;
}


639 640
static int
qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
641 642
                          virDomainObjPtr vm,
                          void *opaque)
643
{
644
    virQEMUDriverPtr driver = opaque;
645
    qemuDomainObjPrivatePtr priv;
646
    virObjectEventPtr event = NULL;
647
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
648

649 650
    VIR_DEBUG("vm=%p", vm);

651
    virObjectLock(vm);
652 653 654 655 656 657

    priv = vm->privateData;
    if (priv->gotShutdown) {
        VIR_DEBUG("Ignoring repeated SHUTDOWN event from domain %s",
                  vm->def->name);
        goto unlock;
658 659 660 661
    } else if (!virDomainObjIsActive(vm)) {
        VIR_DEBUG("Ignoring SHUTDOWN event from inactive domain %s",
                  vm->def->name);
        goto unlock;
662 663 664 665 666 667 668 669
    }
    priv->gotShutdown = true;

    VIR_DEBUG("Transitioned guest %s to shutdown state",
              vm->def->name);
    virDomainObjSetState(vm,
                         VIR_DOMAIN_SHUTDOWN,
                         VIR_DOMAIN_SHUTDOWN_UNKNOWN);
670
    event = virDomainEventLifecycleNewFromObj(vm,
671 672 673
                                     VIR_DOMAIN_EVENT_SHUTDOWN,
                                     VIR_DOMAIN_EVENT_SHUTDOWN_FINISHED);

674
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
675 676 677 678
        VIR_WARN("Unable to save status on vm %s after state change",
                 vm->def->name);
    }

679 680 681
    if (priv->agent)
        qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SHUTDOWN);

682 683
    qemuProcessShutdownOrReboot(driver, vm);

684
 unlock:
685
    virObjectUnlock(vm);
686
    qemuDomainEventQueue(driver, event);
687
    virObjectUnref(cfg);
688

689 690 691 692 693 694
    return 0;
}


static int
qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
695 696
                      virDomainObjPtr vm,
                      void *opaque)
697
{
698
    virQEMUDriverPtr driver = opaque;
699
    virObjectEventPtr event = NULL;
700
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
701

702
    virObjectLock(vm);
J
Jiri Denemark 已提交
703
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
704
        qemuDomainObjPrivatePtr priv = vm->privateData;
705

706
        if (priv->gotShutdown) {
707 708
            VIR_DEBUG("Ignoring STOP event after SHUTDOWN");
            goto unlock;
709 710
        }

711 712
        VIR_DEBUG("Transitioned guest %s to paused state",
                  vm->def->name);
713

714 715 716
        if (priv->job.current)
            ignore_value(virTimeMillisNow(&priv->job.current->stopped));

717
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_UNKNOWN);
718
        event = virDomainEventLifecycleNewFromObj(vm,
719 720 721
                                         VIR_DOMAIN_EVENT_SUSPENDED,
                                         VIR_DOMAIN_EVENT_SUSPENDED_PAUSED);

722 723 724 725 726
        VIR_FREE(priv->lockState);
        if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
            VIR_WARN("Unable to release lease on %s", vm->def->name);
        VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));

727
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
728 729 730
            VIR_WARN("Unable to save status on vm %s after state change",
                     vm->def->name);
        }
731
    }
732

733
 unlock:
734
    virObjectUnlock(vm);
735
    qemuDomainEventQueue(driver, event);
736
    virObjectUnref(cfg);
737 738 739 740 741

    return 0;
}


742 743
static int
qemuProcessHandleResume(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
744 745
                        virDomainObjPtr vm,
                        void *opaque)
746
{
747
    virQEMUDriverPtr driver = opaque;
748
    virObjectEventPtr event = NULL;
749
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
750

751
    virObjectLock(vm);
752 753 754 755 756 757 758 759 760 761 762 763 764
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
        qemuDomainObjPrivatePtr priv = vm->privateData;

        if (priv->gotShutdown) {
            VIR_DEBUG("Ignoring RESUME event after SHUTDOWN");
            goto unlock;
        }

        VIR_DEBUG("Transitioned guest %s out of paused into resumed state",
                  vm->def->name);

        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
                                 VIR_DOMAIN_RUNNING_UNPAUSED);
765
        event = virDomainEventLifecycleNewFromObj(vm,
766 767 768 769
                                         VIR_DOMAIN_EVENT_RESUMED,
                                         VIR_DOMAIN_EVENT_RESUMED_UNPAUSED);

        VIR_DEBUG("Using lock state '%s' on resume event", NULLSTR(priv->lockState));
770
        if (virDomainLockProcessResume(driver->lockManager, cfg->uri,
771 772 773 774 775 776 777 778 779
                                       vm, priv->lockState) < 0) {
            /* Don't free priv->lockState on error, because we need
             * to make sure we have state still present if the user
             * tries to resume again
             */
            goto unlock;
        }
        VIR_FREE(priv->lockState);

780
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
781 782 783 784 785
            VIR_WARN("Unable to save status on vm %s after state change",
                     vm->def->name);
        }
    }

786
 unlock:
787
    virObjectUnlock(vm);
788
    qemuDomainEventQueue(driver, event);
789
    virObjectUnref(cfg);
790 791 792
    return 0;
}

793 794 795
static int
qemuProcessHandleRTCChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                           virDomainObjPtr vm,
796 797
                           long long offset,
                           void *opaque)
798
{
799
    virQEMUDriverPtr driver = opaque;
800
    virObjectEventPtr event = NULL;
801
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
802

803
    virObjectLock(vm);
804

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
    if (vm->def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_VARIABLE) {
        /* when a basedate is manually given on the qemu commandline
         * rather than simply "-rtc base=utc", the offset sent by qemu
         * in this event is *not* the new offset from UTC, but is
         * instead the new offset from the *original basedate* +
         * uptime. For example, if the original offset was 3600 and
         * the guest clock has been advanced by 10 seconds, qemu will
         * send "10" in the event - this means that the new offset
         * from UTC is 3610, *not* 10. If the guest clock is advanced
         * by another 10 seconds, qemu will now send "20" - i.e. each
         * event is the sum of the most recent change and all previous
         * changes since the domain was started. Fortunately, we have
         * saved the initial offset in "adjustment0", so to arrive at
         * the proper new "adjustment", we just add the most recent
         * offset to adjustment0.
         */
        offset += vm->def->clock.data.variable.adjustment0;
822
        vm->def->clock.data.variable.adjustment = offset;
823

824
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
825 826 827 828
           VIR_WARN("unable to save domain status with RTC change");
    }

    event = virDomainEventRTCChangeNewFromObj(vm, offset);
829

830
    virObjectUnlock(vm);
831

832
    qemuDomainEventQueue(driver, event);
833
    virObjectUnref(cfg);
834 835 836 837 838 839 840
    return 0;
}


static int
qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                          virDomainObjPtr vm,
841 842
                          int action,
                          void *opaque)
843
{
844
    virQEMUDriverPtr driver = opaque;
845 846
    virObjectEventPtr watchdogEvent = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
847
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
848

849
    virObjectLock(vm);
850 851 852
    watchdogEvent = virDomainEventWatchdogNewFromObj(vm, action);

    if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE &&
J
Jiri Denemark 已提交
853
        virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
854
        qemuDomainObjPrivatePtr priv = vm->privateData;
855 856
        VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name);

J
Jiri Denemark 已提交
857
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG);
858
        lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
859 860 861
                                                  VIR_DOMAIN_EVENT_SUSPENDED,
                                                  VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG);

862 863 864 865 866
        VIR_FREE(priv->lockState);
        if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
            VIR_WARN("Unable to release lease on %s", vm->def->name);
        VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));

867
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
868 869 870
            VIR_WARN("Unable to save status on vm %s after watchdog event",
                     vm->def->name);
        }
871 872 873
    }

    if (vm->def->watchdog->action == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) {
C
Chen Fan 已提交
874 875 876 877 878
        struct qemuProcessEvent *processEvent;
        if (VIR_ALLOC(processEvent) == 0) {
            processEvent->eventType = QEMU_PROCESS_EVENT_WATCHDOG;
            processEvent->action = VIR_DOMAIN_WATCHDOG_ACTION_DUMP;
            processEvent->vm = vm;
W
Wen Congyang 已提交
879 880 881
            /* Hold an extra reference because we can't allow 'vm' to be
             * deleted before handling watchdog event is finished.
             */
882
            virObjectRef(vm);
C
Chen Fan 已提交
883
            if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
884
                if (!virObjectUnref(vm))
885
                    vm = NULL;
C
Chen Fan 已提交
886
                VIR_FREE(processEvent);
W
Wen Congyang 已提交
887
            }
888
        }
889 890
    }

891
    if (vm)
892
        virObjectUnlock(vm);
893 894
    qemuDomainEventQueue(driver, watchdogEvent);
    qemuDomainEventQueue(driver, lifecycleEvent);
895

896
    virObjectUnref(cfg);
897 898 899 900 901 902 903 904 905
    return 0;
}


static int
qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                         virDomainObjPtr vm,
                         const char *diskAlias,
                         int action,
906 907
                         const char *reason,
                         void *opaque)
908
{
909
    virQEMUDriverPtr driver = opaque;
910 911 912
    virObjectEventPtr ioErrorEvent = NULL;
    virObjectEventPtr ioErrorEvent2 = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
913 914 915
    const char *srcPath;
    const char *devAlias;
    virDomainDiskDefPtr disk;
916
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
917

918
    virObjectLock(vm);
919 920 921
    disk = qemuProcessFindDomainDiskByAlias(vm, diskAlias);

    if (disk) {
922
        srcPath = virDomainDiskGetSource(disk);
923 924 925 926 927 928 929 930 931 932
        devAlias = disk->info.alias;
    } else {
        srcPath = "";
        devAlias = "";
    }

    ioErrorEvent = virDomainEventIOErrorNewFromObj(vm, srcPath, devAlias, action);
    ioErrorEvent2 = virDomainEventIOErrorReasonNewFromObj(vm, srcPath, devAlias, action, reason);

    if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE &&
J
Jiri Denemark 已提交
933
        virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
934
        qemuDomainObjPrivatePtr priv = vm->privateData;
935 936
        VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name);

937 938 939
        if (priv->signalIOError)
            virDomainObjBroadcast(vm);

J
Jiri Denemark 已提交
940
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR);
941
        lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
942 943 944
                                                  VIR_DOMAIN_EVENT_SUSPENDED,
                                                  VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);

945 946 947 948 949
        VIR_FREE(priv->lockState);
        if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
            VIR_WARN("Unable to release lease on %s", vm->def->name);
        VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));

950
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
951 952
            VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
    }
953
    virObjectUnlock(vm);
954

955 956 957
    qemuDomainEventQueue(driver, ioErrorEvent);
    qemuDomainEventQueue(driver, ioErrorEvent2);
    qemuDomainEventQueue(driver, lifecycleEvent);
958
    virObjectUnref(cfg);
959 960 961
    return 0;
}

962 963 964 965 966
static int
qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                          virDomainObjPtr vm,
                          const char *diskAlias,
                          int type,
967 968
                          int status,
                          void *opaque)
969
{
970
    virQEMUDriverPtr driver = opaque;
971
    struct qemuProcessEvent *processEvent = NULL;
972
    virDomainDiskDefPtr disk;
973
    qemuDomainDiskPrivatePtr diskPriv;
974
    char *data = NULL;
975

976
    virObjectLock(vm);
977

978 979
    VIR_DEBUG("Block job for device %s (domain: %p,%s) type %d status %d",
              diskAlias, vm, vm->def->name, type, status);
980

981
    if (!(disk = qemuProcessFindDomainDiskByAlias(vm, diskAlias)))
982
        goto error;
983
    diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
984

985
    if (diskPriv->blockJobSync) {
986
        /* We have a SYNC API waiting for this event, dispatch it back */
987 988
        diskPriv->blockJobType = type;
        diskPriv->blockJobStatus = status;
989
        virDomainObjBroadcast(vm);
990 991 992 993 994 995 996 997 998 999 1000 1001
    } else {
        /* there is no waiting SYNC API, dispatch the update to a thread */
        if (VIR_ALLOC(processEvent) < 0)
            goto error;

        processEvent->eventType = QEMU_PROCESS_EVENT_BLOCK_JOB;
        if (VIR_STRDUP(data, diskAlias) < 0)
            goto error;
        processEvent->data = data;
        processEvent->vm = vm;
        processEvent->action = type;
        processEvent->status = status;
1002

1003 1004 1005 1006 1007
        virObjectRef(vm);
        if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
            ignore_value(virObjectUnref(vm));
            goto error;
        }
1008 1009
    }

1010
 cleanup:
1011
    virObjectUnlock(vm);
1012
    return 0;
1013 1014 1015 1016 1017
 error:
    if (processEvent)
        VIR_FREE(processEvent->data);
    VIR_FREE(processEvent);
    goto cleanup;
1018
}
1019

1020

1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
static int
qemuProcessHandleGraphics(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                          virDomainObjPtr vm,
                          int phase,
                          int localFamily,
                          const char *localNode,
                          const char *localService,
                          int remoteFamily,
                          const char *remoteNode,
                          const char *remoteService,
                          const char *authScheme,
                          const char *x509dname,
1033 1034
                          const char *saslUsername,
                          void *opaque)
1035
{
1036
    virQEMUDriverPtr driver = opaque;
1037
    virObjectEventPtr event;
1038 1039 1040
    virDomainEventGraphicsAddressPtr localAddr = NULL;
    virDomainEventGraphicsAddressPtr remoteAddr = NULL;
    virDomainEventGraphicsSubjectPtr subject = NULL;
1041
    size_t i;
1042 1043

    if (VIR_ALLOC(localAddr) < 0)
1044
        goto error;
1045
    localAddr->family = localFamily;
1046 1047 1048
    if (VIR_STRDUP(localAddr->service, localService) < 0 ||
        VIR_STRDUP(localAddr->node, localNode) < 0)
        goto error;
1049 1050

    if (VIR_ALLOC(remoteAddr) < 0)
1051
        goto error;
1052
    remoteAddr->family = remoteFamily;
1053 1054 1055
    if (VIR_STRDUP(remoteAddr->service, remoteService) < 0 ||
        VIR_STRDUP(remoteAddr->node, remoteNode) < 0)
        goto error;
1056 1057

    if (VIR_ALLOC(subject) < 0)
1058
        goto error;
1059 1060
    if (x509dname) {
        if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0)
1061
            goto error;
1062
        subject->nidentity++;
1063 1064 1065
        if (VIR_STRDUP(subject->identities[subject->nidentity-1].type, "x509dname") < 0 ||
            VIR_STRDUP(subject->identities[subject->nidentity-1].name, x509dname) < 0)
            goto error;
1066 1067 1068
    }
    if (saslUsername) {
        if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0)
1069
            goto error;
1070
        subject->nidentity++;
1071 1072 1073
        if (VIR_STRDUP(subject->identities[subject->nidentity-1].type, "saslUsername") < 0 ||
            VIR_STRDUP(subject->identities[subject->nidentity-1].name, saslUsername) < 0)
            goto error;
1074 1075
    }

1076
    virObjectLock(vm);
1077
    event = virDomainEventGraphicsNewFromObj(vm, phase, localAddr, remoteAddr, authScheme, subject);
1078
    virObjectUnlock(vm);
1079

1080
    qemuDomainEventQueue(driver, event);
1081 1082 1083

    return 0;

1084
 error:
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
    if (localAddr) {
        VIR_FREE(localAddr->service);
        VIR_FREE(localAddr->node);
        VIR_FREE(localAddr);
    }
    if (remoteAddr) {
        VIR_FREE(remoteAddr->service);
        VIR_FREE(remoteAddr->node);
        VIR_FREE(remoteAddr);
    }
    if (subject) {
1096
        for (i = 0; i < subject->nidentity; i++) {
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
            VIR_FREE(subject->identities[i].type);
            VIR_FREE(subject->identities[i].name);
        }
        VIR_FREE(subject->identities);
        VIR_FREE(subject);
    }

    return -1;
}

1107 1108 1109 1110
static int
qemuProcessHandleTrayChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                            virDomainObjPtr vm,
                            const char *devAlias,
1111 1112
                            int reason,
                            void *opaque)
1113
{
1114
    virQEMUDriverPtr driver = opaque;
1115
    virObjectEventPtr event = NULL;
1116
    virDomainDiskDefPtr disk;
1117
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
1118

1119
    virObjectLock(vm);
1120 1121 1122 1123 1124 1125
    disk = qemuProcessFindDomainDiskByAlias(vm, devAlias);

    if (disk) {
        event = virDomainEventTrayChangeNewFromObj(vm,
                                                   devAlias,
                                                   reason);
1126 1127 1128 1129 1130 1131
        /* Update disk tray status */
        if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_OPEN)
            disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
        else if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_CLOSE)
            disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;

1132
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
1133 1134 1135
            VIR_WARN("Unable to save status on vm %s after tray moved event",
                     vm->def->name);
        }
1136 1137

        virDomainObjBroadcast(vm);
1138 1139
    }

1140
    virObjectUnlock(vm);
1141
    qemuDomainEventQueue(driver, event);
1142
    virObjectUnref(cfg);
1143 1144 1145
    return 0;
}

O
Osier Yang 已提交
1146 1147
static int
qemuProcessHandlePMWakeup(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1148 1149
                          virDomainObjPtr vm,
                          void *opaque)
O
Osier Yang 已提交
1150
{
1151
    virQEMUDriverPtr driver = opaque;
1152 1153
    virObjectEventPtr event = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
1154
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
O
Osier Yang 已提交
1155

1156
    virObjectLock(vm);
O
Osier Yang 已提交
1157 1158
    event = virDomainEventPMWakeupNewFromObj(vm);

1159 1160 1161 1162 1163 1164 1165 1166 1167
    /* Don't set domain status back to running if it wasn't paused
     * from guest side, otherwise it can just cause confusion.
     */
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PMSUSPENDED) {
        VIR_DEBUG("Transitioned guest %s from pmsuspended to running "
                  "state due to QMP wakeup event", vm->def->name);

        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
                             VIR_DOMAIN_RUNNING_WAKEUP);
1168
        lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
1169 1170 1171
                                                  VIR_DOMAIN_EVENT_STARTED,
                                                  VIR_DOMAIN_EVENT_STARTED_WAKEUP);

1172
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
1173 1174 1175 1176 1177
            VIR_WARN("Unable to save status on vm %s after wakeup event",
                     vm->def->name);
        }
    }

1178
    virObjectUnlock(vm);
1179 1180
    qemuDomainEventQueue(driver, event);
    qemuDomainEventQueue(driver, lifecycleEvent);
1181
    virObjectUnref(cfg);
O
Osier Yang 已提交
1182 1183
    return 0;
}
1184

O
Osier Yang 已提交
1185 1186
static int
qemuProcessHandlePMSuspend(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1187 1188
                           virDomainObjPtr vm,
                           void *opaque)
O
Osier Yang 已提交
1189
{
1190
    virQEMUDriverPtr driver = opaque;
1191 1192
    virObjectEventPtr event = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
1193
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
O
Osier Yang 已提交
1194

1195
    virObjectLock(vm);
O
Osier Yang 已提交
1196 1197
    event = virDomainEventPMSuspendNewFromObj(vm);

1198
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
1199
        qemuDomainObjPrivatePtr priv = vm->privateData;
1200 1201 1202 1203 1204
        VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
                  "QMP suspend event", vm->def->name);

        virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
                             VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
J
Jiri Denemark 已提交
1205
        lifecycleEvent =
1206
            virDomainEventLifecycleNewFromObj(vm,
J
Jiri Denemark 已提交
1207 1208
                                     VIR_DOMAIN_EVENT_PMSUSPENDED,
                                     VIR_DOMAIN_EVENT_PMSUSPENDED_MEMORY);
1209

1210
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
1211 1212 1213
            VIR_WARN("Unable to save status on vm %s after suspend event",
                     vm->def->name);
        }
1214 1215 1216

        if (priv->agent)
            qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
1217 1218
    }

1219
    virObjectUnlock(vm);
O
Osier Yang 已提交
1220

1221 1222
    qemuDomainEventQueue(driver, event);
    qemuDomainEventQueue(driver, lifecycleEvent);
1223
    virObjectUnref(cfg);
O
Osier Yang 已提交
1224 1225 1226
    return 0;
}

1227 1228 1229
static int
qemuProcessHandleBalloonChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
1230 1231
                               unsigned long long actual,
                               void *opaque)
1232
{
1233
    virQEMUDriverPtr driver = opaque;
1234
    virObjectEventPtr event = NULL;
1235
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
1236

1237
    virObjectLock(vm);
1238 1239 1240 1241 1242 1243
    event = virDomainEventBalloonChangeNewFromObj(vm, actual);

    VIR_DEBUG("Updating balloon from %lld to %lld kb",
              vm->def->mem.cur_balloon, actual);
    vm->def->mem.cur_balloon = actual;

1244
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
1245 1246
        VIR_WARN("unable to save domain status with balloon change");

1247
    virObjectUnlock(vm);
1248

1249
    qemuDomainEventQueue(driver, event);
1250
    virObjectUnref(cfg);
1251 1252 1253
    return 0;
}

1254 1255
static int
qemuProcessHandlePMSuspendDisk(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1256 1257
                               virDomainObjPtr vm,
                               void *opaque)
1258
{
1259
    virQEMUDriverPtr driver = opaque;
1260 1261
    virObjectEventPtr event = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
1262
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
1263

1264
    virObjectLock(vm);
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274
    event = virDomainEventPMSuspendDiskNewFromObj(vm);

    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
        qemuDomainObjPrivatePtr priv = vm->privateData;
        VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
                  "QMP suspend_disk event", vm->def->name);

        virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
                             VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
        lifecycleEvent =
1275
            virDomainEventLifecycleNewFromObj(vm,
1276 1277 1278
                                     VIR_DOMAIN_EVENT_PMSUSPENDED,
                                     VIR_DOMAIN_EVENT_PMSUSPENDED_DISK);

1279
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
1280 1281 1282 1283 1284 1285 1286 1287
            VIR_WARN("Unable to save status on vm %s after suspend event",
                     vm->def->name);
        }

        if (priv->agent)
            qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
    }

1288
    virObjectUnlock(vm);
1289

1290 1291
    qemuDomainEventQueue(driver, event);
    qemuDomainEventQueue(driver, lifecycleEvent);
1292 1293
    virObjectUnref(cfg);

1294 1295 1296
    return 0;
}

1297

1298 1299
static int
qemuProcessHandleGuestPanic(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1300 1301
                            virDomainObjPtr vm,
                            void *opaque)
1302
{
1303
    virQEMUDriverPtr driver = opaque;
1304 1305 1306
    struct qemuProcessEvent *processEvent;

    virObjectLock(vm);
1307
    if (VIR_ALLOC(processEvent) < 0)
1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
        goto cleanup;

    processEvent->eventType = QEMU_PROCESS_EVENT_GUESTPANIC;
    processEvent->action = vm->def->onCrash;
    processEvent->vm = vm;
    /* Hold an extra reference because we can't allow 'vm' to be
     * deleted before handling guest panic event is finished.
     */
    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        if (!virObjectUnref(vm))
            vm = NULL;
        VIR_FREE(processEvent);
    }

1323
 cleanup:
1324
    if (vm)
1325
        virObjectUnlock(vm);
1326 1327 1328 1329 1330

    return 0;
}


1331
int
1332 1333
qemuProcessHandleDeviceDeleted(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
1334 1335
                               const char *devAlias,
                               void *opaque)
1336
{
1337
    virQEMUDriverPtr driver = opaque;
1338 1339
    struct qemuProcessEvent *processEvent = NULL;
    char *data;
1340 1341 1342 1343 1344 1345

    virObjectLock(vm);

    VIR_DEBUG("Device %s removed from domain %p %s",
              devAlias, vm, vm->def->name);

1346 1347
    if (qemuDomainSignalDeviceRemoval(vm, devAlias))
        goto cleanup;
1348

1349 1350
    if (VIR_ALLOC(processEvent) < 0)
        goto error;
1351

1352 1353 1354 1355 1356
    processEvent->eventType = QEMU_PROCESS_EVENT_DEVICE_DELETED;
    if (VIR_STRDUP(data, devAlias) < 0)
        goto error;
    processEvent->data = data;
    processEvent->vm = vm;
1357

1358 1359 1360 1361 1362
    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        ignore_value(virObjectUnref(vm));
        goto error;
    }
1363

1364
 cleanup:
1365 1366
    virObjectUnlock(vm);
    return 0;
1367 1368 1369 1370 1371
 error:
    if (processEvent)
        VIR_FREE(processEvent->data);
    VIR_FREE(processEvent);
    goto cleanup;
1372 1373 1374
}


1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415
static int
qemuProcessHandleNicRxFilterChanged(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                                    virDomainObjPtr vm,
                                    const char *devAlias,
                                    void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    struct qemuProcessEvent *processEvent = NULL;
    char *data;

    virObjectLock(vm);

    VIR_DEBUG("Device %s RX Filter changed in domain %p %s",
              devAlias, vm, vm->def->name);

    if (VIR_ALLOC(processEvent) < 0)
        goto error;

    processEvent->eventType = QEMU_PROCESS_EVENT_NIC_RX_FILTER_CHANGED;
    if (VIR_STRDUP(data, devAlias) < 0)
        goto error;
    processEvent->data = data;
    processEvent->vm = vm;

    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        ignore_value(virObjectUnref(vm));
        goto error;
    }

 cleanup:
    virObjectUnlock(vm);
    return 0;
 error:
    if (processEvent)
        VIR_FREE(processEvent->data);
    VIR_FREE(processEvent);
    goto cleanup;
}


1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
static int
qemuProcessHandleSerialChanged(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
                               const char *devAlias,
                               bool connected,
                               void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    struct qemuProcessEvent *processEvent = NULL;
    char *data;

    virObjectLock(vm);

    VIR_DEBUG("Serial port %s state changed to '%d' in domain %p %s",
              devAlias, connected, vm, vm->def->name);

    if (VIR_ALLOC(processEvent) < 0)
        goto error;

    processEvent->eventType = QEMU_PROCESS_EVENT_SERIAL_CHANGED;
    if (VIR_STRDUP(data, devAlias) < 0)
        goto error;
    processEvent->data = data;
    processEvent->action = connected;
    processEvent->vm = vm;

    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        ignore_value(virObjectUnref(vm));
        goto error;
    }

 cleanup:
    virObjectUnlock(vm);
    return 0;
 error:
    if (processEvent)
        VIR_FREE(processEvent->data);
    VIR_FREE(processEvent);
    goto cleanup;
}


1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477
static int
qemuProcessHandleSpiceMigrated(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
                               void *opaque ATTRIBUTE_UNUSED)
{
    qemuDomainObjPrivatePtr priv;

    virObjectLock(vm);

    VIR_DEBUG("Spice migration completed for domain %p %s",
              vm, vm->def->name);

    priv = vm->privateData;
    if (priv->job.asyncJob != QEMU_ASYNC_JOB_MIGRATION_OUT) {
        VIR_DEBUG("got SPICE_MIGRATE_COMPLETED event without a migration job");
        goto cleanup;
    }

    priv->job.spiceMigrated = true;
1478
    virDomainObjBroadcast(vm);
1479 1480 1481 1482 1483 1484 1485

 cleanup:
    virObjectUnlock(vm);
    return 0;
}


1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500
static int
qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                                 virDomainObjPtr vm,
                                 int status,
                                 void *opaque ATTRIBUTE_UNUSED)
{
    qemuDomainObjPrivatePtr priv;

    virObjectLock(vm);

    VIR_DEBUG("Migration of domain %p %s changed state to %s",
              vm, vm->def->name,
              qemuMonitorMigrationStatusTypeToString(status));

    priv = vm->privateData;
1501
    if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
1502 1503 1504 1505
        VIR_DEBUG("got MIGRATION event without a migration job");
        goto cleanup;
    }

1506
    priv->job.current->stats.status = status;
1507 1508 1509 1510 1511 1512 1513 1514
    virDomainObjBroadcast(vm);

 cleanup:
    virObjectUnlock(vm);
    return 0;
}


1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543
static int
qemuProcessHandleMigrationPass(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
                               int pass,
                               void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    qemuDomainObjPrivatePtr priv;

    virObjectLock(vm);

    VIR_DEBUG("Migrating domain %p %s, iteration %d",
              vm, vm->def->name, pass);

    priv = vm->privateData;
    if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
        VIR_DEBUG("got MIGRATION_PASS event without a migration job");
        goto cleanup;
    }

    qemuDomainEventQueue(driver,
                         virDomainEventMigrationIterationNewFromObj(vm, pass));

 cleanup:
    virObjectUnlock(vm);
    return 0;
}


1544 1545
static qemuMonitorCallbacks monitorCallbacks = {
    .eofNotify = qemuProcessHandleMonitorEOF,
1546
    .errorNotify = qemuProcessHandleMonitorError,
1547
    .diskSecretLookup = qemuProcessFindVolumeQcowPassphrase,
1548
    .domainEvent = qemuProcessHandleEvent,
1549 1550
    .domainShutdown = qemuProcessHandleShutdown,
    .domainStop = qemuProcessHandleStop,
1551
    .domainResume = qemuProcessHandleResume,
1552 1553 1554 1555 1556
    .domainReset = qemuProcessHandleReset,
    .domainRTCChange = qemuProcessHandleRTCChange,
    .domainWatchdog = qemuProcessHandleWatchdog,
    .domainIOError = qemuProcessHandleIOError,
    .domainGraphics = qemuProcessHandleGraphics,
1557
    .domainBlockJob = qemuProcessHandleBlockJob,
1558
    .domainTrayChange = qemuProcessHandleTrayChange,
O
Osier Yang 已提交
1559
    .domainPMWakeup = qemuProcessHandlePMWakeup,
O
Osier Yang 已提交
1560
    .domainPMSuspend = qemuProcessHandlePMSuspend,
1561
    .domainBalloonChange = qemuProcessHandleBalloonChange,
1562
    .domainPMSuspendDisk = qemuProcessHandlePMSuspendDisk,
1563
    .domainGuestPanic = qemuProcessHandleGuestPanic,
1564
    .domainDeviceDeleted = qemuProcessHandleDeviceDeleted,
1565
    .domainNicRxFilterChanged = qemuProcessHandleNicRxFilterChanged,
1566
    .domainSerialChange = qemuProcessHandleSerialChanged,
1567
    .domainSpiceMigrated = qemuProcessHandleSpiceMigrated,
1568
    .domainMigrationStatus = qemuProcessHandleMigrationStatus,
1569
    .domainMigrationPass = qemuProcessHandleMigrationPass,
1570 1571
};

1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584
static void
qemuProcessMonitorReportLogError(qemuMonitorPtr mon,
                                 const char *msg,
                                 void *opaque);


static void
qemuProcessMonitorLogFree(void *opaque)
{
    qemuDomainLogContextPtr logCtxt = opaque;
    qemuDomainLogContextFree(logCtxt);
}

1585
static int
1586
qemuConnectMonitor(virQEMUDriverPtr driver, virDomainObjPtr vm, int asyncJob,
1587
                   qemuDomainLogContextPtr logCtxt)
1588 1589 1590
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret = -1;
1591
    qemuMonitorPtr mon = NULL;
1592

1593
    if (virSecurityManagerSetDaemonSocketLabel(driver->securityManager,
1594
                                               vm->def) < 0) {
1595 1596
        VIR_ERROR(_("Failed to set security context for monitor for %s"),
                  vm->def->name);
1597
        return -1;
1598 1599 1600
    }

    /* Hold an extra reference because we can't allow 'vm' to be
M
Michal Privoznik 已提交
1601
     * deleted unitl the monitor gets its own reference. */
1602
    virObjectRef(vm);
1603

1604
    ignore_value(virTimeMillisNow(&priv->monStart));
1605
    virObjectUnlock(vm);
1606 1607 1608 1609

    mon = qemuMonitorOpen(vm,
                          priv->monConfig,
                          priv->monJSON,
1610 1611
                          &monitorCallbacks,
                          driver);
1612

1613 1614 1615 1616 1617 1618 1619
    if (mon && logCtxt) {
        qemuDomainLogContextRef(logCtxt);
        qemuMonitorSetDomainLog(mon,
                                qemuProcessMonitorReportLogError,
                                logCtxt,
                                qemuProcessMonitorLogFree);
    }
1620

1621
    virObjectLock(vm);
M
Michal Privoznik 已提交
1622
    virObjectUnref(vm);
1623
    priv->monStart = 0;
1624

M
Michal Privoznik 已提交
1625
    if (!virDomainObjIsActive(vm)) {
1626
        qemuMonitorClose(mon);
1627
        mon = NULL;
1628 1629 1630
    }
    priv->mon = mon;

1631
    if (virSecurityManagerClearSocketLabel(driver->securityManager, vm->def) < 0) {
1632 1633
        VIR_ERROR(_("Failed to clear security context for monitor for %s"),
                  vm->def->name);
1634
        return -1;
1635 1636 1637 1638
    }

    if (priv->mon == NULL) {
        VIR_INFO("Failed to connect monitor for %s", vm->def->name);
1639
        return -1;
1640 1641 1642
    }


1643
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
1644
        return -1;
1645

1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659
    if (qemuMonitorSetCapabilities(priv->mon) < 0)
        goto cleanup;

    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MONITOR_JSON) &&
        virQEMUCapsProbeQMP(priv->qemuCaps, priv->mon) < 0)
        goto cleanup;

    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT) &&
        qemuMonitorSetMigrationCapability(priv->mon,
                                          QEMU_MONITOR_MIGRATION_CAPS_EVENTS,
                                          true) < 0) {
        VIR_DEBUG("Cannot enable migration events; clearing capability");
        virQEMUCapsClear(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT);
    }
1660

1661 1662 1663 1664 1665
    ret = 0;

 cleanup:
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
1666 1667 1668
    return ret;
}

1669 1670 1671

/**
 * qemuProcessReadLog: Read log file of a qemu VM
1672
 * @logCtxt: the domain log context
1673
 * @msg: pointer to buffer to store the read messages in
1674 1675
 *
 * Reads log of a qemu VM. Skips messages not produced by qemu or irrelevant
1676
 * messages. Returns returns 0 on success or -1 on error
1677
 */
1678
static int
1679
qemuProcessReadLog(qemuDomainLogContextPtr logCtxt, char **msg)
1680
{
1681 1682
    char *buf;
    ssize_t got;
1683
    char *eol;
1684
    char *filter_next;
1685

1686
    if ((got = qemuDomainLogContextRead(logCtxt, &buf)) < 0)
1687
        return -1;
1688

1689 1690 1691 1692 1693 1694 1695
    /* Filter out debug messages from intermediate libvirt process */
    filter_next = buf;
    while ((eol = strchr(filter_next, '\n'))) {
        *eol = '\0';
        if (virLogProbablyLogMessage(filter_next) ||
            STRPREFIX(filter_next, "char device redirected to")) {
            size_t skip = (eol + 1) - filter_next;
1696
            memmove(filter_next, eol + 1, buf + got - eol);
1697 1698 1699 1700
            got -= skip;
        } else {
            filter_next = eol + 1;
            *eol = '\n';
1701 1702
        }
    }
1703
    filter_next = NULL; /* silence false coverity warning */
1704

1705 1706
    if (got > 0 &&
        buf[got - 1] == '\n') {
1707 1708
        buf[got - 1] = '\0';
        got--;
1709
    }
1710
    ignore_value(VIR_REALLOC_N_QUIET(buf, got + 1));
1711 1712 1713
    *msg = buf;
    return 0;
}
1714 1715


1716 1717
static int
qemuProcessReportLogError(qemuDomainLogContextPtr logCtxt,
1718 1719 1720 1721
                          const char *msgprefix)
{
    char *logmsg = NULL;

1722
    if (qemuProcessReadLog(logCtxt, &logmsg) < 0)
1723 1724 1725 1726 1727 1728 1729
        return -1;

    virResetLastError();
    virReportError(VIR_ERR_INTERNAL_ERROR,
                   _("%s: %s"), msgprefix, logmsg);
    VIR_FREE(logmsg);
    return 0;
1730 1731 1732
}


1733 1734 1735 1736 1737 1738 1739 1740 1741 1742
static void
qemuProcessMonitorReportLogError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                                 const char *msg,
                                 void *opaque)
{
    qemuDomainLogContextPtr logCtxt = opaque;
    qemuProcessReportLogError(logCtxt, msg);
}


1743
static int
1744 1745 1746
qemuProcessLookupPTYs(virDomainDefPtr def,
                      virQEMUCapsPtr qemuCaps,
                      virDomainChrDefPtr *devices,
1747
                      int count,
1748
                      virHashTablePtr info)
1749
{
1750
    size_t i;
1751

1752
    for (i = 0; i < count; i++) {
1753
        virDomainChrDefPtr chr = devices[i];
1754 1755
        bool chardevfmt = virQEMUCapsSupportsChardev(def, qemuCaps, chr);

1756
        if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) {
C
Cole Robinson 已提交
1757
            char id[32];
1758
            qemuMonitorChardevInfoPtr entry;
1759

C
Cole Robinson 已提交
1760
            if (snprintf(id, sizeof(id), "%s%s",
1761
                         chardevfmt ? "char" : "",
1762 1763 1764 1765
                         chr->info.alias) >= sizeof(id)) {
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("failed to format device alias "
                                 "for PTY retrieval"));
1766
                return -1;
1767
            }
1768

1769 1770
            entry = virHashLookup(info, id);
            if (!entry || !entry->ptyPath) {
1771 1772 1773 1774
                if (chr->source.data.file.path == NULL) {
                    /* neither the log output nor 'info chardev' had a
                     * pty path for this chardev, report an error
                     */
1775 1776
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   _("no assigned pty for device %s"), id);
1777 1778 1779 1780 1781 1782 1783 1784 1785 1786
                    return -1;
                } else {
                    /* 'info chardev' had no pty path for this chardev,
                     * but the log output had, so we're fine
                     */
                    continue;
                }
            }

            VIR_FREE(chr->source.data.file.path);
1787
            if (VIR_STRDUP(chr->source.data.file.path, entry->ptyPath) < 0)
1788 1789 1790 1791 1792 1793 1794
                return -1;
        }
    }

    return 0;
}

1795 1796
static int
qemuProcessFindCharDevicePTYsMonitor(virDomainObjPtr vm,
1797
                                     virQEMUCapsPtr qemuCaps,
1798
                                     virHashTablePtr info)
1799
{
1800
    size_t i = 0;
C
Cole Robinson 已提交
1801

1802 1803
    if (qemuProcessLookupPTYs(vm->def, qemuCaps,
                              vm->def->serials, vm->def->nserials,
1804
                              info) < 0)
1805 1806
        return -1;

1807 1808
    if (qemuProcessLookupPTYs(vm->def, qemuCaps,
                              vm->def->parallels, vm->def->nparallels,
1809
                              info) < 0)
1810
        return -1;
1811

1812 1813
    if (qemuProcessLookupPTYs(vm->def, qemuCaps,
                              vm->def->channels, vm->def->nchannels,
1814
                              info) < 0)
1815
        return -1;
1816 1817 1818 1819
    /* For historical reasons, console[0] can be just an alias
     * for serial[0]. That's why we need to update it as well. */
    if (vm->def->nconsoles) {
        virDomainChrDefPtr chr = vm->def->consoles[0];
1820

1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831
        if (vm->def->nserials &&
            chr->deviceType == VIR_DOMAIN_CHR_DEVICE_TYPE_CONSOLE &&
            chr->targetType == VIR_DOMAIN_CHR_CONSOLE_TARGET_TYPE_SERIAL) {
            /* yes, the first console is just an alias for serials[0] */
            i = 1;
            if (virDomainChrSourceDefCopy(&chr->source,
                                          &((vm->def->serials[0])->source)) < 0)
                return -1;
        }
    }

1832 1833
    if (qemuProcessLookupPTYs(vm->def, qemuCaps,
                              vm->def->consoles + i, vm->def->nconsoles - i,
1834
                              info) < 0)
1835
        return -1;
1836 1837 1838 1839 1840

    return 0;
}


1841
static int
1842 1843 1844 1845
qemuProcessRefreshChannelVirtioState(virQEMUDriverPtr driver,
                                     virDomainObjPtr vm,
                                     virHashTablePtr info,
                                     int booted)
1846 1847
{
    size_t i;
1848
    int agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_CHANNEL;
1849
    qemuMonitorChardevInfoPtr entry;
1850
    virObjectEventPtr event = NULL;
1851 1852
    char id[32];

1853 1854 1855
    if (booted)
        agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_DOMAIN_STARTED;

1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871
    for (i = 0; i < vm->def->nchannels; i++) {
        virDomainChrDefPtr chr = vm->def->channels[i];
        if (chr->targetType == VIR_DOMAIN_CHR_CHANNEL_TARGET_TYPE_VIRTIO) {
            if (snprintf(id, sizeof(id), "char%s",
                         chr->info.alias) >= sizeof(id)) {
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("failed to format device alias "
                                 "for PTY retrieval"));
                return -1;
            }

            /* port state not reported */
            if (!(entry = virHashLookup(info, id)) ||
                !entry->state)
                continue;

1872 1873 1874 1875 1876 1877
            if (entry->state != VIR_DOMAIN_CHR_DEVICE_STATE_DEFAULT &&
                STREQ_NULLABLE(chr->target.name, "org.qemu.guest_agent.0") &&
                (event = virDomainEventAgentLifecycleNewFromObj(vm, entry->state,
                                                                agentReason)))
                qemuDomainEventQueue(driver, event);

1878 1879 1880 1881 1882 1883 1884 1885
            chr->state = entry->state;
        }
    }

    return 0;
}


1886 1887 1888
int
qemuRefreshVirtioChannelState(virQEMUDriverPtr driver,
                              virDomainObjPtr vm)
1889 1890 1891 1892 1893 1894 1895
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virHashTablePtr info = NULL;
    int ret = -1;

    qemuDomainObjEnterMonitor(driver, vm);
    ret = qemuMonitorGetChardevInfo(priv->mon, &info);
1896 1897
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
1898 1899 1900 1901

    if (ret < 0)
        goto cleanup;

1902
    ret = qemuProcessRefreshChannelVirtioState(driver, vm, info, false);
1903 1904 1905 1906 1907 1908 1909

 cleanup:
    virHashFree(info);
    return ret;
}


1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941
static int
qemuProcessRefreshBalloonState(virQEMUDriverPtr driver,
                               virDomainObjPtr vm,
                               int asyncJob)
{
    unsigned long long balloon;
    int rc;

    /* if no ballooning is available, the current size equals to the current
     * full memory size */
    if (!vm->def->memballoon ||
        vm->def->memballoon->model == VIR_DOMAIN_MEMBALLOON_MODEL_NONE) {
        vm->def->mem.cur_balloon = virDomainDefGetMemoryActual(vm->def);
        return 0;
    }

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        return -1;

    rc = qemuMonitorGetBalloonInfo(qemuDomainGetMonitor(vm), &balloon);
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        rc = -1;

    if (rc < 0)
        return -1;

    vm->def->mem.cur_balloon = balloon;

    return 0;
}


1942
static int
1943
qemuProcessWaitForMonitor(virQEMUDriverPtr driver,
C
Cole Robinson 已提交
1944
                          virDomainObjPtr vm,
1945
                          int asyncJob,
1946
                          virQEMUCapsPtr qemuCaps,
1947
                          qemuDomainLogContextPtr logCtxt)
1948 1949
{
    int ret = -1;
1950
    virHashTablePtr info = NULL;
1951
    qemuDomainObjPrivatePtr priv;
1952 1953

    VIR_DEBUG("Connect monitor to %p '%s'", vm, vm->def->name);
1954
    if (qemuConnectMonitor(driver, vm, asyncJob, logCtxt) < 0)
1955 1956 1957 1958 1959 1960
        goto cleanup;

    /* Try to get the pty path mappings again via the monitor. This is much more
     * reliable if it's available.
     * Note that the monitor itself can be on a pty, so we still need to try the
     * log output method. */
1961
    priv = vm->privateData;
1962 1963
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;
1964 1965
    ret = qemuMonitorGetChardevInfo(priv->mon, &info);
    VIR_DEBUG("qemuMonitorGetChardevInfo returned %i", ret);
1966 1967 1968
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;

1969 1970 1971 1972 1973
    if (ret == 0) {
        if ((ret = qemuProcessFindCharDevicePTYsMonitor(vm, qemuCaps,
                                                        info)) < 0)
            goto cleanup;

1974 1975
        if ((ret = qemuProcessRefreshChannelVirtioState(driver, vm, info,
                                                        true)) < 0)
1976 1977
            goto cleanup;
    }
1978

1979
 cleanup:
1980
    virHashFree(info);
1981

1982 1983
    if (logCtxt && kill(vm->pid, 0) == -1 && errno == ESRCH) {
        qemuProcessReportLogError(logCtxt,
1984
                                  _("process exited while connecting to monitor"));
1985 1986 1987 1988 1989 1990
        ret = -1;
    }

    return ret;
}

1991

1992 1993 1994 1995 1996 1997
static int
qemuProcessDetectIOThreadPIDs(virQEMUDriverPtr driver,
                              virDomainObjPtr vm,
                              int asyncJob)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
1998
    qemuMonitorIOThreadInfoPtr *iothreads = NULL;
1999 2000 2001 2002
    int niothreads = 0;
    int ret = -1;
    size_t i;

2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026
    if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_IOTHREAD)) {
        /* The following check is because at one time a domain could
         * define iothreadids and start the domain - only failing the
         * capability check when attempting to add a disk. Because the
         * iothreads and [n]iothreadids were left untouched other code
         * assumed it could use the ->thread_id value to make thread_id
         * based adjustments (e.g. pinning, scheduling) which while
         * succeeding would execute on the calling thread.
         */
        if (vm->def->niothreadids) {
            for (i = 0; i < vm->def->niothreadids; i++) {
                /* Check if the domain had defined any iothreadid elements
                 * and supply a VIR_INFO indicating that it's being removed.
                 */
                if (!vm->def->iothreadids[i]->autofill)
                    VIR_INFO("IOThreads not supported, remove iothread id '%u'",
                             vm->def->iothreadids[i]->iothread_id);
                virDomainIOThreadIDDefFree(vm->def->iothreadids[i]);
            }
            /* Remove any trace */
            VIR_FREE(vm->def->iothreadids);
            vm->def->niothreadids = 0;
            vm->def->iothreads = 0;
        }
2027
        return 0;
2028
    }
2029

2030 2031 2032 2033
    /* Get the list of IOThreads from qemu */
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;
    niothreads = qemuMonitorGetIOThreads(priv->mon, &iothreads);
2034 2035
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        goto cleanup;
J
John Ferlan 已提交
2036
    if (niothreads < 0)
2037 2038
        goto cleanup;

2039
    if (niothreads != vm->def->niothreadids) {
2040 2041
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("got wrong number of IOThread pids from QEMU monitor. "
2042 2043
                         "got %d, wanted %zu"),
                       niothreads, vm->def->niothreadids);
2044 2045 2046
        goto cleanup;
    }

2047 2048 2049 2050 2051 2052
    /* Nothing to do */
    if (niothreads == 0) {
        ret = 0;
        goto cleanup;
    }

2053 2054 2055
    for (i = 0; i < niothreads; i++) {
        virDomainIOThreadIDDefPtr iothrid;

2056 2057
        if (!(iothrid = virDomainIOThreadIDFind(vm->def,
                                                iothreads[i]->iothread_id))) {
2058
            virReportError(VIR_ERR_INTERNAL_ERROR,
2059 2060
                           _("iothread %d not found"),
                           iothreads[i]->iothread_id);
2061 2062 2063 2064
            goto cleanup;
        }
        iothrid->thread_id = iothreads[i]->thread_id;
    }
2065 2066 2067 2068 2069 2070

    ret = 0;

 cleanup:
    if (iothreads) {
        for (i = 0; i < niothreads; i++)
2071
            VIR_FREE(iothreads[i]);
2072 2073 2074 2075 2076
        VIR_FREE(iothreads);
    }
    return ret;
}

2077 2078 2079 2080 2081

/*
 * To be run between fork/exec of QEMU only
 */
static int
2082
qemuProcessInitCpuAffinity(virDomainObjPtr vm)
2083 2084 2085 2086
{
    int ret = -1;
    virBitmapPtr cpumap = NULL;
    virBitmapPtr cpumapToSet = NULL;
2087
    qemuDomainObjPrivatePtr priv = vm->privateData;
2088

2089 2090 2091 2092 2093 2094
    if (!vm->pid) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot setup CPU affinity until process is started"));
        return -1;
    }

2095 2096
    if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
        VIR_DEBUG("Set CPU affinity with advisory nodeset from numad");
2097
        cpumapToSet = priv->autoCpuset;
2098
    } else {
2099
        VIR_DEBUG("Set CPU affinity with specified cpuset");
O
Osier Yang 已提交
2100
        if (vm->def->cpumask) {
H
Hu Tao 已提交
2101
            cpumapToSet = vm->def->cpumask;
O
Osier Yang 已提交
2102 2103 2104 2105 2106
        } else {
            /* You may think this is redundant, but we can't assume libvirtd
             * itself is running on all pCPUs, so we need to explicitly set
             * the spawned QEMU instance to all pCPUs if no map is given in
             * its config file */
2107 2108 2109 2110
            int hostcpus;

            /* setaffinity fails if you set bits for CPUs which
             * aren't present, so we have to limit ourselves */
2111
            if ((hostcpus = nodeGetCPUCount(NULL)) < 0)
2112 2113 2114 2115 2116 2117 2118 2119
                goto cleanup;

            if (hostcpus > QEMUD_CPUMASK_LEN)
                hostcpus = QEMUD_CPUMASK_LEN;

            if (!(cpumap = virBitmapNew(hostcpus)))
                goto cleanup;

2120
            virBitmapSetAll(cpumap);
2121 2122

            cpumapToSet = cpumap;
O
Osier Yang 已提交
2123
        }
2124 2125
    }

2126
    if (virProcessSetAffinity(vm->pid, cpumapToSet) < 0)
2127
        goto cleanup;
2128

2129 2130
    ret = 0;

2131
 cleanup:
2132
    virBitmapFree(cpumap);
2133
    return ret;
2134 2135
}

2136 2137
/* set link states to down on interfaces at qemu start */
static int
2138 2139 2140
qemuProcessSetLinkStates(virQEMUDriverPtr driver,
                         virDomainObjPtr vm,
                         qemuDomainAsyncJob asyncJob)
2141 2142 2143
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainDefPtr def = vm->def;
2144
    size_t i;
2145 2146 2147 2148 2149
    int ret = -1;
    int rv;

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        return -1;
2150 2151 2152

    for (i = 0; i < def->nnets; i++) {
        if (def->nets[i]->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN) {
2153 2154 2155
            if (!def->nets[i]->info.alias) {
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("missing alias for network device"));
2156
                goto cleanup;
2157 2158
            }

2159 2160
            VIR_DEBUG("Setting link state: %s", def->nets[i]->info.alias);

2161
            if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_NETDEV)) {
2162
                virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
2163
                               _("Setting of link state is not supported by this qemu"));
2164
                goto cleanup;
2165 2166
            }

2167 2168 2169 2170
            rv = qemuMonitorSetLink(priv->mon,
                                    def->nets[i]->info.alias,
                                    VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN);
            if (rv < 0) {
2171
                virReportError(VIR_ERR_OPERATION_FAILED,
2172 2173 2174
                               _("Couldn't set link state on interface: %s"),
                               def->nets[i]->info.alias);
                goto cleanup;
2175 2176 2177 2178
            }
        }
    }

2179 2180 2181 2182 2183
    ret = 0;

 cleanup:
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
2184 2185 2186
    return ret;
}

2187

2188
/* Set CPU affinities for emulator threads. */
2189
static int
2190
qemuProcessSetEmulatorAffinity(virDomainObjPtr vm)
2191
{
2192
    virBitmapPtr cpumask;
2193 2194 2195
    virDomainDefPtr def = vm->def;
    int ret = -1;

O
Osier Yang 已提交
2196
    if (def->cputune.emulatorpin)
2197
        cpumask = def->cputune.emulatorpin;
O
Osier Yang 已提交
2198
    else if (def->cpumask)
2199
        cpumask = def->cpumask;
O
Osier Yang 已提交
2200 2201
    else
        return 0;
2202

2203
    ret = virProcessSetAffinity(vm->pid, cpumask);
2204 2205 2206
    return ret;
}

2207

2208 2209
static int
qemuProcessInitPasswords(virConnectPtr conn,
2210
                         virQEMUDriverPtr driver,
2211 2212
                         virDomainObjPtr vm,
                         int asyncJob)
2213 2214 2215
{
    int ret = 0;
    qemuDomainObjPrivatePtr priv = vm->privateData;
2216
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
2217
    size_t i;
2218 2219
    char *alias = NULL;
    char *secret = NULL;
2220

2221
    for (i = 0; i < vm->def->ngraphics; ++i) {
2222 2223
        virDomainGraphicsDefPtr graphics = vm->def->graphics[i];
        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
2224 2225
            ret = qemuDomainChangeGraphicsPasswords(driver, vm,
                                                    VIR_DOMAIN_GRAPHICS_TYPE_VNC,
2226
                                                    &graphics->data.vnc.auth,
2227 2228
                                                    cfg->vncPassword,
                                                    asyncJob);
2229
        } else if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
2230 2231
            ret = qemuDomainChangeGraphicsPasswords(driver, vm,
                                                    VIR_DOMAIN_GRAPHICS_TYPE_SPICE,
2232
                                                    &graphics->data.spice.auth,
2233 2234
                                                    cfg->spicePassword,
                                                    asyncJob);
2235 2236
        }

2237 2238 2239
        if (ret < 0)
            goto cleanup;
    }
2240

2241
    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) {
2242
        for (i = 0; i < vm->def->ndisks; i++) {
2243 2244
            size_t secretLen;

2245
            if (!vm->def->disks[i]->src->encryption ||
2246
                !virDomainDiskGetSource(vm->def->disks[i]))
2247 2248
                continue;

2249
            VIR_FREE(secret);
2250 2251 2252 2253 2254
            if (qemuProcessGetVolumeQcowPassphrase(conn,
                                                   vm->def->disks[i],
                                                   &secret, &secretLen) < 0)
                goto cleanup;

2255 2256 2257 2258
            VIR_FREE(alias);
            if (VIR_STRDUP(alias, vm->def->disks[i]->info.alias) < 0)
                goto cleanup;
            if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
2259
                goto cleanup;
E
Eric Blake 已提交
2260
            ret = qemuMonitorSetDrivePassphrase(priv->mon, alias, secret);
2261 2262
            if (qemuDomainObjExitMonitor(driver, vm) < 0)
                ret = -1;
2263 2264 2265 2266 2267
            if (ret < 0)
                goto cleanup;
        }
    }

2268
 cleanup:
2269 2270
    VIR_FREE(alias);
    VIR_FREE(secret);
2271
    virObjectUnref(cfg);
2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314
    return ret;
}


#define QEMU_PCI_VENDOR_INTEL     0x8086
#define QEMU_PCI_VENDOR_LSI_LOGIC 0x1000
#define QEMU_PCI_VENDOR_REDHAT    0x1af4
#define QEMU_PCI_VENDOR_CIRRUS    0x1013
#define QEMU_PCI_VENDOR_REALTEK   0x10ec
#define QEMU_PCI_VENDOR_AMD       0x1022
#define QEMU_PCI_VENDOR_ENSONIQ   0x1274
#define QEMU_PCI_VENDOR_VMWARE    0x15ad
#define QEMU_PCI_VENDOR_QEMU      0x1234

#define QEMU_PCI_PRODUCT_DISK_VIRTIO 0x1001

#define QEMU_PCI_PRODUCT_BALLOON_VIRTIO 0x1002

#define QEMU_PCI_PRODUCT_NIC_NE2K     0x8029
#define QEMU_PCI_PRODUCT_NIC_PCNET    0x2000
#define QEMU_PCI_PRODUCT_NIC_RTL8139  0x8139
#define QEMU_PCI_PRODUCT_NIC_E1000    0x100E
#define QEMU_PCI_PRODUCT_NIC_VIRTIO   0x1000

#define QEMU_PCI_PRODUCT_VGA_CIRRUS 0x00b8
#define QEMU_PCI_PRODUCT_VGA_VMWARE 0x0405
#define QEMU_PCI_PRODUCT_VGA_STDVGA 0x1111

#define QEMU_PCI_PRODUCT_AUDIO_AC97    0x2415
#define QEMU_PCI_PRODUCT_AUDIO_ES1370  0x5000

#define QEMU_PCI_PRODUCT_CONTROLLER_PIIX 0x7010
#define QEMU_PCI_PRODUCT_CONTROLLER_LSI  0x0012

#define QEMU_PCI_PRODUCT_WATCHDOG_I63000ESB 0x25ab

static int
qemuProcessAssignNextPCIAddress(virDomainDeviceInfo *info,
                                int vendor,
                                int product,
                                qemuMonitorPCIAddress *addrs,
                                int naddrs)
{
2315
    bool found = false;
2316
    size_t i;
2317 2318 2319

    VIR_DEBUG("Look for %x:%x out of %d", vendor, product, naddrs);

2320
    for (i = 0; i < naddrs; i++) {
2321 2322 2323
        VIR_DEBUG("Maybe %x:%x", addrs[i].vendor, addrs[i].product);
        if (addrs[i].vendor == vendor &&
            addrs[i].product == product) {
2324
            VIR_DEBUG("Match %zu", i);
2325
            found = true;
2326 2327 2328
            break;
        }
    }
2329
    if (!found)
2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529
        return -1;

    /* Blank it out so this device isn't matched again */
    addrs[i].vendor = 0;
    addrs[i].product = 0;

    if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_NONE)
        info->type = VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI;

    if (info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_PCI) {
        info->addr.pci.domain = addrs[i].addr.domain;
        info->addr.pci.bus = addrs[i].addr.bus;
        info->addr.pci.slot = addrs[i].addr.slot;
        info->addr.pci.function = addrs[i].addr.function;
    }

    return 0;
}

static int
qemuProcessGetPCIDiskVendorProduct(virDomainDiskDefPtr def,
                                   unsigned *vendor,
                                   unsigned *product)
{
    switch (def->bus) {
    case VIR_DOMAIN_DISK_BUS_VIRTIO:
        *vendor = QEMU_PCI_VENDOR_REDHAT;
        *product = QEMU_PCI_PRODUCT_DISK_VIRTIO;
        break;

    default:
        return -1;
    }

    return 0;
}

static int
qemuProcessGetPCINetVendorProduct(virDomainNetDefPtr def,
                                  unsigned *vendor,
                                  unsigned *product)
{
    if (!def->model)
        return -1;

    if (STREQ(def->model, "ne2k_pci")) {
        *vendor = QEMU_PCI_VENDOR_REALTEK;
        *product = QEMU_PCI_PRODUCT_NIC_NE2K;
    } else if (STREQ(def->model, "pcnet")) {
        *vendor = QEMU_PCI_VENDOR_AMD;
        *product = QEMU_PCI_PRODUCT_NIC_PCNET;
    } else if (STREQ(def->model, "rtl8139")) {
        *vendor = QEMU_PCI_VENDOR_REALTEK;
        *product = QEMU_PCI_PRODUCT_NIC_RTL8139;
    } else if (STREQ(def->model, "e1000")) {
        *vendor = QEMU_PCI_VENDOR_INTEL;
        *product = QEMU_PCI_PRODUCT_NIC_E1000;
    } else if (STREQ(def->model, "virtio")) {
        *vendor = QEMU_PCI_VENDOR_REDHAT;
        *product = QEMU_PCI_PRODUCT_NIC_VIRTIO;
    } else {
        VIR_INFO("Unexpected NIC model %s, cannot get PCI address",
                 def->model);
        return -1;
    }
    return 0;
}

static int
qemuProcessGetPCIControllerVendorProduct(virDomainControllerDefPtr def,
                                         unsigned *vendor,
                                         unsigned *product)
{
    switch (def->type) {
    case VIR_DOMAIN_CONTROLLER_TYPE_SCSI:
        *vendor = QEMU_PCI_VENDOR_LSI_LOGIC;
        *product = QEMU_PCI_PRODUCT_CONTROLLER_LSI;
        break;

    case VIR_DOMAIN_CONTROLLER_TYPE_FDC:
        /* XXX we could put in the ISA bridge address, but
           that's not technically the FDC's address */
        return -1;

    case VIR_DOMAIN_CONTROLLER_TYPE_IDE:
        *vendor = QEMU_PCI_VENDOR_INTEL;
        *product = QEMU_PCI_PRODUCT_CONTROLLER_PIIX;
        break;

    default:
        VIR_INFO("Unexpected controller type %s, cannot get PCI address",
                 virDomainControllerTypeToString(def->type));
        return -1;
    }

    return 0;
}

static int
qemuProcessGetPCIVideoVendorProduct(virDomainVideoDefPtr def,
                                    unsigned *vendor,
                                    unsigned *product)
{
    switch (def->type) {
    case VIR_DOMAIN_VIDEO_TYPE_CIRRUS:
        *vendor = QEMU_PCI_VENDOR_CIRRUS;
        *product = QEMU_PCI_PRODUCT_VGA_CIRRUS;
        break;

    case VIR_DOMAIN_VIDEO_TYPE_VGA:
        *vendor = QEMU_PCI_VENDOR_QEMU;
        *product = QEMU_PCI_PRODUCT_VGA_STDVGA;
        break;

    case VIR_DOMAIN_VIDEO_TYPE_VMVGA:
        *vendor = QEMU_PCI_VENDOR_VMWARE;
        *product = QEMU_PCI_PRODUCT_VGA_VMWARE;
        break;

    default:
        return -1;
    }
    return 0;
}

static int
qemuProcessGetPCISoundVendorProduct(virDomainSoundDefPtr def,
                                    unsigned *vendor,
                                    unsigned *product)
{
    switch (def->model) {
    case VIR_DOMAIN_SOUND_MODEL_ES1370:
        *vendor = QEMU_PCI_VENDOR_ENSONIQ;
        *product = QEMU_PCI_PRODUCT_AUDIO_ES1370;
        break;

    case VIR_DOMAIN_SOUND_MODEL_AC97:
        *vendor = QEMU_PCI_VENDOR_INTEL;
        *product = QEMU_PCI_PRODUCT_AUDIO_AC97;
        break;

    default:
        return -1;
    }

    return 0;
}

static int
qemuProcessGetPCIWatchdogVendorProduct(virDomainWatchdogDefPtr def,
                                       unsigned *vendor,
                                       unsigned *product)
{
    switch (def->model) {
    case VIR_DOMAIN_WATCHDOG_MODEL_I6300ESB:
        *vendor = QEMU_PCI_VENDOR_INTEL;
        *product = QEMU_PCI_PRODUCT_WATCHDOG_I63000ESB;
        break;

    default:
        return -1;
    }

    return 0;
}


static int
qemuProcessGetPCIMemballoonVendorProduct(virDomainMemballoonDefPtr def,
                                         unsigned *vendor,
                                         unsigned *product)
{
    switch (def->model) {
    case VIR_DOMAIN_MEMBALLOON_MODEL_VIRTIO:
        *vendor = QEMU_PCI_VENDOR_REDHAT;
        *product = QEMU_PCI_PRODUCT_BALLOON_VIRTIO;
        break;

    default:
        return -1;
    }

    return 0;
}


/*
 * This entire method assumes that PCI devices in 'info pci'
 * match ordering of devices specified on the command line
 * wrt to devices of matching vendor+product
 *
 * XXXX this might not be a valid assumption if we assign
 * some static addrs on CLI. Have to check that...
 */
static int
qemuProcessDetectPCIAddresses(virDomainObjPtr vm,
                              qemuMonitorPCIAddress *addrs,
                              int naddrs)
{
    unsigned int vendor = 0, product = 0;
2530
    size_t i;
2531 2532 2533 2534 2535

    /* XXX should all these vendor/product IDs be kept in the
     * actual device data structure instead ?
     */

2536
    for (i = 0; i < vm->def->ndisks; i++) {
2537 2538 2539 2540 2541 2542
        if (qemuProcessGetPCIDiskVendorProduct(vm->def->disks[i], &vendor, &product) < 0)
            continue;

        if (qemuProcessAssignNextPCIAddress(&(vm->def->disks[i]->info),
                                            vendor, product,
                                            addrs, naddrs) < 0) {
2543 2544 2545
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("cannot find PCI address for VirtIO disk %s"),
                           vm->def->disks[i]->dst);
2546 2547 2548 2549
            return -1;
        }
    }

2550
    for (i = 0; i < vm->def->nnets; i++) {
2551 2552 2553 2554 2555 2556
        if (qemuProcessGetPCINetVendorProduct(vm->def->nets[i], &vendor, &product) < 0)
            continue;

        if (qemuProcessAssignNextPCIAddress(&(vm->def->nets[i]->info),
                                            vendor, product,
                                            addrs,  naddrs) < 0) {
2557 2558 2559
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("cannot find PCI address for %s NIC"),
                           vm->def->nets[i]->model);
2560 2561 2562 2563
            return -1;
        }
    }

2564
    for (i = 0; i < vm->def->ncontrollers; i++) {
2565 2566 2567 2568 2569 2570
        if (qemuProcessGetPCIControllerVendorProduct(vm->def->controllers[i], &vendor, &product) < 0)
            continue;

        if (qemuProcessAssignNextPCIAddress(&(vm->def->controllers[i]->info),
                                            vendor, product,
                                            addrs,  naddrs) < 0) {
2571 2572 2573
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("cannot find PCI address for controller %s"),
                           virDomainControllerTypeToString(vm->def->controllers[i]->type));
2574 2575 2576 2577
            return -1;
        }
    }

2578
    for (i = 0; i < vm->def->nvideos; i++) {
2579 2580 2581 2582 2583 2584
        if (qemuProcessGetPCIVideoVendorProduct(vm->def->videos[i], &vendor, &product) < 0)
            continue;

        if (qemuProcessAssignNextPCIAddress(&(vm->def->videos[i]->info),
                                            vendor, product,
                                            addrs,  naddrs) < 0) {
2585 2586 2587
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("cannot find PCI address for video adapter %s"),
                           virDomainVideoTypeToString(vm->def->videos[i]->type));
2588 2589 2590 2591
            return -1;
        }
    }

2592
    for (i = 0; i < vm->def->nsounds; i++) {
2593 2594 2595 2596 2597 2598
        if (qemuProcessGetPCISoundVendorProduct(vm->def->sounds[i], &vendor, &product) < 0)
            continue;

        if (qemuProcessAssignNextPCIAddress(&(vm->def->sounds[i]->info),
                                    vendor, product,
                                     addrs,  naddrs) < 0) {
2599 2600 2601
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("cannot find PCI address for sound adapter %s"),
                           virDomainSoundModelTypeToString(vm->def->sounds[i]->model));
2602 2603 2604 2605 2606 2607 2608 2609 2610 2611
            return -1;
        }
    }


    if (vm->def->watchdog &&
        qemuProcessGetPCIWatchdogVendorProduct(vm->def->watchdog, &vendor, &product) == 0) {
        if (qemuProcessAssignNextPCIAddress(&(vm->def->watchdog->info),
                                            vendor, product,
                                            addrs,  naddrs) < 0) {
2612 2613 2614
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("cannot find PCI address for watchdog %s"),
                           virDomainWatchdogModelTypeToString(vm->def->watchdog->model));
2615 2616 2617 2618 2619 2620 2621 2622 2623
            return -1;
        }
    }

    if (vm->def->memballoon &&
        qemuProcessGetPCIMemballoonVendorProduct(vm->def->memballoon, &vendor, &product) == 0) {
        if (qemuProcessAssignNextPCIAddress(&(vm->def->memballoon->info),
                                            vendor, product,
                                            addrs, naddrs) < 0) {
2624 2625 2626
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("cannot find PCI address for balloon %s"),
                           virDomainMemballoonModelTypeToString(vm->def->memballoon->model));
2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643
            return -1;
        }
    }

    /* XXX console (virtio) */


    /* ... and now things we don't have in our xml */

    /* XXX USB controller ? */

    /* XXX what about other PCI devices (ie bridges) */

    return 0;
}

static int
2644
qemuProcessInitPCIAddresses(virQEMUDriverPtr driver,
2645 2646
                            virDomainObjPtr vm,
                            int asyncJob)
2647 2648 2649
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int naddrs;
2650
    int ret = -1;
2651 2652
    qemuMonitorPCIAddress *addrs = NULL;

2653 2654
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        return -1;
2655 2656
    naddrs = qemuMonitorGetAllPCIAddresses(priv->mon,
                                           &addrs);
2657 2658
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        goto cleanup;
2659

2660 2661
    if (naddrs > 0)
        ret = qemuProcessDetectPCIAddresses(vm, addrs, naddrs);
2662

2663
 cleanup:
2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692
    VIR_FREE(addrs);

    return ret;
}


static int
qemuProcessPrepareChardevDevice(virDomainDefPtr def ATTRIBUTE_UNUSED,
                                virDomainChrDefPtr dev,
                                void *opaque ATTRIBUTE_UNUSED)
{
    int fd;
    if (dev->source.type != VIR_DOMAIN_CHR_TYPE_FILE)
        return 0;

    if ((fd = open(dev->source.data.file.path,
                   O_CREAT | O_APPEND, S_IRUSR|S_IWUSR)) < 0) {
        virReportSystemError(errno,
                             _("Unable to pre-create chardev file '%s'"),
                             dev->source.data.file.path);
        return -1;
    }

    VIR_FORCE_CLOSE(fd);

    return 0;
}


2693 2694 2695 2696 2697 2698
static int
qemuProcessCleanupChardevDevice(virDomainDefPtr def ATTRIBUTE_UNUSED,
                                virDomainChrDefPtr dev,
                                void *opaque ATTRIBUTE_UNUSED)
{
    if (dev->source.type == VIR_DOMAIN_CHR_TYPE_UNIX &&
2699 2700
        dev->source.data.nix.listen &&
        dev->source.data.nix.path)
2701 2702 2703 2704 2705 2706
        unlink(dev->source.data.nix.path);

    return 0;
}


2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771
/**
 * Loads and update video memory size for video devices according to QEMU
 * process as the QEMU will silently update the values that we pass to QEMU
 * through command line.  We need to load these updated values and store them
 * into the status XML.
 *
 * We will fail if for some reason the values cannot be loaded from QEMU because
 * its mandatory to get the correct video memory size to status XML to not break
 * migration.
 */
static int
qemuProcessUpdateVideoRamSize(virQEMUDriverPtr driver,
                              virDomainObjPtr vm,
                              int asyncJob)
{
    int ret = -1;
    ssize_t i;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainVideoDefPtr video = NULL;
    virQEMUDriverConfigPtr cfg = NULL;

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        return -1;

    for (i = 0; i < vm->def->nvideos; i++) {
        video = vm->def->videos[i];

        switch (video->type) {
        case VIR_DOMAIN_VIDEO_TYPE_VGA:
            if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VGA_VGAMEM)) {
                if (qemuMonitorUpdateVideoMemorySize(priv->mon, video, "VGA") < 0)
                    goto error;
            }
            break;
        case VIR_DOMAIN_VIDEO_TYPE_QXL:
            if (i == 0) {
                if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGA_VGAMEM)) {
                    if (qemuMonitorUpdateVideoMemorySize(priv->mon, video,
                                                         "qxl-vga") < 0)
                        goto error;
                }
            } else {
                if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGAMEM)) {
                    if (qemuMonitorUpdateVideoMemorySize(priv->mon, video,
                                                         "qxl") < 0)
                        goto error;
                }
            }
            break;
        case VIR_DOMAIN_VIDEO_TYPE_VMVGA:
            if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VMWARE_SVGA_VGAMEM)) {
                if (qemuMonitorUpdateVideoMemorySize(priv->mon, video,
                                                     "vmware-svga") < 0)
                    goto error;
            }
            break;
        case VIR_DOMAIN_VIDEO_TYPE_CIRRUS:
        case VIR_DOMAIN_VIDEO_TYPE_XEN:
        case VIR_DOMAIN_VIDEO_TYPE_VBOX:
        case VIR_DOMAIN_VIDEO_TYPE_LAST:
            break;
        }

    }

2772 2773
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        return -1;
2774 2775

    cfg = virQEMUDriverGetConfig(driver);
2776
    ret = virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps);
2777 2778 2779 2780 2781
    virObjectUnref(cfg);

    return ret;

 error:
2782
    ignore_value(qemuDomainObjExitMonitor(driver, vm));
2783 2784 2785 2786
    return -1;
}


2787 2788 2789
struct qemuProcessHookData {
    virConnectPtr conn;
    virDomainObjPtr vm;
2790
    virQEMUDriverPtr driver;
2791
    virQEMUDriverConfigPtr cfg;
2792 2793 2794 2795 2796
};

static int qemuProcessHook(void *data)
{
    struct qemuProcessHookData *h = data;
2797
    qemuDomainObjPrivatePtr priv = h->vm->privateData;
2798
    int ret = -1;
2799
    int fd;
2800 2801 2802
    virBitmapPtr nodeset = NULL;
    virDomainNumatuneMemMode mode;

2803 2804 2805 2806 2807
    /* This method cannot use any mutexes, which are not
     * protected across fork()
     */

    virSecurityManagerPostFork(h->driver->securityManager);
2808 2809 2810 2811 2812

    /* Some later calls want pid present */
    h->vm->pid = getpid();

    VIR_DEBUG("Obtaining domain lock");
2813 2814 2815 2816 2817 2818 2819
    /*
     * Since we're going to leak the returned FD to QEMU,
     * we need to make sure it gets a sensible label.
     * This mildly sucks, because there could be other
     * sockets the lock driver opens that we don't want
     * labelled. So far we're ok though.
     */
2820
    if (virSecurityManagerSetSocketLabel(h->driver->securityManager, h->vm->def) < 0)
2821
        goto cleanup;
2822
    if (virDomainLockProcessStart(h->driver->lockManager,
2823
                                  h->cfg->uri,
2824
                                  h->vm,
J
Ján Tomko 已提交
2825
                                  /* QEMU is always paused initially */
2826 2827
                                  true,
                                  &fd) < 0)
2828
        goto cleanup;
2829
    if (virSecurityManagerClearSocketLabel(h->driver->securityManager, h->vm->def) < 0)
2830
        goto cleanup;
2831

2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842
    if (virDomainNumatuneGetMode(h->vm->def->numa, -1, &mode) == 0) {
        if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
            h->cfg->cgroupControllers & (1 << VIR_CGROUP_CONTROLLER_CPUSET) &&
            virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) {
            /* Use virNuma* API iff necessary. Once set and child is exec()-ed,
             * there's no way for us to change it. Rely on cgroups (if available
             * and enabled in the config) rather than virNuma*. */
            VIR_DEBUG("Relying on CGroups for memory binding");
        } else {
            nodeset = virDomainNumatuneGetNodeset(h->vm->def->numa,
                                                  priv->autoNodeset, -1);
2843

2844 2845 2846
            if (virNumaSetupMemoryPolicy(mode, nodeset) < 0)
                goto cleanup;
        }
2847
    }
2848

2849 2850
    ret = 0;

2851
 cleanup:
2852
    virObjectUnref(h->cfg);
2853 2854
    VIR_DEBUG("Hook complete ret=%d", ret);
    return ret;
2855 2856 2857
}

int
2858
qemuProcessPrepareMonitorChr(virQEMUDriverConfigPtr cfg,
2859 2860 2861 2862 2863 2864
                             virDomainChrSourceDefPtr monConfig,
                             const char *vm)
{
    monConfig->type = VIR_DOMAIN_CHR_TYPE_UNIX;
    monConfig->data.nix.listen = true;

2865
    if (virAsprintf(&monConfig->data.nix.path, "%s/domain-%s/monitor.sock",
2866 2867 2868
                    cfg->libDir, vm) < 0)
        return -1;
    return 0;
2869 2870 2871
}


2872
/*
2873 2874
 * Precondition: vm must be locked, and a job must be active.
 * This method will call {Enter,Exit}Monitor
2875
 */
E
Eric Blake 已提交
2876
int
2877
qemuProcessStartCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm,
2878
                     virConnectPtr conn, virDomainRunningReason reason,
2879
                     qemuDomainAsyncJob asyncJob)
2880
{
2881
    int ret = -1;
2882
    qemuDomainObjPrivatePtr priv = vm->privateData;
2883
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
2884

2885
    /* Bring up netdevs before starting CPUs */
2886
    if (qemuInterfaceStartDevices(vm->def) < 0)
2887 2888
       goto cleanup;

2889
    VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState));
2890
    if (virDomainLockProcessResume(driver->lockManager, cfg->uri,
2891
                                   vm, priv->lockState) < 0) {
2892 2893 2894 2895
        /* Don't free priv->lockState on error, because we need
         * to make sure we have state still present if the user
         * tries to resume again
         */
2896
        goto cleanup;
2897 2898 2899
    }
    VIR_FREE(priv->lockState);

2900 2901
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto release;
J
Jiri Denemark 已提交
2902

2903
    ret = qemuMonitorStartCPUs(priv->mon, conn);
2904 2905
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
2906 2907 2908 2909 2910

    if (ret < 0)
        goto release;

    virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
2911

2912
 cleanup:
2913
    virObjectUnref(cfg);
2914
    return ret;
2915 2916 2917 2918 2919 2920

 release:
    if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
        VIR_WARN("Unable to release lease on %s", vm->def->name);
    VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
    goto cleanup;
2921 2922 2923
}


2924 2925
int qemuProcessStopCPUs(virQEMUDriverPtr driver,
                        virDomainObjPtr vm,
2926
                        virDomainPausedReason reason,
2927
                        qemuDomainAsyncJob asyncJob)
2928
{
2929
    int ret = -1;
2930 2931
    qemuDomainObjPrivatePtr priv = vm->privateData;

2932
    VIR_FREE(priv->lockState);
J
Jiri Denemark 已提交
2933

2934 2935
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;
J
Jiri Denemark 已提交
2936

2937
    ret = qemuMonitorStopCPUs(priv->mon);
2938 2939
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
2940 2941 2942 2943

    if (ret < 0)
        goto cleanup;

2944 2945 2946
    /* de-activate netdevs after stopping CPUs */
    ignore_value(qemuInterfaceStopDevices(vm->def));

2947 2948 2949
    if (priv->job.current)
        ignore_value(virTimeMillisNow(&priv->job.current->stopped));

2950 2951 2952 2953
    virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
    if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
        VIR_WARN("Unable to release lease on %s", vm->def->name);
    VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
J
Jiri Denemark 已提交
2954

2955
 cleanup:
2956 2957 2958 2959 2960
    return ret;
}



2961 2962 2963
static int
qemuProcessNotifyNets(virDomainDefPtr def)
{
2964
    size_t i;
2965

2966 2967
    for (i = 0; i < def->nnets; i++) {
        virDomainNetDefPtr net = def->nets[i];
2968 2969 2970 2971 2972 2973 2974 2975
        /* keep others from trying to use the macvtap device name, but
         * don't return error if this happens, since that causes the
         * domain to be unceremoniously killed, which would be *very*
         * impolite.
         */
        if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_DIRECT)
           ignore_value(virNetDevMacVLanReserveName(net->ifname, false));

2976
        if (networkNotifyActualDevice(def, net) < 0)
2977 2978 2979 2980 2981
            return -1;
    }
    return 0;
}

2982
static int
2983
qemuProcessFiltersInstantiate(virDomainDefPtr def)
2984
{
2985
    size_t i;
2986

2987
    for (i = 0; i < def->nnets; i++) {
2988 2989
        virDomainNetDefPtr net = def->nets[i];
        if ((net->filter) && (net->ifname)) {
2990
            if (virDomainConfNWFilterInstantiate(def->uuid, net) < 0)
J
Ján Tomko 已提交
2991
                return 1;
2992 2993 2994
        }
    }

J
Ján Tomko 已提交
2995
    return 0;
2996 2997
}

2998
static int
2999
qemuProcessUpdateState(virQEMUDriverPtr driver, virDomainObjPtr vm)
3000 3001 3002
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainState state;
3003
    virDomainPausedReason reason;
3004
    virDomainState newState = VIR_DOMAIN_NOSTATE;
3005
    int oldReason;
3006
    int newReason;
3007
    bool running;
3008
    char *msg = NULL;
3009 3010
    int ret;

3011
    qemuDomainObjEnterMonitor(driver, vm);
3012
    ret = qemuMonitorGetStatus(priv->mon, &running, &reason);
3013 3014
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        return -1;
3015

3016
    if (ret < 0)
3017 3018
        return -1;

3019
    state = virDomainObjGetState(vm, &oldReason);
3020

3021 3022 3023 3024 3025 3026 3027 3028
    if (running &&
        (state == VIR_DOMAIN_SHUTOFF ||
         (state == VIR_DOMAIN_PAUSED &&
          oldReason == VIR_DOMAIN_PAUSED_STARTING_UP))) {
        newState = VIR_DOMAIN_RUNNING;
        newReason = VIR_DOMAIN_RUNNING_BOOTED;
        ignore_value(VIR_STRDUP_QUIET(msg, "finished booting"));
    } else if (state == VIR_DOMAIN_PAUSED && running) {
3029 3030
        newState = VIR_DOMAIN_RUNNING;
        newReason = VIR_DOMAIN_RUNNING_UNPAUSED;
3031
        ignore_value(VIR_STRDUP_QUIET(msg, "was unpaused"));
3032
    } else if (state == VIR_DOMAIN_RUNNING && !running) {
3033 3034 3035
        if (reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN) {
            newState = VIR_DOMAIN_SHUTDOWN;
            newReason = VIR_DOMAIN_SHUTDOWN_UNKNOWN;
3036
            ignore_value(VIR_STRDUP_QUIET(msg, "shutdown"));
3037
        } else if (reason == VIR_DOMAIN_PAUSED_CRASHED) {
3038 3039 3040
            newState = VIR_DOMAIN_CRASHED;
            newReason = VIR_DOMAIN_CRASHED_PANICKED;
            ignore_value(VIR_STRDUP_QUIET(msg, "crashed"));
3041 3042 3043
        } else {
            newState = VIR_DOMAIN_PAUSED;
            newReason = reason;
S
Stefan Berger 已提交
3044 3045
            ignore_value(virAsprintf(&msg, "was paused (%s)",
                                 virDomainPausedReasonTypeToString(reason)));
3046 3047 3048 3049 3050 3051 3052
        }
    }

    if (newState != VIR_DOMAIN_NOSTATE) {
        VIR_DEBUG("Domain %s %s while its monitor was disconnected;"
                  " changing state to %s (%s)",
                  vm->def->name,
3053
                  NULLSTR(msg),
3054 3055 3056 3057
                  virDomainStateTypeToString(newState),
                  virDomainStateReasonToString(newState, newReason));
        VIR_FREE(msg);
        virDomainObjSetState(vm, newState, newReason);
3058 3059 3060 3061 3062
    }

    return 0;
}

3063
static int
3064
qemuProcessRecoverMigration(virQEMUDriverPtr driver,
3065 3066
                            virDomainObjPtr vm,
                            virConnectPtr conn,
3067 3068
                            qemuDomainAsyncJob job,
                            qemuMigrationJobPhase phase,
3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094
                            virDomainState state,
                            int reason)
{
    if (job == QEMU_ASYNC_JOB_MIGRATION_IN) {
        switch (phase) {
        case QEMU_MIGRATION_PHASE_NONE:
        case QEMU_MIGRATION_PHASE_PERFORM2:
        case QEMU_MIGRATION_PHASE_BEGIN3:
        case QEMU_MIGRATION_PHASE_PERFORM3:
        case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
        case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
        case QEMU_MIGRATION_PHASE_CONFIRM3:
        case QEMU_MIGRATION_PHASE_LAST:
            break;

        case QEMU_MIGRATION_PHASE_PREPARE:
            VIR_DEBUG("Killing unfinished incoming migration for domain %s",
                      vm->def->name);
            return -1;

        case QEMU_MIGRATION_PHASE_FINISH2:
            /* source domain is already killed so let's just resume the domain
             * and hope we are all set */
            VIR_DEBUG("Incoming migration finished, resuming domain %s",
                      vm->def->name);
            if (qemuProcessStartCPUs(driver, vm, conn,
3095 3096
                                     VIR_DOMAIN_RUNNING_UNPAUSED,
                                     QEMU_ASYNC_JOB_NONE) < 0) {
3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124
                VIR_WARN("Could not resume domain %s", vm->def->name);
            }
            break;

        case QEMU_MIGRATION_PHASE_FINISH3:
            /* migration finished, we started resuming the domain but didn't
             * confirm success or failure yet; killing it seems safest */
            VIR_DEBUG("Killing migrated domain %s", vm->def->name);
            return -1;
        }
    } else if (job == QEMU_ASYNC_JOB_MIGRATION_OUT) {
        switch (phase) {
        case QEMU_MIGRATION_PHASE_NONE:
        case QEMU_MIGRATION_PHASE_PREPARE:
        case QEMU_MIGRATION_PHASE_FINISH2:
        case QEMU_MIGRATION_PHASE_FINISH3:
        case QEMU_MIGRATION_PHASE_LAST:
            break;

        case QEMU_MIGRATION_PHASE_BEGIN3:
            /* nothing happen so far, just forget we were about to migrate the
             * domain */
            break;

        case QEMU_MIGRATION_PHASE_PERFORM2:
        case QEMU_MIGRATION_PHASE_PERFORM3:
            /* migration is still in progress, let's cancel it and resume the
             * domain */
3125
            if (qemuMigrationCancel(driver, vm) < 0)
3126
                return -1;
3127 3128 3129 3130 3131 3132
            /* resume the domain but only if it was paused as a result of
             * migration */
            if (state == VIR_DOMAIN_PAUSED &&
                (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
                 reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
                if (qemuProcessStartCPUs(driver, vm, conn,
3133 3134
                                         VIR_DOMAIN_RUNNING_UNPAUSED,
                                         QEMU_ASYNC_JOB_NONE) < 0) {
3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153
                    VIR_WARN("Could not resume domain %s", vm->def->name);
                }
            }
            break;

        case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
            /* migration finished but we didn't have a chance to get the result
             * of Finish3 step; third party needs to check what to do next
             */
            break;

        case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
            /* Finish3 failed, we need to resume the domain */
            VIR_DEBUG("Resuming domain %s after failed migration",
                      vm->def->name);
            if (state == VIR_DOMAIN_PAUSED &&
                (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
                 reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
                if (qemuProcessStartCPUs(driver, vm, conn,
3154 3155
                                         VIR_DOMAIN_RUNNING_UNPAUSED,
                                         QEMU_ASYNC_JOB_NONE) < 0) {
3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169
                    VIR_WARN("Could not resume domain %s", vm->def->name);
                }
            }
            break;

        case QEMU_MIGRATION_PHASE_CONFIRM3:
            /* migration completed, we need to kill the domain here */
            return -1;
        }
    }

    return 0;
}

3170
static int
3171
qemuProcessRecoverJob(virQEMUDriverPtr driver,
3172 3173 3174 3175
                      virDomainObjPtr vm,
                      virConnectPtr conn,
                      const struct qemuDomainJobObj *job)
{
3176
    qemuDomainObjPrivatePtr priv = vm->privateData;
3177 3178 3179 3180 3181 3182 3183 3184
    virDomainState state;
    int reason;

    state = virDomainObjGetState(vm, &reason);

    switch (job->asyncJob) {
    case QEMU_ASYNC_JOB_MIGRATION_OUT:
    case QEMU_ASYNC_JOB_MIGRATION_IN:
3185 3186 3187
        if (qemuProcessRecoverMigration(driver, vm, conn, job->asyncJob,
                                        job->phase, state, reason) < 0)
            return -1;
3188 3189 3190 3191
        break;

    case QEMU_ASYNC_JOB_SAVE:
    case QEMU_ASYNC_JOB_DUMP:
3192
    case QEMU_ASYNC_JOB_SNAPSHOT:
3193
        qemuDomainObjEnterMonitor(driver, vm);
3194
        ignore_value(qemuMonitorMigrateCancel(priv->mon));
3195 3196
        if (qemuDomainObjExitMonitor(driver, vm) < 0)
            return -1;
3197
        /* resume the domain but only if it was paused as a result of
3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213
         * running a migration-to-file operation.  Although we are
         * recovering an async job, this function is run at startup
         * and must resume things using sync monitor connections.  */
         if (state == VIR_DOMAIN_PAUSED &&
             ((job->asyncJob == QEMU_ASYNC_JOB_DUMP &&
               reason == VIR_DOMAIN_PAUSED_DUMP) ||
              (job->asyncJob == QEMU_ASYNC_JOB_SAVE &&
               reason == VIR_DOMAIN_PAUSED_SAVE) ||
              (job->asyncJob == QEMU_ASYNC_JOB_SNAPSHOT &&
               reason == VIR_DOMAIN_PAUSED_SNAPSHOT) ||
              reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
             if (qemuProcessStartCPUs(driver, vm, conn,
                                      VIR_DOMAIN_RUNNING_UNPAUSED,
                                      QEMU_ASYNC_JOB_NONE) < 0) {
                 VIR_WARN("Could not resume domain '%s' after migration to file",
                          vm->def->name);
3214 3215 3216 3217
            }
        }
        break;

3218 3219 3220 3221
    case QEMU_ASYNC_JOB_START:
        /* Already handled in VIR_DOMAIN_PAUSED_STARTING_UP check. */
        break;

3222 3223 3224 3225 3226 3227 3228 3229
    case QEMU_ASYNC_JOB_NONE:
    case QEMU_ASYNC_JOB_LAST:
        break;
    }

    if (!virDomainObjIsActive(vm))
        return -1;

3230 3231 3232 3233
    /* In case any special handling is added for job type that has been ignored
     * before, QEMU_DOMAIN_TRACK_JOBS (from qemu_domain.h) needs to be updated
     * for the job to be properly tracked in domain state XML.
     */
3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253
    switch (job->active) {
    case QEMU_JOB_QUERY:
        /* harmless */
        break;

    case QEMU_JOB_DESTROY:
        VIR_DEBUG("Domain %s should have already been destroyed",
                  vm->def->name);
        return -1;

    case QEMU_JOB_SUSPEND:
        /* mostly harmless */
        break;

    case QEMU_JOB_MODIFY:
        /* XXX depending on the command we may be in an inconsistent state and
         * we should probably fall back to "monitor error" state and refuse to
         */
        break;

3254
    case QEMU_JOB_MIGRATION_OP:
3255
    case QEMU_JOB_ABORT:
3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266
    case QEMU_JOB_ASYNC:
    case QEMU_JOB_ASYNC_NESTED:
        /* async job was already handled above */
    case QEMU_JOB_NONE:
    case QEMU_JOB_LAST:
        break;
    }

    return 0;
}

3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281
static int
qemuProcessUpdateDevices(virQEMUDriverPtr driver,
                         virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainDeviceDef dev;
    char **old;
    char **tmp;
    int ret = -1;

    if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_DEL_EVENT))
        return 0;

    old = priv->qemuDevices;
    priv->qemuDevices = NULL;
3282
    if (qemuDomainUpdateDeviceList(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
3283 3284 3285 3286 3287
        goto cleanup;

    if ((tmp = old)) {
        while (*tmp) {
            if (!virStringArrayHasString(priv->qemuDevices, *tmp) &&
3288 3289 3290 3291
                virDomainDefFindDevice(vm->def, *tmp, &dev, false) == 0 &&
                qemuDomainRemoveDevice(driver, vm, &dev) < 0) {
                goto cleanup;
            }
3292 3293 3294 3295 3296
            tmp++;
        }
    }
    ret = 0;

3297
 cleanup:
3298 3299 3300 3301
    virStringFreeList(old);
    return ret;
}

3302 3303
struct qemuProcessReconnectData {
    virConnectPtr conn;
3304
    virQEMUDriverPtr driver;
3305
    virDomainObjPtr obj;
3306 3307 3308 3309
};
/*
 * Open an existing VM's monitor, re-detect VCPU threads
 * and re-reserve the security labels in use
S
Stefan Berger 已提交
3310 3311 3312 3313
 *
 * We own the virConnectPtr we are passed here - whoever started
 * this thread function has increased the reference counter to it
 * so that we now have to close it.
3314
 *
3315
 * This function also inherits a locked and ref'd domain object.
3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327
 *
 * This function needs to:
 * 1. Enter job
 * 1. just before monitor reconnect do lightweight MonitorEnter
 *    (increase VM refcount and unlock VM)
 * 2. reconnect to monitor
 * 3. do lightweight MonitorExit (lock VM)
 * 4. continue reconnect process
 * 5. EndJob
 *
 * We can't do normal MonitorEnter & MonitorExit because these two lock the
 * monitor lock, which does not exists in this early phase.
3328 3329
 */
static void
3330
qemuProcessReconnect(void *opaque)
3331 3332
{
    struct qemuProcessReconnectData *data = opaque;
3333
    virQEMUDriverPtr driver = data->driver;
3334
    virDomainObjPtr obj = data->obj;
3335 3336
    qemuDomainObjPrivatePtr priv;
    virConnectPtr conn = data->conn;
3337
    struct qemuDomainJobObj oldjob;
3338 3339
    int state;
    int reason;
3340
    virQEMUDriverConfigPtr cfg;
3341
    size_t i;
3342
    int ret;
3343
    unsigned int stopFlags = 0;
3344
    bool jobStarted = false;
3345

3346 3347
    VIR_FREE(data);

3348
    qemuDomainObjRestoreJob(obj, &oldjob);
3349 3350
    if (oldjob.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
        stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
3351

3352 3353 3354
    cfg = virQEMUDriverGetConfig(driver);
    priv = obj->privateData;

3355 3356 3357 3358
    if (qemuDomainObjBeginJob(driver, obj, QEMU_JOB_MODIFY) < 0)
        goto error;
    jobStarted = true;

3359 3360 3361
    /* XXX If we ever gonna change pid file pattern, come up with
     * some intelligence here to deal with old paths. */
    if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, obj->def->name)))
3362
        goto error;
3363 3364

    virNWFilterReadLockFilterUpdates();
3365 3366 3367 3368

    VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name);

    /* XXX check PID liveliness & EXE path */
3369
    if (qemuConnectMonitor(driver, obj, QEMU_ASYNC_JOB_NONE, NULL) < 0)
3370 3371
        goto error;

3372
    if (qemuHostdevUpdateActiveDomainDevices(driver, obj->def) < 0)
3373 3374
        goto error;

3375
    if (qemuConnectCgroup(driver, obj) < 0)
3376 3377
        goto error;

3378
    /* XXX: Need to change as long as lock is introduced for
3379
     * qemu_driver->sharedDevices.
3380 3381
     */
    for (i = 0; i < obj->def->ndisks; i++) {
3382 3383
        virDomainDeviceDef dev;

3384
        if (virStorageTranslateDiskSourcePool(conn, obj->def->disks[i]) < 0)
3385
            goto error;
3386

3387 3388 3389 3390 3391 3392
        /* XXX we should be able to restore all data from XML in the future.
         * This should be the only place that calls qemuDomainDetermineDiskChain
         * with @report_broken == false to guarantee best-effort domain
         * reconnect */
        if (qemuDomainDetermineDiskChain(driver, obj, obj->def->disks[i],
                                         true, false) < 0)
3393 3394
            goto error;

3395 3396 3397
        dev.type = VIR_DOMAIN_DEVICE_DISK;
        dev.data.disk = obj->def->disks[i];
        if (qemuAddSharedDevice(driver, &dev, obj->def->name) < 0)
3398 3399 3400
            goto error;
    }

3401 3402 3403
    if (qemuProcessUpdateState(driver, obj) < 0)
        goto error;

3404
    state = virDomainObjGetState(obj, &reason);
3405 3406 3407
    if (state == VIR_DOMAIN_SHUTOFF ||
        (state == VIR_DOMAIN_PAUSED &&
         reason == VIR_DOMAIN_PAUSED_STARTING_UP)) {
3408 3409 3410 3411 3412
        VIR_DEBUG("Domain '%s' wasn't fully started yet, killing it",
                  obj->def->name);
        goto error;
    }

3413 3414 3415
    /* If upgrading from old libvirtd we won't have found any
     * caps in the domain status, so re-query them
     */
3416 3417
    if (!priv->qemuCaps &&
        !(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
3418 3419
                                                      obj->def->emulator,
                                                      obj->def->os.machine)))
3420 3421
        goto error;

3422
    /* In case the domain shutdown while we were not running,
3423
     * we need to finish the shutdown process. And we need to do it after
3424
     * we have virQEMUCaps filled in.
3425
     */
3426 3427 3428 3429 3430 3431
    if (state == VIR_DOMAIN_SHUTDOWN ||
        (state == VIR_DOMAIN_PAUSED &&
         reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN)) {
        VIR_DEBUG("Finishing shutdown sequence for domain %s",
                  obj->def->name);
        qemuProcessShutdownOrReboot(driver, obj);
3432
        goto cleanup;
3433 3434
    }

3435 3436
    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE))
        if ((qemuDomainAssignAddresses(obj->def, priv->qemuCaps, obj)) < 0)
3437
            goto error;
3438

3439 3440 3441 3442 3443 3444
    /* if domain requests security driver we haven't loaded, report error, but
     * do not kill the domain
     */
    ignore_value(virSecurityManagerCheckAllLabel(driver->securityManager,
                                                 obj->def));

3445
    if (virSecurityManagerReserveLabel(driver->securityManager, obj->def, obj->pid) < 0)
3446 3447
        goto error;

3448 3449 3450
    if (qemuProcessNotifyNets(obj->def) < 0)
        goto error;

3451
    if (qemuProcessFiltersInstantiate(obj->def))
3452 3453
        goto error;

3454
    if (qemuDomainCheckEjectableMedia(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
3455 3456
        goto error;

3457
    if (qemuRefreshVirtioChannelState(driver, obj) < 0)
3458 3459
        goto error;

3460 3461 3462
    if (qemuProcessRefreshBalloonState(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
        goto error;

3463 3464 3465
    if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
        goto error;

3466 3467 3468
    if (qemuProcessUpdateDevices(driver, obj) < 0)
        goto error;

3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479
    /* Failure to connect to agent shouldn't be fatal */
    if ((ret = qemuConnectAgent(driver, obj)) < 0) {
        if (ret == -2)
            goto error;

        VIR_WARN("Cannot connect to QEMU guest agent for %s",
                 obj->def->name);
        virResetLastError();
        priv->agentError = true;
    }

3480
    /* update domain state XML with possibly updated state in virDomainObj */
3481
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, obj, driver->caps) < 0)
3482 3483
        goto error;

3484 3485
    /* Run an hook to allow admins to do some magic */
    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
3486
        char *xml = qemuDomainDefFormatXML(driver, obj->def, 0);
3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500
        int hookret;

        hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, obj->def->name,
                              VIR_HOOK_QEMU_OP_RECONNECT, VIR_HOOK_SUBOP_BEGIN,
                              NULL, xml, NULL);
        VIR_FREE(xml);

        /*
         * If the script raised an error abort the launch
         */
        if (hookret < 0)
            goto error;
    }

3501
    if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
3502 3503
        driver->inhibitCallback(true, driver->inhibitOpaque);

3504 3505 3506 3507 3508 3509 3510 3511 3512 3513
 cleanup:
    if (jobStarted)
        qemuDomainObjEndJob(driver, obj);
    if (!virDomainObjIsActive(obj))
        qemuDomainRemoveInactive(driver, obj);
    virDomainObjEndAPI(&obj);
    virObjectUnref(conn);
    virObjectUnref(cfg);
    virNWFilterUnlockFilterUpdates();
    return;
3514

3515
 error:
3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529
    if (virDomainObjIsActive(obj)) {
        /* We can't get the monitor back, so must kill the VM
         * to remove danger of it ending up running twice if
         * user tries to start it again later
         */
        if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_NO_SHUTDOWN)) {
            /* If we couldn't get the monitor and qemu supports
             * no-shutdown, we can safely say that the domain
             * crashed ... */
            state = VIR_DOMAIN_SHUTOFF_CRASHED;
        } else {
            /* ... but if it doesn't we can't say what the state
             * really is and FAILED means "failed to start" */
            state = VIR_DOMAIN_SHUTOFF_UNKNOWN;
3530
        }
3531 3532 3533 3534 3535
        /* If BeginJob failed, we jumped here without a job, let's hope another
         * thread didn't have a chance to start playing with the domain yet
         * (it's all we can do anyway).
         */
        qemuProcessStop(driver, obj, state, QEMU_ASYNC_JOB_NONE, stopFlags);
3536
    }
3537
    goto cleanup;
3538 3539
}

3540 3541
static int
qemuProcessReconnectHelper(virDomainObjPtr obj,
3542 3543 3544 3545 3546 3547
                           void *opaque)
{
    virThread thread;
    struct qemuProcessReconnectData *src = opaque;
    struct qemuProcessReconnectData *data;

3548
    /* If the VM was inactive, we don't need to reconnect */
3549 3550 3551
    if (!obj->pid)
        return 0;

3552
    if (VIR_ALLOC(data) < 0)
3553
        return -1;
3554 3555

    memcpy(data, src, sizeof(*data));
3556
    data->obj = obj;
3557

3558 3559
    /* this lock and reference will be eventually transferred to the thread
     * that handles the reconnect */
3560
    virObjectLock(obj);
3561
    virObjectRef(obj);
3562

3563 3564 3565
    /* Since we close the connection later on, we have to make sure that the
     * threads we start see a valid connection throughout their lifetime. We
     * simply increase the reference counter here.
S
Stefan Berger 已提交
3566
     */
3567
    virObjectRef(data->conn);
S
Stefan Berger 已提交
3568

3569
    if (virThreadCreate(&thread, false, qemuProcessReconnect, data) < 0) {
3570 3571 3572
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Could not create thread. QEMU initialization "
                         "might be incomplete"));
3573 3574 3575 3576 3577 3578 3579
        /* We can't spawn a thread and thus connect to monitor. Kill qemu.
         * It's safe to call qemuProcessStop without a job here since there
         * is no thread that could be doing anything else with the same domain
         * object.
         */
        qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
                        QEMU_ASYNC_JOB_NONE, 0);
3580
        qemuDomainRemoveInactive(src->driver, obj);
3581

M
Michal Privoznik 已提交
3582
        virDomainObjEndAPI(&obj);
3583 3584 3585 3586
        virObjectUnref(data->conn);
        VIR_FREE(data);
        return -1;
    }
3587

3588
    return 0;
3589 3590 3591 3592 3593 3594 3595 3596 3597
}

/**
 * qemuProcessReconnectAll
 *
 * Try to re-open the resources for live VMs that we care
 * about.
 */
void
3598
qemuProcessReconnectAll(virConnectPtr conn, virQEMUDriverPtr driver)
3599
{
3600
    struct qemuProcessReconnectData data = {.conn = conn, .driver = driver};
3601
    virDomainObjListForEach(driver->domains, qemuProcessReconnectHelper, &data);
3602 3603
}

3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626
static int
qemuProcessVNCAllocatePorts(virQEMUDriverPtr driver,
                            virDomainGraphicsDefPtr graphics)
{
    unsigned short port;

    if (graphics->data.vnc.socket)
        return 0;

    if (graphics->data.vnc.autoport) {
        if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
            return -1;
        graphics->data.vnc.port = port;
    }

    if (graphics->data.vnc.websocket == -1) {
        if (virPortAllocatorAcquire(driver->webSocketPorts, &port) < 0)
            return -1;
        graphics->data.vnc.websocket = port;
    }

    return 0;
}
3627

3628
int
3629 3630
qemuProcessSPICEAllocatePorts(virQEMUDriverPtr driver,
                              virQEMUDriverConfigPtr cfg,
3631 3632
                              virDomainGraphicsDefPtr graphics,
                              bool allocate)
3633 3634 3635
{
    unsigned short port = 0;
    unsigned short tlsPort;
3636
    size_t i;
3637 3638 3639 3640 3641 3642 3643
    int defaultMode = graphics->data.spice.defaultMode;

    bool needTLSPort = false;
    bool needPort = false;

    if (graphics->data.spice.autoport) {
        /* check if tlsPort or port need allocation */
3644
        for (i = 0; i < VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_LAST; i++) {
3645 3646 3647 3648 3649 3650 3651 3652 3653 3654
            switch (graphics->data.spice.channels[i]) {
            case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
                needTLSPort = true;
                break;

            case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
                needPort = true;
                break;

            case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
3655
                /* default mode will be used */
3656 3657 3658
                break;
            }
        }
3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673
        switch (defaultMode) {
        case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
            needTLSPort = true;
            break;

        case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
            needPort = true;
            break;

        case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
            if (cfg->spiceTLS)
                needTLSPort = true;
            needPort = true;
            break;
        }
3674 3675
    }

3676 3677 3678 3679 3680 3681 3682 3683 3684 3685
    if (!allocate) {
        if (needPort || graphics->data.spice.port == -1)
            graphics->data.spice.port = 5901;

        if (needTLSPort || graphics->data.spice.tlsPort == -1)
            graphics->data.spice.tlsPort = 5902;

        return 0;
    }

3686
    if (needPort || graphics->data.spice.port == -1) {
3687
        if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
3688
            goto error;
3689 3690 3691 3692

        graphics->data.spice.port = port;
    }

3693 3694
    if (needTLSPort || graphics->data.spice.tlsPort == -1) {
        if (!cfg->spiceTLS) {
3695 3696 3697 3698
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                           _("Auto allocation of spice TLS port requested "
                             "but spice TLS is disabled in qemu.conf"));
            goto error;
3699
        }
3700 3701 3702 3703 3704

        if (virPortAllocatorAcquire(driver->remotePorts, &tlsPort) < 0)
            goto error;

        graphics->data.spice.tlsPort = tlsPort;
3705 3706
    }

3707
    return 0;
3708

3709
 error:
3710
    virPortAllocatorRelease(driver->remotePorts, port);
3711
    return -1;
3712 3713 3714
}


3715 3716 3717
static int
qemuValidateCpuCount(virDomainDefPtr def,
                     virQEMUCapsPtr qemuCaps)
3718
{
3719
    unsigned int maxCpus = virQEMUCapsGetMachineMaxCpus(qemuCaps, def->os.machine);
3720

3721 3722 3723 3724 3725 3726
    if (virDomainDefGetVcpus(def) == 0) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Domain requires at least 1 vCPU"));
        return -1;
    }

3727 3728 3729 3730
    if (maxCpus > 0 && virDomainDefGetVcpusMax(def) > maxCpus) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Maximum CPUs greater than specified machine type limit"));
        return -1;
3731 3732
    }

3733
    return 0;
3734 3735
}

3736 3737

static bool
3738 3739 3740
qemuProcessVerifyGuestCPU(virQEMUDriverPtr driver,
                          virDomainObjPtr vm,
                          int asyncJob)
3741 3742 3743 3744 3745
{
    virDomainDefPtr def = vm->def;
    virArch arch = def->os.arch;
    virCPUDataPtr guestcpu = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
3746
    int rc;
3747
    bool ret = false;
J
Ján Tomko 已提交
3748
    size_t i;
3749

3750 3751 3752 3753 3754
    /* no features are passed to QEMU with -cpu host
     * so it makes no sense to verify them */
    if (def->cpu && def->cpu->mode == VIR_CPU_MODE_HOST_PASSTHROUGH)
        return true;

3755 3756 3757
    switch (arch) {
    case VIR_ARCH_I686:
    case VIR_ARCH_X86_64:
3758 3759
        if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
            return false;
3760
        rc = qemuMonitorGetGuestCPU(priv->mon, arch, &guestcpu);
3761 3762
        if (qemuDomainObjExitMonitor(driver, vm) < 0)
            return false;
3763

3764 3765 3766 3767
        if (rc < 0) {
            if (rc == -2)
                break;

3768
            goto cleanup;
3769
        }
3770

J
Ján Tomko 已提交
3771
        if (def->features[VIR_DOMAIN_FEATURE_PVSPINLOCK] == VIR_TRISTATE_SWITCH_ON) {
3772 3773 3774 3775 3776 3777
            if (!cpuHasFeature(guestcpu, VIR_CPU_x86_KVM_PV_UNHALT)) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                               _("host doesn't support paravirtual spinlocks"));
                goto cleanup;
            }
        }
J
Ján Tomko 已提交
3778

3779
        for (i = 0; def->cpu && i < def->cpu->nfeatures; i++) {
J
Ján Tomko 已提交
3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791
            virCPUFeatureDefPtr feature = &def->cpu->features[i];

            if (feature->policy != VIR_CPU_FEATURE_REQUIRE)
                continue;

            if (STREQ(feature->name, "invtsc") &&
                !cpuHasFeature(guestcpu, feature->name)) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                               _("host doesn't support invariant TSC"));
                goto cleanup;
            }
        }
3792 3793 3794 3795 3796 3797 3798 3799
        break;

    default:
        break;
    }

    ret = true;

3800
 cleanup:
3801 3802 3803 3804 3805
    cpuDataFree(guestcpu);
    return ret;
}


3806 3807
static int
qemuPrepareNVRAM(virQEMUDriverConfigPtr cfg,
3808
                 virCapsPtr caps,
3809
                 virDomainObjPtr vm,
3810 3811 3812 3813 3814
                 bool migrated)
{
    int ret = -1;
    int srcFD = -1;
    int dstFD = -1;
3815
    virDomainLoaderDefPtr loader = vm->def->os.loader;
3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840
    bool generated = false;
    bool created = false;

    /* Unless domain has RO loader of pflash type, we have
     * nothing to do here.  If the loader is RW then it's not
     * using split code and vars feature, so no nvram file needs
     * to be created. */
    if (!loader || loader->type != VIR_DOMAIN_LOADER_TYPE_PFLASH ||
        loader->readonly != VIR_TRISTATE_SWITCH_ON)
        return 0;

    /* If the nvram path is configured already, there's nothing
     * we need to do. Unless we are starting the destination side
     * of migration in which case nvram is configured in the
     * domain XML but the file doesn't exist yet. Moreover, after
     * the migration is completed, qemu will invoke a
     * synchronization write into the nvram file so we don't have
     * to take care about transmitting the real data on the other
     * side. */
    if (loader->nvram && !migrated)
        return 0;

    /* Autogenerate nvram path if needed.*/
    if (!loader->nvram) {
        if (virAsprintf(&loader->nvram,
3841 3842
                        "%s/%s_VARS.fd",
                        cfg->nvramDir, vm->def->name) < 0)
3843 3844 3845
            goto cleanup;

        generated = true;
3846

3847
        if (vm->persistent &&
3848
            virDomainSaveConfig(cfg->configDir, caps, vm->def) < 0)
3849
            goto cleanup;
3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939
    }

    if (!virFileExists(loader->nvram)) {
        const char *master_nvram_path = loader->templt;
        ssize_t r;

        if (!loader->templt) {
            size_t i;
            for (i = 0; i < cfg->nloader; i++) {
                if (STREQ(cfg->loader[i], loader->path)) {
                    master_nvram_path = cfg->nvram[i];
                    break;
                }
            }
        }

        if (!master_nvram_path) {
            virReportError(VIR_ERR_OPERATION_FAILED,
                           _("unable to find any master var store for "
                             "loader: %s"), loader->path);
            goto cleanup;
        }

        if ((srcFD = virFileOpenAs(master_nvram_path, O_RDONLY,
                                   0, -1, -1, 0)) < 0) {
            virReportSystemError(-srcFD,
                                 _("Failed to open file '%s'"),
                                 master_nvram_path);
            goto cleanup;
        }
        if ((dstFD = virFileOpenAs(loader->nvram,
                                   O_WRONLY | O_CREAT | O_EXCL,
                                   S_IRUSR | S_IWUSR,
                                   cfg->user, cfg->group, 0)) < 0) {
            virReportSystemError(-dstFD,
                                 _("Failed to create file '%s'"),
                                 loader->nvram);
            goto cleanup;
        }
        created = true;

        do {
            char buf[1024];

            if ((r = saferead(srcFD, buf, sizeof(buf))) < 0) {
                virReportSystemError(errno,
                                     _("Unable to read from file '%s'"),
                                     master_nvram_path);
                goto cleanup;
            }

            if (safewrite(dstFD, buf, r) < 0) {
                virReportSystemError(errno,
                                     _("Unable to write to file '%s'"),
                                     loader->nvram);
                goto cleanup;
            }
        } while (r);

        if (VIR_CLOSE(srcFD) < 0) {
            virReportSystemError(errno,
                                 _("Unable to close file '%s'"),
                                 master_nvram_path);
            goto cleanup;
        }
        if (VIR_CLOSE(dstFD) < 0) {
            virReportSystemError(errno,
                                 _("Unable to close file '%s'"),
                                 loader->nvram);
            goto cleanup;
        }
    }

    ret = 0;
 cleanup:
    /* We successfully generated the nvram path, but failed to
     * copy the file content. Roll back. */
    if (ret < 0) {
        if (created)
            unlink(loader->nvram);
        if (generated)
            VIR_FREE(loader->nvram);
    }

    VIR_FORCE_CLOSE(srcFD);
    VIR_FORCE_CLOSE(dstFD);
    return ret;
}


3940 3941 3942
static void
qemuLogOperation(virDomainObjPtr vm,
                 const char *msg,
3943 3944
                 virCommandPtr cmd,
                 qemuDomainLogContextPtr logCtxt)
3945 3946 3947 3948 3949
{
    char *timestamp;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int qemuVersion = virQEMUCapsGetVersion(priv->qemuCaps);
    const char *package = virQEMUCapsGetPackage(priv->qemuCaps);
3950
    char *hostname = virGetHostname();
3951 3952

    if ((timestamp = virTimeStringNow()) == NULL)
3953
        goto cleanup;
3954

3955 3956
    if (qemuDomainLogContextWrite(logCtxt,
                                  "%s: %s %s, qemu version: %d.%d.%d%s, hostname: %s\n",
3957 3958 3959 3960
                                  timestamp, msg, VIR_LOG_VERSION_STRING,
                                  (qemuVersion / 1000000) % 1000,
                                  (qemuVersion / 1000) % 1000,
                                  qemuVersion % 1000,
3961 3962
                                  package ? package : "",
                                  hostname ? hostname : "") < 0)
3963
        goto cleanup;
3964

3965 3966 3967 3968 3969
    if (cmd) {
        char *args = virCommandToString(cmd);
        qemuDomainLogContextWrite(logCtxt, "%s\n", args);
        VIR_FREE(args);
    }
3970 3971

 cleanup:
3972
    VIR_FREE(hostname);
3973 3974 3975
    VIR_FREE(timestamp);
}

3976 3977 3978 3979 3980 3981 3982

void
qemuProcessIncomingDefFree(qemuProcessIncomingDefPtr inc)
{
    if (!inc)
        return;

3983
    VIR_FREE(inc->address);
3984
    VIR_FREE(inc->launchURI);
3985
    VIR_FREE(inc->deferredURI);
3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996
    VIR_FREE(inc);
}


/*
 * This function does not copy @path, the caller is responsible for keeping
 * the @path pointer valid during the lifetime of the allocated
 * qemuProcessIncomingDef structure.
 */
qemuProcessIncomingDefPtr
qemuProcessIncomingDefNew(virQEMUCapsPtr qemuCaps,
3997
                          const char *listenAddress,
3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009
                          const char *migrateFrom,
                          int fd,
                          const char *path)
{
    qemuProcessIncomingDefPtr inc = NULL;

    if (qemuMigrationCheckIncoming(qemuCaps, migrateFrom) < 0)
        return NULL;

    if (VIR_ALLOC(inc) < 0)
        return NULL;

4010 4011 4012
    if (VIR_STRDUP(inc->address, listenAddress) < 0)
        goto error;

4013 4014 4015 4016
    inc->launchURI = qemuMigrationIncomingURI(migrateFrom, fd);
    if (!inc->launchURI)
        goto error;

4017 4018 4019 4020 4021 4022
    if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_INCOMING_DEFER)) {
        inc->deferredURI = inc->launchURI;
        if (VIR_STRDUP(inc->launchURI, "defer") < 0)
            goto error;
    }

4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033
    inc->fd = fd;
    inc->path = path;

    return inc;

 error:
    qemuProcessIncomingDefFree(inc);
    return NULL;
}


4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063
/*
 * This function starts a new QEMU_ASYNC_JOB_START async job. The user is
 * responsible for calling qemuProcessEndJob to stop this job and for passing
 * QEMU_ASYNC_JOB_START as @asyncJob argument to any function requiring this
 * parameter between qemuProcessBeginJob and qemuProcessEndJob.
 */
int
qemuProcessBeginJob(virQEMUDriverPtr driver,
                    virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;

    if (qemuDomainObjBeginAsyncJob(driver, vm, QEMU_ASYNC_JOB_START) < 0)
        return -1;

    qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE);
    priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED;

    return 0;
}


void
qemuProcessEndJob(virQEMUDriverPtr driver,
                  virDomainObjPtr vm)
{
    qemuDomainObjEndAsyncJob(driver, vm);
}


4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086
static int
qemuProcessStartHook(virQEMUDriverPtr driver,
                     virDomainObjPtr vm,
                     virHookQemuOpType op,
                     virHookSubopType subop)
{
    char *xml;
    int ret;

    if (!virHookPresent(VIR_HOOK_DRIVER_QEMU))
        return 0;

    if (!(xml = qemuDomainDefFormatXML(driver, vm->def, 0)))
        return -1;

    ret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, op, subop,
                      NULL, xml, NULL);
    VIR_FREE(xml);

    return ret;
}


4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164
static int
qemuProcessSetupGraphics(virQEMUDriverPtr driver,
                         virDomainObjPtr vm)
{
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
    size_t i;
    int ret = -1;

    for (i = 0; i < vm->def->ngraphics; ++i) {
        virDomainGraphicsDefPtr graphics = vm->def->graphics[i];
        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC &&
            !graphics->data.vnc.autoport) {
            if (virPortAllocatorSetUsed(driver->remotePorts,
                                        graphics->data.vnc.port,
                                        true) < 0)
                goto cleanup;
            graphics->data.vnc.portReserved = true;

        } else if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE &&
                   !graphics->data.spice.autoport) {
            if (graphics->data.spice.port > 0) {
                if (virPortAllocatorSetUsed(driver->remotePorts,
                                            graphics->data.spice.port,
                                            true) < 0)
                    goto cleanup;
                graphics->data.spice.portReserved = true;
            }

            if (graphics->data.spice.tlsPort > 0) {
                if (virPortAllocatorSetUsed(driver->remotePorts,
                                            graphics->data.spice.tlsPort,
                                            true) < 0)
                    goto cleanup;
                graphics->data.spice.tlsPortReserved = true;
            }
        }
    }

    for (i = 0; i < vm->def->ngraphics; ++i) {
        virDomainGraphicsDefPtr graphics = vm->def->graphics[i];
        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
            if (qemuProcessVNCAllocatePorts(driver, graphics) < 0)
                goto cleanup;
        } else if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
            if (qemuProcessSPICEAllocatePorts(driver, cfg, graphics, true) < 0)
                goto cleanup;
        }

        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC ||
            graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
            if (graphics->nListens == 0) {
                if (VIR_EXPAND_N(graphics->listens, graphics->nListens, 1) < 0)
                    goto cleanup;
                graphics->listens[0].type = VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS;
                if (VIR_STRDUP(graphics->listens[0].address,
                               graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC ?
                               cfg->vncListen : cfg->spiceListen) < 0) {
                    VIR_SHRINK_N(graphics->listens, graphics->nListens, 1);
                    goto cleanup;
                }
                graphics->listens[0].fromConfig = true;
            } else if (graphics->nListens > 1) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                               _("QEMU does not support multiple listen "
                                 "addresses for one graphics device."));
                goto cleanup;
            }
        }
    }

    ret = 0;

 cleanup:
    virObjectUnref(cfg);
    return ret;
}


4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223
static int
qemuProcessSetupRawIO(virQEMUDriverPtr driver,
                      virDomainObjPtr vm,
                      virCommandPtr cmd ATTRIBUTE_UNUSED)
{
    bool rawio = false;
    size_t i;
    int ret = -1;

    /* in case a certain disk is desirous of CAP_SYS_RAWIO, add this */
    for (i = 0; i < vm->def->ndisks; i++) {
        virDomainDeviceDef dev;
        virDomainDiskDefPtr disk = vm->def->disks[i];

        if (disk->rawio == VIR_TRISTATE_BOOL_YES) {
            rawio = true;
#ifndef CAP_SYS_RAWIO
            break;
#endif
        }

        dev.type = VIR_DOMAIN_DEVICE_DISK;
        dev.data.disk = disk;
        if (qemuAddSharedDevice(driver, &dev, vm->def->name) < 0)
            goto cleanup;

        if (qemuSetUnprivSGIO(&dev) < 0)
            goto cleanup;
    }

    /* If rawio not already set, check hostdevs as well */
    if (!rawio) {
        for (i = 0; i < vm->def->nhostdevs; i++) {
            virDomainHostdevSubsysSCSIPtr scsisrc =
                &vm->def->hostdevs[i]->source.subsys.u.scsi;
            if (scsisrc->rawio == VIR_TRISTATE_BOOL_YES) {
                rawio = true;
                break;
            }
        }
    }

    ret = 0;

 cleanup:
    if (rawio) {
#ifdef CAP_SYS_RAWIO
        if (ret == 0)
            virCommandAllowCap(cmd, CAP_SYS_RAWIO);
#else
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Raw I/O is not supported on this platform"));
        ret = -1;
#endif
    }
    return ret;
}


4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256
static int
qemuProcessSetupBalloon(virQEMUDriverPtr driver,
                        virDomainObjPtr vm,
                        qemuDomainAsyncJob asyncJob)
{
    unsigned long long balloon = vm->def->mem.cur_balloon;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int period;
    int ret = -1;

    if (!vm->def->memballoon ||
        vm->def->memballoon->model == VIR_DOMAIN_MEMBALLOON_MODEL_NONE)
        return 0;

    period = vm->def->memballoon->period;

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;

    if (period)
        qemuMonitorSetMemoryStatsPeriod(priv->mon, period);
    if (qemuMonitorSetBalloon(priv->mon, balloon) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
    return ret;
}


J
Jiri Denemark 已提交
4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284
static int
qemuProcessMakeDir(virQEMUDriverPtr driver,
                   virDomainObjPtr vm,
                   const char *parentDir)
{
    char *path = NULL;
    int ret = -1;

    if (virAsprintf(&path, "%s/domain-%s", parentDir, vm->def->name) < 0)
        goto cleanup;

    if (virFileMakePathWithMode(path, 0750) < 0) {
        virReportSystemError(errno, _("Cannot create directory '%s'"), path);
        goto cleanup;
    }

    if (virSecurityManagerDomainSetDirLabel(driver->securityManager,
                                            vm->def, path) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    VIR_FREE(path);
    return ret;
}


4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310
/**
 * qemuProcessStartValidate:
 * @vm: domain object
 * @qemuCaps: emulator capabilities
 * @migration: restoration of existing state
 *
 * This function aggregates checks independent from host state done prior to
 * start of a VM.
 */
int
qemuProcessStartValidate(virDomainDefPtr def,
                         virQEMUCapsPtr qemuCaps,
                         bool migration,
                         bool snapshot)
{
    if (qemuValidateCpuCount(def, qemuCaps) < 0)
        return -1;

    if (!migration && !snapshot &&
        virDomainDefCheckDuplicateDiskInfo(def) < 0)
        return -1;

    return 0;
}


J
Jiri Denemark 已提交
4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321
/**
 * qemuProcessInit:
 *
 * Prepares the domain up to the point when priv->qemuCaps is initialized. The
 * function calls qemuProcessStop when needed.
 *
 * Returns 0 on success, -1 on error.
 */
int
qemuProcessInit(virQEMUDriverPtr driver,
                virDomainObjPtr vm,
4322
                qemuDomainAsyncJob asyncJob,
4323 4324
                bool migration,
                bool snap)
J
Jiri Denemark 已提交
4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343
{
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
    virCapsPtr caps = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int stopFlags;
    int ret = -1;

    VIR_DEBUG("vm=%p name=%s id=%d migration=%d",
              vm, vm->def->name, vm->def->id, migration);

    VIR_DEBUG("Beginning VM startup process");

    if (virDomainObjIsActive(vm)) {
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("VM is already active"));
        goto cleanup;
    }

    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
4344 4345 4346 4347 4348 4349 4350 4351
        goto cleanup;

    VIR_DEBUG("Determining emulator version");
    virObjectUnref(priv->qemuCaps);
    if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
                                                      vm->def->emulator,
                                                      vm->def->os.machine)))
        goto cleanup;
J
Jiri Denemark 已提交
4352

4353 4354 4355
    if (qemuProcessStartValidate(vm->def, priv->qemuCaps, migration, snap) < 0)
        goto cleanup;

J
Jiri Denemark 已提交
4356 4357 4358 4359
    /* Some things, paths, ... are generated here and we want them to persist.
     * Fill them in prior to setting the domain def as transient. */
    VIR_DEBUG("Generating paths");

4360
    if (qemuPrepareNVRAM(cfg, caps, vm, migration) < 0)
J
Jiri Denemark 已提交
4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394
        goto stop;

    /* Do this upfront, so any part of the startup process can add
     * runtime state to vm->def that won't be persisted. This let's us
     * report implicit runtime defaults in the XML, like vnc listen/socket
     */
    VIR_DEBUG("Setting current domain def as transient");
    if (virDomainObjSetDefTransient(caps, driver->xmlopt, vm, true) < 0)
        goto stop;

    vm->def->id = qemuDriverAllocateID(driver);
    qemuDomainSetFakeReboot(driver, vm, false);
    virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_STARTING_UP);

    if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
        driver->inhibitCallback(true, driver->inhibitOpaque);

    /* Run an early hook to set-up missing devices */
    if (qemuProcessStartHook(driver, vm,
                             VIR_HOOK_QEMU_OP_PREPARE,
                             VIR_HOOK_SUBOP_BEGIN) < 0)
        goto stop;

    ret = 0;

 cleanup:
    virObjectUnref(cfg);
    virObjectUnref(caps);
    return ret;

 stop:
    stopFlags = VIR_QEMU_PROCESS_STOP_NO_RELABEL;
    if (migration)
        stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
4395
    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
J
Jiri Denemark 已提交
4396 4397 4398 4399
    goto cleanup;
}


4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452
/**
 * qemuProcessNetworkPrepareDevices
 */
static int
qemuProcessNetworkPrepareDevices(virDomainDefPtr def)
{
    int ret = -1;
    size_t i;

    for (i = 0; i < def->nnets; i++) {
        virDomainNetDefPtr net = def->nets[i];
        int actualType;

        /* If appropriate, grab a physical device from the configured
         * network's pool of devices, or resolve bridge device name
         * to the one defined in the network definition.
         */
        if (networkAllocateActualDevice(def, net) < 0)
            goto cleanup;

        actualType = virDomainNetGetActualType(net);
        if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV &&
            net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
            /* Each type='hostdev' network device must also have a
             * corresponding entry in the hostdevs array. For netdevs
             * that are hardcoded as type='hostdev', this is already
             * done by the parser, but for those allocated from a
             * network / determined at runtime, we need to do it
             * separately.
             */
            virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
            virDomainHostdevSubsysPCIPtr pcisrc = &hostdev->source.subsys.u.pci;

            if (virDomainHostdevFind(def, hostdev, NULL) >= 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("PCI device %04x:%02x:%02x.%x "
                                 "allocated from network %s is already "
                                 "in use by domain %s"),
                               pcisrc->addr.domain, pcisrc->addr.bus,
                               pcisrc->addr.slot, pcisrc->addr.function,
                               net->data.network.name, def->name);
                goto cleanup;
            }
            if (virDomainHostdevInsert(def, hostdev) < 0)
                goto cleanup;
        }
    }
    ret = 0;
 cleanup:
    return ret;
}


4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595
/**
 * qemuProcessSetupVcpu:
 * @vm: domain object
 * @vcpuid: id of VCPU to set defaults
 *
 * This function sets resource properties (cgroups, affinity, scheduler) for a
 * vCPU. This function expects that the vCPU is online and the vCPU pids were
 * correctly detected at the point when it's called.
 *
 * Returns 0 on success, -1 on error.
 */
int
qemuProcessSetupVcpu(virDomainObjPtr vm,
                     unsigned int vcpuid)
{
    pid_t vcpupid = qemuDomainGetVcpuPid(vm, vcpuid);
    virDomainVcpuInfoPtr vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
    qemuDomainObjPrivatePtr priv = vm->privateData;
    char *mem_mask = NULL;
    virDomainNumatuneMemMode mem_mode;
    unsigned long long period = vm->def->cputune.period;
    long long quota = vm->def->cputune.quota;
    virCgroupPtr cgroup_vcpu = NULL;
    virBitmapPtr cpumask;
    int ret = -1;

    if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) ||
        virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {

        if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
            mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
            virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
                                                priv->autoNodeset,
                                                &mem_mask, -1) < 0)
            goto cleanup;

        if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_VCPU, vcpuid,
                               true, &cgroup_vcpu) < 0)
            goto cleanup;

        if (period || quota) {
            if (qemuSetupCgroupVcpuBW(cgroup_vcpu, period, quota) < 0)
                goto cleanup;
        }
    }

    /* infer which cpumask shall be used */
    if (vcpu->cpumask)
        cpumask = vcpu->cpumask;
    else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)
        cpumask = priv->autoCpuset;
    else
        cpumask = vm->def->cpumask;

    /* setup cgroups */
    if (cgroup_vcpu) {
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
            if (mem_mask && virCgroupSetCpusetMems(cgroup_vcpu, mem_mask) < 0)
                goto cleanup;

            if (cpumask && qemuSetupCgroupCpusetCpus(cgroup_vcpu, cpumask) < 0)
                goto cleanup;
        }

        /* move the thread for vcpu to sub dir */
        if (virCgroupAddTask(cgroup_vcpu, vcpupid) < 0)
            goto cleanup;
    }

    /* setup legacy affinty */
    if (cpumask && virProcessSetAffinity(vcpupid, cpumask) < 0)
        goto cleanup;

    /* set scheduler type and priority */
    if (vcpu->sched.policy != VIR_PROC_POLICY_NONE) {
        if (virProcessSetScheduler(vcpupid, vcpu->sched.policy,
                                   vcpu->sched.priority) < 0)
            goto cleanup;
    }

    ret = 0;

 cleanup:
    VIR_FREE(mem_mask);
    if (cgroup_vcpu) {
        if (ret < 0)
            virCgroupRemove(cgroup_vcpu);
        virCgroupFree(&cgroup_vcpu);
    }

    return ret;
}


static int
qemuProcessSetupVcpus(virDomainObjPtr vm)
{
    virDomainVcpuInfoPtr vcpu;
    unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
    size_t i;

    if ((vm->def->cputune.period || vm->def->cputune.quota) &&
        !virCgroupHasController(((qemuDomainObjPrivatePtr) vm->privateData)->cgroup,
                                VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    if (!qemuDomainHasVcpuPids(vm)) {
        /* If any CPU has custom affinity that differs from the
         * VM default affinity, we must reject it */
        for (i = 0; i < maxvcpus; i++) {
            vcpu = virDomainDefGetVcpu(vm->def, i);

            if (!vcpu->online)
                continue;

            if (vcpu->cpumask &&
                !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) {
                virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                                _("cpu affinity is not supported"));
                return -1;
            }
        }

        return 0;
    }

    for (i = 0; i < maxvcpus; i++) {
        vcpu = virDomainDefGetVcpu(vm->def, i);

        if (!vcpu->online)
            continue;

        if (qemuProcessSetupVcpu(vm, i) < 0)
            return -1;
    }

    return 0;
}


4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706
/**
 * qemuProcessSetupIOThread:
 * @vm: domain object
 * @iothread: iothread data structure to set the data for
 *
 * This function sets resource properities (affinity, cgroups, scheduler) for a
 * IOThread. This function expects that the IOThread is online and the IOThread
 * pids were correctly detected at the point when it's called.
 *
 * Returns 0 on success, -1 on error.
 */
int
qemuProcessSetupIOThread(virDomainObjPtr vm,
                         virDomainIOThreadIDDefPtr iothread)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    unsigned long long period = vm->def->cputune.period;
    long long quota = vm->def->cputune.quota;
    virDomainNumatuneMemMode mem_mode;
    char *mem_mask = NULL;
    virCgroupPtr cgroup_iothread = NULL;
    virBitmapPtr cpumask = NULL;
    int ret = -1;

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) ||
        virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
        if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
            mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
            virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
                                                priv->autoNodeset,
                                                &mem_mask, -1) < 0)
            goto cleanup;

        if (virCgroupNewThread(priv->cgroup, VIR_CGROUP_THREAD_IOTHREAD,
                               iothread->iothread_id,
                               true, &cgroup_iothread) < 0)
            goto cleanup;
    }

    if (iothread->cpumask)
        cpumask = iothread->cpumask;
    else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)
        cpumask = priv->autoCpuset;
    else
        cpumask = vm->def->cpumask;

    if (period || quota) {
        if (qemuSetupCgroupVcpuBW(cgroup_iothread, period, quota) < 0)
            goto cleanup;
    }

    if (cgroup_iothread) {
        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
            if (mem_mask &&
                virCgroupSetCpusetMems(cgroup_iothread, mem_mask) < 0)
                goto cleanup;

            if (cpumask &&
                qemuSetupCgroupCpusetCpus(cgroup_iothread, cpumask) < 0)
                goto cleanup;
        }

        if (virCgroupAddTask(cgroup_iothread, iothread->thread_id) < 0)
            goto cleanup;
    }

    if (cpumask && virProcessSetAffinity(iothread->thread_id, cpumask) < 0)
        goto cleanup;

    if (iothread->sched.policy != VIR_PROC_POLICY_NONE &&
        virProcessSetScheduler(iothread->thread_id, iothread->sched.policy,
                               iothread->sched.priority) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    if (cgroup_iothread) {
        if (ret < 0)
            virCgroupRemove(cgroup_iothread);
        virCgroupFree(&cgroup_iothread);
    }

    VIR_FREE(mem_mask);
    return ret;
}


static int
qemuProcessSetupIOThreads(virDomainObjPtr vm)
{
    size_t i;

    for (i = 0; i < vm->def->niothreadids; i++) {
        virDomainIOThreadIDDefPtr info = vm->def->iothreadids[i];

        if (qemuProcessSetupIOThread(vm, info) < 0)
            return -1;
    }

    return 0;
}


J
Jiri Denemark 已提交
4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728
/**
 * qemuProcessLaunch:
 *
 * Launch a new QEMU process with stopped virtual CPUs.
 *
 * The caller is supposed to call qemuProcessStop with appropriate
 * flags in case of failure.
 *
 * Returns 0 on success,
 *        -1 on error which happened before devices were labeled and thus
 *           there is no need to restore them,
 *        -2 on error requesting security labels to be restored.
 */
int
qemuProcessLaunch(virConnectPtr conn,
                  virQEMUDriverPtr driver,
                  virDomainObjPtr vm,
                  qemuDomainAsyncJob asyncJob,
                  qemuProcessIncomingDefPtr incoming,
                  virDomainSnapshotObjPtr snapshot,
                  virNetDevVPortProfileOp vmop,
                  unsigned int flags)
4729
{
4730
    int ret = -1;
4731
    int rv;
4732
    int logfile = -1;
4733
    qemuDomainLogContextPtr logCtxt = NULL;
4734 4735 4736
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virCommandPtr cmd = NULL;
    struct qemuProcessHookData hookData;
4737
    size_t i;
4738
    char *nodeset = NULL;
J
Jiri Denemark 已提交
4739
    virQEMUDriverConfigPtr cfg;
4740
    virCapsPtr caps = NULL;
4741
    unsigned int hostdev_flags = 0;
4742 4743
    size_t nnicindexes = 0;
    int *nicindexes = NULL;
4744
    bool check_shmem = false;
4745

J
Jiri Denemark 已提交
4746 4747 4748 4749 4750 4751 4752 4753 4754 4755
    VIR_DEBUG("vm=%p name=%s id=%d asyncJob=%d "
              "incoming.launchURI=%s incoming.deferredURI=%s "
              "incoming.fd=%d incoming.path=%s "
              "snapshot=%p vmop=%d flags=0x%x",
              vm, vm->def->name, vm->def->id, asyncJob,
              NULLSTR(incoming ? incoming->launchURI : NULL),
              NULLSTR(incoming ? incoming->deferredURI : NULL),
              incoming ? incoming->fd : -1,
              NULLSTR(incoming ? incoming->path : NULL),
              snapshot, vmop, flags);
4756

4757 4758 4759 4760
    /* Okay, these are just internal flags,
     * but doesn't hurt to check */
    virCheckFlags(VIR_QEMU_PROCESS_START_COLD |
                  VIR_QEMU_PROCESS_START_PAUSED |
4761
                  VIR_QEMU_PROCESS_START_AUTODESTROY, -1);
4762

4763 4764
    cfg = virQEMUDriverGetConfig(driver);

4765 4766 4767
    hookData.conn = conn;
    hookData.vm = vm;
    hookData.driver = driver;
4768 4769
    /* We don't increase cfg's reference counter here. */
    hookData.cfg = cfg;
4770

4771
    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
J
Jiri Denemark 已提交
4772
        goto cleanup;
4773

4774 4775 4776 4777 4778
    /* network devices must be "prepared" before hostdevs, because
     * setting up a network device might create a new hostdev that
     * will need to be setup.
     */
    VIR_DEBUG("Preparing network devices");
4779
    if (qemuProcessNetworkPrepareDevices(vm->def) < 0)
J
Jiri Denemark 已提交
4780
        goto cleanup;
4781

4782
    /* Must be run before security labelling */
4783
    VIR_DEBUG("Preparing host devices");
4784 4785
    if (!cfg->relaxedACS)
        hostdev_flags |= VIR_HOSTDEV_STRICT_ACS_CHECK;
J
Jiri Denemark 已提交
4786
    if (!incoming)
4787
        hostdev_flags |= VIR_HOSTDEV_COLD_BOOT;
4788 4789
    if (qemuHostdevPrepareDomainDevices(driver, vm->def, priv->qemuCaps,
                                        hostdev_flags) < 0)
J
Jiri Denemark 已提交
4790
        goto cleanup;
4791

4792
    VIR_DEBUG("Preparing chr devices");
4793 4794 4795 4796
    if (virDomainChrDefForeach(vm->def,
                               true,
                               qemuProcessPrepareChardevDevice,
                               NULL) < 0)
J
Jiri Denemark 已提交
4797
        goto cleanup;
4798

4799 4800
    VIR_DEBUG("Checking domain and device security labels");
    if (virSecurityManagerCheckAllLabel(driver->securityManager, vm->def) < 0)
J
Jiri Denemark 已提交
4801
        goto cleanup;
4802

4803 4804
    /* If you are using a SecurityDriver with dynamic labelling,
       then generate a security label for isolation */
4805
    VIR_DEBUG("Generating domain security label (if required)");
4806
    if (virSecurityManagerGenLabel(driver->securityManager, vm->def) < 0) {
4807
        virDomainAuditSecurityLabel(vm, false);
J
Jiri Denemark 已提交
4808
        goto cleanup;
4809
    }
4810
    virDomainAuditSecurityLabel(vm, true);
4811

4812
    if (vm->def->mem.nhugepages) {
4813 4814 4815 4816
        for (i = 0; i < cfg->nhugetlbfs; i++) {
            char *hugepagePath = qemuGetHugepagePath(&cfg->hugetlbfs[i]);

            if (!hugepagePath)
J
Jiri Denemark 已提交
4817
                goto cleanup;
4818 4819 4820 4821 4822 4823

            if (virSecurityManagerSetHugepages(driver->securityManager,
                                               vm->def, hugepagePath) < 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               "%s", _("Unable to set huge path in security driver"));
                VIR_FREE(hugepagePath);
J
Jiri Denemark 已提交
4824
                goto cleanup;
4825 4826
            }
            VIR_FREE(hugepagePath);
4827 4828 4829
        }
    }

4830 4831
    /* Ensure no historical cgroup for this VM is lying around bogus
     * settings */
4832
    VIR_DEBUG("Ensuring no historical cgroup is lying around");
4833
    qemuRemoveCgroup(vm);
4834

4835 4836
    VIR_DEBUG("Setting up ports for graphics");
    if (qemuProcessSetupGraphics(driver, vm) < 0)
J
Jiri Denemark 已提交
4837
        goto cleanup;
4838

4839
    if (virFileMakePath(cfg->logDir) < 0) {
4840 4841
        virReportSystemError(errno,
                             _("cannot create log directory %s"),
4842
                             cfg->logDir);
J
Jiri Denemark 已提交
4843
        goto cleanup;
4844 4845
    }

4846
    VIR_DEBUG("Creating domain log file");
4847 4848
    if (!(logCtxt = qemuDomainLogContextNew(driver, vm,
                                            QEMU_DOMAIN_LOG_CONTEXT_MODE_START)))
J
Jiri Denemark 已提交
4849
        goto cleanup;
4850
    logfile = qemuDomainLogContextGetWriteFD(logCtxt);
4851

4852 4853
    if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
        VIR_DEBUG("Checking for KVM availability");
4854
        if (!virFileExists("/dev/kvm")) {
4855 4856 4857 4858
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                           _("Domain requires KVM, but it is not available. "
                             "Check that virtualization is enabled in the host BIOS, "
                             "and host configuration is setup to load the kvm modules."));
J
Jiri Denemark 已提交
4859
            goto cleanup;
4860 4861 4862
        }
    }

4863
    if (qemuAssignDeviceAliases(vm->def, priv->qemuCaps) < 0)
J
Jiri Denemark 已提交
4864
        goto cleanup;
4865

4866 4867 4868
    /* Get the advisory nodeset from numad if 'placement' of
     * either <vcpu> or <numatune> is 'auto'.
     */
4869
    if (virDomainDefNeedsPlacementAdvice(vm->def)) {
4870
        nodeset = virNumaGetAutoPlacementAdvice(virDomainDefGetVcpus(vm->def),
4871
                                                virDomainDefGetMemoryActual(vm->def));
4872
        if (!nodeset)
J
Jiri Denemark 已提交
4873
            goto cleanup;
4874 4875 4876

        VIR_DEBUG("Nodeset returned from numad: %s", nodeset);

4877 4878
        if (virBitmapParse(nodeset, 0, &priv->autoNodeset,
                           VIR_DOMAIN_CPUMASK_LEN) < 0)
J
Jiri Denemark 已提交
4879
            goto cleanup;
4880 4881 4882

        if (!(priv->autoCpuset = virCapabilitiesGetCpusForNodemask(caps,
                                                                   priv->autoNodeset)))
J
Jiri Denemark 已提交
4883
            goto cleanup;
4884 4885
    }

4886 4887 4888 4889
    /* "volume" type disk's source must be translated before
     * cgroup and security setting.
     */
    for (i = 0; i < vm->def->ndisks; i++) {
4890
        if (virStorageTranslateDiskSourcePool(conn, vm->def->disks[i]) < 0)
J
Jiri Denemark 已提交
4891
            goto cleanup;
4892 4893
    }

4894 4895
    if (qemuDomainCheckDiskPresence(driver, vm,
                                    flags & VIR_QEMU_PROCESS_START_COLD) < 0)
J
Jiri Denemark 已提交
4896
        goto cleanup;
4897

4898 4899 4900 4901
    if (vm->def->mem.min_guarantee) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Parameter 'min_guarantee' "
                         "not supported by QEMU."));
J
Jiri Denemark 已提交
4902
        goto cleanup;
4903 4904
    }

4905
    if (VIR_ALLOC(priv->monConfig) < 0)
J
Jiri Denemark 已提交
4906
        goto cleanup;
4907

4908
    VIR_DEBUG("Preparing monitor state");
4909
    if (qemuProcessPrepareMonitorChr(cfg, priv->monConfig, vm->def->name) < 0)
J
Jiri Denemark 已提交
4910
        goto cleanup;
4911

E
Eric Blake 已提交
4912
    priv->monJSON = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MONITOR_JSON);
4913 4914
    priv->monError = false;
    priv->monStart = 0;
4915 4916
    priv->gotShutdown = false;

4917
    VIR_FREE(priv->pidfile);
4918
    if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, vm->def->name))) {
4919 4920
        virReportSystemError(errno,
                             "%s", _("Failed to build pidfile path."));
J
Jiri Denemark 已提交
4921
        goto cleanup;
4922 4923
    }

4924 4925
    if (unlink(priv->pidfile) < 0 &&
        errno != ENOENT) {
4926
        virReportSystemError(errno,
4927 4928
                             _("Cannot remove stale PID file %s"),
                             priv->pidfile);
J
Jiri Denemark 已提交
4929
        goto cleanup;
4930 4931 4932 4933 4934 4935
    }

    /*
     * Normally PCI addresses are assigned in the virDomainCreate
     * or virDomainDefine methods. We might still need to assign
     * some here to cope with the question of upgrades. Regardless
M
Martin Kletzander 已提交
4936
     * we also need to populate the PCI address set cache for later
4937 4938
     * use in hotplug
     */
4939
    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) {
4940
        VIR_DEBUG("Assigning domain PCI addresses");
4941
        if ((qemuDomainAssignAddresses(vm->def, priv->qemuCaps, vm)) < 0)
J
Jiri Denemark 已提交
4942
            goto cleanup;
4943 4944
    }

4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975
    VIR_DEBUG("Checking for any possible (non-fatal) issues");

    /*
     * For vhost-user to work, the domain has to have some type of
     * shared memory configured.  We're not the proper ones to judge
     * whether shared hugepages or shm are enough and will be in the
     * future, so we'll just warn in case neither is configured.
     * Moreover failing would give the false illusion that libvirt is
     * really checking that everything works before running the domain
     * and not only we are unable to do that, but it's also not our
     * aim to do so.
     */
    for (i = 0; i < vm->def->nnets; i++) {
        if (virDomainNetGetActualType(vm->def->nets[i]) ==
                                      VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
            check_shmem = true;
            break;
        }
    }

    if (check_shmem) {
        bool shmem = vm->def->nshmems;

        /*
         * This check is by no means complete.  We merely check
         * whether there are *some* hugepages enabled and *some* NUMA
         * nodes with shared memory access.
         */
        if (!shmem && vm->def->mem.nhugepages) {
            for (i = 0; i < virDomainNumaGetNodeCount(vm->def->numa); i++) {
                if (virDomainNumaGetNodeMemoryAccessMode(vm->def->numa, i) ==
4976
                    VIR_NUMA_MEM_ACCESS_SHARED) {
4977
                    shmem = true;
4978 4979
                    break;
                }
4980 4981 4982 4983 4984 4985 4986 4987 4988
            }
        }

        if (!shmem) {
            VIR_WARN("Detected vhost-user interface without any shared memory, "
                     "the interface might not be operational");
        }
    }

4989
    VIR_DEBUG("Building emulator command line");
4990
    if (!(cmd = qemuBuildCommandLine(conn, driver, vm->def, priv->monConfig,
E
Eric Blake 已提交
4991
                                     priv->monJSON, priv->qemuCaps,
4992 4993
                                     incoming ? incoming->launchURI : NULL,
                                     snapshot, vmop,
4994
                                     &buildCommandLineCallbacks, false,
4995
                                     qemuCheckFips(),
4996
                                     priv->autoNodeset,
4997
                                     &nnicindexes, &nicindexes)))
J
Jiri Denemark 已提交
4998
        goto cleanup;
4999

5000 5001
    if (incoming && incoming->fd != -1)
        virCommandPassFD(cmd, incoming->fd, 0);
5002 5003 5004 5005 5006

    /*
     * Create all per-domain directories in order to make sure domain
     * with any possible seclabels can access it.
     */
J
Jiri Denemark 已提交
5007 5008
    if (qemuProcessMakeDir(driver, vm, cfg->libDir) < 0 ||
        qemuProcessMakeDir(driver, vm, cfg->channelTargetDir) < 0)
J
Jiri Denemark 已提交
5009
        goto cleanup;
5010

5011
    /* now that we know it is about to start call the hook if present */
5012 5013 5014
    if (qemuProcessStartHook(driver, vm,
                             VIR_HOOK_QEMU_OP_START,
                             VIR_HOOK_SUBOP_BEGIN) < 0)
J
Jiri Denemark 已提交
5015
        goto cleanup;
5016

5017
    qemuLogOperation(vm, "starting up", cmd, logCtxt);
5018

5019
    qemuDomainObjCheckTaint(driver, vm, logCtxt);
5020

5021
    qemuDomainLogContextMarkPosition(logCtxt);
5022 5023

    VIR_DEBUG("Clear emulator capabilities: %d",
5024 5025
              cfg->clearEmulatorCapabilities);
    if (cfg->clearEmulatorCapabilities)
5026 5027
        virCommandClearCaps(cmd);

5028 5029
    VIR_DEBUG("Setting up raw IO");
    if (qemuProcessSetupRawIO(driver, vm, cmd) < 0)
J
Jiri Denemark 已提交
5030
        goto cleanup;
5031

5032
    virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData);
5033 5034
    virCommandSetMaxProcesses(cmd, cfg->maxProcesses);
    virCommandSetMaxFiles(cmd, cfg->maxFiles);
5035
    virCommandSetUmask(cmd, 0x002);
5036

5037 5038
    VIR_DEBUG("Setting up security labelling");
    if (virSecurityManagerSetChildProcessLabel(driver->securityManager,
5039
                                               vm->def, cmd) < 0)
J
Jiri Denemark 已提交
5040
        goto cleanup;
5041

5042 5043 5044
    virCommandSetOutputFD(cmd, &logfile);
    virCommandSetErrorFD(cmd, &logfile);
    virCommandNonblockingFDs(cmd);
5045
    virCommandSetPidFile(cmd, priv->pidfile);
5046
    virCommandDaemonize(cmd);
5047
    virCommandRequireHandshake(cmd);
5048

5049
    if (virSecurityManagerPreFork(driver->securityManager) < 0)
J
Jiri Denemark 已提交
5050
        goto cleanup;
5051
    rv = virCommandRun(cmd, NULL);
5052
    virSecurityManagerPostFork(driver->securityManager);
5053

E
Eric Blake 已提交
5054
    /* wait for qemu process to show up */
5055
    if (rv == 0) {
5056
        if (virPidFileReadPath(priv->pidfile, &vm->pid) < 0) {
5057 5058
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Domain %s didn't show up"), vm->def->name);
5059
            rv = -1;
5060
        }
5061 5062 5063 5064 5065
        VIR_DEBUG("QEMU vm=%p name=%s running with pid=%llu",
                  vm, vm->def->name, (unsigned long long)vm->pid);
    } else {
        VIR_DEBUG("QEMU vm=%p name=%s failed to spawn",
                  vm, vm->def->name);
5066 5067
    }

5068
    VIR_DEBUG("Writing early domain status to disk");
5069
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
J
Jiri Denemark 已提交
5070
        goto cleanup;
5071

5072 5073
    VIR_DEBUG("Waiting for handshake from child");
    if (virCommandHandshakeWait(cmd) < 0) {
5074
        /* Read errors from child that occurred between fork and exec. */
5075 5076
        qemuProcessReportLogError(logCtxt,
                                  _("Process exited prior to exec"));
J
Jiri Denemark 已提交
5077
        goto cleanup;
5078 5079
    }

5080
    VIR_DEBUG("Setting up domain cgroup (if required)");
5081
    if (qemuSetupCgroup(driver, vm, nnicindexes, nicindexes) < 0)
J
Jiri Denemark 已提交
5082
        goto cleanup;
5083

5084 5085 5086 5087 5088 5089
    /* This must be done after cgroup placement to avoid resetting CPU
     * affinity */
    if (!vm->def->cputune.emulatorpin &&
        qemuProcessInitCpuAffinity(vm) < 0)
        goto cleanup;

5090 5091
    VIR_DEBUG("Setting domain security labels");
    if (virSecurityManagerSetAllLabel(driver->securityManager,
J
Jiri Denemark 已提交
5092 5093 5094
                                      vm->def,
                                      incoming ? incoming->path : NULL) < 0)
        goto cleanup;
5095

5096
    /* Security manager labeled all devices, therefore
J
Jiri Denemark 已提交
5097 5098 5099 5100
     * if any operation from now on fails, we need to ask the caller to
     * restore labels.
     */
    ret = -2;
5101

J
Jiri Denemark 已提交
5102
    if (incoming && incoming->fd != -1) {
5103 5104 5105 5106 5107 5108 5109
        /* if there's an fd to migrate from, and it's a pipe, put the
         * proper security label on it
         */
        struct stat stdin_sb;

        VIR_DEBUG("setting security label on pipe used for migration");

J
Jiri Denemark 已提交
5110
        if (fstat(incoming->fd, &stdin_sb) < 0) {
5111
            virReportSystemError(errno,
J
Jiri Denemark 已提交
5112 5113
                                 _("cannot stat fd %d"), incoming->fd);
            goto cleanup;
5114 5115
        }
        if (S_ISFIFO(stdin_sb.st_mode) &&
J
Jiri Denemark 已提交
5116 5117 5118
            virSecurityManagerSetImageFDLabel(driver->securityManager,
                                              vm->def, incoming->fd) < 0)
            goto cleanup;
5119 5120 5121
    }

    VIR_DEBUG("Labelling done, completing handshake to child");
5122
    if (virCommandHandshakeNotify(cmd) < 0)
J
Jiri Denemark 已提交
5123
        goto cleanup;
5124 5125
    VIR_DEBUG("Handshake complete, child running");

5126
    if (rv == -1) /* The VM failed to start; tear filters before taps */
5127 5128
        virDomainConfVMNWFilterTeardown(vm);

5129
    if (rv == -1) /* The VM failed to start */
J
Jiri Denemark 已提交
5130
        goto cleanup;
5131

5132
    VIR_DEBUG("Setting cgroup for emulator (if required)");
5133
    if (qemuSetupCgroupForEmulator(vm) < 0)
J
Jiri Denemark 已提交
5134
        goto cleanup;
5135 5136 5137

    VIR_DEBUG("Setting affinity of emulator threads");
    if (qemuProcessSetEmulatorAffinity(vm) < 0)
J
Jiri Denemark 已提交
5138
        goto cleanup;
5139

5140
    VIR_DEBUG("Waiting for monitor to show up");
5141
    if (qemuProcessWaitForMonitor(driver, vm, asyncJob, priv->qemuCaps, logCtxt) < 0)
J
Jiri Denemark 已提交
5142
        goto cleanup;
5143

D
Daniel P. Berrange 已提交
5144
    /* Failure to connect to agent shouldn't be fatal */
5145 5146
    if ((rv = qemuConnectAgent(driver, vm)) < 0) {
        if (rv == -2)
J
Jiri Denemark 已提交
5147
            goto cleanup;
5148

D
Daniel P. Berrange 已提交
5149 5150 5151 5152 5153 5154
        VIR_WARN("Cannot connect to QEMU guest agent for %s",
                 vm->def->name);
        virResetLastError();
        priv->agentError = true;
    }

5155
    VIR_DEBUG("Detecting if required emulator features are present");
5156
    if (!qemuProcessVerifyGuestCPU(driver, vm, asyncJob))
J
Jiri Denemark 已提交
5157
        goto cleanup;
5158

5159
    VIR_DEBUG("Setting up post-init cgroup restrictions");
5160
    if (qemuSetupCpusetMems(vm) < 0)
J
Jiri Denemark 已提交
5161
        goto cleanup;
5162

5163
    VIR_DEBUG("Detecting VCPU PIDs");
5164
    if (qemuDomainDetectVcpuPids(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5165
        goto cleanup;
5166

5167 5168
    VIR_DEBUG("Detecting IOThread PIDs");
    if (qemuProcessDetectIOThreadPIDs(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5169
        goto cleanup;
5170

5171 5172
    VIR_DEBUG("Setting vCPU tuning/settings");
    if (qemuProcessSetupVcpus(vm) < 0)
J
Jiri Denemark 已提交
5173
        goto cleanup;
5174

5175 5176
    VIR_DEBUG("Setting IOThread tuning/settings");
    if (qemuProcessSetupIOThreads(vm) < 0)
J
Jiri Denemark 已提交
5177
        goto cleanup;
5178

5179
    VIR_DEBUG("Setting any required VM passwords");
5180
    if (qemuProcessInitPasswords(conn, driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5181
        goto cleanup;
5182 5183 5184

    /* If we have -device, then addresses are assigned explicitly.
     * If not, then we have to detect dynamic ones here */
5185
    if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) {
5186
        VIR_DEBUG("Determining domain device PCI addresses");
5187
        if (qemuProcessInitPCIAddresses(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5188
            goto cleanup;
5189 5190
    }

5191 5192 5193 5194
    /* set default link states */
    /* qemu doesn't support setting this on the command line, so
     * enter the monitor */
    VIR_DEBUG("Setting network link states");
5195
    if (qemuProcessSetLinkStates(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5196
        goto cleanup;
5197

5198
    VIR_DEBUG("Fetching list of active devices");
5199
    if (qemuDomainUpdateDeviceList(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5200
        goto cleanup;
5201

5202 5203
    VIR_DEBUG("Updating info of memory devices");
    if (qemuDomainUpdateMemoryDeviceInfo(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5204
        goto cleanup;
5205

5206
    VIR_DEBUG("Setting initial memory amount");
5207
    if (qemuProcessSetupBalloon(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5208
        goto cleanup;
5209

5210
    /* Since CPUs were not started yet, the balloon could not return the memory
5211 5212
     * to the host and thus cur_balloon needs to be updated so that GetXMLdesc
     * and friends return the correct size in case they can't grab the job */
5213
    if (!incoming && !snapshot &&
5214
        qemuProcessRefreshBalloonState(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5215
        goto cleanup;
5216

5217 5218
    VIR_DEBUG("Detecting actual memory size for video device");
    if (qemuProcessUpdateVideoRamSize(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5219 5220 5221 5222 5223 5224 5225 5226 5227 5228
        goto cleanup;

    if (flags & VIR_QEMU_PROCESS_START_AUTODESTROY &&
        qemuProcessAutoDestroyAdd(driver, vm, conn) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    virCommandFree(cmd);
5229
    qemuDomainLogContextFree(logCtxt);
J
Jiri Denemark 已提交
5230 5231 5232 5233 5234 5235 5236 5237
    virObjectUnref(cfg);
    virObjectUnref(caps);
    VIR_FREE(nicindexes);
    VIR_FREE(nodeset);
    return ret;
}


5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268
/**
 * qemuProcessFinishStartup:
 *
 * Finish starting a new domain.
 */
int
qemuProcessFinishStartup(virConnectPtr conn,
                         virQEMUDriverPtr driver,
                         virDomainObjPtr vm,
                         qemuDomainAsyncJob asyncJob,
                         bool startCPUs,
                         virDomainPausedReason pausedReason)
{
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
    int ret = -1;

    if (startCPUs) {
        VIR_DEBUG("Starting domain CPUs");
        if (qemuProcessStartCPUs(driver, vm, conn,
                                 VIR_DOMAIN_RUNNING_BOOTED,
                                 asyncJob) < 0) {
            if (!virGetLastError())
                virReportError(VIR_ERR_OPERATION_FAILED, "%s",
                               _("resume operation failed"));
            goto cleanup;
        }
    } else {
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, pausedReason);
    }

    VIR_DEBUG("Writing domain status to disk");
5269
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284
        goto cleanup;

    if (qemuProcessStartHook(driver, vm,
                             VIR_HOOK_QEMU_OP_STARTED,
                             VIR_HOOK_SUBOP_BEGIN) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    virObjectUnref(cfg);
    return ret;
}


J
Jiri Denemark 已提交
5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315
int
qemuProcessStart(virConnectPtr conn,
                 virQEMUDriverPtr driver,
                 virDomainObjPtr vm,
                 qemuDomainAsyncJob asyncJob,
                 const char *migrateFrom,
                 int migrateFd,
                 const char *migratePath,
                 virDomainSnapshotObjPtr snapshot,
                 virNetDevVPortProfileOp vmop,
                 unsigned int flags)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    qemuProcessIncomingDefPtr incoming = NULL;
    unsigned int stopFlags;
    bool relabel = false;
    int ret = -1;
    int rv;

    VIR_DEBUG("conn=%p driver=%p vm=%p name=%s id=%d asyncJob=%s "
              "migrateFrom=%s migrateFd=%d migratePath=%s "
              "snapshot=%p vmop=%d flags=0x%x",
              conn, driver, vm, vm->def->name, vm->def->id,
              qemuDomainAsyncJobTypeToString(asyncJob),
              NULLSTR(migrateFrom), migrateFd, NULLSTR(migratePath),
              snapshot, vmop, flags);

    virCheckFlagsGoto(VIR_QEMU_PROCESS_START_COLD |
                      VIR_QEMU_PROCESS_START_PAUSED |
                      VIR_QEMU_PROCESS_START_AUTODESTROY, cleanup);

5316
    if (qemuProcessInit(driver, vm, asyncJob, !!migrateFrom, !!snapshot) < 0)
J
Jiri Denemark 已提交
5317 5318 5319
        goto cleanup;

    if (migrateFrom) {
5320
        incoming = qemuProcessIncomingDefNew(priv->qemuCaps, NULL, migrateFrom,
J
Jiri Denemark 已提交
5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332
                                             migrateFd, migratePath);
        if (!incoming)
            goto stop;
    }

    if ((rv = qemuProcessLaunch(conn, driver, vm, asyncJob, incoming,
                                snapshot, vmop, flags)) < 0) {
        if (rv == -1)
            relabel = true;
        goto stop;
    }
    relabel = true;
5333

5334 5335 5336
    if (incoming &&
        incoming->deferredURI &&
        qemuMigrationRunIncoming(driver, vm, incoming->deferredURI, asyncJob) < 0)
J
Jiri Denemark 已提交
5337
        goto stop;
5338

5339 5340 5341 5342 5343
    if (qemuProcessFinishStartup(conn, driver, vm, asyncJob,
                                 !(flags & VIR_QEMU_PROCESS_START_PAUSED),
                                 incoming ?
                                 VIR_DOMAIN_PAUSED_MIGRATION :
                                 VIR_DOMAIN_PAUSED_USER) < 0)
J
Jiri Denemark 已提交
5344
        goto stop;
5345

5346 5347
    /* Keep watching qemu log for errors during incoming migration, otherwise
     * unset reporting errors from qemu log. */
5348
    if (!incoming)
5349
        qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
5350

5351 5352 5353
    ret = 0;

 cleanup:
5354
    qemuProcessIncomingDefFree(incoming);
5355
    return ret;
5356

J
Jiri Denemark 已提交
5357 5358 5359 5360 5361 5362
 stop:
    stopFlags = 0;
    if (!relabel)
        stopFlags |= VIR_QEMU_PROCESS_STOP_NO_RELABEL;
    if (migrateFrom)
        stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
5363
    if (priv->mon)
5364
        qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
5365
    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
5366
    goto cleanup;
5367 5368 5369
}


5370
int
5371
qemuProcessKill(virDomainObjPtr vm, unsigned int flags)
5372
{
5373
    int ret;
5374

5375 5376 5377
    VIR_DEBUG("vm=%p name=%s pid=%llu flags=%x",
              vm, vm->def->name,
              (unsigned long long)vm->pid, flags);
5378

5379 5380 5381 5382 5383
    if (!(flags & VIR_QEMU_PROCESS_KILL_NOCHECK)) {
        if (!virDomainObjIsActive(vm)) {
            VIR_DEBUG("VM '%s' not active", vm->def->name);
            return 0;
        }
5384 5385
    }

5386
    if (flags & VIR_QEMU_PROCESS_KILL_NOWAIT) {
5387 5388 5389 5390 5391
        virProcessKill(vm->pid,
                       (flags & VIR_QEMU_PROCESS_KILL_FORCE) ?
                       SIGKILL : SIGTERM);
        return 0;
    }
5392

5393 5394
    ret = virProcessKillPainfully(vm->pid,
                                  !!(flags & VIR_QEMU_PROCESS_KILL_FORCE));
5395

5396
    return ret;
5397 5398 5399
}


5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438
/**
 * qemuProcessBeginStopJob:
 *
 * Stop all current jobs by killing the domain and start a new one for
 * qemuProcessStop.
 */
int
qemuProcessBeginStopJob(virQEMUDriverPtr driver,
                        virDomainObjPtr vm,
                        qemuDomainJob job,
                        bool forceKill)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    unsigned int killFlags = forceKill ? VIR_QEMU_PROCESS_KILL_FORCE : 0;
    int ret = -1;

    /* We need to prevent monitor EOF callback from doing our work (and
     * sending misleading events) while the vm is unlocked inside
     * BeginJob/ProcessKill API
     */
    priv->beingDestroyed = true;

    if (qemuProcessKill(vm, killFlags) < 0)
        goto cleanup;

    /* Wake up anything waiting on domain condition */
    virDomainObjBroadcast(vm);

    if (qemuDomainObjBeginJob(driver, vm, job) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    priv->beingDestroyed = false;
    return ret;
}


5439
void qemuProcessStop(virQEMUDriverPtr driver,
5440
                     virDomainObjPtr vm,
5441
                     virDomainShutoffReason reason,
5442
                     qemuDomainAsyncJob asyncJob,
5443
                     unsigned int flags)
5444 5445 5446 5447 5448 5449
{
    int ret;
    int retries = 0;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virErrorPtr orig_err;
    virDomainDefPtr def;
A
Ansis Atteka 已提交
5450
    virNetDevVPortProfilePtr vport = NULL;
5451
    size_t i;
5452
    char *timestamp;
5453
    char *tmppath = NULL;
5454
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
5455
    qemuDomainLogContextPtr logCtxt = NULL;
5456

5457 5458
    VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%llu, "
              "reason=%s, asyncJob=%s, flags=%x",
5459
              vm, vm->def->name, vm->def->id,
5460 5461 5462 5463
              (unsigned long long)vm->pid,
              virDomainShutoffReasonTypeToString(reason),
              qemuDomainAsyncJobTypeToString(asyncJob),
              flags);
5464

5465 5466 5467 5468
    /* This method is routinely used in clean up paths. Disable error
     * reporting so we don't squash a legit error. */
    orig_err = virSaveLastError();

5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483
    if (asyncJob != QEMU_ASYNC_JOB_NONE) {
        if (qemuDomainObjBeginNestedJob(driver, vm, asyncJob) < 0)
            goto cleanup;
    } else if (priv->job.asyncJob != QEMU_ASYNC_JOB_NONE &&
               priv->job.asyncOwner == virThreadSelfID() &&
               priv->job.active != QEMU_JOB_ASYNC_NESTED) {
        VIR_WARN("qemuProcessStop called without a nested job (async=%s)",
                 qemuDomainAsyncJobTypeToString(asyncJob));
    }

    if (!virDomainObjIsActive(vm)) {
        VIR_DEBUG("VM '%s' not active", vm->def->name);
        goto endjob;
    }

5484 5485
    vm->def->id = -1;

5486
    if (virAtomicIntDecAndTest(&driver->nactive) && driver->inhibitCallback)
5487 5488
        driver->inhibitCallback(false, driver->inhibitOpaque);

5489 5490
    /* Wake up anything waiting on domain condition */
    virDomainObjBroadcast(vm);
5491

5492 5493
    if ((logCtxt = qemuDomainLogContextNew(driver, vm,
                                           QEMU_DOMAIN_LOG_CONTEXT_MODE_STOP))) {
5494
        if ((timestamp = virTimeStringNow()) != NULL) {
5495
            qemuDomainLogContextWrite(logCtxt, "%s: shutting down\n", timestamp);
5496 5497
            VIR_FREE(timestamp);
        }
5498
        qemuDomainLogContextFree(logCtxt);
5499 5500
    }

5501 5502 5503
    /* Clear network bandwidth */
    virDomainClearNetBandwidth(vm);

5504 5505
    virDomainConfVMNWFilterTeardown(vm);

5506
    if (cfg->macFilter) {
5507
        def = vm->def;
5508
        for (i = 0; i < def->nnets; i++) {
5509 5510 5511
            virDomainNetDefPtr net = def->nets[i];
            if (net->ifname == NULL)
                continue;
5512 5513 5514
            ignore_value(ebtablesRemoveForwardAllowIn(driver->ebtables,
                                                      net->ifname,
                                                      &net->mac));
5515 5516 5517
        }
    }

5518
    virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
5519
    priv->nbdPort = 0;
5520

D
Daniel P. Berrange 已提交
5521 5522 5523 5524 5525 5526
    if (priv->agent) {
        qemuAgentClose(priv->agent);
        priv->agent = NULL;
        priv->agentError = false;
    }

5527
    if (priv->mon) {
5528
        qemuMonitorClose(priv->mon);
5529 5530
        priv->mon = NULL;
    }
5531 5532 5533 5534 5535 5536 5537 5538

    if (priv->monConfig) {
        if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
            unlink(priv->monConfig->data.nix.path);
        virDomainChrSourceDefFree(priv->monConfig);
        priv->monConfig = NULL;
    }

5539 5540
    ignore_value(virAsprintf(&tmppath, "%s/domain-%s",
                             cfg->libDir, vm->def->name));
5541
    virFileDeleteTree(tmppath);
5542 5543 5544 5545
    VIR_FREE(tmppath);

    ignore_value(virAsprintf(&tmppath, "%s/domain-%s",
                             cfg->channelTargetDir, vm->def->name));
5546
    virFileDeleteTree(tmppath);
5547 5548
    VIR_FREE(tmppath);

5549 5550 5551 5552 5553 5554
    ignore_value(virDomainChrDefForeach(vm->def,
                                        false,
                                        qemuProcessCleanupChardevDevice,
                                        NULL));


5555
    /* shut it off for sure */
5556 5557 5558
    ignore_value(qemuProcessKill(vm,
                                 VIR_QEMU_PROCESS_KILL_FORCE|
                                 VIR_QEMU_PROCESS_KILL_NOCHECK));
5559

5560 5561
    qemuDomainCleanupRun(driver, vm);

5562
    /* Stop autodestroy in case guest is restarted */
5563
    qemuProcessAutoDestroyRemove(driver, vm);
5564

5565 5566
    /* now that we know it's stopped call the hook if present */
    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
5567
        char *xml = qemuDomainDefFormatXML(driver, vm->def, 0);
5568 5569

        /* we can't stop the operation even if the script raised an error */
5570 5571 5572
        ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
                                 VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
                                 NULL, xml, NULL));
5573 5574 5575
        VIR_FREE(xml);
    }

5576 5577 5578 5579
    /* Reset Security Labels unless caller don't want us to */
    if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL))
        virSecurityManagerRestoreAllLabel(driver->securityManager,
                                          vm->def,
5580
                                          !!(flags & VIR_QEMU_PROCESS_STOP_MIGRATED));
5581
    virSecurityManagerReleaseLabel(driver->securityManager, vm->def);
5582

5583
    for (i = 0; i < vm->def->ndisks; i++) {
5584
        virDomainDeviceDef dev;
5585
        virDomainDiskDefPtr disk = vm->def->disks[i];
5586 5587 5588 5589

        dev.type = VIR_DOMAIN_DEVICE_DISK;
        dev.data.disk = disk;
        ignore_value(qemuRemoveSharedDevice(driver, &dev, vm->def->name));
5590 5591
    }

5592
    /* Clear out dynamically assigned labels */
5593
    for (i = 0; i < vm->def->nseclabels; i++) {
5594
        if (vm->def->seclabels[i]->type == VIR_DOMAIN_SECLABEL_DYNAMIC)
5595 5596
            VIR_FREE(vm->def->seclabels[i]->label);
        VIR_FREE(vm->def->seclabels[i]->imagelabel);
5597 5598
    }

5599 5600 5601
    virStringFreeList(priv->qemuDevices);
    priv->qemuDevices = NULL;

5602 5603 5604
    virDomainDefClearDeviceAliases(vm->def);
    if (!priv->persistentAddrs) {
        virDomainDefClearPCIAddresses(vm->def);
5605
        virDomainPCIAddressSetFree(priv->pciaddrs);
5606
        priv->pciaddrs = NULL;
5607
        virDomainDefClearCCWAddresses(vm->def);
J
Ján Tomko 已提交
5608
        virDomainCCWAddressSetFree(priv->ccwaddrs);
5609
        priv->ccwaddrs = NULL;
5610 5611
        virDomainVirtioSerialAddrSetFree(priv->vioserialaddrs);
        priv->vioserialaddrs = NULL;
5612 5613
    }

5614
    qemuHostdevReAttachDomainDevices(driver, vm->def);
5615 5616 5617 5618

    def = vm->def;
    for (i = 0; i < def->nnets; i++) {
        virDomainNetDefPtr net = def->nets[i];
5619 5620 5621 5622
        vport = virDomainNetGetActualVirtPortProfile(net);

        switch (virDomainNetGetActualType(net)) {
        case VIR_DOMAIN_NET_TYPE_DIRECT:
5623
            ignore_value(virNetDevMacVLanDeleteWithVPortProfile(
5624
                             net->ifname, &net->mac,
5625 5626
                             virDomainNetGetActualDirectDev(net),
                             virDomainNetGetActualDirectMode(net),
5627
                             virDomainNetGetActualVirtPortProfile(net),
5628
                             cfg->stateDir));
5629 5630 5631 5632 5633
            break;
        case VIR_DOMAIN_NET_TYPE_BRIDGE:
        case VIR_DOMAIN_NET_TYPE_NETWORK:
#ifdef VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP
            if (!(vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH))
5634
                ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap));
5635 5636
#endif
            break;
5637
        }
5638 5639 5640
        /* release the physical device (or any other resources used by
         * this interface in the network driver
         */
5641 5642 5643 5644 5645 5646 5647 5648 5649
        if (vport) {
            if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_MIDONET) {
                ignore_value(virNetDevMidonetUnbindPort(vport));
            } else if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) {
                ignore_value(virNetDevOpenvswitchRemovePort(
                                 virDomainNetGetActualBridgeName(net),
                                 net->ifname));
            }
        }
A
Ansis Atteka 已提交
5650

5651 5652
        /* kick the device out of the hostdev list too */
        virDomainNetRemoveHostdev(def, net);
5653
        networkReleaseActualDevice(vm->def, net);
5654
    }
5655

5656
 retry:
5657
    if ((ret = qemuRemoveCgroup(vm)) < 0) {
5658 5659 5660 5661 5662 5663 5664
        if (ret == -EBUSY && (retries++ < 5)) {
            usleep(200*1000);
            goto retry;
        }
        VIR_WARN("Failed to remove cgroup for %s",
                 vm->def->name);
    }
5665
    virCgroupFree(&priv->cgroup);
5666 5667 5668

    qemuProcessRemoveDomainStatus(driver, vm);

5669 5670
    /* Remove VNC and Spice ports from port reservation bitmap, but only if
       they were reserved by the driver (autoport=yes)
5671
    */
5672
    for (i = 0; i < vm->def->ngraphics; ++i) {
5673
        virDomainGraphicsDefPtr graphics = vm->def->graphics[i];
5674 5675
        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
            if (graphics->data.vnc.autoport) {
5676 5677
                virPortAllocatorRelease(driver->remotePorts,
                                        graphics->data.vnc.port);
5678
            } else if (graphics->data.vnc.portReserved) {
5679 5680 5681 5682 5683
                virPortAllocatorSetUsed(driver->remotePorts,
                                        graphics->data.spice.port,
                                        false);
                graphics->data.vnc.portReserved = false;
            }
5684 5685
            virPortAllocatorRelease(driver->webSocketPorts,
                                    graphics->data.vnc.websocket);
5686
        }
5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707
        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
            if (graphics->data.spice.autoport) {
                virPortAllocatorRelease(driver->remotePorts,
                                        graphics->data.spice.port);
                virPortAllocatorRelease(driver->remotePorts,
                                        graphics->data.spice.tlsPort);
            } else {
                if (graphics->data.spice.portReserved) {
                    virPortAllocatorSetUsed(driver->remotePorts,
                                            graphics->data.spice.port,
                                            false);
                    graphics->data.spice.portReserved = false;
                }

                if (graphics->data.spice.tlsPortReserved) {
                    virPortAllocatorSetUsed(driver->remotePorts,
                                            graphics->data.spice.tlsPort,
                                            false);
                    graphics->data.spice.tlsPortReserved = false;
                }
            }
5708
        }
5709 5710
    }

5711
    vm->taint = 0;
5712
    vm->pid = -1;
J
Jiri Denemark 已提交
5713
    virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
5714 5715
    VIR_FREE(priv->vcpupids);
    priv->nvcpupids = 0;
5716 5717
    for (i = 0; i < vm->def->niothreadids; i++)
        vm->def->iothreadids[i]->thread_id = 0;
5718 5719
    virObjectUnref(priv->qemuCaps);
    priv->qemuCaps = NULL;
5720
    VIR_FREE(priv->pidfile);
5721

5722
    /* The "release" hook cleans up additional resources */
5723
    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
5724
        char *xml = qemuDomainDefFormatXML(driver, vm->def, 0);
5725 5726 5727

        /* we can't stop the operation even if the script raised an error */
        virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
5728 5729
                    VIR_HOOK_QEMU_OP_RELEASE, VIR_HOOK_SUBOP_END,
                    NULL, xml, NULL);
5730 5731 5732
        VIR_FREE(xml);
    }

5733 5734 5735 5736 5737 5738 5739
    if (vm->newDef) {
        virDomainDefFree(vm->def);
        vm->def = vm->newDef;
        vm->def->id = -1;
        vm->newDef = NULL;
    }

5740 5741 5742 5743 5744
 endjob:
    if (asyncJob != QEMU_ASYNC_JOB_NONE)
        qemuDomainObjEndJob(driver, vm);

 cleanup:
5745 5746 5747 5748
    if (orig_err) {
        virSetError(orig_err);
        virFreeError(orig_err);
    }
5749
    virObjectUnref(cfg);
5750
}
5751 5752


5753
int qemuProcessAttach(virConnectPtr conn ATTRIBUTE_UNUSED,
5754
                      virQEMUDriverPtr driver,
5755
                      virDomainObjPtr vm,
5756
                      pid_t pid,
5757 5758 5759 5760
                      const char *pidfile,
                      virDomainChrSourceDefPtr monConfig,
                      bool monJSON)
{
5761
    size_t i;
5762
    qemuDomainLogContextPtr logCtxt = NULL;
5763 5764 5765
    char *timestamp;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    bool running = true;
5766
    virDomainPausedReason reason;
5767
    virSecurityLabelPtr seclabel = NULL;
5768
    virSecurityLabelDefPtr seclabeldef = NULL;
5769
    bool seclabelgen = false;
5770 5771
    virSecurityManagerPtr* sec_managers = NULL;
    const char *model;
5772
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
5773
    virCapsPtr caps = NULL;
5774
    bool active = false;
5775
    int ret;
5776 5777 5778 5779

    VIR_DEBUG("Beginning VM attach process");

    if (virDomainObjIsActive(vm)) {
5780 5781
        virReportError(VIR_ERR_OPERATION_INVALID,
                       "%s", _("VM is already active"));
5782
        virObjectUnref(cfg);
5783 5784 5785
        return -1;
    }

5786
    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
5787
        goto error;
5788

5789 5790 5791 5792 5793
    /* Do this upfront, so any part of the startup process can add
     * runtime state to vm->def that won't be persisted. This let's us
     * report implicit runtime defaults in the XML, like vnc listen/socket
     */
    VIR_DEBUG("Setting current domain def as transient");
5794
    if (virDomainObjSetDefTransient(caps, driver->xmlopt, vm, true) < 0)
5795
        goto error;
5796

5797
    vm->def->id = qemuDriverAllocateID(driver);
5798

5799
    if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
5800
        driver->inhibitCallback(true, driver->inhibitOpaque);
5801
    active = true;
5802

5803
    if (virFileMakePath(cfg->logDir) < 0) {
5804 5805
        virReportSystemError(errno,
                             _("cannot create log directory %s"),
5806
                             cfg->logDir);
5807
        goto error;
5808 5809 5810
    }

    VIR_FREE(priv->pidfile);
5811
    if (VIR_STRDUP(priv->pidfile, pidfile) < 0)
5812
        goto error;
5813

5814 5815
    vm->pid = pid;

5816
    VIR_DEBUG("Detect security driver config");
5817
    sec_managers = virSecurityManagerGetNested(driver->securityManager);
5818 5819
    if (sec_managers == NULL)
        goto error;
5820 5821

    for (i = 0; sec_managers[i]; i++) {
5822
        seclabelgen = false;
5823 5824
        model = virSecurityManagerGetModel(sec_managers[i]);
        seclabeldef = virDomainDefGetSecurityLabelDef(vm->def, model);
5825
        if (seclabeldef == NULL) {
5826
            if (!(seclabeldef = virSecurityLabelDefNew(model)))
5827 5828 5829
                goto error;
            seclabelgen = true;
        }
5830 5831
        seclabeldef->type = VIR_DOMAIN_SECLABEL_STATIC;
        if (VIR_ALLOC(seclabel) < 0)
5832
            goto error;
5833
        if (virSecurityManagerGetProcessLabel(sec_managers[i],
5834
                                              vm->def, vm->pid, seclabel) < 0)
5835
            goto error;
5836

5837
        if (VIR_STRDUP(seclabeldef->model, model) < 0)
5838
            goto error;
5839

5840
        if (VIR_STRDUP(seclabeldef->label, seclabel->label) < 0)
5841
            goto error;
5842
        VIR_FREE(seclabel);
5843 5844 5845 5846 5847 5848

        if (seclabelgen) {
            if (VIR_APPEND_ELEMENT(vm->def->seclabels, vm->def->nseclabels, seclabeldef) < 0)
                goto error;
            seclabelgen = false;
        }
5849
    }
5850

5851 5852
    if (virSecurityManagerCheckAllLabel(driver->securityManager, vm->def) < 0)
        goto error;
5853 5854 5855
    if (virSecurityManagerGenLabel(driver->securityManager, vm->def) < 0)
        goto error;

5856
    VIR_DEBUG("Creating domain log file");
5857 5858
    if (!(logCtxt = qemuDomainLogContextNew(driver, vm,
                                            QEMU_DOMAIN_LOG_CONTEXT_MODE_ATTACH)))
5859
        goto error;
5860 5861

    VIR_DEBUG("Determining emulator version");
5862 5863
    virObjectUnref(priv->qemuCaps);
    if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
5864 5865
                                                      vm->def->emulator,
                                                      vm->def->os.machine)))
5866
        goto error;
5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878

    VIR_DEBUG("Preparing monitor state");
    priv->monConfig = monConfig;
    monConfig = NULL;
    priv->monJSON = monJSON;

    priv->gotShutdown = false;

    /*
     * Normally PCI addresses are assigned in the virDomainCreate
     * or virDomainDefine methods. We might still need to assign
     * some here to cope with the question of upgrades. Regardless
M
Martin Kletzander 已提交
5879
     * we also need to populate the PCI address set cache for later
5880 5881
     * use in hotplug
     */
5882
    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) {
5883
        VIR_DEBUG("Assigning domain PCI addresses");
5884
        if ((qemuDomainAssignAddresses(vm->def, priv->qemuCaps, vm)) < 0)
5885
            goto error;
5886 5887
    }

5888
    if ((timestamp = virTimeStringNow()) == NULL)
5889
        goto error;
5890

5891
    qemuDomainLogContextWrite(logCtxt, "%s: attaching\n", timestamp);
5892
    VIR_FREE(timestamp);
5893

5894
    qemuDomainObjTaint(driver, vm, VIR_DOMAIN_TAINT_EXTERNAL_LAUNCH, logCtxt);
5895 5896

    VIR_DEBUG("Waiting for monitor to show up");
5897
    if (qemuProcessWaitForMonitor(driver, vm, QEMU_ASYNC_JOB_NONE, priv->qemuCaps, NULL) < 0)
5898
        goto error;
5899

D
Daniel P. Berrange 已提交
5900
    /* Failure to connect to agent shouldn't be fatal */
5901 5902 5903 5904
    if ((ret = qemuConnectAgent(driver, vm)) < 0) {
        if (ret == -2)
            goto error;

D
Daniel P. Berrange 已提交
5905 5906 5907 5908 5909 5910
        VIR_WARN("Cannot connect to QEMU guest agent for %s",
                 vm->def->name);
        virResetLastError();
        priv->agentError = true;
    }

5911
    VIR_DEBUG("Detecting VCPU PIDs");
5912
    if (qemuDomainDetectVcpuPids(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
5913 5914 5915 5916
        goto error;

    VIR_DEBUG("Detecting IOThread PIDs");
    if (qemuProcessDetectIOThreadPIDs(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
5917
        goto error;
5918 5919 5920

    /* If we have -device, then addresses are assigned explicitly.
     * If not, then we have to detect dynamic ones here */
5921
    if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE)) {
5922
        VIR_DEBUG("Determining domain device PCI addresses");
5923
        if (qemuProcessInitPCIAddresses(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
5924
            goto error;
5925 5926 5927
    }

    VIR_DEBUG("Getting initial memory amount");
5928
    qemuDomainObjEnterMonitor(driver, vm);
5929 5930 5931 5932 5933 5934 5935
    if (qemuMonitorGetBalloonInfo(priv->mon, &vm->def->mem.cur_balloon) < 0)
        goto exit_monitor;
    if (qemuMonitorGetStatus(priv->mon, &running, &reason) < 0)
        goto exit_monitor;
    if (qemuMonitorGetVirtType(priv->mon, &vm->def->virtType) < 0)
        goto exit_monitor;
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
5936
        goto error;
5937

5938
    if (running) {
5939 5940
        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
                             VIR_DOMAIN_RUNNING_UNPAUSED);
5941 5942 5943
        if (vm->def->memballoon &&
            vm->def->memballoon->model == VIR_DOMAIN_MEMBALLOON_MODEL_VIRTIO &&
            vm->def->memballoon->period) {
5944 5945 5946
            qemuDomainObjEnterMonitor(driver, vm);
            qemuMonitorSetMemoryStatsPeriod(priv->mon,
                                            vm->def->memballoon->period);
5947 5948
            if (qemuDomainObjExitMonitor(driver, vm) < 0)
                goto error;
5949 5950
        }
    } else {
5951
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
5952
    }
5953 5954

    VIR_DEBUG("Writing domain status to disk");
5955
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
5956
        goto error;
5957

5958 5959
    /* Run an hook to allow admins to do some magic */
    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
5960
        char *xml = qemuDomainDefFormatXML(driver, vm->def, 0);
5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971
        int hookret;

        hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
                              VIR_HOOK_QEMU_OP_ATTACH, VIR_HOOK_SUBOP_BEGIN,
                              NULL, xml, NULL);
        VIR_FREE(xml);

        /*
         * If the script raised an error abort the launch
         */
        if (hookret < 0)
5972
            goto error;
5973 5974
    }

5975
    qemuDomainLogContextFree(logCtxt);
5976
    VIR_FREE(seclabel);
5977
    VIR_FREE(sec_managers);
5978
    virObjectUnref(cfg);
5979
    virObjectUnref(caps);
5980 5981 5982

    return 0;

5983 5984
 exit_monitor:
    ignore_value(qemuDomainObjExitMonitor(driver, vm));
5985
 error:
5986 5987 5988 5989 5990 5991
    /* We jump here if we failed to attach to the VM for any reason.
     * Leave the domain running, but pretend we never attempted to
     * attach to it.  */
    if (active && virAtomicIntDecAndTest(&driver->nactive) &&
        driver->inhibitCallback)
        driver->inhibitCallback(false, driver->inhibitOpaque);
5992
    qemuDomainLogContextFree(logCtxt);
5993
    VIR_FREE(seclabel);
5994
    VIR_FREE(sec_managers);
5995 5996
    if (seclabelgen)
        virSecurityLabelDefFree(seclabeldef);
5997
    virDomainChrSourceDefFree(monConfig);
5998
    virObjectUnref(cfg);
5999
    virObjectUnref(caps);
6000 6001 6002 6003
    return -1;
}


6004
static virDomainObjPtr
6005 6006 6007
qemuProcessAutoDestroy(virDomainObjPtr dom,
                       virConnectPtr conn,
                       void *opaque)
6008
{
6009
    virQEMUDriverPtr driver = opaque;
6010
    qemuDomainObjPrivatePtr priv = dom->privateData;
6011
    virObjectEventPtr event = NULL;
6012
    unsigned int stopFlags = 0;
6013

6014
    VIR_DEBUG("vm=%s, conn=%p", dom->def->name, conn);
6015

6016 6017
    virObjectRef(dom);

6018 6019 6020
    if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
        stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;

6021 6022
    if (priv->job.asyncJob) {
        VIR_DEBUG("vm=%s has long-term job active, cancelling",
6023
                  dom->def->name);
6024
        qemuDomainObjDiscardAsyncJob(driver, dom);
6025 6026 6027
    }

    VIR_DEBUG("Killing domain");
6028

6029 6030 6031 6032 6033
    if (qemuProcessBeginStopJob(driver, dom, QEMU_JOB_DESTROY, true) < 0)
        goto cleanup;

    qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED,
                    QEMU_ASYNC_JOB_NONE, stopFlags);
6034

6035
    virDomainAuditStop(dom, "destroyed");
6036
    event = virDomainEventLifecycleNewFromObj(dom,
6037 6038
                                     VIR_DOMAIN_EVENT_STOPPED,
                                     VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
6039

6040 6041
    qemuDomainObjEndJob(driver, dom);

6042
    qemuDomainRemoveInactive(driver, dom);
6043

6044
    qemuDomainEventQueue(driver, event);
6045

6046
 cleanup:
6047
    virDomainObjEndAPI(&dom);
6048
    return dom;
6049 6050
}

6051
int qemuProcessAutoDestroyAdd(virQEMUDriverPtr driver,
6052 6053 6054
                              virDomainObjPtr vm,
                              virConnectPtr conn)
{
6055
    VIR_DEBUG("vm=%s, conn=%p", vm->def->name, conn);
6056 6057
    return virCloseCallbacksSet(driver->closeCallbacks, vm, conn,
                                qemuProcessAutoDestroy);
6058 6059
}

6060
int qemuProcessAutoDestroyRemove(virQEMUDriverPtr driver,
6061 6062
                                 virDomainObjPtr vm)
{
6063
    int ret;
6064
    VIR_DEBUG("vm=%s", vm->def->name);
6065 6066 6067
    ret = virCloseCallbacksUnset(driver->closeCallbacks, vm,
                                 qemuProcessAutoDestroy);
    return ret;
6068
}
6069

6070
bool qemuProcessAutoDestroyActive(virQEMUDriverPtr driver,
6071 6072
                                  virDomainObjPtr vm)
{
6073
    virCloseCallback cb;
6074
    VIR_DEBUG("vm=%s", vm->def->name);
6075
    cb = virCloseCallbacksGet(driver->closeCallbacks, vm, NULL);
6076
    return cb == qemuProcessAutoDestroy;
6077
}