qemu_process.c 199.5 KB
Newer Older
1
/*
2
 * qemu_process.c: QEMU process management
3
 *
4
 * Copyright (C) 2006-2016 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22 23 24 25 26 27
 *
 */

#include <config.h>

#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#include <sys/stat.h>
R
Roman Bogorodskiy 已提交
28 29 30 31 32 33
#if defined(__linux__)
# include <linux/capability.h>
#elif defined(__FreeBSD__)
# include <sys/param.h>
# include <sys/cpuset.h>
#endif
34 35

#include "qemu_process.h"
36
#include "qemu_processpriv.h"
37
#include "qemu_alias.h"
38
#include "qemu_domain.h"
39
#include "qemu_domain_address.h"
40 41 42 43 44 45
#include "qemu_cgroup.h"
#include "qemu_capabilities.h"
#include "qemu_monitor.h"
#include "qemu_command.h"
#include "qemu_hostdev.h"
#include "qemu_hotplug.h"
46
#include "qemu_migration.h"
47
#include "qemu_interface.h"
48

49
#include "cpu/cpu.h"
50
#include "datatypes.h"
51
#include "virlog.h"
52
#include "virerror.h"
53
#include "viralloc.h"
54
#include "virhook.h"
E
Eric Blake 已提交
55
#include "virfile.h"
56
#include "virpidfile.h"
57
#include "virhostcpu.h"
58
#include "domain_audit.h"
59
#include "domain_nwfilter.h"
60
#include "locking/domain_lock.h"
61
#include "network/bridge_driver.h"
62
#include "viruuid.h"
63
#include "virprocess.h"
64
#include "virtime.h"
A
Ansis Atteka 已提交
65
#include "virnetdevtap.h"
66
#include "virnetdevopenvswitch.h"
67
#include "virnetdevmidonet.h"
68
#include "virbitmap.h"
69
#include "viratomic.h"
70
#include "virnuma.h"
71
#include "virstring.h"
72
#include "virhostdev.h"
J
John Ferlan 已提交
73
#include "secret_util.h"
74
#include "storage/storage_driver.h"
75
#include "configmake.h"
76
#include "nwfilter_conf.h"
77
#include "netdev_bandwidth_conf.h"
78 79 80

#define VIR_FROM_THIS VIR_FROM_QEMU

81 82
VIR_LOG_INIT("qemu.qemu_process");

83
/**
84
 * qemuProcessRemoveDomainStatus
85 86 87 88 89 90
 *
 * remove all state files of a domain from statedir
 *
 * Returns 0 on success
 */
static int
91
qemuProcessRemoveDomainStatus(virQEMUDriverPtr driver,
92 93 94 95
                              virDomainObjPtr vm)
{
    char ebuf[1024];
    char *file = NULL;
96
    qemuDomainObjPrivatePtr priv = vm->privateData;
97 98
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
    int ret = -1;
99

100
    if (virAsprintf(&file, "%s/%s.xml", cfg->stateDir, vm->def->name) < 0)
101
        goto cleanup;
102 103 104 105 106 107

    if (unlink(file) < 0 && errno != ENOENT && errno != ENOTDIR)
        VIR_WARN("Failed to remove domain XML for %s: %s",
                 vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf)));
    VIR_FREE(file);

108 109 110
    if (priv->pidfile &&
        unlink(priv->pidfile) < 0 &&
        errno != ENOENT)
111 112 113
        VIR_WARN("Failed to remove PID file for %s: %s",
                 vm->def->name, virStrerror(errno, ebuf, sizeof(ebuf)));

114
    ret = 0;
115
 cleanup:
116 117
    virObjectUnref(cfg);
    return ret;
118 119 120 121
}


/* XXX figure out how to remove this */
122
extern virQEMUDriverPtr qemu_driver;
123

D
Daniel P. Berrange 已提交
124 125 126 127 128 129 130
/*
 * This is a callback registered with a qemuAgentPtr instance,
 * and to be invoked when the agent console hits an end of file
 * condition, or error, thus indicating VM shutdown should be
 * performed
 */
static void
131
qemuProcessHandleAgentEOF(qemuAgentPtr agent,
D
Daniel P. Berrange 已提交
132 133 134 135 136 137
                          virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv;

    VIR_DEBUG("Received EOF from agent on %p '%s'", vm, vm->def->name);

138
    virObjectLock(vm);
D
Daniel P. Berrange 已提交
139 140

    priv = vm->privateData;
141 142 143 144 145 146 147 148 149 150 151

    if (!priv->agent) {
        VIR_DEBUG("Agent freed already");
        goto unlock;
    }

    if (priv->beingDestroyed) {
        VIR_DEBUG("Domain is being destroyed, agent EOF is expected");
        goto unlock;
    }

152
    qemuAgentClose(agent);
153
    priv->agent = NULL;
D
Daniel P. Berrange 已提交
154

155
    virObjectUnlock(vm);
156 157
    return;

158
 unlock:
159 160
    virObjectUnlock(vm);
    return;
D
Daniel P. Berrange 已提交
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
}


/*
 * This is invoked when there is some kind of error
 * parsing data to/from the agent. The VM can continue
 * to run, but no further agent commands will be
 * allowed
 */
static void
qemuProcessHandleAgentError(qemuAgentPtr agent ATTRIBUTE_UNUSED,
                            virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv;

    VIR_DEBUG("Received error from agent on %p '%s'", vm, vm->def->name);

178
    virObjectLock(vm);
D
Daniel P. Berrange 已提交
179 180 181 182 183

    priv = vm->privateData;

    priv->agentError = true;

184
    virObjectUnlock(vm);
D
Daniel P. Berrange 已提交
185 186 187 188 189
}

static void qemuProcessHandleAgentDestroy(qemuAgentPtr agent,
                                          virDomainObjPtr vm)
{
190 191
    VIR_DEBUG("Received destroy agent=%p vm=%p", agent, vm);

192
    virObjectUnref(vm);
D
Daniel P. Berrange 已提交
193 194 195 196 197 198 199 200 201 202
}


static qemuAgentCallbacks agentCallbacks = {
    .destroy = qemuProcessHandleAgentDestroy,
    .eofNotify = qemuProcessHandleAgentEOF,
    .errorNotify = qemuProcessHandleAgentError,
};


203
int
204
qemuConnectAgent(virQEMUDriverPtr driver, virDomainObjPtr vm)
D
Daniel P. Berrange 已提交
205 206 207 208
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret = -1;
    qemuAgentPtr agent = NULL;
209
    virDomainChrDefPtr config = qemuFindAgentConfig(vm->def);
D
Daniel P. Berrange 已提交
210 211 212 213

    if (!config)
        return 0;

214 215 216 217 218 219 220 221 222
    if (priv->agent)
        return 0;

    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VSERPORT_CHANGE) &&
        config->state != VIR_DOMAIN_CHR_DEVICE_STATE_CONNECTED) {
        VIR_DEBUG("Deferring connecting to guest agent");
        return 0;
    }

D
Daniel P. Berrange 已提交
223 224 225 226 227 228 229 230 231
    if (virSecurityManagerSetDaemonSocketLabel(driver->securityManager,
                                               vm->def) < 0) {
        VIR_ERROR(_("Failed to set security context for agent for %s"),
                  vm->def->name);
        goto cleanup;
    }

    /* Hold an extra reference because we can't allow 'vm' to be
     * deleted while the agent is active */
232
    virObjectRef(vm);
D
Daniel P. Berrange 已提交
233 234

    ignore_value(virTimeMillisNow(&priv->agentStart));
235
    virObjectUnlock(vm);
D
Daniel P. Berrange 已提交
236 237

    agent = qemuAgentOpen(vm,
238
                          &config->source,
D
Daniel P. Berrange 已提交
239 240
                          &agentCallbacks);

241
    virObjectLock(vm);
D
Daniel P. Berrange 已提交
242 243
    priv->agentStart = 0;

244 245 246 247 248 249 250 251 252 253 254
    if (agent == NULL)
        virObjectUnref(vm);

    if (!virDomainObjIsActive(vm)) {
        qemuAgentClose(agent);
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("guest crashed while connecting to the guest agent"));
        ret = -2;
        goto cleanup;
    }

D
Daniel P. Berrange 已提交
255 256 257 258
    if (virSecurityManagerClearSocketLabel(driver->securityManager,
                                           vm->def) < 0) {
        VIR_ERROR(_("Failed to clear security context for agent for %s"),
                  vm->def->name);
259
        qemuAgentClose(agent);
D
Daniel P. Berrange 已提交
260 261 262 263 264 265 266 267 268 269 270 271 272
        goto cleanup;
    }


    priv->agent = agent;

    if (priv->agent == NULL) {
        VIR_INFO("Failed to connect agent for %s", vm->def->name);
        goto cleanup;
    }

    ret = 0;

273
 cleanup:
D
Daniel P. Berrange 已提交
274 275 276 277
    return ret;
}


278
/*
279
 * This is a callback registered with a qemuMonitorPtr instance,
280 281 282 283 284 285
 * and to be invoked when the monitor console hits an end of file
 * condition, or error, thus indicating VM shutdown should be
 * performed
 */
static void
qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
286 287
                            virDomainObjPtr vm,
                            void *opaque)
288
{
289
    virQEMUDriverPtr driver = opaque;
290
    qemuDomainObjPrivatePtr priv;
291
    struct qemuProcessEvent *processEvent;
292

293
    virObjectLock(vm);
294

295
    VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name);
296

297
    priv = vm->privateData;
298 299
    if (priv->beingDestroyed) {
        VIR_DEBUG("Domain is being destroyed, EOF is expected");
300
        goto cleanup;
301 302
    }

303
    if (VIR_ALLOC(processEvent) < 0)
304
        goto cleanup;
305

306 307
    processEvent->eventType = QEMU_PROCESS_EVENT_MONITOR_EOF;
    processEvent->vm = vm;
308

309 310 311 312 313
    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        ignore_value(virObjectUnref(vm));
        VIR_FREE(processEvent);
        goto cleanup;
314
    }
315

316 317 318 319
    /* We don't want this EOF handler to be called over and over while the
     * thread is waiting for a job.
     */
    qemuMonitorUnregister(mon);
320

321
 cleanup:
322
    virObjectUnlock(vm);
323 324 325 326 327 328 329 330 331 332 333
}


/*
 * This is invoked when there is some kind of error
 * parsing data to/from the monitor. The VM can continue
 * to run, but no further monitor commands will be
 * allowed
 */
static void
qemuProcessHandleMonitorError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
334 335
                              virDomainObjPtr vm,
                              void *opaque)
336
{
337
    virQEMUDriverPtr driver = opaque;
338
    virObjectEventPtr event = NULL;
339 340 341

    VIR_DEBUG("Received error on %p '%s'", vm, vm->def->name);

342
    virObjectLock(vm);
343

344
    ((qemuDomainObjPrivatePtr) vm->privateData)->monError = true;
345
    event = virDomainEventControlErrorNewFromObj(vm);
346
    qemuDomainEventQueue(driver, event);
347

348
    virObjectUnlock(vm);
349 350 351
}


352
virDomainDiskDefPtr
353 354 355
qemuProcessFindDomainDiskByAlias(virDomainObjPtr vm,
                                 const char *alias)
{
356
    size_t i;
357

358
    alias = qemuAliasDiskDriveSkipPrefix(alias);
359 360 361 362 363 364 365 366 367

    for (i = 0; i < vm->def->ndisks; i++) {
        virDomainDiskDefPtr disk;

        disk = vm->def->disks[i];
        if (disk->info.alias != NULL && STREQ(disk->info.alias, alias))
            return disk;
    }

368 369 370
    virReportError(VIR_ERR_INTERNAL_ERROR,
                   _("no disk found with alias %s"),
                   alias);
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
    return NULL;
}

static int
qemuProcessGetVolumeQcowPassphrase(virConnectPtr conn,
                                   virDomainDiskDefPtr disk,
                                   char **secretRet,
                                   size_t *secretLen)
{
    char *passphrase;
    unsigned char *data;
    size_t size;
    int ret = -1;
    virStorageEncryptionPtr enc;

386
    if (!disk->src->encryption) {
387 388
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("disk %s does not have any encryption information"),
389
                       disk->src->path);
390 391
        return -1;
    }
392
    enc = disk->src->encryption;
393 394

    if (!conn) {
395 396
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       "%s", _("cannot find secrets without a connection"));
397 398 399 400
        goto cleanup;
    }

    if (conn->secretDriver == NULL ||
401 402
        conn->secretDriver->secretLookupByUUID == NULL ||
        conn->secretDriver->secretGetValue == NULL) {
403 404
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("secret storage not supported"));
405 406 407 408 409 410 411
        goto cleanup;
    }

    if (enc->format != VIR_STORAGE_ENCRYPTION_FORMAT_QCOW ||
        enc->nsecrets != 1 ||
        enc->secrets[0]->type !=
        VIR_STORAGE_ENCRYPTION_SECRET_TYPE_PASSPHRASE) {
412
        virReportError(VIR_ERR_XML_ERROR,
413 414
                       _("invalid <encryption> for volume %s"),
                       virDomainDiskGetSource(disk));
415 416 417
        goto cleanup;
    }

J
John Ferlan 已提交
418 419 420
    if (virSecretGetSecretString(conn, &enc->secrets[0]->seclookupdef,
                                 VIR_SECRET_USAGE_TYPE_VOLUME,
                                 &data, &size) < 0)
421 422 423 424 425
        goto cleanup;

    if (memchr(data, '\0', size) != NULL) {
        memset(data, 0, size);
        VIR_FREE(data);
426 427
        virReportError(VIR_ERR_XML_ERROR,
                       _("format='qcow' passphrase for %s must not contain a "
428
                         "'\\0'"), virDomainDiskGetSource(disk));
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
        goto cleanup;
    }

    if (VIR_ALLOC_N(passphrase, size + 1) < 0) {
        memset(data, 0, size);
        VIR_FREE(data);
        goto cleanup;
    }
    memcpy(passphrase, data, size);
    passphrase[size] = '\0';

    memset(data, 0, size);
    VIR_FREE(data);

    *secretRet = passphrase;
    *secretLen = size;

    ret = 0;

448
 cleanup:
449 450 451 452 453 454 455 456 457
    return ret;
}

static int
qemuProcessFindVolumeQcowPassphrase(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                                    virConnectPtr conn,
                                    virDomainObjPtr vm,
                                    const char *path,
                                    char **secretRet,
458 459
                                    size_t *secretLen,
                                    void *opaque ATTRIBUTE_UNUSED)
460 461 462 463
{
    virDomainDiskDefPtr disk;
    int ret = -1;

464
    virObjectLock(vm);
465 466 467 468
    if (!(disk = virDomainDiskByName(vm->def, path, true))) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("no disk found with path %s"),
                       path);
469
        goto cleanup;
470
    }
471 472 473

    ret = qemuProcessGetVolumeQcowPassphrase(conn, disk, secretRet, secretLen);

474
 cleanup:
475
    virObjectUnlock(vm);
476 477 478 479 480 481
    return ret;
}


static int
qemuProcessHandleReset(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
482 483
                       virDomainObjPtr vm,
                       void *opaque)
484
{
485
    virQEMUDriverPtr driver = opaque;
486
    virObjectEventPtr event;
487
    qemuDomainObjPrivatePtr priv;
488
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
489

490
    virObjectLock(vm);
491

492
    event = virDomainEventRebootNewFromObj(vm);
493 494 495
    priv = vm->privateData;
    if (priv->agent)
        qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_RESET);
496

497
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
498
        VIR_WARN("Failed to save status on vm %s", vm->def->name);
499

500
    virObjectUnlock(vm);
501

502
    qemuDomainEventQueue(driver, event);
503

504
    virObjectUnref(cfg);
505 506 507 508
    return 0;
}


509 510 511 512 513 514 515 516 517 518 519
/*
 * Since we have the '-no-shutdown' flag set, the
 * QEMU process will currently have guest OS shutdown
 * and the CPUS stopped. To fake the reboot, we thus
 * want todo a reset of the virtual hardware, followed
 * by restart of the CPUs. This should result in the
 * guest OS booting up again
 */
static void
qemuProcessFakeReboot(void *opaque)
{
520
    virQEMUDriverPtr driver = qemu_driver;
521 522
    virDomainObjPtr vm = opaque;
    qemuDomainObjPrivatePtr priv = vm->privateData;
523
    virObjectEventPtr event = NULL;
524
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
525
    virDomainRunningReason reason = VIR_DOMAIN_RUNNING_BOOTED;
526
    int ret = -1, rc;
527

528
    VIR_DEBUG("vm=%p", vm);
529
    virObjectLock(vm);
530
    if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0)
531 532 533
        goto cleanup;

    if (!virDomainObjIsActive(vm)) {
534 535
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("guest unexpectedly quit"));
536 537 538
        goto endjob;
    }

539
    qemuDomainObjEnterMonitor(driver, vm);
540 541 542
    rc = qemuMonitorSystemReset(priv->mon);

    if (qemuDomainObjExitMonitor(driver, vm) < 0)
543 544
        goto endjob;

545
    if (rc < 0)
546 547
        goto endjob;

548 549 550
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_CRASHED)
        reason = VIR_DOMAIN_RUNNING_CRASHED;

551
    if (qemuProcessStartCPUs(driver, vm, NULL,
552
                             reason,
553
                             QEMU_ASYNC_JOB_NONE) < 0) {
554
        if (virGetLastError() == NULL)
555 556
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("resume operation failed"));
557 558
        goto endjob;
    }
559
    priv->gotShutdown = false;
560
    event = virDomainEventLifecycleNewFromObj(vm,
561 562 563
                                     VIR_DOMAIN_EVENT_RESUMED,
                                     VIR_DOMAIN_EVENT_RESUMED_UNPAUSED);

564
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
565 566 567 568
        VIR_WARN("Unable to save status on vm %s after state change",
                 vm->def->name);
    }

569 570
    ret = 0;

571
 endjob:
572
    qemuDomainObjEndJob(driver, vm);
573

574
 cleanup:
575 576
    if (ret == -1)
        ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_FORCE));
M
Michal Privoznik 已提交
577
    virDomainObjEndAPI(&vm);
578
    qemuDomainEventQueue(driver, event);
579
    virObjectUnref(cfg);
580 581 582
}


583
void
584
qemuProcessShutdownOrReboot(virQEMUDriverPtr driver,
585
                            virDomainObjPtr vm)
586
{
587 588 589
    qemuDomainObjPrivatePtr priv = vm->privateData;

    if (priv->fakeReboot) {
590
        qemuDomainSetFakeReboot(driver, vm, false);
591
        virObjectRef(vm);
592 593 594 595 596
        virThread th;
        if (virThreadCreate(&th,
                            false,
                            qemuProcessFakeReboot,
                            vm) < 0) {
597
            VIR_ERROR(_("Failed to create reboot thread, killing domain"));
598
            ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
599
            virObjectUnref(vm);
600 601
        }
    } else {
602
        ignore_value(qemuProcessKill(vm, VIR_QEMU_PROCESS_KILL_NOWAIT));
603
    }
604
}
605

606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626

static int
qemuProcessHandleEvent(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                       virDomainObjPtr vm,
                       const char *eventName,
                       long long seconds,
                       unsigned int micros,
                       const char *details,
                       void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    virObjectEventPtr event = NULL;

    VIR_DEBUG("vm=%p", vm);

    virObjectLock(vm);
    event = virDomainQemuMonitorEventNew(vm->def->id, vm->def->name,
                                         vm->def->uuid, eventName,
                                         seconds, micros, details);

    virObjectUnlock(vm);
627
    qemuDomainEventQueue(driver, event);
628 629 630 631 632

    return 0;
}


633 634
static int
qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
635 636
                          virDomainObjPtr vm,
                          void *opaque)
637
{
638
    virQEMUDriverPtr driver = opaque;
639
    qemuDomainObjPrivatePtr priv;
640
    virObjectEventPtr event = NULL;
641
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
642

643 644
    VIR_DEBUG("vm=%p", vm);

645
    virObjectLock(vm);
646 647 648 649 650 651

    priv = vm->privateData;
    if (priv->gotShutdown) {
        VIR_DEBUG("Ignoring repeated SHUTDOWN event from domain %s",
                  vm->def->name);
        goto unlock;
652 653 654 655
    } else if (!virDomainObjIsActive(vm)) {
        VIR_DEBUG("Ignoring SHUTDOWN event from inactive domain %s",
                  vm->def->name);
        goto unlock;
656 657 658 659 660 661 662 663
    }
    priv->gotShutdown = true;

    VIR_DEBUG("Transitioned guest %s to shutdown state",
              vm->def->name);
    virDomainObjSetState(vm,
                         VIR_DOMAIN_SHUTDOWN,
                         VIR_DOMAIN_SHUTDOWN_UNKNOWN);
664
    event = virDomainEventLifecycleNewFromObj(vm,
665 666 667
                                     VIR_DOMAIN_EVENT_SHUTDOWN,
                                     VIR_DOMAIN_EVENT_SHUTDOWN_FINISHED);

668
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
669 670 671 672
        VIR_WARN("Unable to save status on vm %s after state change",
                 vm->def->name);
    }

673 674 675
    if (priv->agent)
        qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SHUTDOWN);

676 677
    qemuProcessShutdownOrReboot(driver, vm);

678
 unlock:
679
    virObjectUnlock(vm);
680
    qemuDomainEventQueue(driver, event);
681
    virObjectUnref(cfg);
682

683 684 685 686 687 688
    return 0;
}


static int
qemuProcessHandleStop(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
689 690
                      virDomainObjPtr vm,
                      void *opaque)
691
{
692
    virQEMUDriverPtr driver = opaque;
693
    virObjectEventPtr event = NULL;
694 695
    virDomainPausedReason reason = VIR_DOMAIN_PAUSED_UNKNOWN;
    virDomainEventSuspendedDetailType detail = VIR_DOMAIN_EVENT_SUSPENDED_PAUSED;
696
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
697

698
    virObjectLock(vm);
J
Jiri Denemark 已提交
699
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
700
        qemuDomainObjPrivatePtr priv = vm->privateData;
701

702
        if (priv->gotShutdown) {
703 704
            VIR_DEBUG("Ignoring STOP event after SHUTDOWN");
            goto unlock;
705 706
        }

707
        if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) {
708 709 710 711 712 713 714 715
            if (priv->job.current->stats.status ==
                        QEMU_MONITOR_MIGRATION_STATUS_POSTCOPY) {
                reason = VIR_DOMAIN_PAUSED_POSTCOPY;
                detail = VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY;
            } else {
                reason = VIR_DOMAIN_PAUSED_MIGRATION;
                detail = VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED;
            }
716 717 718 719
        }

        VIR_DEBUG("Transitioned guest %s to paused state, reason %s",
                  vm->def->name, virDomainPausedReasonTypeToString(reason));
720

721 722 723
        if (priv->job.current)
            ignore_value(virTimeMillisNow(&priv->job.current->stopped));

724 725 726 727
        if (priv->signalStop)
            virDomainObjBroadcast(vm);

        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
728
        event = virDomainEventLifecycleNewFromObj(vm,
729 730
                                                  VIR_DOMAIN_EVENT_SUSPENDED,
                                                  detail);
731

732 733 734 735 736
        VIR_FREE(priv->lockState);
        if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
            VIR_WARN("Unable to release lease on %s", vm->def->name);
        VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));

737
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
738 739 740
            VIR_WARN("Unable to save status on vm %s after state change",
                     vm->def->name);
        }
741
    }
742

743
 unlock:
744
    virObjectUnlock(vm);
745
    qemuDomainEventQueue(driver, event);
746
    virObjectUnref(cfg);
747 748 749 750 751

    return 0;
}


752 753
static int
qemuProcessHandleResume(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
754 755
                        virDomainObjPtr vm,
                        void *opaque)
756
{
757
    virQEMUDriverPtr driver = opaque;
758
    virObjectEventPtr event = NULL;
759
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
760

761
    virObjectLock(vm);
762 763 764 765 766 767 768 769 770 771 772 773 774
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) {
        qemuDomainObjPrivatePtr priv = vm->privateData;

        if (priv->gotShutdown) {
            VIR_DEBUG("Ignoring RESUME event after SHUTDOWN");
            goto unlock;
        }

        VIR_DEBUG("Transitioned guest %s out of paused into resumed state",
                  vm->def->name);

        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
                                 VIR_DOMAIN_RUNNING_UNPAUSED);
775
        event = virDomainEventLifecycleNewFromObj(vm,
776 777 778 779
                                         VIR_DOMAIN_EVENT_RESUMED,
                                         VIR_DOMAIN_EVENT_RESUMED_UNPAUSED);

        VIR_DEBUG("Using lock state '%s' on resume event", NULLSTR(priv->lockState));
780
        if (virDomainLockProcessResume(driver->lockManager, cfg->uri,
781 782 783 784 785 786 787 788 789
                                       vm, priv->lockState) < 0) {
            /* Don't free priv->lockState on error, because we need
             * to make sure we have state still present if the user
             * tries to resume again
             */
            goto unlock;
        }
        VIR_FREE(priv->lockState);

790
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
791 792 793 794 795
            VIR_WARN("Unable to save status on vm %s after state change",
                     vm->def->name);
        }
    }

796
 unlock:
797
    virObjectUnlock(vm);
798
    qemuDomainEventQueue(driver, event);
799
    virObjectUnref(cfg);
800 801 802
    return 0;
}

803 804 805
static int
qemuProcessHandleRTCChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                           virDomainObjPtr vm,
806 807
                           long long offset,
                           void *opaque)
808
{
809
    virQEMUDriverPtr driver = opaque;
810
    virObjectEventPtr event = NULL;
811
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
812

813
    virObjectLock(vm);
814

815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
    if (vm->def->clock.offset == VIR_DOMAIN_CLOCK_OFFSET_VARIABLE) {
        /* when a basedate is manually given on the qemu commandline
         * rather than simply "-rtc base=utc", the offset sent by qemu
         * in this event is *not* the new offset from UTC, but is
         * instead the new offset from the *original basedate* +
         * uptime. For example, if the original offset was 3600 and
         * the guest clock has been advanced by 10 seconds, qemu will
         * send "10" in the event - this means that the new offset
         * from UTC is 3610, *not* 10. If the guest clock is advanced
         * by another 10 seconds, qemu will now send "20" - i.e. each
         * event is the sum of the most recent change and all previous
         * changes since the domain was started. Fortunately, we have
         * saved the initial offset in "adjustment0", so to arrive at
         * the proper new "adjustment", we just add the most recent
         * offset to adjustment0.
         */
        offset += vm->def->clock.data.variable.adjustment0;
832
        vm->def->clock.data.variable.adjustment = offset;
833

834
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
835 836 837 838
           VIR_WARN("unable to save domain status with RTC change");
    }

    event = virDomainEventRTCChangeNewFromObj(vm, offset);
839

840
    virObjectUnlock(vm);
841

842
    qemuDomainEventQueue(driver, event);
843
    virObjectUnref(cfg);
844 845 846 847 848 849 850
    return 0;
}


static int
qemuProcessHandleWatchdog(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                          virDomainObjPtr vm,
851 852
                          int action,
                          void *opaque)
853
{
854
    virQEMUDriverPtr driver = opaque;
855 856
    virObjectEventPtr watchdogEvent = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
857
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
858

859
    virObjectLock(vm);
860 861 862
    watchdogEvent = virDomainEventWatchdogNewFromObj(vm, action);

    if (action == VIR_DOMAIN_EVENT_WATCHDOG_PAUSE &&
J
Jiri Denemark 已提交
863
        virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
864
        qemuDomainObjPrivatePtr priv = vm->privateData;
865 866
        VIR_DEBUG("Transitioned guest %s to paused state due to watchdog", vm->def->name);

J
Jiri Denemark 已提交
867
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_WATCHDOG);
868
        lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
869 870 871
                                                  VIR_DOMAIN_EVENT_SUSPENDED,
                                                  VIR_DOMAIN_EVENT_SUSPENDED_WATCHDOG);

872 873 874 875 876
        VIR_FREE(priv->lockState);
        if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
            VIR_WARN("Unable to release lease on %s", vm->def->name);
        VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));

877
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
878 879 880
            VIR_WARN("Unable to save status on vm %s after watchdog event",
                     vm->def->name);
        }
881 882 883
    }

    if (vm->def->watchdog->action == VIR_DOMAIN_WATCHDOG_ACTION_DUMP) {
C
Chen Fan 已提交
884 885 886 887 888
        struct qemuProcessEvent *processEvent;
        if (VIR_ALLOC(processEvent) == 0) {
            processEvent->eventType = QEMU_PROCESS_EVENT_WATCHDOG;
            processEvent->action = VIR_DOMAIN_WATCHDOG_ACTION_DUMP;
            processEvent->vm = vm;
W
Wen Congyang 已提交
889 890 891
            /* Hold an extra reference because we can't allow 'vm' to be
             * deleted before handling watchdog event is finished.
             */
892
            virObjectRef(vm);
C
Chen Fan 已提交
893
            if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
894
                if (!virObjectUnref(vm))
895
                    vm = NULL;
C
Chen Fan 已提交
896
                VIR_FREE(processEvent);
W
Wen Congyang 已提交
897
            }
898
        }
899 900
    }

901
    if (vm)
902
        virObjectUnlock(vm);
903 904
    qemuDomainEventQueue(driver, watchdogEvent);
    qemuDomainEventQueue(driver, lifecycleEvent);
905

906
    virObjectUnref(cfg);
907 908 909 910 911 912 913 914 915
    return 0;
}


static int
qemuProcessHandleIOError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                         virDomainObjPtr vm,
                         const char *diskAlias,
                         int action,
916 917
                         const char *reason,
                         void *opaque)
918
{
919
    virQEMUDriverPtr driver = opaque;
920 921 922
    virObjectEventPtr ioErrorEvent = NULL;
    virObjectEventPtr ioErrorEvent2 = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
923 924 925
    const char *srcPath;
    const char *devAlias;
    virDomainDiskDefPtr disk;
926
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
927

928
    virObjectLock(vm);
929 930 931
    disk = qemuProcessFindDomainDiskByAlias(vm, diskAlias);

    if (disk) {
932
        srcPath = virDomainDiskGetSource(disk);
933 934 935 936 937 938 939 940 941 942
        devAlias = disk->info.alias;
    } else {
        srcPath = "";
        devAlias = "";
    }

    ioErrorEvent = virDomainEventIOErrorNewFromObj(vm, srcPath, devAlias, action);
    ioErrorEvent2 = virDomainEventIOErrorReasonNewFromObj(vm, srcPath, devAlias, action, reason);

    if (action == VIR_DOMAIN_EVENT_IO_ERROR_PAUSE &&
J
Jiri Denemark 已提交
943
        virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
944
        qemuDomainObjPrivatePtr priv = vm->privateData;
945 946
        VIR_DEBUG("Transitioned guest %s to paused state due to IO error", vm->def->name);

947 948 949
        if (priv->signalIOError)
            virDomainObjBroadcast(vm);

J
Jiri Denemark 已提交
950
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_IOERROR);
951
        lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
952 953 954
                                                  VIR_DOMAIN_EVENT_SUSPENDED,
                                                  VIR_DOMAIN_EVENT_SUSPENDED_IOERROR);

955 956 957 958 959
        VIR_FREE(priv->lockState);
        if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
            VIR_WARN("Unable to release lease on %s", vm->def->name);
        VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));

960
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
961 962
            VIR_WARN("Unable to save status on vm %s after IO error", vm->def->name);
    }
963
    virObjectUnlock(vm);
964

965 966 967
    qemuDomainEventQueue(driver, ioErrorEvent);
    qemuDomainEventQueue(driver, ioErrorEvent2);
    qemuDomainEventQueue(driver, lifecycleEvent);
968
    virObjectUnref(cfg);
969 970 971
    return 0;
}

972 973 974 975 976
static int
qemuProcessHandleBlockJob(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                          virDomainObjPtr vm,
                          const char *diskAlias,
                          int type,
977 978
                          int status,
                          void *opaque)
979
{
980
    virQEMUDriverPtr driver = opaque;
981
    struct qemuProcessEvent *processEvent = NULL;
982
    virDomainDiskDefPtr disk;
983
    qemuDomainDiskPrivatePtr diskPriv;
984
    char *data = NULL;
985

986
    virObjectLock(vm);
987

988 989
    VIR_DEBUG("Block job for device %s (domain: %p,%s) type %d status %d",
              diskAlias, vm, vm->def->name, type, status);
990

991
    if (!(disk = qemuProcessFindDomainDiskByAlias(vm, diskAlias)))
992
        goto error;
993
    diskPriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
994

995
    if (diskPriv->blockJobSync) {
996
        /* We have a SYNC API waiting for this event, dispatch it back */
997 998
        diskPriv->blockJobType = type;
        diskPriv->blockJobStatus = status;
999
        virDomainObjBroadcast(vm);
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
    } else {
        /* there is no waiting SYNC API, dispatch the update to a thread */
        if (VIR_ALLOC(processEvent) < 0)
            goto error;

        processEvent->eventType = QEMU_PROCESS_EVENT_BLOCK_JOB;
        if (VIR_STRDUP(data, diskAlias) < 0)
            goto error;
        processEvent->data = data;
        processEvent->vm = vm;
        processEvent->action = type;
        processEvent->status = status;
1012

1013 1014 1015 1016 1017
        virObjectRef(vm);
        if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
            ignore_value(virObjectUnref(vm));
            goto error;
        }
1018 1019
    }

1020
 cleanup:
1021
    virObjectUnlock(vm);
1022
    return 0;
1023 1024 1025 1026 1027
 error:
    if (processEvent)
        VIR_FREE(processEvent->data);
    VIR_FREE(processEvent);
    goto cleanup;
1028
}
1029

1030

1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
static int
qemuProcessHandleGraphics(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                          virDomainObjPtr vm,
                          int phase,
                          int localFamily,
                          const char *localNode,
                          const char *localService,
                          int remoteFamily,
                          const char *remoteNode,
                          const char *remoteService,
                          const char *authScheme,
                          const char *x509dname,
1043 1044
                          const char *saslUsername,
                          void *opaque)
1045
{
1046
    virQEMUDriverPtr driver = opaque;
1047
    virObjectEventPtr event;
1048 1049 1050
    virDomainEventGraphicsAddressPtr localAddr = NULL;
    virDomainEventGraphicsAddressPtr remoteAddr = NULL;
    virDomainEventGraphicsSubjectPtr subject = NULL;
1051
    size_t i;
1052 1053

    if (VIR_ALLOC(localAddr) < 0)
1054
        goto error;
1055
    localAddr->family = localFamily;
1056 1057 1058
    if (VIR_STRDUP(localAddr->service, localService) < 0 ||
        VIR_STRDUP(localAddr->node, localNode) < 0)
        goto error;
1059 1060

    if (VIR_ALLOC(remoteAddr) < 0)
1061
        goto error;
1062
    remoteAddr->family = remoteFamily;
1063 1064 1065
    if (VIR_STRDUP(remoteAddr->service, remoteService) < 0 ||
        VIR_STRDUP(remoteAddr->node, remoteNode) < 0)
        goto error;
1066 1067

    if (VIR_ALLOC(subject) < 0)
1068
        goto error;
1069 1070
    if (x509dname) {
        if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0)
1071
            goto error;
1072
        subject->nidentity++;
1073 1074 1075
        if (VIR_STRDUP(subject->identities[subject->nidentity-1].type, "x509dname") < 0 ||
            VIR_STRDUP(subject->identities[subject->nidentity-1].name, x509dname) < 0)
            goto error;
1076 1077 1078
    }
    if (saslUsername) {
        if (VIR_REALLOC_N(subject->identities, subject->nidentity+1) < 0)
1079
            goto error;
1080
        subject->nidentity++;
1081 1082 1083
        if (VIR_STRDUP(subject->identities[subject->nidentity-1].type, "saslUsername") < 0 ||
            VIR_STRDUP(subject->identities[subject->nidentity-1].name, saslUsername) < 0)
            goto error;
1084 1085
    }

1086
    virObjectLock(vm);
1087
    event = virDomainEventGraphicsNewFromObj(vm, phase, localAddr, remoteAddr, authScheme, subject);
1088
    virObjectUnlock(vm);
1089

1090
    qemuDomainEventQueue(driver, event);
1091 1092 1093

    return 0;

1094
 error:
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
    if (localAddr) {
        VIR_FREE(localAddr->service);
        VIR_FREE(localAddr->node);
        VIR_FREE(localAddr);
    }
    if (remoteAddr) {
        VIR_FREE(remoteAddr->service);
        VIR_FREE(remoteAddr->node);
        VIR_FREE(remoteAddr);
    }
    if (subject) {
1106
        for (i = 0; i < subject->nidentity; i++) {
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
            VIR_FREE(subject->identities[i].type);
            VIR_FREE(subject->identities[i].name);
        }
        VIR_FREE(subject->identities);
        VIR_FREE(subject);
    }

    return -1;
}

1117 1118 1119 1120
static int
qemuProcessHandleTrayChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                            virDomainObjPtr vm,
                            const char *devAlias,
1121 1122
                            int reason,
                            void *opaque)
1123
{
1124
    virQEMUDriverPtr driver = opaque;
1125
    virObjectEventPtr event = NULL;
1126
    virDomainDiskDefPtr disk;
1127
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
1128

1129
    virObjectLock(vm);
1130 1131 1132 1133 1134 1135
    disk = qemuProcessFindDomainDiskByAlias(vm, devAlias);

    if (disk) {
        event = virDomainEventTrayChangeNewFromObj(vm,
                                                   devAlias,
                                                   reason);
1136 1137 1138 1139 1140 1141
        /* Update disk tray status */
        if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_OPEN)
            disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
        else if (reason == VIR_DOMAIN_EVENT_TRAY_CHANGE_CLOSE)
            disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;

1142
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
1143 1144 1145
            VIR_WARN("Unable to save status on vm %s after tray moved event",
                     vm->def->name);
        }
1146 1147

        virDomainObjBroadcast(vm);
1148 1149
    }

1150
    virObjectUnlock(vm);
1151
    qemuDomainEventQueue(driver, event);
1152
    virObjectUnref(cfg);
1153 1154 1155
    return 0;
}

O
Osier Yang 已提交
1156 1157
static int
qemuProcessHandlePMWakeup(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1158 1159
                          virDomainObjPtr vm,
                          void *opaque)
O
Osier Yang 已提交
1160
{
1161
    virQEMUDriverPtr driver = opaque;
1162 1163
    virObjectEventPtr event = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
1164
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
O
Osier Yang 已提交
1165

1166
    virObjectLock(vm);
O
Osier Yang 已提交
1167 1168
    event = virDomainEventPMWakeupNewFromObj(vm);

1169 1170 1171 1172 1173 1174 1175 1176 1177
    /* Don't set domain status back to running if it wasn't paused
     * from guest side, otherwise it can just cause confusion.
     */
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PMSUSPENDED) {
        VIR_DEBUG("Transitioned guest %s from pmsuspended to running "
                  "state due to QMP wakeup event", vm->def->name);

        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
                             VIR_DOMAIN_RUNNING_WAKEUP);
1178
        lifecycleEvent = virDomainEventLifecycleNewFromObj(vm,
1179 1180 1181
                                                  VIR_DOMAIN_EVENT_STARTED,
                                                  VIR_DOMAIN_EVENT_STARTED_WAKEUP);

1182
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
1183 1184 1185 1186 1187
            VIR_WARN("Unable to save status on vm %s after wakeup event",
                     vm->def->name);
        }
    }

1188
    virObjectUnlock(vm);
1189 1190
    qemuDomainEventQueue(driver, event);
    qemuDomainEventQueue(driver, lifecycleEvent);
1191
    virObjectUnref(cfg);
O
Osier Yang 已提交
1192 1193
    return 0;
}
1194

O
Osier Yang 已提交
1195 1196
static int
qemuProcessHandlePMSuspend(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1197 1198
                           virDomainObjPtr vm,
                           void *opaque)
O
Osier Yang 已提交
1199
{
1200
    virQEMUDriverPtr driver = opaque;
1201 1202
    virObjectEventPtr event = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
1203
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
O
Osier Yang 已提交
1204

1205
    virObjectLock(vm);
O
Osier Yang 已提交
1206 1207
    event = virDomainEventPMSuspendNewFromObj(vm);

1208
    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
1209
        qemuDomainObjPrivatePtr priv = vm->privateData;
1210 1211 1212 1213 1214
        VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
                  "QMP suspend event", vm->def->name);

        virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
                             VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
J
Jiri Denemark 已提交
1215
        lifecycleEvent =
1216
            virDomainEventLifecycleNewFromObj(vm,
J
Jiri Denemark 已提交
1217 1218
                                     VIR_DOMAIN_EVENT_PMSUSPENDED,
                                     VIR_DOMAIN_EVENT_PMSUSPENDED_MEMORY);
1219

1220
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
1221 1222 1223
            VIR_WARN("Unable to save status on vm %s after suspend event",
                     vm->def->name);
        }
1224 1225 1226

        if (priv->agent)
            qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
1227 1228
    }

1229
    virObjectUnlock(vm);
O
Osier Yang 已提交
1230

1231 1232
    qemuDomainEventQueue(driver, event);
    qemuDomainEventQueue(driver, lifecycleEvent);
1233
    virObjectUnref(cfg);
O
Osier Yang 已提交
1234 1235 1236
    return 0;
}

1237 1238 1239
static int
qemuProcessHandleBalloonChange(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
1240 1241
                               unsigned long long actual,
                               void *opaque)
1242
{
1243
    virQEMUDriverPtr driver = opaque;
1244
    virObjectEventPtr event = NULL;
1245
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
1246

1247
    virObjectLock(vm);
1248 1249 1250 1251 1252 1253
    event = virDomainEventBalloonChangeNewFromObj(vm, actual);

    VIR_DEBUG("Updating balloon from %lld to %lld kb",
              vm->def->mem.cur_balloon, actual);
    vm->def->mem.cur_balloon = actual;

1254
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
1255 1256
        VIR_WARN("unable to save domain status with balloon change");

1257
    virObjectUnlock(vm);
1258

1259
    qemuDomainEventQueue(driver, event);
1260
    virObjectUnref(cfg);
1261 1262 1263
    return 0;
}

1264 1265
static int
qemuProcessHandlePMSuspendDisk(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1266 1267
                               virDomainObjPtr vm,
                               void *opaque)
1268
{
1269
    virQEMUDriverPtr driver = opaque;
1270 1271
    virObjectEventPtr event = NULL;
    virObjectEventPtr lifecycleEvent = NULL;
1272
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
1273

1274
    virObjectLock(vm);
1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
    event = virDomainEventPMSuspendDiskNewFromObj(vm);

    if (virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING) {
        qemuDomainObjPrivatePtr priv = vm->privateData;
        VIR_DEBUG("Transitioned guest %s to pmsuspended state due to "
                  "QMP suspend_disk event", vm->def->name);

        virDomainObjSetState(vm, VIR_DOMAIN_PMSUSPENDED,
                             VIR_DOMAIN_PMSUSPENDED_UNKNOWN);
        lifecycleEvent =
1285
            virDomainEventLifecycleNewFromObj(vm,
1286 1287 1288
                                     VIR_DOMAIN_EVENT_PMSUSPENDED,
                                     VIR_DOMAIN_EVENT_PMSUSPENDED_DISK);

1289
        if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0) {
1290 1291 1292 1293 1294 1295 1296 1297
            VIR_WARN("Unable to save status on vm %s after suspend event",
                     vm->def->name);
        }

        if (priv->agent)
            qemuAgentNotifyEvent(priv->agent, QEMU_AGENT_EVENT_SUSPEND);
    }

1298
    virObjectUnlock(vm);
1299

1300 1301
    qemuDomainEventQueue(driver, event);
    qemuDomainEventQueue(driver, lifecycleEvent);
1302 1303
    virObjectUnref(cfg);

1304 1305 1306
    return 0;
}

1307

1308 1309
static int
qemuProcessHandleGuestPanic(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
1310 1311
                            virDomainObjPtr vm,
                            void *opaque)
1312
{
1313
    virQEMUDriverPtr driver = opaque;
1314 1315 1316
    struct qemuProcessEvent *processEvent;

    virObjectLock(vm);
1317
    if (VIR_ALLOC(processEvent) < 0)
1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
        goto cleanup;

    processEvent->eventType = QEMU_PROCESS_EVENT_GUESTPANIC;
    processEvent->action = vm->def->onCrash;
    processEvent->vm = vm;
    /* Hold an extra reference because we can't allow 'vm' to be
     * deleted before handling guest panic event is finished.
     */
    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        if (!virObjectUnref(vm))
            vm = NULL;
        VIR_FREE(processEvent);
    }

1333
 cleanup:
1334
    if (vm)
1335
        virObjectUnlock(vm);
1336 1337 1338 1339 1340

    return 0;
}


1341
int
1342 1343
qemuProcessHandleDeviceDeleted(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
1344 1345
                               const char *devAlias,
                               void *opaque)
1346
{
1347
    virQEMUDriverPtr driver = opaque;
1348 1349
    struct qemuProcessEvent *processEvent = NULL;
    char *data;
1350 1351 1352 1353 1354 1355

    virObjectLock(vm);

    VIR_DEBUG("Device %s removed from domain %p %s",
              devAlias, vm, vm->def->name);

1356 1357
    if (qemuDomainSignalDeviceRemoval(vm, devAlias,
                                      QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_OK))
1358
        goto cleanup;
1359

1360 1361
    if (VIR_ALLOC(processEvent) < 0)
        goto error;
1362

1363 1364 1365 1366 1367
    processEvent->eventType = QEMU_PROCESS_EVENT_DEVICE_DELETED;
    if (VIR_STRDUP(data, devAlias) < 0)
        goto error;
    processEvent->data = data;
    processEvent->vm = vm;
1368

1369 1370 1371 1372 1373
    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        ignore_value(virObjectUnref(vm));
        goto error;
    }
1374

1375
 cleanup:
1376 1377
    virObjectUnlock(vm);
    return 0;
1378 1379 1380 1381 1382
 error:
    if (processEvent)
        VIR_FREE(processEvent->data);
    VIR_FREE(processEvent);
    goto cleanup;
1383 1384 1385
}


1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453
/**
 *
 * Meaning of fields reported by the event according to the ACPI standard:
 * @source:
 *  0x00 - 0xff: Notification values, as passed at the request time
 *  0x100: Operating System Shutdown Processing
 *  0x103: Ejection processing
 *  0x200: Insertion processing
 *  other values are reserved
 *
 * @status:
 *   general values
 *     0x00: success
 *     0x01: non-specific failure
 *     0x02: unrecognized notify code
 *     0x03 - 0x7f: reserved
 *     other values are specific to the notification type
 *
 *   for the 0x100 source the following additional codes are standardized
 *     0x80: OS Shutdown request denied
 *     0x81: OS Shutdown in progress
 *     0x82: OS Shutdown completed
 *     0x83: OS Graceful shutdown not supported
 *     other values are reserved
 *
 * Other fields and semantics are specific to the qemu handling of the event.
 *  - @alias may be NULL for successful unplug operations
 *  - @slotType describes the device type a bit more closely, currently the
 *    only known value is 'DIMM'
 *  - @slot describes the specific device
 *
 *  Note that qemu does not emit the event for all the documented sources or
 *  devices.
 */
static int
qemuProcessHandleAcpiOstInfo(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                             virDomainObjPtr vm,
                             const char *alias,
                             const char *slotType,
                             const char *slot,
                             unsigned int source,
                             unsigned int status,
                             void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    virObjectEventPtr event = NULL;

    virObjectLock(vm);

    VIR_DEBUG("ACPI OST info for device %s domain %p %s. "
              "slotType='%s' slot='%s' source=%u status=%u",
              NULLSTR(alias), vm, vm->def->name, slotType, slot, source, status);

    /* handle memory unplug failure */
    if (STREQ(slotType, "DIMM") && alias && status == 1) {
        qemuDomainSignalDeviceRemoval(vm, alias,
                                      QEMU_DOMAIN_UNPLUGGING_DEVICE_STATUS_GUEST_REJECTED);

        event = virDomainEventDeviceRemovalFailedNewFromObj(vm, alias);
    }

    virObjectUnlock(vm);
    qemuDomainEventQueue(driver, event);

    return 0;
}


1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
static int
qemuProcessHandleNicRxFilterChanged(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                                    virDomainObjPtr vm,
                                    const char *devAlias,
                                    void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    struct qemuProcessEvent *processEvent = NULL;
    char *data;

    virObjectLock(vm);

    VIR_DEBUG("Device %s RX Filter changed in domain %p %s",
              devAlias, vm, vm->def->name);

    if (VIR_ALLOC(processEvent) < 0)
        goto error;

    processEvent->eventType = QEMU_PROCESS_EVENT_NIC_RX_FILTER_CHANGED;
    if (VIR_STRDUP(data, devAlias) < 0)
        goto error;
    processEvent->data = data;
    processEvent->vm = vm;

    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        ignore_value(virObjectUnref(vm));
        goto error;
    }

 cleanup:
    virObjectUnlock(vm);
    return 0;
 error:
    if (processEvent)
        VIR_FREE(processEvent->data);
    VIR_FREE(processEvent);
    goto cleanup;
}


1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537
static int
qemuProcessHandleSerialChanged(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
                               const char *devAlias,
                               bool connected,
                               void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    struct qemuProcessEvent *processEvent = NULL;
    char *data;

    virObjectLock(vm);

    VIR_DEBUG("Serial port %s state changed to '%d' in domain %p %s",
              devAlias, connected, vm, vm->def->name);

    if (VIR_ALLOC(processEvent) < 0)
        goto error;

    processEvent->eventType = QEMU_PROCESS_EVENT_SERIAL_CHANGED;
    if (VIR_STRDUP(data, devAlias) < 0)
        goto error;
    processEvent->data = data;
    processEvent->action = connected;
    processEvent->vm = vm;

    virObjectRef(vm);
    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
        ignore_value(virObjectUnref(vm));
        goto error;
    }

 cleanup:
    virObjectUnlock(vm);
    return 0;
 error:
    if (processEvent)
        VIR_FREE(processEvent->data);
    VIR_FREE(processEvent);
    goto cleanup;
}


1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556
static int
qemuProcessHandleSpiceMigrated(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
                               void *opaque ATTRIBUTE_UNUSED)
{
    qemuDomainObjPrivatePtr priv;

    virObjectLock(vm);

    VIR_DEBUG("Spice migration completed for domain %p %s",
              vm, vm->def->name);

    priv = vm->privateData;
    if (priv->job.asyncJob != QEMU_ASYNC_JOB_MIGRATION_OUT) {
        VIR_DEBUG("got SPICE_MIGRATE_COMPLETED event without a migration job");
        goto cleanup;
    }

    priv->job.spiceMigrated = true;
1557
    virDomainObjBroadcast(vm);
1558 1559 1560 1561 1562 1563 1564

 cleanup:
    virObjectUnlock(vm);
    return 0;
}


1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579
static int
qemuProcessHandleMigrationStatus(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                                 virDomainObjPtr vm,
                                 int status,
                                 void *opaque ATTRIBUTE_UNUSED)
{
    qemuDomainObjPrivatePtr priv;

    virObjectLock(vm);

    VIR_DEBUG("Migration of domain %p %s changed state to %s",
              vm, vm->def->name,
              qemuMonitorMigrationStatusTypeToString(status));

    priv = vm->privateData;
1580
    if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
1581 1582 1583 1584
        VIR_DEBUG("got MIGRATION event without a migration job");
        goto cleanup;
    }

1585
    priv->job.current->stats.status = status;
1586 1587 1588 1589 1590 1591 1592 1593
    virDomainObjBroadcast(vm);

 cleanup:
    virObjectUnlock(vm);
    return 0;
}


1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622
static int
qemuProcessHandleMigrationPass(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                               virDomainObjPtr vm,
                               int pass,
                               void *opaque)
{
    virQEMUDriverPtr driver = opaque;
    qemuDomainObjPrivatePtr priv;

    virObjectLock(vm);

    VIR_DEBUG("Migrating domain %p %s, iteration %d",
              vm, vm->def->name, pass);

    priv = vm->privateData;
    if (priv->job.asyncJob == QEMU_ASYNC_JOB_NONE) {
        VIR_DEBUG("got MIGRATION_PASS event without a migration job");
        goto cleanup;
    }

    qemuDomainEventQueue(driver,
                         virDomainEventMigrationIterationNewFromObj(vm, pass));

 cleanup:
    virObjectUnlock(vm);
    return 0;
}


1623 1624
static qemuMonitorCallbacks monitorCallbacks = {
    .eofNotify = qemuProcessHandleMonitorEOF,
1625
    .errorNotify = qemuProcessHandleMonitorError,
1626
    .diskSecretLookup = qemuProcessFindVolumeQcowPassphrase,
1627
    .domainEvent = qemuProcessHandleEvent,
1628 1629
    .domainShutdown = qemuProcessHandleShutdown,
    .domainStop = qemuProcessHandleStop,
1630
    .domainResume = qemuProcessHandleResume,
1631 1632 1633 1634 1635
    .domainReset = qemuProcessHandleReset,
    .domainRTCChange = qemuProcessHandleRTCChange,
    .domainWatchdog = qemuProcessHandleWatchdog,
    .domainIOError = qemuProcessHandleIOError,
    .domainGraphics = qemuProcessHandleGraphics,
1636
    .domainBlockJob = qemuProcessHandleBlockJob,
1637
    .domainTrayChange = qemuProcessHandleTrayChange,
O
Osier Yang 已提交
1638
    .domainPMWakeup = qemuProcessHandlePMWakeup,
O
Osier Yang 已提交
1639
    .domainPMSuspend = qemuProcessHandlePMSuspend,
1640
    .domainBalloonChange = qemuProcessHandleBalloonChange,
1641
    .domainPMSuspendDisk = qemuProcessHandlePMSuspendDisk,
1642
    .domainGuestPanic = qemuProcessHandleGuestPanic,
1643
    .domainDeviceDeleted = qemuProcessHandleDeviceDeleted,
1644
    .domainNicRxFilterChanged = qemuProcessHandleNicRxFilterChanged,
1645
    .domainSerialChange = qemuProcessHandleSerialChanged,
1646
    .domainSpiceMigrated = qemuProcessHandleSpiceMigrated,
1647
    .domainMigrationStatus = qemuProcessHandleMigrationStatus,
1648
    .domainMigrationPass = qemuProcessHandleMigrationPass,
1649
    .domainAcpiOstInfo = qemuProcessHandleAcpiOstInfo,
1650 1651
};

1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664
static void
qemuProcessMonitorReportLogError(qemuMonitorPtr mon,
                                 const char *msg,
                                 void *opaque);


static void
qemuProcessMonitorLogFree(void *opaque)
{
    qemuDomainLogContextPtr logCtxt = opaque;
    qemuDomainLogContextFree(logCtxt);
}

1665
static int
1666
qemuConnectMonitor(virQEMUDriverPtr driver, virDomainObjPtr vm, int asyncJob,
1667
                   qemuDomainLogContextPtr logCtxt)
1668 1669 1670
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret = -1;
1671
    qemuMonitorPtr mon = NULL;
1672

1673
    if (virSecurityManagerSetDaemonSocketLabel(driver->securityManager,
1674
                                               vm->def) < 0) {
1675 1676
        VIR_ERROR(_("Failed to set security context for monitor for %s"),
                  vm->def->name);
1677
        return -1;
1678 1679 1680
    }

    /* Hold an extra reference because we can't allow 'vm' to be
M
Michal Privoznik 已提交
1681
     * deleted unitl the monitor gets its own reference. */
1682
    virObjectRef(vm);
1683

1684
    ignore_value(virTimeMillisNow(&priv->monStart));
1685
    virObjectUnlock(vm);
1686 1687 1688 1689

    mon = qemuMonitorOpen(vm,
                          priv->monConfig,
                          priv->monJSON,
1690 1691
                          &monitorCallbacks,
                          driver);
1692

1693 1694 1695 1696 1697 1698 1699
    if (mon && logCtxt) {
        qemuDomainLogContextRef(logCtxt);
        qemuMonitorSetDomainLog(mon,
                                qemuProcessMonitorReportLogError,
                                logCtxt,
                                qemuProcessMonitorLogFree);
    }
1700

1701
    virObjectLock(vm);
M
Michal Privoznik 已提交
1702
    virObjectUnref(vm);
1703
    priv->monStart = 0;
1704

M
Michal Privoznik 已提交
1705
    if (!virDomainObjIsActive(vm)) {
1706
        qemuMonitorClose(mon);
1707
        mon = NULL;
1708 1709 1710
    }
    priv->mon = mon;

1711
    if (virSecurityManagerClearSocketLabel(driver->securityManager, vm->def) < 0) {
1712 1713
        VIR_ERROR(_("Failed to clear security context for monitor for %s"),
                  vm->def->name);
1714
        return -1;
1715 1716 1717 1718
    }

    if (priv->mon == NULL) {
        VIR_INFO("Failed to connect monitor for %s", vm->def->name);
1719
        return -1;
1720 1721 1722
    }


1723
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
1724
        return -1;
1725

1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739
    if (qemuMonitorSetCapabilities(priv->mon) < 0)
        goto cleanup;

    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MONITOR_JSON) &&
        virQEMUCapsProbeQMP(priv->qemuCaps, priv->mon) < 0)
        goto cleanup;

    if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT) &&
        qemuMonitorSetMigrationCapability(priv->mon,
                                          QEMU_MONITOR_MIGRATION_CAPS_EVENTS,
                                          true) < 0) {
        VIR_DEBUG("Cannot enable migration events; clearing capability");
        virQEMUCapsClear(priv->qemuCaps, QEMU_CAPS_MIGRATION_EVENT);
    }
1740

1741 1742 1743 1744 1745
    ret = 0;

 cleanup:
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
1746 1747 1748
    return ret;
}

1749 1750 1751

/**
 * qemuProcessReadLog: Read log file of a qemu VM
1752
 * @logCtxt: the domain log context
1753
 * @msg: pointer to buffer to store the read messages in
1754 1755
 *
 * Reads log of a qemu VM. Skips messages not produced by qemu or irrelevant
1756
 * messages. Returns returns 0 on success or -1 on error
1757
 */
1758
static int
1759
qemuProcessReadLog(qemuDomainLogContextPtr logCtxt, char **msg)
1760
{
1761 1762
    char *buf;
    ssize_t got;
1763
    char *eol;
1764
    char *filter_next;
1765

1766
    if ((got = qemuDomainLogContextRead(logCtxt, &buf)) < 0)
1767
        return -1;
1768

1769 1770 1771 1772 1773 1774 1775
    /* Filter out debug messages from intermediate libvirt process */
    filter_next = buf;
    while ((eol = strchr(filter_next, '\n'))) {
        *eol = '\0';
        if (virLogProbablyLogMessage(filter_next) ||
            STRPREFIX(filter_next, "char device redirected to")) {
            size_t skip = (eol + 1) - filter_next;
1776
            memmove(filter_next, eol + 1, buf + got - eol);
1777 1778 1779 1780
            got -= skip;
        } else {
            filter_next = eol + 1;
            *eol = '\n';
1781 1782
        }
    }
1783
    filter_next = NULL; /* silence false coverity warning */
1784

1785 1786
    if (got > 0 &&
        buf[got - 1] == '\n') {
1787 1788
        buf[got - 1] = '\0';
        got--;
1789
    }
1790
    ignore_value(VIR_REALLOC_N_QUIET(buf, got + 1));
1791 1792 1793
    *msg = buf;
    return 0;
}
1794 1795


1796 1797
static int
qemuProcessReportLogError(qemuDomainLogContextPtr logCtxt,
1798 1799 1800 1801
                          const char *msgprefix)
{
    char *logmsg = NULL;

1802
    if (qemuProcessReadLog(logCtxt, &logmsg) < 0)
1803 1804 1805
        return -1;

    virResetLastError();
1806 1807 1808 1809 1810
    if (virStringIsEmpty(logmsg))
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s", msgprefix);
    else
        virReportError(VIR_ERR_INTERNAL_ERROR, _("%s: %s"), msgprefix, logmsg);

1811 1812
    VIR_FREE(logmsg);
    return 0;
1813 1814 1815
}


1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
static void
qemuProcessMonitorReportLogError(qemuMonitorPtr mon ATTRIBUTE_UNUSED,
                                 const char *msg,
                                 void *opaque)
{
    qemuDomainLogContextPtr logCtxt = opaque;
    qemuProcessReportLogError(logCtxt, msg);
}


1826
static int
1827 1828 1829
qemuProcessLookupPTYs(virDomainDefPtr def,
                      virQEMUCapsPtr qemuCaps,
                      virDomainChrDefPtr *devices,
1830
                      int count,
1831
                      virHashTablePtr info)
1832
{
1833
    size_t i;
1834

1835
    for (i = 0; i < count; i++) {
1836
        virDomainChrDefPtr chr = devices[i];
1837 1838
        bool chardevfmt = virQEMUCapsSupportsChardev(def, qemuCaps, chr);

1839
        if (chr->source.type == VIR_DOMAIN_CHR_TYPE_PTY) {
C
Cole Robinson 已提交
1840
            char id[32];
1841
            qemuMonitorChardevInfoPtr entry;
1842

C
Cole Robinson 已提交
1843
            if (snprintf(id, sizeof(id), "%s%s",
1844
                         chardevfmt ? "char" : "",
1845 1846 1847 1848
                         chr->info.alias) >= sizeof(id)) {
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("failed to format device alias "
                                 "for PTY retrieval"));
1849
                return -1;
1850
            }
1851

1852 1853
            entry = virHashLookup(info, id);
            if (!entry || !entry->ptyPath) {
1854 1855 1856 1857
                if (chr->source.data.file.path == NULL) {
                    /* neither the log output nor 'info chardev' had a
                     * pty path for this chardev, report an error
                     */
1858 1859
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   _("no assigned pty for device %s"), id);
1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
                    return -1;
                } else {
                    /* 'info chardev' had no pty path for this chardev,
                     * but the log output had, so we're fine
                     */
                    continue;
                }
            }

            VIR_FREE(chr->source.data.file.path);
1870
            if (VIR_STRDUP(chr->source.data.file.path, entry->ptyPath) < 0)
1871 1872 1873 1874 1875 1876 1877
                return -1;
        }
    }

    return 0;
}

1878 1879
static int
qemuProcessFindCharDevicePTYsMonitor(virDomainObjPtr vm,
1880
                                     virQEMUCapsPtr qemuCaps,
1881
                                     virHashTablePtr info)
1882
{
1883
    size_t i = 0;
C
Cole Robinson 已提交
1884

1885 1886
    if (qemuProcessLookupPTYs(vm->def, qemuCaps,
                              vm->def->serials, vm->def->nserials,
1887
                              info) < 0)
1888 1889
        return -1;

1890 1891
    if (qemuProcessLookupPTYs(vm->def, qemuCaps,
                              vm->def->parallels, vm->def->nparallels,
1892
                              info) < 0)
1893
        return -1;
1894

1895 1896
    if (qemuProcessLookupPTYs(vm->def, qemuCaps,
                              vm->def->channels, vm->def->nchannels,
1897
                              info) < 0)
1898
        return -1;
1899 1900 1901 1902
    /* For historical reasons, console[0] can be just an alias
     * for serial[0]. That's why we need to update it as well. */
    if (vm->def->nconsoles) {
        virDomainChrDefPtr chr = vm->def->consoles[0];
1903

1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914
        if (vm->def->nserials &&
            chr->deviceType == VIR_DOMAIN_CHR_DEVICE_TYPE_CONSOLE &&
            chr->targetType == VIR_DOMAIN_CHR_CONSOLE_TARGET_TYPE_SERIAL) {
            /* yes, the first console is just an alias for serials[0] */
            i = 1;
            if (virDomainChrSourceDefCopy(&chr->source,
                                          &((vm->def->serials[0])->source)) < 0)
                return -1;
        }
    }

1915 1916
    if (qemuProcessLookupPTYs(vm->def, qemuCaps,
                              vm->def->consoles + i, vm->def->nconsoles - i,
1917
                              info) < 0)
1918
        return -1;
1919 1920 1921 1922 1923

    return 0;
}


1924
static int
1925 1926 1927 1928
qemuProcessRefreshChannelVirtioState(virQEMUDriverPtr driver,
                                     virDomainObjPtr vm,
                                     virHashTablePtr info,
                                     int booted)
1929 1930
{
    size_t i;
1931
    int agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_CHANNEL;
1932
    qemuMonitorChardevInfoPtr entry;
1933
    virObjectEventPtr event = NULL;
1934 1935
    char id[32];

1936 1937 1938
    if (booted)
        agentReason = VIR_CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_REASON_DOMAIN_STARTED;

1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954
    for (i = 0; i < vm->def->nchannels; i++) {
        virDomainChrDefPtr chr = vm->def->channels[i];
        if (chr->targetType == VIR_DOMAIN_CHR_CHANNEL_TARGET_TYPE_VIRTIO) {
            if (snprintf(id, sizeof(id), "char%s",
                         chr->info.alias) >= sizeof(id)) {
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("failed to format device alias "
                                 "for PTY retrieval"));
                return -1;
            }

            /* port state not reported */
            if (!(entry = virHashLookup(info, id)) ||
                !entry->state)
                continue;

1955 1956 1957 1958 1959 1960
            if (entry->state != VIR_DOMAIN_CHR_DEVICE_STATE_DEFAULT &&
                STREQ_NULLABLE(chr->target.name, "org.qemu.guest_agent.0") &&
                (event = virDomainEventAgentLifecycleNewFromObj(vm, entry->state,
                                                                agentReason)))
                qemuDomainEventQueue(driver, event);

1961 1962 1963 1964 1965 1966 1967 1968
            chr->state = entry->state;
        }
    }

    return 0;
}


1969 1970
int
qemuRefreshVirtioChannelState(virQEMUDriverPtr driver,
1971 1972
                              virDomainObjPtr vm,
                              qemuDomainAsyncJob asyncJob)
1973 1974 1975 1976 1977
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virHashTablePtr info = NULL;
    int ret = -1;

1978 1979 1980
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;

1981
    ret = qemuMonitorGetChardevInfo(priv->mon, &info);
1982 1983
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
1984 1985 1986 1987

    if (ret < 0)
        goto cleanup;

1988
    ret = qemuProcessRefreshChannelVirtioState(driver, vm, info, false);
1989 1990 1991 1992 1993 1994

 cleanup:
    virHashFree(info);
    return ret;
}

1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030
static void
qemuRefreshRTC(virQEMUDriverPtr driver,
               virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    time_t now, then;
    struct tm thenbits;
    long localOffset;
    int rv;

    if (vm->def->clock.offset != VIR_DOMAIN_CLOCK_OFFSET_VARIABLE)
        return;

    memset(&thenbits, 0, sizeof(thenbits));
    qemuDomainObjEnterMonitor(driver, vm);
    now = time(NULL);
    rv = qemuMonitorGetRTCTime(priv->mon, &thenbits);
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        rv = -1;

    if (rv < 0)
        return;

    thenbits.tm_isdst = -1;
    if ((then = mktime(&thenbits)) == (time_t) -1) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Unable to convert time"));
        return;
    }

    /* Thing is, @now is in local TZ but @then in UTC. */
    if (virTimeLocalOffsetFromUTC(&localOffset) < 0)
        return;

    vm->def->clock.data.variable.adjustment = then - now + localOffset;
}
2031

2032
int
2033 2034 2035 2036 2037 2038 2039 2040 2041
qemuProcessRefreshBalloonState(virQEMUDriverPtr driver,
                               virDomainObjPtr vm,
                               int asyncJob)
{
    unsigned long long balloon;
    int rc;

    /* if no ballooning is available, the current size equals to the current
     * full memory size */
2042
    if (!virDomainDefHasMemballoon(vm->def)) {
2043
        vm->def->mem.cur_balloon = virDomainDefGetMemoryTotal(vm->def);
2044 2045 2046 2047 2048 2049 2050
        return 0;
    }

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        return -1;

    rc = qemuMonitorGetBalloonInfo(qemuDomainGetMonitor(vm), &balloon);
2051
    if (qemuDomainObjExitMonitor(driver, vm) < 0 || rc < 0)
2052 2053 2054 2055 2056 2057 2058 2059
        return -1;

    vm->def->mem.cur_balloon = balloon;

    return 0;
}


2060
static int
2061
qemuProcessWaitForMonitor(virQEMUDriverPtr driver,
C
Cole Robinson 已提交
2062
                          virDomainObjPtr vm,
2063
                          int asyncJob,
2064
                          virQEMUCapsPtr qemuCaps,
2065
                          qemuDomainLogContextPtr logCtxt)
2066 2067
{
    int ret = -1;
2068
    virHashTablePtr info = NULL;
2069
    qemuDomainObjPrivatePtr priv;
2070 2071

    VIR_DEBUG("Connect monitor to %p '%s'", vm, vm->def->name);
2072
    if (qemuConnectMonitor(driver, vm, asyncJob, logCtxt) < 0)
2073 2074 2075 2076 2077 2078
        goto cleanup;

    /* Try to get the pty path mappings again via the monitor. This is much more
     * reliable if it's available.
     * Note that the monitor itself can be on a pty, so we still need to try the
     * log output method. */
2079
    priv = vm->privateData;
2080 2081
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;
2082 2083
    ret = qemuMonitorGetChardevInfo(priv->mon, &info);
    VIR_DEBUG("qemuMonitorGetChardevInfo returned %i", ret);
2084 2085 2086
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;

2087 2088 2089 2090 2091
    if (ret == 0) {
        if ((ret = qemuProcessFindCharDevicePTYsMonitor(vm, qemuCaps,
                                                        info)) < 0)
            goto cleanup;

2092 2093
        if ((ret = qemuProcessRefreshChannelVirtioState(driver, vm, info,
                                                        true)) < 0)
2094 2095
            goto cleanup;
    }
2096

2097
 cleanup:
2098
    virHashFree(info);
2099

2100 2101
    if (logCtxt && kill(vm->pid, 0) == -1 && errno == ESRCH) {
        qemuProcessReportLogError(logCtxt,
2102
                                  _("process exited while connecting to monitor"));
2103 2104 2105 2106 2107 2108
        ret = -1;
    }

    return ret;
}

2109

2110 2111 2112 2113 2114 2115
static int
qemuProcessDetectIOThreadPIDs(virQEMUDriverPtr driver,
                              virDomainObjPtr vm,
                              int asyncJob)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
2116
    qemuMonitorIOThreadInfoPtr *iothreads = NULL;
2117 2118 2119 2120
    int niothreads = 0;
    int ret = -1;
    size_t i;

2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144
    if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_OBJECT_IOTHREAD)) {
        /* The following check is because at one time a domain could
         * define iothreadids and start the domain - only failing the
         * capability check when attempting to add a disk. Because the
         * iothreads and [n]iothreadids were left untouched other code
         * assumed it could use the ->thread_id value to make thread_id
         * based adjustments (e.g. pinning, scheduling) which while
         * succeeding would execute on the calling thread.
         */
        if (vm->def->niothreadids) {
            for (i = 0; i < vm->def->niothreadids; i++) {
                /* Check if the domain had defined any iothreadid elements
                 * and supply a VIR_INFO indicating that it's being removed.
                 */
                if (!vm->def->iothreadids[i]->autofill)
                    VIR_INFO("IOThreads not supported, remove iothread id '%u'",
                             vm->def->iothreadids[i]->iothread_id);
                virDomainIOThreadIDDefFree(vm->def->iothreadids[i]);
            }
            /* Remove any trace */
            VIR_FREE(vm->def->iothreadids);
            vm->def->niothreadids = 0;
            vm->def->iothreads = 0;
        }
2145
        return 0;
2146
    }
2147

2148 2149 2150 2151
    /* Get the list of IOThreads from qemu */
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;
    niothreads = qemuMonitorGetIOThreads(priv->mon, &iothreads);
2152 2153
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        goto cleanup;
J
John Ferlan 已提交
2154
    if (niothreads < 0)
2155 2156
        goto cleanup;

2157
    if (niothreads != vm->def->niothreadids) {
2158 2159
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("got wrong number of IOThread pids from QEMU monitor. "
2160 2161
                         "got %d, wanted %zu"),
                       niothreads, vm->def->niothreadids);
2162 2163 2164
        goto cleanup;
    }

2165 2166 2167 2168 2169 2170
    /* Nothing to do */
    if (niothreads == 0) {
        ret = 0;
        goto cleanup;
    }

2171 2172 2173
    for (i = 0; i < niothreads; i++) {
        virDomainIOThreadIDDefPtr iothrid;

2174 2175
        if (!(iothrid = virDomainIOThreadIDFind(vm->def,
                                                iothreads[i]->iothread_id))) {
2176
            virReportError(VIR_ERR_INTERNAL_ERROR,
2177 2178
                           _("iothread %d not found"),
                           iothreads[i]->iothread_id);
2179 2180 2181 2182
            goto cleanup;
        }
        iothrid->thread_id = iothreads[i]->thread_id;
    }
2183 2184 2185 2186 2187 2188

    ret = 0;

 cleanup:
    if (iothreads) {
        for (i = 0; i < niothreads; i++)
2189
            VIR_FREE(iothreads[i]);
2190 2191 2192 2193 2194
        VIR_FREE(iothreads);
    }
    return ret;
}

2195 2196 2197 2198 2199

/*
 * To be run between fork/exec of QEMU only
 */
static int
2200
qemuProcessInitCpuAffinity(virDomainObjPtr vm)
2201 2202 2203 2204
{
    int ret = -1;
    virBitmapPtr cpumap = NULL;
    virBitmapPtr cpumapToSet = NULL;
2205
    qemuDomainObjPrivatePtr priv = vm->privateData;
2206

2207 2208 2209 2210 2211 2212
    if (!vm->pid) {
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Cannot setup CPU affinity until process is started"));
        return -1;
    }

2213 2214
    if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) {
        VIR_DEBUG("Set CPU affinity with advisory nodeset from numad");
2215
        cpumapToSet = priv->autoCpuset;
2216
    } else {
2217
        VIR_DEBUG("Set CPU affinity with specified cpuset");
O
Osier Yang 已提交
2218
        if (vm->def->cpumask) {
H
Hu Tao 已提交
2219
            cpumapToSet = vm->def->cpumask;
O
Osier Yang 已提交
2220 2221 2222 2223 2224
        } else {
            /* You may think this is redundant, but we can't assume libvirtd
             * itself is running on all pCPUs, so we need to explicitly set
             * the spawned QEMU instance to all pCPUs if no map is given in
             * its config file */
2225 2226 2227 2228
            int hostcpus;

            /* setaffinity fails if you set bits for CPUs which
             * aren't present, so we have to limit ourselves */
2229
            if ((hostcpus = virHostCPUGetCount()) < 0)
2230 2231 2232 2233 2234 2235 2236 2237
                goto cleanup;

            if (hostcpus > QEMUD_CPUMASK_LEN)
                hostcpus = QEMUD_CPUMASK_LEN;

            if (!(cpumap = virBitmapNew(hostcpus)))
                goto cleanup;

2238
            virBitmapSetAll(cpumap);
2239 2240

            cpumapToSet = cpumap;
O
Osier Yang 已提交
2241
        }
2242 2243
    }

2244
    if (virProcessSetAffinity(vm->pid, cpumapToSet) < 0)
2245
        goto cleanup;
2246

2247 2248
    ret = 0;

2249
 cleanup:
2250
    virBitmapFree(cpumap);
2251
    return ret;
2252 2253
}

2254 2255
/* set link states to down on interfaces at qemu start */
static int
2256 2257 2258
qemuProcessSetLinkStates(virQEMUDriverPtr driver,
                         virDomainObjPtr vm,
                         qemuDomainAsyncJob asyncJob)
2259 2260 2261
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainDefPtr def = vm->def;
2262
    size_t i;
2263 2264 2265 2266 2267
    int ret = -1;
    int rv;

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        return -1;
2268 2269 2270

    for (i = 0; i < def->nnets; i++) {
        if (def->nets[i]->linkstate == VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN) {
2271 2272 2273
            if (!def->nets[i]->info.alias) {
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                               _("missing alias for network device"));
2274
                goto cleanup;
2275 2276
            }

2277 2278
            VIR_DEBUG("Setting link state: %s", def->nets[i]->info.alias);

2279
            if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_NETDEV)) {
2280
                virReportError(VIR_ERR_OPERATION_UNSUPPORTED, "%s",
2281
                               _("Setting of link state is not supported by this qemu"));
2282
                goto cleanup;
2283 2284
            }

2285 2286 2287 2288
            rv = qemuMonitorSetLink(priv->mon,
                                    def->nets[i]->info.alias,
                                    VIR_DOMAIN_NET_INTERFACE_LINK_STATE_DOWN);
            if (rv < 0) {
2289
                virReportError(VIR_ERR_OPERATION_FAILED,
2290 2291 2292
                               _("Couldn't set link state on interface: %s"),
                               def->nets[i]->info.alias);
                goto cleanup;
2293 2294 2295 2296
            }
        }
    }

2297 2298 2299 2300 2301
    ret = 0;

 cleanup:
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
2302 2303 2304
    return ret;
}

2305

2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413
/**
 * qemuProcessSetupPid:
 *
 * This function sets resource properities (affinity, cgroups,
 * scheduler) for any PID associated with a domain.  It should be used
 * to set up emulator PIDs as well as vCPU and I/O thread pids to
 * ensure they are all handled the same way.
 *
 * Returns 0 on success, -1 on error.
 */
static int
qemuProcessSetupPid(virDomainObjPtr vm,
                    pid_t pid,
                    virCgroupThreadName nameval,
                    int id,
                    virBitmapPtr cpumask,
                    unsigned long long period,
                    long long quota,
                    virDomainThreadSchedParamPtr sched)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainNumatuneMemMode mem_mode;
    virCgroupPtr cgroup = NULL;
    virBitmapPtr use_cpumask;
    char *mem_mask = NULL;
    int ret = -1;

    if ((period || quota) &&
        !virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        goto cleanup;
    }

    /* Infer which cpumask shall be used. */
    if (cpumask)
        use_cpumask = cpumask;
    else if (vm->def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO)
        use_cpumask = priv->autoCpuset;
    else
        use_cpumask = vm->def->cpumask;

    /*
     * If CPU cgroup controller is not initialized here, then we need
     * neither period nor quota settings.  And if CPUSET controller is
     * not initialized either, then there's nothing to do anyway.
     */
    if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPU) ||
        virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {

        if (virDomainNumatuneGetMode(vm->def->numa, -1, &mem_mode) == 0 &&
            mem_mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
            virDomainNumatuneMaybeFormatNodeset(vm->def->numa,
                                                priv->autoNodeset,
                                                &mem_mask, -1) < 0)
            goto cleanup;

        if (virCgroupNewThread(priv->cgroup, nameval, id, true, &cgroup) < 0)
            goto cleanup;

        if (virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_CPUSET)) {
            if (use_cpumask &&
                qemuSetupCgroupCpusetCpus(cgroup, use_cpumask) < 0)
                goto cleanup;

            /*
             * Don't setup cpuset.mems for the emulator, they need to
             * be set up after initialization in order for kvm
             * allocations to succeed.
             */
            if (nameval != VIR_CGROUP_THREAD_EMULATOR &&
                mem_mask && virCgroupSetCpusetMems(cgroup, mem_mask) < 0)
                goto cleanup;

        }

        if ((period || quota) &&
            qemuSetupCgroupVcpuBW(cgroup, period, quota) < 0)
            goto cleanup;

        /* Move the thread to the sub dir */
        if (virCgroupAddTask(cgroup, pid) < 0)
            goto cleanup;

    }

    /* Setup legacy affinity. */
    if (use_cpumask && virProcessSetAffinity(pid, use_cpumask) < 0)
        goto cleanup;

    /* Set scheduler type and priority. */
    if (sched &&
        virProcessSetScheduler(pid, sched->policy, sched->priority) < 0)
        goto cleanup;

    ret = 0;
 cleanup:
    VIR_FREE(mem_mask);
    if (cgroup) {
        if (ret < 0)
            virCgroupRemove(cgroup);
        virCgroupFree(&cgroup);
    }

    return ret;
}


2414
static int
2415
qemuProcessSetupEmulator(virDomainObjPtr vm)
2416
{
2417 2418 2419 2420 2421
    return qemuProcessSetupPid(vm, vm->pid, VIR_CGROUP_THREAD_EMULATOR,
                               0, vm->def->cputune.emulatorpin,
                               vm->def->cputune.emulator_period,
                               vm->def->cputune.emulator_quota,
                               NULL);
2422 2423
}

2424

2425 2426
static int
qemuProcessInitPasswords(virConnectPtr conn,
2427
                         virQEMUDriverPtr driver,
2428 2429
                         virDomainObjPtr vm,
                         int asyncJob)
2430 2431 2432
{
    int ret = 0;
    qemuDomainObjPrivatePtr priv = vm->privateData;
2433
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
2434
    size_t i;
2435 2436
    char *alias = NULL;
    char *secret = NULL;
2437

2438
    for (i = 0; i < vm->def->ngraphics; ++i) {
2439 2440
        virDomainGraphicsDefPtr graphics = vm->def->graphics[i];
        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
2441 2442
            ret = qemuDomainChangeGraphicsPasswords(driver, vm,
                                                    VIR_DOMAIN_GRAPHICS_TYPE_VNC,
2443
                                                    &graphics->data.vnc.auth,
2444 2445
                                                    cfg->vncPassword,
                                                    asyncJob);
2446
        } else if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
2447 2448
            ret = qemuDomainChangeGraphicsPasswords(driver, vm,
                                                    VIR_DOMAIN_GRAPHICS_TYPE_SPICE,
2449
                                                    &graphics->data.spice.auth,
2450 2451
                                                    cfg->spicePassword,
                                                    asyncJob);
2452 2453
        }

2454 2455 2456
        if (ret < 0)
            goto cleanup;
    }
2457

2458 2459
    for (i = 0; i < vm->def->ndisks; i++) {
        size_t secretLen;
2460

2461 2462 2463
        if (!vm->def->disks[i]->src->encryption ||
            !virDomainDiskGetSource(vm->def->disks[i]))
            continue;
2464

2465 2466 2467 2468 2469 2470
        if (vm->def->disks[i]->src->encryption->format !=
            VIR_STORAGE_ENCRYPTION_FORMAT_DEFAULT &&
            vm->def->disks[i]->src->encryption->format !=
            VIR_STORAGE_ENCRYPTION_FORMAT_QCOW)
            continue;

2471 2472 2473 2474 2475
        VIR_FREE(secret);
        if (qemuProcessGetVolumeQcowPassphrase(conn,
                                               vm->def->disks[i],
                                               &secret, &secretLen) < 0)
            goto cleanup;
2476

2477
        VIR_FREE(alias);
2478
        if (!(alias = qemuAliasFromDisk(vm->def->disks[i])))
2479 2480 2481 2482 2483 2484 2485 2486
            goto cleanup;
        if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
            goto cleanup;
        ret = qemuMonitorSetDrivePassphrase(priv->mon, alias, secret);
        if (qemuDomainObjExitMonitor(driver, vm) < 0)
            ret = -1;
        if (ret < 0)
            goto cleanup;
2487 2488
    }

2489
 cleanup:
2490 2491
    VIR_FREE(alias);
    VIR_FREE(secret);
2492
    virObjectUnref(cfg);
2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519
    return ret;
}


static int
qemuProcessPrepareChardevDevice(virDomainDefPtr def ATTRIBUTE_UNUSED,
                                virDomainChrDefPtr dev,
                                void *opaque ATTRIBUTE_UNUSED)
{
    int fd;
    if (dev->source.type != VIR_DOMAIN_CHR_TYPE_FILE)
        return 0;

    if ((fd = open(dev->source.data.file.path,
                   O_CREAT | O_APPEND, S_IRUSR|S_IWUSR)) < 0) {
        virReportSystemError(errno,
                             _("Unable to pre-create chardev file '%s'"),
                             dev->source.data.file.path);
        return -1;
    }

    VIR_FORCE_CLOSE(fd);

    return 0;
}


2520 2521 2522 2523 2524 2525
static int
qemuProcessCleanupChardevDevice(virDomainDefPtr def ATTRIBUTE_UNUSED,
                                virDomainChrDefPtr dev,
                                void *opaque ATTRIBUTE_UNUSED)
{
    if (dev->source.type == VIR_DOMAIN_CHR_TYPE_UNIX &&
2526 2527
        dev->source.data.nix.listen &&
        dev->source.data.nix.path)
2528 2529 2530 2531 2532 2533
        unlink(dev->source.data.nix.path);

    return 0;
}


2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569
/**
 * Loads and update video memory size for video devices according to QEMU
 * process as the QEMU will silently update the values that we pass to QEMU
 * through command line.  We need to load these updated values and store them
 * into the status XML.
 *
 * We will fail if for some reason the values cannot be loaded from QEMU because
 * its mandatory to get the correct video memory size to status XML to not break
 * migration.
 */
static int
qemuProcessUpdateVideoRamSize(virQEMUDriverPtr driver,
                              virDomainObjPtr vm,
                              int asyncJob)
{
    int ret = -1;
    ssize_t i;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainVideoDefPtr video = NULL;
    virQEMUDriverConfigPtr cfg = NULL;

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        return -1;

    for (i = 0; i < vm->def->nvideos; i++) {
        video = vm->def->videos[i];

        switch (video->type) {
        case VIR_DOMAIN_VIDEO_TYPE_VGA:
            if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VGA_VGAMEM)) {
                if (qemuMonitorUpdateVideoMemorySize(priv->mon, video, "VGA") < 0)
                    goto error;
            }
            break;
        case VIR_DOMAIN_VIDEO_TYPE_QXL:
            if (i == 0) {
2570 2571 2572
                if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGA_VGAMEM) &&
                    qemuMonitorUpdateVideoMemorySize(priv->mon, video,
                                                     "qxl-vga") < 0)
2573
                        goto error;
2574 2575 2576 2577 2578

                if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGA_VRAM64) &&
                    qemuMonitorUpdateVideoVram64Size(priv->mon, video,
                                                     "qxl-vga") < 0)
                    goto error;
2579
            } else {
2580 2581 2582 2583 2584 2585 2586 2587
                if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VGAMEM) &&
                    qemuMonitorUpdateVideoMemorySize(priv->mon, video,
                                                     "qxl") < 0)
                        goto error;

                if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_QXL_VRAM64) &&
                    qemuMonitorUpdateVideoVram64Size(priv->mon, video,
                                                     "qxl") < 0)
2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606
                        goto error;
            }
            break;
        case VIR_DOMAIN_VIDEO_TYPE_VMVGA:
            if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_VMWARE_SVGA_VGAMEM)) {
                if (qemuMonitorUpdateVideoMemorySize(priv->mon, video,
                                                     "vmware-svga") < 0)
                    goto error;
            }
            break;
        case VIR_DOMAIN_VIDEO_TYPE_CIRRUS:
        case VIR_DOMAIN_VIDEO_TYPE_XEN:
        case VIR_DOMAIN_VIDEO_TYPE_VBOX:
        case VIR_DOMAIN_VIDEO_TYPE_LAST:
            break;
        }

    }

2607 2608
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        return -1;
2609 2610

    cfg = virQEMUDriverGetConfig(driver);
2611
    ret = virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps);
2612 2613 2614 2615 2616
    virObjectUnref(cfg);

    return ret;

 error:
2617
    ignore_value(qemuDomainObjExitMonitor(driver, vm));
2618 2619 2620 2621
    return -1;
}


2622 2623 2624
struct qemuProcessHookData {
    virConnectPtr conn;
    virDomainObjPtr vm;
2625
    virQEMUDriverPtr driver;
2626
    virQEMUDriverConfigPtr cfg;
2627 2628 2629 2630 2631
};

static int qemuProcessHook(void *data)
{
    struct qemuProcessHookData *h = data;
2632
    qemuDomainObjPrivatePtr priv = h->vm->privateData;
2633
    int ret = -1;
2634
    int fd;
2635 2636 2637
    virBitmapPtr nodeset = NULL;
    virDomainNumatuneMemMode mode;

2638 2639 2640 2641 2642
    /* This method cannot use any mutexes, which are not
     * protected across fork()
     */

    virSecurityManagerPostFork(h->driver->securityManager);
2643 2644 2645 2646 2647

    /* Some later calls want pid present */
    h->vm->pid = getpid();

    VIR_DEBUG("Obtaining domain lock");
2648 2649 2650 2651 2652 2653 2654
    /*
     * Since we're going to leak the returned FD to QEMU,
     * we need to make sure it gets a sensible label.
     * This mildly sucks, because there could be other
     * sockets the lock driver opens that we don't want
     * labelled. So far we're ok though.
     */
2655
    if (virSecurityManagerSetSocketLabel(h->driver->securityManager, h->vm->def) < 0)
2656
        goto cleanup;
2657
    if (virDomainLockProcessStart(h->driver->lockManager,
2658
                                  h->cfg->uri,
2659
                                  h->vm,
J
Ján Tomko 已提交
2660
                                  /* QEMU is always paused initially */
2661 2662
                                  true,
                                  &fd) < 0)
2663
        goto cleanup;
2664
    if (virSecurityManagerClearSocketLabel(h->driver->securityManager, h->vm->def) < 0)
2665
        goto cleanup;
2666

2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677
    if (virDomainNumatuneGetMode(h->vm->def->numa, -1, &mode) == 0) {
        if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
            h->cfg->cgroupControllers & (1 << VIR_CGROUP_CONTROLLER_CPUSET) &&
            virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) {
            /* Use virNuma* API iff necessary. Once set and child is exec()-ed,
             * there's no way for us to change it. Rely on cgroups (if available
             * and enabled in the config) rather than virNuma*. */
            VIR_DEBUG("Relying on CGroups for memory binding");
        } else {
            nodeset = virDomainNumatuneGetNodeset(h->vm->def->numa,
                                                  priv->autoNodeset, -1);
2678

2679 2680 2681
            if (virNumaSetupMemoryPolicy(mode, nodeset) < 0)
                goto cleanup;
        }
2682
    }
2683

2684 2685
    ret = 0;

2686
 cleanup:
2687
    virObjectUnref(h->cfg);
2688 2689
    VIR_DEBUG("Hook complete ret=%d", ret);
    return ret;
2690 2691 2692
}

int
2693 2694
qemuProcessPrepareMonitorChr(virDomainChrSourceDefPtr monConfig,
                             const char *domainDir)
2695 2696 2697 2698
{
    monConfig->type = VIR_DOMAIN_CHR_TYPE_UNIX;
    monConfig->data.nix.listen = true;

2699 2700
    if (virAsprintf(&monConfig->data.nix.path, "%s/monitor.sock",
                    domainDir) < 0)
2701 2702
        return -1;
    return 0;
2703 2704 2705
}


2706
/*
2707 2708
 * Precondition: vm must be locked, and a job must be active.
 * This method will call {Enter,Exit}Monitor
2709
 */
E
Eric Blake 已提交
2710
int
2711
qemuProcessStartCPUs(virQEMUDriverPtr driver, virDomainObjPtr vm,
2712
                     virConnectPtr conn, virDomainRunningReason reason,
2713
                     qemuDomainAsyncJob asyncJob)
2714
{
2715
    int ret = -1;
2716
    qemuDomainObjPrivatePtr priv = vm->privateData;
2717
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
2718

2719
    /* Bring up netdevs before starting CPUs */
2720
    if (qemuInterfaceStartDevices(vm->def) < 0)
2721 2722
       goto cleanup;

2723
    VIR_DEBUG("Using lock state '%s'", NULLSTR(priv->lockState));
2724
    if (virDomainLockProcessResume(driver->lockManager, cfg->uri,
2725
                                   vm, priv->lockState) < 0) {
2726 2727 2728 2729
        /* Don't free priv->lockState on error, because we need
         * to make sure we have state still present if the user
         * tries to resume again
         */
2730
        goto cleanup;
2731 2732 2733
    }
    VIR_FREE(priv->lockState);

2734 2735
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto release;
J
Jiri Denemark 已提交
2736

2737
    ret = qemuMonitorStartCPUs(priv->mon, conn);
2738 2739
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
2740 2741 2742 2743 2744

    if (ret < 0)
        goto release;

    virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
2745

2746
 cleanup:
2747
    virObjectUnref(cfg);
2748
    return ret;
2749 2750 2751 2752 2753 2754

 release:
    if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
        VIR_WARN("Unable to release lease on %s", vm->def->name);
    VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
    goto cleanup;
2755 2756 2757
}


2758 2759
int qemuProcessStopCPUs(virQEMUDriverPtr driver,
                        virDomainObjPtr vm,
2760
                        virDomainPausedReason reason,
2761
                        qemuDomainAsyncJob asyncJob)
2762
{
2763
    int ret = -1;
2764 2765
    qemuDomainObjPrivatePtr priv = vm->privateData;

2766
    VIR_FREE(priv->lockState);
J
Jiri Denemark 已提交
2767

2768 2769
    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;
J
Jiri Denemark 已提交
2770

2771
    ret = qemuMonitorStopCPUs(priv->mon);
2772 2773
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
2774 2775 2776 2777

    if (ret < 0)
        goto cleanup;

2778 2779 2780
    /* de-activate netdevs after stopping CPUs */
    ignore_value(qemuInterfaceStopDevices(vm->def));

2781 2782 2783
    if (priv->job.current)
        ignore_value(virTimeMillisNow(&priv->job.current->stopped));

2784 2785 2786 2787
    virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
    if (virDomainLockProcessPause(driver->lockManager, vm, &priv->lockState) < 0)
        VIR_WARN("Unable to release lease on %s", vm->def->name);
    VIR_DEBUG("Preserving lock state '%s'", NULLSTR(priv->lockState));
J
Jiri Denemark 已提交
2788

2789
 cleanup:
2790 2791 2792 2793 2794
    return ret;
}



2795 2796 2797
static int
qemuProcessNotifyNets(virDomainDefPtr def)
{
2798
    size_t i;
2799

2800 2801
    for (i = 0; i < def->nnets; i++) {
        virDomainNetDefPtr net = def->nets[i];
2802 2803 2804 2805 2806 2807 2808 2809
        /* keep others from trying to use the macvtap device name, but
         * don't return error if this happens, since that causes the
         * domain to be unceremoniously killed, which would be *very*
         * impolite.
         */
        if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_DIRECT)
           ignore_value(virNetDevMacVLanReserveName(net->ifname, false));

2810
        if (networkNotifyActualDevice(def, net) < 0)
2811 2812 2813 2814 2815
            return -1;
    }
    return 0;
}

2816
static int
2817
qemuProcessFiltersInstantiate(virDomainDefPtr def)
2818
{
2819
    size_t i;
2820

2821
    for (i = 0; i < def->nnets; i++) {
2822 2823
        virDomainNetDefPtr net = def->nets[i];
        if ((net->filter) && (net->ifname)) {
2824
            if (virDomainConfNWFilterInstantiate(def->uuid, net) < 0)
J
Ján Tomko 已提交
2825
                return 1;
2826 2827 2828
        }
    }

J
Ján Tomko 已提交
2829
    return 0;
2830 2831
}

2832
static int
2833
qemuProcessUpdateState(virQEMUDriverPtr driver, virDomainObjPtr vm)
2834 2835 2836
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainState state;
2837
    virDomainPausedReason reason;
2838
    virDomainState newState = VIR_DOMAIN_NOSTATE;
2839
    int oldReason;
2840
    int newReason;
2841
    bool running;
2842
    char *msg = NULL;
2843 2844
    int ret;

2845
    qemuDomainObjEnterMonitor(driver, vm);
2846
    ret = qemuMonitorGetStatus(priv->mon, &running, &reason);
2847 2848
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        return -1;
2849

2850
    if (ret < 0)
2851 2852
        return -1;

2853
    state = virDomainObjGetState(vm, &oldReason);
2854

2855 2856 2857 2858 2859 2860 2861 2862
    if (running &&
        (state == VIR_DOMAIN_SHUTOFF ||
         (state == VIR_DOMAIN_PAUSED &&
          oldReason == VIR_DOMAIN_PAUSED_STARTING_UP))) {
        newState = VIR_DOMAIN_RUNNING;
        newReason = VIR_DOMAIN_RUNNING_BOOTED;
        ignore_value(VIR_STRDUP_QUIET(msg, "finished booting"));
    } else if (state == VIR_DOMAIN_PAUSED && running) {
2863 2864
        newState = VIR_DOMAIN_RUNNING;
        newReason = VIR_DOMAIN_RUNNING_UNPAUSED;
2865
        ignore_value(VIR_STRDUP_QUIET(msg, "was unpaused"));
2866
    } else if (state == VIR_DOMAIN_RUNNING && !running) {
2867 2868 2869
        if (reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN) {
            newState = VIR_DOMAIN_SHUTDOWN;
            newReason = VIR_DOMAIN_SHUTDOWN_UNKNOWN;
2870
            ignore_value(VIR_STRDUP_QUIET(msg, "shutdown"));
2871
        } else if (reason == VIR_DOMAIN_PAUSED_CRASHED) {
2872 2873 2874
            newState = VIR_DOMAIN_CRASHED;
            newReason = VIR_DOMAIN_CRASHED_PANICKED;
            ignore_value(VIR_STRDUP_QUIET(msg, "crashed"));
2875 2876 2877
        } else {
            newState = VIR_DOMAIN_PAUSED;
            newReason = reason;
S
Stefan Berger 已提交
2878 2879
            ignore_value(virAsprintf(&msg, "was paused (%s)",
                                 virDomainPausedReasonTypeToString(reason)));
2880 2881 2882 2883 2884 2885 2886
        }
    }

    if (newState != VIR_DOMAIN_NOSTATE) {
        VIR_DEBUG("Domain %s %s while its monitor was disconnected;"
                  " changing state to %s (%s)",
                  vm->def->name,
2887
                  NULLSTR(msg),
2888 2889 2890 2891
                  virDomainStateTypeToString(newState),
                  virDomainStateReasonToString(newState, newReason));
        VIR_FREE(msg);
        virDomainObjSetState(vm, newState, newReason);
2892 2893 2894 2895 2896
    }

    return 0;
}

2897
static int
2898 2899 2900 2901 2902
qemuProcessRecoverMigrationIn(virQEMUDriverPtr driver,
                              virDomainObjPtr vm,
                              virConnectPtr conn,
                              qemuMigrationJobPhase phase,
                              virDomainState state,
2903
                              int reason)
2904
{
2905 2906 2907 2908 2909
    bool postcopy = (state == VIR_DOMAIN_PAUSED &&
                     reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED) ||
                    (state == VIR_DOMAIN_RUNNING &&
                     reason == VIR_DOMAIN_RUNNING_POSTCOPY);

2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925
    switch (phase) {
    case QEMU_MIGRATION_PHASE_NONE:
    case QEMU_MIGRATION_PHASE_PERFORM2:
    case QEMU_MIGRATION_PHASE_BEGIN3:
    case QEMU_MIGRATION_PHASE_PERFORM3:
    case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
    case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
    case QEMU_MIGRATION_PHASE_CONFIRM3:
    case QEMU_MIGRATION_PHASE_LAST:
        /* N/A for incoming migration */
        break;

    case QEMU_MIGRATION_PHASE_PREPARE:
        VIR_DEBUG("Killing unfinished incoming migration for domain %s",
                  vm->def->name);
        return -1;
2926

2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941
    case QEMU_MIGRATION_PHASE_FINISH2:
        /* source domain is already killed so let's just resume the domain
         * and hope we are all set */
        VIR_DEBUG("Incoming migration finished, resuming domain %s",
                  vm->def->name);
        if (qemuProcessStartCPUs(driver, vm, conn,
                                 VIR_DOMAIN_RUNNING_UNPAUSED,
                                 QEMU_ASYNC_JOB_NONE) < 0) {
            VIR_WARN("Could not resume domain %s", vm->def->name);
        }
        break;

    case QEMU_MIGRATION_PHASE_FINISH3:
        /* migration finished, we started resuming the domain but didn't
         * confirm success or failure yet; killing it seems safest unless
2942 2943 2944 2945
         * we already started guest CPUs or we were in post-copy mode */
        if (postcopy) {
            qemuMigrationPostcopyFailed(driver, vm);
        } else if (state != VIR_DOMAIN_RUNNING) {
2946
            VIR_DEBUG("Killing migrated domain %s", vm->def->name);
2947
            return -1;
2948 2949 2950
        }
        break;
    }
2951

2952 2953
    return 0;
}
2954

2955 2956 2957 2958 2959 2960 2961 2962
static int
qemuProcessRecoverMigrationOut(virQEMUDriverPtr driver,
                               virDomainObjPtr vm,
                               virConnectPtr conn,
                               qemuMigrationJobPhase phase,
                               virDomainState state,
                               int reason)
{
2963 2964 2965 2966
    bool postcopy = state == VIR_DOMAIN_PAUSED &&
                    (reason == VIR_DOMAIN_PAUSED_POSTCOPY ||
                     reason == VIR_DOMAIN_PAUSED_POSTCOPY_FAILED);

2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983
    switch (phase) {
    case QEMU_MIGRATION_PHASE_NONE:
    case QEMU_MIGRATION_PHASE_PREPARE:
    case QEMU_MIGRATION_PHASE_FINISH2:
    case QEMU_MIGRATION_PHASE_FINISH3:
    case QEMU_MIGRATION_PHASE_LAST:
        /* N/A for outgoing migration */
        break;

    case QEMU_MIGRATION_PHASE_BEGIN3:
        /* nothing happened so far, just forget we were about to migrate the
         * domain */
        break;

    case QEMU_MIGRATION_PHASE_PERFORM2:
    case QEMU_MIGRATION_PHASE_PERFORM3:
        /* migration is still in progress, let's cancel it and resume the
2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996
         * domain; however we can only do that before migration enters
         * post-copy mode
         */
        if (postcopy) {
            qemuMigrationPostcopyFailed(driver, vm);
        } else {
            VIR_DEBUG("Cancelling unfinished migration of domain %s",
                      vm->def->name);
            if (qemuMigrationCancel(driver, vm) < 0) {
                VIR_WARN("Could not cancel ongoing migration of domain %s",
                         vm->def->name);
            }
            goto resume;
2997
        }
2998
        break;
2999

3000 3001
    case QEMU_MIGRATION_PHASE_PERFORM3_DONE:
        /* migration finished but we didn't have a chance to get the result
3002 3003
         * of Finish3 step; third party needs to check what to do next; in
         * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this
3004
         */
3005 3006
        if (postcopy)
            qemuMigrationPostcopyFailed(driver, vm);
3007
        break;
3008

3009
    case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED:
3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021
        /* Finish3 failed, we need to resume the domain, but once we enter
         * post-copy mode there's no way back, so let's just mark the domain
         * as broken in that case
         */
        if (postcopy) {
            qemuMigrationPostcopyFailed(driver, vm);
        } else {
            VIR_DEBUG("Resuming domain %s after failed migration",
                      vm->def->name);
            goto resume;
        }
        break;
3022

3023 3024 3025 3026
    case QEMU_MIGRATION_PHASE_CONFIRM3:
        /* migration completed, we need to kill the domain here */
        return -1;
    }
3027

3028
    return 0;
3029

3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040
 resume:
    /* resume the domain but only if it was paused as a result of
     * migration
     */
    if (state == VIR_DOMAIN_PAUSED &&
        (reason == VIR_DOMAIN_PAUSED_MIGRATION ||
         reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
        if (qemuProcessStartCPUs(driver, vm, conn,
                                 VIR_DOMAIN_RUNNING_UNPAUSED,
                                 QEMU_ASYNC_JOB_NONE) < 0) {
            VIR_WARN("Could not resume domain %s", vm->def->name);
3041 3042 3043 3044 3045
        }
    }
    return 0;
}

3046
static int
3047
qemuProcessRecoverJob(virQEMUDriverPtr driver,
3048 3049 3050 3051
                      virDomainObjPtr vm,
                      virConnectPtr conn,
                      const struct qemuDomainJobObj *job)
{
3052
    qemuDomainObjPrivatePtr priv = vm->privateData;
3053 3054 3055 3056 3057 3058 3059
    virDomainState state;
    int reason;

    state = virDomainObjGetState(vm, &reason);

    switch (job->asyncJob) {
    case QEMU_ASYNC_JOB_MIGRATION_OUT:
3060 3061 3062 3063 3064
        if (qemuProcessRecoverMigrationOut(driver, vm, conn, job->phase,
                                           state, reason) < 0)
            return -1;
        break;

3065
    case QEMU_ASYNC_JOB_MIGRATION_IN:
3066 3067
        if (qemuProcessRecoverMigrationIn(driver, vm, conn, job->phase,
                                          state, reason) < 0)
3068
            return -1;
3069 3070 3071 3072
        break;

    case QEMU_ASYNC_JOB_SAVE:
    case QEMU_ASYNC_JOB_DUMP:
3073
    case QEMU_ASYNC_JOB_SNAPSHOT:
3074
        qemuDomainObjEnterMonitor(driver, vm);
3075
        ignore_value(qemuMonitorMigrateCancel(priv->mon));
3076 3077
        if (qemuDomainObjExitMonitor(driver, vm) < 0)
            return -1;
3078
        /* resume the domain but only if it was paused as a result of
3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094
         * running a migration-to-file operation.  Although we are
         * recovering an async job, this function is run at startup
         * and must resume things using sync monitor connections.  */
         if (state == VIR_DOMAIN_PAUSED &&
             ((job->asyncJob == QEMU_ASYNC_JOB_DUMP &&
               reason == VIR_DOMAIN_PAUSED_DUMP) ||
              (job->asyncJob == QEMU_ASYNC_JOB_SAVE &&
               reason == VIR_DOMAIN_PAUSED_SAVE) ||
              (job->asyncJob == QEMU_ASYNC_JOB_SNAPSHOT &&
               reason == VIR_DOMAIN_PAUSED_SNAPSHOT) ||
              reason == VIR_DOMAIN_PAUSED_UNKNOWN)) {
             if (qemuProcessStartCPUs(driver, vm, conn,
                                      VIR_DOMAIN_RUNNING_UNPAUSED,
                                      QEMU_ASYNC_JOB_NONE) < 0) {
                 VIR_WARN("Could not resume domain '%s' after migration to file",
                          vm->def->name);
3095 3096 3097 3098
            }
        }
        break;

3099 3100 3101 3102
    case QEMU_ASYNC_JOB_START:
        /* Already handled in VIR_DOMAIN_PAUSED_STARTING_UP check. */
        break;

3103 3104 3105 3106 3107 3108 3109 3110
    case QEMU_ASYNC_JOB_NONE:
    case QEMU_ASYNC_JOB_LAST:
        break;
    }

    if (!virDomainObjIsActive(vm))
        return -1;

3111 3112 3113 3114
    /* In case any special handling is added for job type that has been ignored
     * before, QEMU_DOMAIN_TRACK_JOBS (from qemu_domain.h) needs to be updated
     * for the job to be properly tracked in domain state XML.
     */
3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134
    switch (job->active) {
    case QEMU_JOB_QUERY:
        /* harmless */
        break;

    case QEMU_JOB_DESTROY:
        VIR_DEBUG("Domain %s should have already been destroyed",
                  vm->def->name);
        return -1;

    case QEMU_JOB_SUSPEND:
        /* mostly harmless */
        break;

    case QEMU_JOB_MODIFY:
        /* XXX depending on the command we may be in an inconsistent state and
         * we should probably fall back to "monitor error" state and refuse to
         */
        break;

3135
    case QEMU_JOB_MIGRATION_OP:
3136
    case QEMU_JOB_ABORT:
3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147
    case QEMU_JOB_ASYNC:
    case QEMU_JOB_ASYNC_NESTED:
        /* async job was already handled above */
    case QEMU_JOB_NONE:
    case QEMU_JOB_LAST:
        break;
    }

    return 0;
}

3148 3149 3150 3151 3152 3153
static int
qemuProcessUpdateDevices(virQEMUDriverPtr driver,
                         virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virDomainDeviceDef dev;
3154
    const char **qemuDevices;
3155 3156 3157 3158 3159 3160 3161 3162 3163
    char **old;
    char **tmp;
    int ret = -1;

    if (!virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_DEVICE_DEL_EVENT))
        return 0;

    old = priv->qemuDevices;
    priv->qemuDevices = NULL;
3164
    if (qemuDomainUpdateDeviceList(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
3165 3166
        goto cleanup;

3167
    qemuDevices = (const char **) priv->qemuDevices;
3168 3169
    if ((tmp = old)) {
        while (*tmp) {
3170
            if (!virStringArrayHasString(qemuDevices, *tmp) &&
3171 3172 3173 3174
                virDomainDefFindDevice(vm->def, *tmp, &dev, false) == 0 &&
                qemuDomainRemoveDevice(driver, vm, &dev) < 0) {
                goto cleanup;
            }
3175 3176 3177 3178 3179
            tmp++;
        }
    }
    ret = 0;

3180
 cleanup:
3181 3182 3183 3184
    virStringFreeList(old);
    return ret;
}

3185 3186 3187 3188 3189 3190 3191
static int
qemuDomainPerfRestart(virDomainObjPtr vm)
{
    size_t i;
    virDomainDefPtr def = vm->def;
    qemuDomainObjPrivatePtr priv = vm->privateData;

3192
    if (!(priv->perf = virPerfNew()))
3193 3194 3195
        return -1;

    for (i = 0; i < VIR_PERF_EVENT_LAST; i++) {
3196 3197
        if (def->perf.events[i] &&
            def->perf.events[i] == VIR_TRISTATE_BOOL_YES) {
3198 3199 3200

            /* Failure to re-enable the perf event should not be fatal */
            if (virPerfEventEnable(priv->perf, i, vm->pid) < 0)
3201
                def->perf.events[i] = VIR_TRISTATE_BOOL_NO;
3202 3203 3204 3205 3206 3207
        }
    }

    return 0;
}

3208 3209
struct qemuProcessReconnectData {
    virConnectPtr conn;
3210
    virQEMUDriverPtr driver;
3211
    virDomainObjPtr obj;
3212 3213 3214 3215
};
/*
 * Open an existing VM's monitor, re-detect VCPU threads
 * and re-reserve the security labels in use
S
Stefan Berger 已提交
3216 3217 3218 3219
 *
 * We own the virConnectPtr we are passed here - whoever started
 * this thread function has increased the reference counter to it
 * so that we now have to close it.
3220
 *
3221
 * This function also inherits a locked and ref'd domain object.
3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233
 *
 * This function needs to:
 * 1. Enter job
 * 1. just before monitor reconnect do lightweight MonitorEnter
 *    (increase VM refcount and unlock VM)
 * 2. reconnect to monitor
 * 3. do lightweight MonitorExit (lock VM)
 * 4. continue reconnect process
 * 5. EndJob
 *
 * We can't do normal MonitorEnter & MonitorExit because these two lock the
 * monitor lock, which does not exists in this early phase.
3234 3235
 */
static void
3236
qemuProcessReconnect(void *opaque)
3237 3238
{
    struct qemuProcessReconnectData *data = opaque;
3239
    virQEMUDriverPtr driver = data->driver;
3240
    virDomainObjPtr obj = data->obj;
3241 3242
    qemuDomainObjPrivatePtr priv;
    virConnectPtr conn = data->conn;
3243
    struct qemuDomainJobObj oldjob;
3244 3245
    int state;
    int reason;
3246
    virQEMUDriverConfigPtr cfg;
3247
    size_t i;
3248
    int ret;
3249
    unsigned int stopFlags = 0;
3250
    bool jobStarted = false;
3251

3252 3253
    VIR_FREE(data);

3254
    qemuDomainObjRestoreJob(obj, &oldjob);
3255 3256
    if (oldjob.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
        stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
3257

3258 3259 3260
    cfg = virQEMUDriverGetConfig(driver);
    priv = obj->privateData;

3261 3262 3263 3264
    if (qemuDomainObjBeginJob(driver, obj, QEMU_JOB_MODIFY) < 0)
        goto error;
    jobStarted = true;

3265 3266 3267
    /* XXX If we ever gonna change pid file pattern, come up with
     * some intelligence here to deal with old paths. */
    if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, obj->def->name)))
3268
        goto error;
3269

J
John Ferlan 已提交
3270 3271 3272 3273
    /* Restore the masterKey */
    if (qemuDomainMasterKeyReadFile(priv) < 0)
        goto error;

3274
    virNWFilterReadLockFilterUpdates();
3275 3276 3277 3278

    VIR_DEBUG("Reconnect monitor to %p '%s'", obj, obj->def->name);

    /* XXX check PID liveliness & EXE path */
3279
    if (qemuConnectMonitor(driver, obj, QEMU_ASYNC_JOB_NONE, NULL) < 0)
3280 3281
        goto error;

3282
    if (qemuHostdevUpdateActiveDomainDevices(driver, obj->def) < 0)
3283 3284
        goto error;

3285
    if (qemuConnectCgroup(driver, obj) < 0)
3286 3287
        goto error;

3288 3289 3290
    if (qemuDomainPerfRestart(obj) < 0)
        goto error;

3291
    /* XXX: Need to change as long as lock is introduced for
3292
     * qemu_driver->sharedDevices.
3293 3294
     */
    for (i = 0; i < obj->def->ndisks; i++) {
3295 3296
        virDomainDeviceDef dev;

3297
        if (virStorageTranslateDiskSourcePool(conn, obj->def->disks[i]) < 0)
3298
            goto error;
3299

3300 3301 3302 3303 3304 3305
        /* XXX we should be able to restore all data from XML in the future.
         * This should be the only place that calls qemuDomainDetermineDiskChain
         * with @report_broken == false to guarantee best-effort domain
         * reconnect */
        if (qemuDomainDetermineDiskChain(driver, obj, obj->def->disks[i],
                                         true, false) < 0)
3306 3307
            goto error;

3308 3309 3310
        dev.type = VIR_DOMAIN_DEVICE_DISK;
        dev.data.disk = obj->def->disks[i];
        if (qemuAddSharedDevice(driver, &dev, obj->def->name) < 0)
3311 3312 3313
            goto error;
    }

3314 3315 3316
    if (qemuProcessUpdateState(driver, obj) < 0)
        goto error;

3317
    state = virDomainObjGetState(obj, &reason);
3318 3319 3320
    if (state == VIR_DOMAIN_SHUTOFF ||
        (state == VIR_DOMAIN_PAUSED &&
         reason == VIR_DOMAIN_PAUSED_STARTING_UP)) {
3321 3322 3323 3324 3325
        VIR_DEBUG("Domain '%s' wasn't fully started yet, killing it",
                  obj->def->name);
        goto error;
    }

3326 3327 3328
    /* If upgrading from old libvirtd we won't have found any
     * caps in the domain status, so re-query them
     */
3329 3330
    if (!priv->qemuCaps &&
        !(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
3331 3332
                                                      obj->def->emulator,
                                                      obj->def->os.machine)))
3333 3334
        goto error;

3335
    /* In case the domain shutdown while we were not running,
3336
     * we need to finish the shutdown process. And we need to do it after
3337
     * we have virQEMUCaps filled in.
3338
     */
3339 3340 3341 3342 3343 3344
    if (state == VIR_DOMAIN_SHUTDOWN ||
        (state == VIR_DOMAIN_PAUSED &&
         reason == VIR_DOMAIN_PAUSED_SHUTTING_DOWN)) {
        VIR_DEBUG("Finishing shutdown sequence for domain %s",
                  obj->def->name);
        qemuProcessShutdownOrReboot(driver, obj);
3345
        goto cleanup;
3346 3347
    }

3348
    if ((qemuDomainAssignAddresses(obj->def, priv->qemuCaps, obj, false)) < 0)
3349
        goto error;
3350

3351 3352 3353 3354 3355 3356
    /* if domain requests security driver we haven't loaded, report error, but
     * do not kill the domain
     */
    ignore_value(virSecurityManagerCheckAllLabel(driver->securityManager,
                                                 obj->def));

3357 3358 3359
    if (qemuDomainRefreshVcpuInfo(driver, obj, QEMU_ASYNC_JOB_NONE, true) < 0)
        goto error;

3360
    if (virSecurityManagerReserveLabel(driver->securityManager, obj->def, obj->pid) < 0)
3361 3362
        goto error;

3363 3364 3365
    if (qemuProcessNotifyNets(obj->def) < 0)
        goto error;

3366
    if (qemuProcessFiltersInstantiate(obj->def))
3367 3368
        goto error;

3369
    if (qemuProcessRefreshDisks(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
3370 3371
        goto error;

3372
    if (qemuRefreshVirtioChannelState(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
3373 3374
        goto error;

3375 3376 3377
    /* If querying of guest's RTC failed, report error, but do not kill the domain. */
    qemuRefreshRTC(driver, obj);

3378 3379 3380
    if (qemuProcessRefreshBalloonState(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
        goto error;

3381 3382 3383
    if (qemuProcessRecoverJob(driver, obj, conn, &oldjob) < 0)
        goto error;

3384 3385 3386
    if (qemuProcessUpdateDevices(driver, obj) < 0)
        goto error;

3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397
    /* Failure to connect to agent shouldn't be fatal */
    if ((ret = qemuConnectAgent(driver, obj)) < 0) {
        if (ret == -2)
            goto error;

        VIR_WARN("Cannot connect to QEMU guest agent for %s",
                 obj->def->name);
        virResetLastError();
        priv->agentError = true;
    }

3398
    /* update domain state XML with possibly updated state in virDomainObj */
3399
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, obj, driver->caps) < 0)
3400 3401
        goto error;

3402 3403
    /* Run an hook to allow admins to do some magic */
    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
3404
        char *xml = qemuDomainDefFormatXML(driver, obj->def, 0);
3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418
        int hookret;

        hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, obj->def->name,
                              VIR_HOOK_QEMU_OP_RECONNECT, VIR_HOOK_SUBOP_BEGIN,
                              NULL, xml, NULL);
        VIR_FREE(xml);

        /*
         * If the script raised an error abort the launch
         */
        if (hookret < 0)
            goto error;
    }

3419
    if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
3420 3421
        driver->inhibitCallback(true, driver->inhibitOpaque);

3422 3423 3424 3425 3426 3427 3428 3429 3430 3431
 cleanup:
    if (jobStarted)
        qemuDomainObjEndJob(driver, obj);
    if (!virDomainObjIsActive(obj))
        qemuDomainRemoveInactive(driver, obj);
    virDomainObjEndAPI(&obj);
    virObjectUnref(conn);
    virObjectUnref(cfg);
    virNWFilterUnlockFilterUpdates();
    return;
3432

3433
 error:
3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447
    if (virDomainObjIsActive(obj)) {
        /* We can't get the monitor back, so must kill the VM
         * to remove danger of it ending up running twice if
         * user tries to start it again later
         */
        if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_NO_SHUTDOWN)) {
            /* If we couldn't get the monitor and qemu supports
             * no-shutdown, we can safely say that the domain
             * crashed ... */
            state = VIR_DOMAIN_SHUTOFF_CRASHED;
        } else {
            /* ... but if it doesn't we can't say what the state
             * really is and FAILED means "failed to start" */
            state = VIR_DOMAIN_SHUTOFF_UNKNOWN;
3448
        }
3449 3450 3451 3452 3453
        /* If BeginJob failed, we jumped here without a job, let's hope another
         * thread didn't have a chance to start playing with the domain yet
         * (it's all we can do anyway).
         */
        qemuProcessStop(driver, obj, state, QEMU_ASYNC_JOB_NONE, stopFlags);
3454
    }
3455
    goto cleanup;
3456 3457
}

3458 3459
static int
qemuProcessReconnectHelper(virDomainObjPtr obj,
3460 3461 3462 3463 3464 3465
                           void *opaque)
{
    virThread thread;
    struct qemuProcessReconnectData *src = opaque;
    struct qemuProcessReconnectData *data;

3466
    /* If the VM was inactive, we don't need to reconnect */
3467 3468 3469
    if (!obj->pid)
        return 0;

3470
    if (VIR_ALLOC(data) < 0)
3471
        return -1;
3472 3473

    memcpy(data, src, sizeof(*data));
3474
    data->obj = obj;
3475

3476 3477
    /* this lock and reference will be eventually transferred to the thread
     * that handles the reconnect */
3478
    virObjectLock(obj);
3479
    virObjectRef(obj);
3480

3481 3482 3483
    /* Since we close the connection later on, we have to make sure that the
     * threads we start see a valid connection throughout their lifetime. We
     * simply increase the reference counter here.
S
Stefan Berger 已提交
3484
     */
3485
    virObjectRef(data->conn);
S
Stefan Berger 已提交
3486

3487
    if (virThreadCreate(&thread, false, qemuProcessReconnect, data) < 0) {
3488 3489 3490
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Could not create thread. QEMU initialization "
                         "might be incomplete"));
3491 3492 3493 3494 3495 3496 3497
        /* We can't spawn a thread and thus connect to monitor. Kill qemu.
         * It's safe to call qemuProcessStop without a job here since there
         * is no thread that could be doing anything else with the same domain
         * object.
         */
        qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
                        QEMU_ASYNC_JOB_NONE, 0);
3498
        qemuDomainRemoveInactive(src->driver, obj);
3499

M
Michal Privoznik 已提交
3500
        virDomainObjEndAPI(&obj);
3501 3502 3503 3504
        virObjectUnref(data->conn);
        VIR_FREE(data);
        return -1;
    }
3505

3506
    return 0;
3507 3508 3509 3510 3511 3512 3513 3514 3515
}

/**
 * qemuProcessReconnectAll
 *
 * Try to re-open the resources for live VMs that we care
 * about.
 */
void
3516
qemuProcessReconnectAll(virConnectPtr conn, virQEMUDriverPtr driver)
3517
{
3518
    struct qemuProcessReconnectData data = {.conn = conn, .driver = driver};
3519
    virDomainObjListForEach(driver->domains, qemuProcessReconnectHelper, &data);
3520 3521
}

3522
static int
3523
qemuProcessVNCAllocatePorts(virQEMUDriverPtr driver,
3524 3525
                            virDomainGraphicsDefPtr graphics,
                            bool allocate)
3526 3527 3528
{
    unsigned short port;

3529 3530 3531 3532 3533 3534 3535
    if (!allocate) {
        if (graphics->data.vnc.autoport)
            graphics->data.vnc.port = 5900;

        return 0;
    }

3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549
    if (graphics->data.vnc.autoport) {
        if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
            return -1;
        graphics->data.vnc.port = port;
    }

    if (graphics->data.vnc.websocket == -1) {
        if (virPortAllocatorAcquire(driver->webSocketPorts, &port) < 0)
            return -1;
        graphics->data.vnc.websocket = port;
    }

    return 0;
}
3550

3551
static int
3552
qemuProcessSPICEAllocatePorts(virQEMUDriverPtr driver,
3553 3554
                              virDomainGraphicsDefPtr graphics,
                              bool allocate)
3555
{
3556
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
3557 3558
    unsigned short port = 0;
    unsigned short tlsPort;
3559
    size_t i;
3560
    int defaultMode = graphics->data.spice.defaultMode;
3561
    int ret = -1;
3562 3563 3564 3565 3566 3567

    bool needTLSPort = false;
    bool needPort = false;

    if (graphics->data.spice.autoport) {
        /* check if tlsPort or port need allocation */
3568
        for (i = 0; i < VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_LAST; i++) {
3569 3570 3571 3572 3573 3574 3575 3576 3577 3578
            switch (graphics->data.spice.channels[i]) {
            case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
                needTLSPort = true;
                break;

            case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
                needPort = true;
                break;

            case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
3579
                /* default mode will be used */
3580 3581 3582
                break;
            }
        }
3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597
        switch (defaultMode) {
        case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_SECURE:
            needTLSPort = true;
            break;

        case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_INSECURE:
            needPort = true;
            break;

        case VIR_DOMAIN_GRAPHICS_SPICE_CHANNEL_MODE_ANY:
            if (cfg->spiceTLS)
                needTLSPort = true;
            needPort = true;
            break;
        }
3598 3599
    }

3600 3601 3602 3603 3604 3605 3606
    if (!allocate) {
        if (needPort || graphics->data.spice.port == -1)
            graphics->data.spice.port = 5901;

        if (needTLSPort || graphics->data.spice.tlsPort == -1)
            graphics->data.spice.tlsPort = 5902;

3607 3608
        ret = 0;
        goto cleanup;
3609 3610
    }

3611
    if (needPort || graphics->data.spice.port == -1) {
3612
        if (virPortAllocatorAcquire(driver->remotePorts, &port) < 0)
3613
            goto cleanup;
3614 3615

        graphics->data.spice.port = port;
3616 3617 3618

        if (!graphics->data.spice.autoport)
            graphics->data.spice.portReserved = true;
3619 3620
    }

3621 3622
    if (needTLSPort || graphics->data.spice.tlsPort == -1) {
        if (!cfg->spiceTLS) {
3623 3624 3625
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                           _("Auto allocation of spice TLS port requested "
                             "but spice TLS is disabled in qemu.conf"));
3626
            goto cleanup;
3627
        }
3628 3629

        if (virPortAllocatorAcquire(driver->remotePorts, &tlsPort) < 0)
3630
            goto cleanup;
3631 3632

        graphics->data.spice.tlsPort = tlsPort;
3633 3634 3635

        if (!graphics->data.spice.autoport)
            graphics->data.spice.tlsPortReserved = true;
3636 3637
    }

3638
    ret = 0;
3639

3640
 cleanup:
3641
    virPortAllocatorRelease(driver->remotePorts, port);
3642 3643
    virObjectUnref(cfg);
    return ret;
3644 3645 3646
}


3647 3648 3649
static int
qemuValidateCpuCount(virDomainDefPtr def,
                     virQEMUCapsPtr qemuCaps)
3650
{
3651
    unsigned int maxCpus = virQEMUCapsGetMachineMaxCpus(qemuCaps, def->os.machine);
3652

3653 3654 3655 3656 3657 3658
    if (virDomainDefGetVcpus(def) == 0) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Domain requires at least 1 vCPU"));
        return -1;
    }

3659 3660 3661 3662
    if (maxCpus > 0 && virDomainDefGetVcpusMax(def) > maxCpus) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Maximum CPUs greater than specified machine type limit"));
        return -1;
3663 3664
    }

3665
    return 0;
3666 3667
}

3668 3669

static bool
3670 3671 3672
qemuProcessVerifyGuestCPU(virQEMUDriverPtr driver,
                          virDomainObjPtr vm,
                          int asyncJob)
3673 3674 3675 3676 3677
{
    virDomainDefPtr def = vm->def;
    virArch arch = def->os.arch;
    virCPUDataPtr guestcpu = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
3678
    int rc;
3679
    bool ret = false;
J
Ján Tomko 已提交
3680
    size_t i;
3681 3682 3683 3684

    switch (arch) {
    case VIR_ARCH_I686:
    case VIR_ARCH_X86_64:
3685 3686
        if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
            return false;
3687
        rc = qemuMonitorGetGuestCPU(priv->mon, arch, &guestcpu);
3688 3689
        if (qemuDomainObjExitMonitor(driver, vm) < 0)
            return false;
3690

3691 3692 3693 3694
        if (rc < 0) {
            if (rc == -2)
                break;

3695
            goto cleanup;
3696
        }
3697

J
Ján Tomko 已提交
3698
        if (def->features[VIR_DOMAIN_FEATURE_PVSPINLOCK] == VIR_TRISTATE_SWITCH_ON) {
3699 3700 3701 3702 3703 3704
            if (!cpuHasFeature(guestcpu, VIR_CPU_x86_KVM_PV_UNHALT)) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                               _("host doesn't support paravirtual spinlocks"));
                goto cleanup;
            }
        }
J
Ján Tomko 已提交
3705

3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730
        for (i = 0; i < VIR_DOMAIN_HYPERV_LAST; i++) {
            if (def->hyperv_features[i] == VIR_TRISTATE_SWITCH_ON) {
                char *cpuFeature;
                if (virAsprintf(&cpuFeature, "__kvm_hv_%s",
                                virDomainHypervTypeToString(i)) < 0)
                    goto cleanup;
                if (!cpuHasFeature(guestcpu, cpuFeature)) {
                    switch ((virDomainHyperv) i) {
                    case VIR_DOMAIN_HYPERV_RELAXED:
                    case VIR_DOMAIN_HYPERV_VAPIC:
                    case VIR_DOMAIN_HYPERV_SPINLOCKS:
                        VIR_WARN("host doesn't support hyperv '%s' feature",
                                 virDomainHypervTypeToString(i));
                        break;
                    case VIR_DOMAIN_HYPERV_VPINDEX:
                    case VIR_DOMAIN_HYPERV_RUNTIME:
                    case VIR_DOMAIN_HYPERV_SYNIC:
                    case VIR_DOMAIN_HYPERV_STIMER:
                    case VIR_DOMAIN_HYPERV_RESET:
                    case VIR_DOMAIN_HYPERV_VENDOR_ID:
                        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                                       _("host doesn't support hyperv '%s' feature"),
                                       virDomainHypervTypeToString(i));
                        goto cleanup;
                        break;
3731 3732

                    /* coverity[dead_error_begin] */
3733 3734 3735 3736 3737 3738
                    case VIR_DOMAIN_HYPERV_LAST:
                        break;
                    }
                }
            }
        }
J
Ján Tomko 已提交
3739

3740 3741 3742
        if (def->cpu && def->cpu->mode != VIR_CPU_MODE_HOST_PASSTHROUGH) {
            for (i = 0; i < def->cpu->nfeatures; i++) {
                virCPUFeatureDefPtr feature = &def->cpu->features[i];
J
Ján Tomko 已提交
3743

3744 3745 3746 3747 3748 3749 3750 3751 3752
                if (feature->policy != VIR_CPU_FEATURE_REQUIRE)
                    continue;

                if (STREQ(feature->name, "invtsc") &&
                    !cpuHasFeature(guestcpu, feature->name)) {
                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                                   _("host doesn't support invariant TSC"));
                    goto cleanup;
                }
J
Ján Tomko 已提交
3753 3754
            }
        }
3755 3756 3757 3758 3759 3760 3761 3762
        break;

    default:
        break;
    }

    ret = true;

3763
 cleanup:
3764 3765 3766 3767 3768
    cpuDataFree(guestcpu);
    return ret;
}


3769 3770
static int
qemuPrepareNVRAM(virQEMUDriverConfigPtr cfg,
3771
                 virDomainObjPtr vm)
3772 3773 3774 3775
{
    int ret = -1;
    int srcFD = -1;
    int dstFD = -1;
3776
    virDomainLoaderDefPtr loader = vm->def->os.loader;
3777
    bool created = false;
3778 3779
    const char *master_nvram_path;
    ssize_t r;
3780

3781
    if (!loader || !loader->nvram || virFileExists(loader->nvram))
3782 3783
        return 0;

3784 3785 3786
    master_nvram_path = loader->templt;
    if (!loader->templt) {
        size_t i;
3787 3788 3789
        for (i = 0; i < cfg->nfirmwares; i++) {
            if (STREQ(cfg->firmwares[i]->name, loader->path)) {
                master_nvram_path = cfg->firmwares[i]->nvram;
3790
                break;
3791 3792
            }
        }
3793
    }
3794

3795 3796 3797 3798 3799 3800
    if (!master_nvram_path) {
        virReportError(VIR_ERR_OPERATION_FAILED,
                       _("unable to find any master var store for "
                         "loader: %s"), loader->path);
        goto cleanup;
    }
3801

3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818
    if ((srcFD = virFileOpenAs(master_nvram_path, O_RDONLY,
                               0, -1, -1, 0)) < 0) {
        virReportSystemError(-srcFD,
                             _("Failed to open file '%s'"),
                             master_nvram_path);
        goto cleanup;
    }
    if ((dstFD = virFileOpenAs(loader->nvram,
                               O_WRONLY | O_CREAT | O_EXCL,
                               S_IRUSR | S_IWUSR,
                               cfg->user, cfg->group, 0)) < 0) {
        virReportSystemError(-dstFD,
                             _("Failed to create file '%s'"),
                             loader->nvram);
        goto cleanup;
    }
    created = true;
3819

3820 3821
    do {
        char buf[1024];
3822

3823
        if ((r = saferead(srcFD, buf, sizeof(buf))) < 0) {
3824
            virReportSystemError(errno,
3825
                                 _("Unable to read from file '%s'"),
3826 3827 3828
                                 master_nvram_path);
            goto cleanup;
        }
3829 3830

        if (safewrite(dstFD, buf, r) < 0) {
3831
            virReportSystemError(errno,
3832
                                 _("Unable to write to file '%s'"),
3833 3834 3835
                                 loader->nvram);
            goto cleanup;
        }
3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848
    } while (r);

    if (VIR_CLOSE(srcFD) < 0) {
        virReportSystemError(errno,
                             _("Unable to close file '%s'"),
                             master_nvram_path);
        goto cleanup;
    }
    if (VIR_CLOSE(dstFD) < 0) {
        virReportSystemError(errno,
                             _("Unable to close file '%s'"),
                             loader->nvram);
        goto cleanup;
3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865
    }

    ret = 0;
 cleanup:
    /* We successfully generated the nvram path, but failed to
     * copy the file content. Roll back. */
    if (ret < 0) {
        if (created)
            unlink(loader->nvram);
    }

    VIR_FORCE_CLOSE(srcFD);
    VIR_FORCE_CLOSE(dstFD);
    return ret;
}


3866 3867 3868
static void
qemuLogOperation(virDomainObjPtr vm,
                 const char *msg,
3869 3870
                 virCommandPtr cmd,
                 qemuDomainLogContextPtr logCtxt)
3871 3872 3873 3874 3875
{
    char *timestamp;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int qemuVersion = virQEMUCapsGetVersion(priv->qemuCaps);
    const char *package = virQEMUCapsGetPackage(priv->qemuCaps);
3876
    char *hostname = virGetHostname();
3877 3878

    if ((timestamp = virTimeStringNow()) == NULL)
3879
        goto cleanup;
3880

3881 3882
    if (qemuDomainLogContextWrite(logCtxt,
                                  "%s: %s %s, qemu version: %d.%d.%d%s, hostname: %s\n",
3883 3884 3885 3886
                                  timestamp, msg, VIR_LOG_VERSION_STRING,
                                  (qemuVersion / 1000000) % 1000,
                                  (qemuVersion / 1000) % 1000,
                                  qemuVersion % 1000,
3887 3888
                                  package ? package : "",
                                  hostname ? hostname : "") < 0)
3889
        goto cleanup;
3890

3891 3892 3893 3894 3895
    if (cmd) {
        char *args = virCommandToString(cmd);
        qemuDomainLogContextWrite(logCtxt, "%s\n", args);
        VIR_FREE(args);
    }
3896 3897

 cleanup:
3898
    VIR_FREE(hostname);
3899 3900 3901
    VIR_FREE(timestamp);
}

3902 3903 3904 3905 3906 3907 3908

void
qemuProcessIncomingDefFree(qemuProcessIncomingDefPtr inc)
{
    if (!inc)
        return;

3909
    VIR_FREE(inc->address);
3910
    VIR_FREE(inc->launchURI);
3911
    VIR_FREE(inc->deferredURI);
3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922
    VIR_FREE(inc);
}


/*
 * This function does not copy @path, the caller is responsible for keeping
 * the @path pointer valid during the lifetime of the allocated
 * qemuProcessIncomingDef structure.
 */
qemuProcessIncomingDefPtr
qemuProcessIncomingDefNew(virQEMUCapsPtr qemuCaps,
3923
                          const char *listenAddress,
3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935
                          const char *migrateFrom,
                          int fd,
                          const char *path)
{
    qemuProcessIncomingDefPtr inc = NULL;

    if (qemuMigrationCheckIncoming(qemuCaps, migrateFrom) < 0)
        return NULL;

    if (VIR_ALLOC(inc) < 0)
        return NULL;

3936 3937 3938
    if (VIR_STRDUP(inc->address, listenAddress) < 0)
        goto error;

3939 3940 3941 3942
    inc->launchURI = qemuMigrationIncomingURI(migrateFrom, fd);
    if (!inc->launchURI)
        goto error;

3943 3944 3945 3946 3947 3948
    if (virQEMUCapsGet(qemuCaps, QEMU_CAPS_INCOMING_DEFER)) {
        inc->deferredURI = inc->launchURI;
        if (VIR_STRDUP(inc->launchURI, "defer") < 0)
            goto error;
    }

3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959
    inc->fd = fd;
    inc->path = path;

    return inc;

 error:
    qemuProcessIncomingDefFree(inc);
    return NULL;
}


3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989
/*
 * This function starts a new QEMU_ASYNC_JOB_START async job. The user is
 * responsible for calling qemuProcessEndJob to stop this job and for passing
 * QEMU_ASYNC_JOB_START as @asyncJob argument to any function requiring this
 * parameter between qemuProcessBeginJob and qemuProcessEndJob.
 */
int
qemuProcessBeginJob(virQEMUDriverPtr driver,
                    virDomainObjPtr vm)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;

    if (qemuDomainObjBeginAsyncJob(driver, vm, QEMU_ASYNC_JOB_START) < 0)
        return -1;

    qemuDomainObjSetAsyncJobMask(vm, QEMU_JOB_NONE);
    priv->job.current->type = VIR_DOMAIN_JOB_UNBOUNDED;

    return 0;
}


void
qemuProcessEndJob(virQEMUDriverPtr driver,
                  virDomainObjPtr vm)
{
    qemuDomainObjEndAsyncJob(driver, vm);
}


3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012
static int
qemuProcessStartHook(virQEMUDriverPtr driver,
                     virDomainObjPtr vm,
                     virHookQemuOpType op,
                     virHookSubopType subop)
{
    char *xml;
    int ret;

    if (!virHookPresent(VIR_HOOK_DRIVER_QEMU))
        return 0;

    if (!(xml = qemuDomainDefFormatXML(driver, vm->def, 0)))
        return -1;

    ret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, op, subop,
                      NULL, xml, NULL);
    VIR_FREE(xml);

    return ret;
}


4013
static int
4014
qemuProcessGraphicsReservePorts(virQEMUDriverPtr driver,
4015
                                virDomainGraphicsDefPtr graphics)
4016
{
4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027
    virDomainGraphicsListenDefPtr glisten;

    if (graphics->nListens <= 0)
        return 0;

    glisten = &graphics->listens[0];

    if (glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS &&
        glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK)
        return 0;

4028 4029 4030 4031 4032 4033 4034
    if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC &&
        !graphics->data.vnc.autoport) {
        if (virPortAllocatorSetUsed(driver->remotePorts,
                                    graphics->data.vnc.port,
                                    true) < 0)
            return -1;
        graphics->data.vnc.portReserved = true;
4035

4036 4037 4038
    } else if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE &&
               !graphics->data.spice.autoport) {
        if (graphics->data.spice.port > 0) {
4039
            if (virPortAllocatorSetUsed(driver->remotePorts,
4040
                                        graphics->data.spice.port,
4041
                                        true) < 0)
4042
                return -1;
4043 4044
            graphics->data.spice.portReserved = true;
        }
4045

4046 4047 4048 4049 4050 4051
        if (graphics->data.spice.tlsPort > 0) {
            if (virPortAllocatorSetUsed(driver->remotePorts,
                                        graphics->data.spice.tlsPort,
                                        true) < 0)
                return -1;
            graphics->data.spice.tlsPortReserved = true;
4052 4053 4054
        }
    }

4055 4056 4057 4058
    return 0;
}


4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096
static int
qemuProcessGraphicsAllocatePorts(virQEMUDriverPtr driver,
                                 virDomainGraphicsDefPtr graphics,
                                 bool allocate)
{
    virDomainGraphicsListenDefPtr glisten;

    if (graphics->nListens <= 0)
        return 0;

    glisten = &graphics->listens[0];

    if (glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS &&
        glisten->type != VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK)
        return 0;

    switch (graphics->type) {
    case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
        if (qemuProcessVNCAllocatePorts(driver, graphics, allocate) < 0)
            return -1;
        break;

    case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
        if (qemuProcessSPICEAllocatePorts(driver, graphics, allocate) < 0)
            return -1;
        break;

    case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
    case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
    case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
    case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
        break;
    }

    return 0;
}


4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123
static int
qemuProcessGraphicsSetupNetworkAddress(virDomainGraphicsListenDefPtr glisten,
                                       const char *listenAddr)
{
    int rc;

    /* TODO: reject configuration without network specified for network listen */
    if (!glisten->network) {
        if (VIR_STRDUP(glisten->address, listenAddr) < 0)
            return -1;
        return 0;
    }

    rc = networkGetNetworkAddress(glisten->network, &glisten->address);
    if (rc <= -2) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("network-based listen isn't possible, "
                         "network driver isn't present"));
        return -1;
    }
    if (rc < 0)
        return -1;

    return 0;
}


4124
static int
4125
qemuProcessGraphicsSetupListen(virQEMUDriverPtr driver,
4126 4127
                               virDomainGraphicsDefPtr graphics,
                               virDomainObjPtr vm)
4128
{
4129
    qemuDomainObjPrivatePtr priv = vm->privateData;
4130
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
4131
    const char *type = virDomainGraphicsTypeToString(graphics->type);
4132
    char *listenAddr = NULL;
4133
    bool useSocket = false;
4134
    size_t i;
4135
    int ret = -1;
4136 4137 4138

    switch (graphics->type) {
    case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
4139
        useSocket = cfg->vncAutoUnixSocket;
4140 4141 4142 4143
        listenAddr = cfg->vncListen;
        break;

    case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
4144
        useSocket = cfg->spiceAutoUnixSocket;
4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159
        listenAddr = cfg->spiceListen;
        break;

    case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
    case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
    case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
    case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
        break;
    }

    for (i = 0; i < graphics->nListens; i++) {
        virDomainGraphicsListenDefPtr glisten = &graphics->listens[i];

        switch (glisten->type) {
        case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_ADDRESS:
4160 4161 4162 4163 4164
            if (!glisten->address) {
                /* If there is no address specified and qemu.conf has
                 * *_auto_unix_socket set we should use unix socket as
                 * default instead of tcp listen. */
                if (useSocket) {
4165 4166
                    memset(glisten, 0, sizeof(virDomainGraphicsListenDef));
                    if (virAsprintf(&glisten->socket, "%s/%s.sock",
4167
                                    priv->libDir, type) < 0)
4168
                        goto cleanup;
4169 4170
                    glisten->fromConfig = true;
                    glisten->type = VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET;
4171 4172
                } else if (listenAddr) {
                    if (VIR_STRDUP(glisten->address, listenAddr) < 0)
4173
                        goto cleanup;
4174 4175 4176
                    glisten->fromConfig = true;
                }
            }
4177 4178 4179 4180 4181 4182 4183 4184
            break;

        case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NETWORK:
            if (glisten->address || !listenAddr)
                continue;

            if (qemuProcessGraphicsSetupNetworkAddress(glisten,
                                                       listenAddr) < 0)
4185
                goto cleanup;
4186 4187
            break;

4188 4189 4190 4191
        case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_SOCKET:
            if (!glisten->socket) {
                if (virAsprintf(&glisten->socket, "%s/%s.sock",
                                priv->libDir, type) < 0)
4192
                    goto cleanup;
4193 4194 4195 4196
                glisten->autoGenerated = true;
            }
            break;

4197 4198 4199 4200 4201 4202
        case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_NONE:
        case VIR_DOMAIN_GRAPHICS_LISTEN_TYPE_LAST:
            break;
        }
    }

4203 4204 4205 4206 4207
    ret = 0;

 cleanup:
    virObjectUnref(cfg);
    return ret;
4208 4209 4210
}


4211 4212
static int
qemuProcessSetupGraphics(virQEMUDriverPtr driver,
4213 4214
                         virDomainObjPtr vm,
                         unsigned int flags)
4215
{
4216
    virDomainGraphicsDefPtr graphics;
4217
    bool allocate = !(flags & VIR_QEMU_PROCESS_START_PRETEND);
4218
    size_t i;
4219 4220
    int ret = -1;

4221 4222 4223 4224 4225 4226 4227
    for (i = 0; i < vm->def->ngraphics; i++) {
        graphics = vm->def->graphics[i];

        if (qemuProcessGraphicsSetupListen(driver, graphics, vm) < 0)
            goto cleanup;
    }

4228 4229 4230 4231 4232 4233 4234 4235
    if (allocate) {
        for (i = 0; i < vm->def->ngraphics; i++) {
            graphics = vm->def->graphics[i];

            if (qemuProcessGraphicsReservePorts(driver, graphics) < 0)
                goto cleanup;
        }
    }
4236

4237
    for (i = 0; i < vm->def->ngraphics; ++i) {
4238
        graphics = vm->def->graphics[i];
4239

4240 4241
        if (qemuProcessGraphicsAllocatePorts(driver, graphics, allocate) < 0)
            goto cleanup;
4242 4243 4244 4245 4246 4247 4248 4249 4250
    }

    ret = 0;

 cleanup:
    return ret;
}


4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309
static int
qemuProcessSetupRawIO(virQEMUDriverPtr driver,
                      virDomainObjPtr vm,
                      virCommandPtr cmd ATTRIBUTE_UNUSED)
{
    bool rawio = false;
    size_t i;
    int ret = -1;

    /* in case a certain disk is desirous of CAP_SYS_RAWIO, add this */
    for (i = 0; i < vm->def->ndisks; i++) {
        virDomainDeviceDef dev;
        virDomainDiskDefPtr disk = vm->def->disks[i];

        if (disk->rawio == VIR_TRISTATE_BOOL_YES) {
            rawio = true;
#ifndef CAP_SYS_RAWIO
            break;
#endif
        }

        dev.type = VIR_DOMAIN_DEVICE_DISK;
        dev.data.disk = disk;
        if (qemuAddSharedDevice(driver, &dev, vm->def->name) < 0)
            goto cleanup;

        if (qemuSetUnprivSGIO(&dev) < 0)
            goto cleanup;
    }

    /* If rawio not already set, check hostdevs as well */
    if (!rawio) {
        for (i = 0; i < vm->def->nhostdevs; i++) {
            virDomainHostdevSubsysSCSIPtr scsisrc =
                &vm->def->hostdevs[i]->source.subsys.u.scsi;
            if (scsisrc->rawio == VIR_TRISTATE_BOOL_YES) {
                rawio = true;
                break;
            }
        }
    }

    ret = 0;

 cleanup:
    if (rawio) {
#ifdef CAP_SYS_RAWIO
        if (ret == 0)
            virCommandAllowCap(cmd, CAP_SYS_RAWIO);
#else
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Raw I/O is not supported on this platform"));
        ret = -1;
#endif
    }
    return ret;
}


4310 4311 4312 4313 4314 4315 4316 4317 4318
static int
qemuProcessSetupBalloon(virQEMUDriverPtr driver,
                        virDomainObjPtr vm,
                        qemuDomainAsyncJob asyncJob)
{
    unsigned long long balloon = vm->def->mem.cur_balloon;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int ret = -1;

4319
    if (!virDomainDefHasMemballoon(vm->def))
4320 4321 4322 4323 4324
        return 0;

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
        goto cleanup;

4325 4326 4327
    if (vm->def->memballoon->period)
        qemuMonitorSetMemoryStatsPeriod(priv->mon, vm->def->memballoon,
                                        vm->def->memballoon->period);
4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339
    if (qemuMonitorSetBalloon(priv->mon, balloon) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
        ret = -1;
    return ret;
}


J
Jiri Denemark 已提交
4340 4341 4342
static int
qemuProcessMakeDir(virQEMUDriverPtr driver,
                   virDomainObjPtr vm,
4343
                   const char *path)
J
Jiri Denemark 已提交
4344 4345 4346 4347 4348 4349 4350 4351
{
    int ret = -1;

    if (virFileMakePathWithMode(path, 0750) < 0) {
        virReportSystemError(errno, _("Cannot create directory '%s'"), path);
        goto cleanup;
    }

4352 4353
    if (virSecurityManagerDomainSetPathLabel(driver->securityManager,
                                             vm->def, path) < 0)
J
Jiri Denemark 已提交
4354 4355 4356 4357 4358 4359 4360 4361 4362
        goto cleanup;

    ret = 0;

 cleanup:
    return ret;
}


4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411
static void
qemuProcessStartWarnShmem(virDomainObjPtr vm)
{
    size_t i;
    bool check_shmem = false;
    bool shmem = vm->def->nshmems;

    /*
     * For vhost-user to work, the domain has to have some type of
     * shared memory configured.  We're not the proper ones to judge
     * whether shared hugepages or shm are enough and will be in the
     * future, so we'll just warn in case neither is configured.
     * Moreover failing would give the false illusion that libvirt is
     * really checking that everything works before running the domain
     * and not only we are unable to do that, but it's also not our
     * aim to do so.
     */
    for (i = 0; i < vm->def->nnets; i++) {
        if (virDomainNetGetActualType(vm->def->nets[i]) ==
                                      VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
            check_shmem = true;
            break;
        }
    }

    if (!check_shmem)
        return;

    /*
     * This check is by no means complete.  We merely check
     * whether there are *some* hugepages enabled and *some* NUMA
     * nodes with shared memory access.
     */
    if (!shmem && vm->def->mem.nhugepages) {
        for (i = 0; i < virDomainNumaGetNodeCount(vm->def->numa); i++) {
            if (virDomainNumaGetNodeMemoryAccessMode(vm->def->numa, i) ==
                VIR_NUMA_MEM_ACCESS_SHARED) {
                shmem = true;
                break;
            }
        }
    }

    if (!shmem) {
        VIR_WARN("Detected vhost-user interface without any shared memory, "
                 "the interface might not be operational");
    }
}

4412
static int
4413 4414
qemuProcessStartValidateXML(virQEMUDriverPtr driver,
                            virDomainObjPtr vm,
4415
                            virQEMUCapsPtr qemuCaps,
4416
                            virCapsPtr caps,
4417
                            unsigned int flags)
4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429
{
    /* The bits we validate here are XML configs that we previously
     * accepted. We reject them at VM startup time rather than parse
     * time so that pre-existing VMs aren't rejected and dropped from
     * the VM list when libvirt is updated.
     *
     * If back compat isn't a concern, XML validation should probably
     * be done at parse time.
     */
    if (qemuValidateCpuCount(vm->def, qemuCaps) < 0)
        return -1;

4430 4431 4432
    /* checks below should not be executed when starting a qemu process for a
     * VM that was running before (migration, snapshots, save). It's more
     * important to start such VM than keep the configuration clean */
4433 4434 4435
    if ((flags & VIR_QEMU_PROCESS_START_NEW) &&
        virDomainDefValidate(vm->def, caps, 0, driver->xmlopt) < 0)
        return -1;
4436 4437 4438

    return 0;
}
4439

4440 4441 4442 4443 4444 4445
/**
 * qemuProcessStartValidate:
 * @vm: domain object
 * @qemuCaps: emulator capabilities
 * @migration: restoration of existing state
 *
4446 4447 4448 4449 4450
 * This function aggregates checks done prior to start of a VM.
 *
 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
 * start the domain but create a valid qemu command.  If some code shouldn't be
 * executed in this case, make sure to check this flag.
4451
 */
4452
static int
4453 4454
qemuProcessStartValidate(virQEMUDriverPtr driver,
                         virDomainObjPtr vm,
4455
                         virQEMUCapsPtr qemuCaps,
4456
                         virCapsPtr caps,
4457
                         unsigned int flags)
4458
{
4459 4460
    size_t i;

4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479
    if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
        if (vm->def->virtType == VIR_DOMAIN_VIRT_KVM) {
            VIR_DEBUG("Checking for KVM availability");
            if (!virFileExists("/dev/kvm")) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                               _("Domain requires KVM, but it is not available. "
                                 "Check that virtualization is enabled in the "
                                 "host BIOS, and host configuration is setup to "
                                 "load the kvm modules."));
                return -1;
            }
        }

        VIR_DEBUG("Checking domain and device security labels");
        if (virSecurityManagerCheckAllLabel(driver->securityManager, vm->def) < 0)
            return -1;

    }

4480
    if (qemuProcessStartValidateXML(driver, vm, qemuCaps, caps, flags) < 0)
4481 4482
        return -1;

4483 4484
    VIR_DEBUG("Checking for any possible (non-fatal) issues");

4485
    qemuProcessStartWarnShmem(vm);
4486

4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508
    for (i = 0; i < vm->def->ngraphics; i++) {
        virDomainGraphicsDefPtr graphics = vm->def->graphics[i];

        switch (graphics->type) {
        case VIR_DOMAIN_GRAPHICS_TYPE_VNC:
        case VIR_DOMAIN_GRAPHICS_TYPE_SPICE:
            if (graphics->nListens > 1) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                               _("QEMU does not support multiple listens for "
                                 "one graphics device."));
                return -1;
            }
            break;

        case VIR_DOMAIN_GRAPHICS_TYPE_SDL:
        case VIR_DOMAIN_GRAPHICS_TYPE_RDP:
        case VIR_DOMAIN_GRAPHICS_TYPE_DESKTOP:
        case VIR_DOMAIN_GRAPHICS_TYPE_LAST:
            break;
        }
    }

4509 4510 4511 4512
    return 0;
}


J
Jiri Denemark 已提交
4513 4514 4515 4516 4517 4518
/**
 * qemuProcessInit:
 *
 * Prepares the domain up to the point when priv->qemuCaps is initialized. The
 * function calls qemuProcessStop when needed.
 *
4519 4520 4521 4522
 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
 * start the domain but create a valid qemu command.  If some code shouldn't be
 * executed in this case, make sure to check this flag.
 *
J
Jiri Denemark 已提交
4523 4524 4525 4526 4527
 * Returns 0 on success, -1 on error.
 */
int
qemuProcessInit(virQEMUDriverPtr driver,
                virDomainObjPtr vm,
4528
                qemuDomainAsyncJob asyncJob,
4529
                bool migration,
4530
                unsigned int flags)
J
Jiri Denemark 已提交
4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549
{
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
    virCapsPtr caps = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    int stopFlags;
    int ret = -1;

    VIR_DEBUG("vm=%p name=%s id=%d migration=%d",
              vm, vm->def->name, vm->def->id, migration);

    VIR_DEBUG("Beginning VM startup process");

    if (virDomainObjIsActive(vm)) {
        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                       _("VM is already active"));
        goto cleanup;
    }

    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
4550 4551 4552 4553 4554 4555 4556 4557
        goto cleanup;

    VIR_DEBUG("Determining emulator version");
    virObjectUnref(priv->qemuCaps);
    if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
                                                      vm->def->emulator,
                                                      vm->def->os.machine)))
        goto cleanup;
J
Jiri Denemark 已提交
4558

4559
    if (qemuProcessStartValidate(driver, vm, priv->qemuCaps, caps, flags) < 0)
4560 4561
        goto cleanup;

J
Jiri Denemark 已提交
4562 4563 4564 4565 4566
    /* Do this upfront, so any part of the startup process can add
     * runtime state to vm->def that won't be persisted. This let's us
     * report implicit runtime defaults in the XML, like vnc listen/socket
     */
    VIR_DEBUG("Setting current domain def as transient");
4567
    if (virDomainObjSetDefTransient(caps, driver->xmlopt, vm) < 0)
J
Jiri Denemark 已提交
4568 4569
        goto stop;

4570 4571 4572 4573
    if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
        vm->def->id = qemuDriverAllocateID(driver);
        qemuDomainSetFakeReboot(driver, vm, false);
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, VIR_DOMAIN_PAUSED_STARTING_UP);
J
Jiri Denemark 已提交
4574

4575 4576
        if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
            driver->inhibitCallback(true, driver->inhibitOpaque);
J
Jiri Denemark 已提交
4577

4578 4579 4580 4581 4582 4583
        /* Run an early hook to set-up missing devices */
        if (qemuProcessStartHook(driver, vm,
                                 VIR_HOOK_QEMU_OP_PREPARE,
                                 VIR_HOOK_SUBOP_BEGIN) < 0)
            goto stop;
    }
J
Jiri Denemark 已提交
4584

4585
    if (qemuDomainSetPrivatePaths(driver, vm) < 0)
4586 4587
        goto cleanup;

J
Jiri Denemark 已提交
4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598
    ret = 0;

 cleanup:
    virObjectUnref(cfg);
    virObjectUnref(caps);
    return ret;

 stop:
    stopFlags = VIR_QEMU_PROCESS_STOP_NO_RELABEL;
    if (migration)
        stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
4599
    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
J
Jiri Denemark 已提交
4600 4601 4602 4603
    goto cleanup;
}


4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656
/**
 * qemuProcessNetworkPrepareDevices
 */
static int
qemuProcessNetworkPrepareDevices(virDomainDefPtr def)
{
    int ret = -1;
    size_t i;

    for (i = 0; i < def->nnets; i++) {
        virDomainNetDefPtr net = def->nets[i];
        int actualType;

        /* If appropriate, grab a physical device from the configured
         * network's pool of devices, or resolve bridge device name
         * to the one defined in the network definition.
         */
        if (networkAllocateActualDevice(def, net) < 0)
            goto cleanup;

        actualType = virDomainNetGetActualType(net);
        if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV &&
            net->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
            /* Each type='hostdev' network device must also have a
             * corresponding entry in the hostdevs array. For netdevs
             * that are hardcoded as type='hostdev', this is already
             * done by the parser, but for those allocated from a
             * network / determined at runtime, we need to do it
             * separately.
             */
            virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
            virDomainHostdevSubsysPCIPtr pcisrc = &hostdev->source.subsys.u.pci;

            if (virDomainHostdevFind(def, hostdev, NULL) >= 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("PCI device %04x:%02x:%02x.%x "
                                 "allocated from network %s is already "
                                 "in use by domain %s"),
                               pcisrc->addr.domain, pcisrc->addr.bus,
                               pcisrc->addr.slot, pcisrc->addr.function,
                               net->data.network.name, def->name);
                goto cleanup;
            }
            if (virDomainHostdevInsert(def, hostdev) < 0)
                goto cleanup;
        }
    }
    ret = 0;
 cleanup:
    return ret;
}


4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672
/**
 * qemuProcessSetupVcpu:
 * @vm: domain object
 * @vcpuid: id of VCPU to set defaults
 *
 * This function sets resource properties (cgroups, affinity, scheduler) for a
 * vCPU. This function expects that the vCPU is online and the vCPU pids were
 * correctly detected at the point when it's called.
 *
 * Returns 0 on success, -1 on error.
 */
int
qemuProcessSetupVcpu(virDomainObjPtr vm,
                     unsigned int vcpuid)
{
    pid_t vcpupid = qemuDomainGetVcpuPid(vm, vcpuid);
4673
    virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(vm->def, vcpuid);
4674

4675 4676 4677 4678 4679
    return qemuProcessSetupPid(vm, vcpupid, VIR_CGROUP_THREAD_VCPU,
                               vcpuid, vcpu->cpumask,
                               vm->def->cputune.period,
                               vm->def->cputune.quota,
                               &vcpu->sched);
4680 4681 4682 4683 4684 4685
}


static int
qemuProcessSetupVcpus(virDomainObjPtr vm)
{
4686
    virDomainVcpuDefPtr vcpu;
4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731
    unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
    size_t i;

    if ((vm->def->cputune.period || vm->def->cputune.quota) &&
        !virCgroupHasController(((qemuDomainObjPrivatePtr) vm->privateData)->cgroup,
                                VIR_CGROUP_CONTROLLER_CPU)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("cgroup cpu is required for scheduler tuning"));
        return -1;
    }

    if (!qemuDomainHasVcpuPids(vm)) {
        /* If any CPU has custom affinity that differs from the
         * VM default affinity, we must reject it */
        for (i = 0; i < maxvcpus; i++) {
            vcpu = virDomainDefGetVcpu(vm->def, i);

            if (!vcpu->online)
                continue;

            if (vcpu->cpumask &&
                !virBitmapEqual(vm->def->cpumask, vcpu->cpumask)) {
                virReportError(VIR_ERR_OPERATION_INVALID, "%s",
                                _("cpu affinity is not supported"));
                return -1;
            }
        }

        return 0;
    }

    for (i = 0; i < maxvcpus; i++) {
        vcpu = virDomainDefGetVcpu(vm->def, i);

        if (!vcpu->online)
            continue;

        if (qemuProcessSetupVcpu(vm, i) < 0)
            return -1;
    }

    return 0;
}


4732 4733 4734 4735 4736
int
qemuProcessSetupIOThread(virDomainObjPtr vm,
                         virDomainIOThreadIDDefPtr iothread)
{

4737 4738
    return qemuProcessSetupPid(vm, iothread->thread_id,
                               VIR_CGROUP_THREAD_IOTHREAD,
4739
                               iothread->iothread_id,
4740
                               iothread->cpumask,
4741 4742
                               vm->def->cputune.iothread_period,
                               vm->def->cputune.iothread_quota,
4743
                               &iothread->sched);
4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762
}


static int
qemuProcessSetupIOThreads(virDomainObjPtr vm)
{
    size_t i;

    for (i = 0; i < vm->def->niothreadids; i++) {
        virDomainIOThreadIDDefPtr info = vm->def->iothreadids[i];

        if (qemuProcessSetupIOThread(vm, info) < 0)
            return -1;
    }

    return 0;
}


4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866
static int
qemuProcessValidateHotpluggableVcpus(virDomainDefPtr def)
{
    virDomainVcpuDefPtr vcpu;
    virDomainVcpuDefPtr subvcpu;
    qemuDomainVcpuPrivatePtr vcpupriv;
    unsigned int maxvcpus = virDomainDefGetVcpusMax(def);
    size_t i = 0;
    size_t j;
    virBitmapPtr ordermap = NULL;
    int ret = -1;

    if (!(ordermap = virBitmapNew(maxvcpus)))
        goto cleanup;

    /* validate:
     * - all hotpluggable entities to be hotplugged have the correct data
     * - vcpus belonging to a hotpluggable entity share configuration
     * - order of the hotpluggable entities is unique
     */
    for (i = 0; i < maxvcpus; i++) {
        vcpu = virDomainDefGetVcpu(def, i);
        vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);

        /* skip over hotpluggable entities  */
        if (vcpupriv->vcpus == 0)
            continue;

        if (vcpu->order != 0) {
            if (virBitmapIsBitSet(ordermap, vcpu->order - 1)) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("duplicate vcpu order '%u'"), vcpu->order - 1);
                goto cleanup;
            }

            ignore_value(virBitmapSetBit(ordermap, vcpu->order - 1));
        }


        for (j = i + 1; j < (i + vcpupriv->vcpus); j++) {
            subvcpu = virDomainDefGetVcpu(def, j);
            if (subvcpu->hotpluggable != vcpu->hotpluggable ||
                subvcpu->online != vcpu->online ||
                subvcpu->order != vcpu->order) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("vcpus '%zu' and '%zu' are in the same hotplug "
                                 "group but differ in configuration"), i, j);
                goto cleanup;
            }
        }

        if (vcpu->online && vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES) {
            if ((vcpupriv->socket_id == -1 && vcpupriv->core_id == -1 &&
                 vcpupriv->thread_id == -1) ||
                !vcpupriv->type) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("vcpu '%zu' is missing hotplug data"), i);
                goto cleanup;
            }
        }
    }

    ret = 0;
 cleanup:
    virBitmapFree(ordermap);
    return ret;
}


static int
qemuDomainHasHotpluggableStartupVcpus(virDomainDefPtr def)
{
    size_t maxvcpus = virDomainDefGetVcpusMax(def);
    virDomainVcpuDefPtr vcpu;
    size_t i;

    for (i = 0; i < maxvcpus; i++) {
        vcpu = virDomainDefGetVcpu(def, i);

        if (vcpu->online && vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES)
            return true;
    }

    return false;
}


static int
qemuProcessVcpusSortOrder(const void *a,
                          const void *b)
{
    virDomainVcpuDefPtr vcpua = *((virDomainVcpuDefPtr *)a);
    virDomainVcpuDefPtr vcpub = *((virDomainVcpuDefPtr *)b);

    return vcpua->order - vcpub->order;
}


static int
qemuProcessSetupHotpluggableVcpus(virQEMUDriverPtr driver,
                                  virDomainObjPtr vm,
                                  qemuDomainAsyncJob asyncJob)
{
    unsigned int maxvcpus = virDomainDefGetVcpusMax(vm->def);
4867 4868
    qemuDomainObjPrivatePtr priv = vm->privateData;
    qemuCgroupEmulatorAllNodesDataPtr emulatorCgroup = NULL;
4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900
    virDomainVcpuDefPtr vcpu;
    qemuDomainVcpuPrivatePtr vcpupriv;
    virJSONValuePtr vcpuprops = NULL;
    size_t i;
    int ret = -1;
    int rc;

    virDomainVcpuDefPtr *bootHotplug = NULL;
    size_t nbootHotplug = 0;

    for (i = 0; i < maxvcpus; i++) {
        vcpu = virDomainDefGetVcpu(vm->def, i);
        vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);

        if (vcpu->hotpluggable == VIR_TRISTATE_BOOL_YES && vcpu->online &&
            vcpupriv->vcpus != 0) {
            if (virAsprintf(&vcpupriv->alias, "vcpu%zu", i) < 0)
                goto cleanup;

            if (VIR_APPEND_ELEMENT(bootHotplug, nbootHotplug, vcpu) < 0)
                goto cleanup;
        }
    }

    if (nbootHotplug == 0) {
        ret = 0;
        goto cleanup;
    }

    qsort(bootHotplug, nbootHotplug, sizeof(*bootHotplug),
          qemuProcessVcpusSortOrder);

4901 4902 4903
    if (qemuCgroupEmulatorAllNodesAllow(priv->cgroup, &emulatorCgroup) < 0)
        goto cleanup;

4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927
    for (i = 0; i < nbootHotplug; i++) {
        vcpu = bootHotplug[i];

        if (!(vcpuprops = qemuBuildHotpluggableCPUProps(vcpu)))
            goto cleanup;

        if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
            goto cleanup;

        rc = qemuMonitorAddDeviceArgs(qemuDomainGetMonitor(vm), vcpuprops);
        vcpuprops = NULL;

        if (qemuDomainObjExitMonitor(driver, vm) < 0)
            goto cleanup;

        if (rc < 0)
            goto cleanup;

        virJSONValueFree(vcpuprops);
    }

    ret = 0;

 cleanup:
4928
    qemuCgrouEmulatorAllNodesRestore(emulatorCgroup);
4929 4930 4931 4932 4933 4934
    VIR_FREE(bootHotplug);
    virJSONValueFree(vcpuprops);
    return ret;
}


4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956
/**
 * qemuProcessPrepareDomain
 *
 * This function groups all code that modifies only live XML of a domain which
 * is about to start and it's the only place to do those modifications.
 *
 * Flag VIR_QEMU_PROCESS_START_PRETEND tells, that we don't want to actually
 * start the domain but create a valid qemu command.  If some code shouldn't be
 * executed in this case, make sure to check this flag.
 *
 * TODO: move all XML modification from qemuBuildCommandLine into this function
 */
int
qemuProcessPrepareDomain(virConnectPtr conn,
                         virQEMUDriverPtr driver,
                         virDomainObjPtr vm,
                         unsigned int flags)
{
    int ret = -1;
    size_t i;
    char *nodeset = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
4957
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977
    virCapsPtr caps;

    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
        goto cleanup;

    if (!(flags & VIR_QEMU_PROCESS_START_PRETEND)) {
        /* If you are using a SecurityDriver with dynamic labelling,
           then generate a security label for isolation */
        VIR_DEBUG("Generating domain security label (if required)");
        if (virSecurityManagerGenLabel(driver->securityManager, vm->def) < 0) {
            virDomainAuditSecurityLabel(vm, false);
            goto cleanup;
        }
        virDomainAuditSecurityLabel(vm, true);

        /* Get the advisory nodeset from numad if 'placement' of
         * either <vcpu> or <numatune> is 'auto'.
         */
        if (virDomainDefNeedsPlacementAdvice(vm->def)) {
            nodeset = virNumaGetAutoPlacementAdvice(virDomainDefGetVcpus(vm->def),
4978
                                                    virDomainDefGetMemoryTotal(vm->def));
4979 4980 4981 4982 4983
            if (!nodeset)
                goto cleanup;

            VIR_DEBUG("Nodeset returned from numad: %s", nodeset);

4984
            if (virBitmapParse(nodeset, &priv->autoNodeset,
4985 4986 4987 4988 4989 4990 4991 4992 4993
                               VIR_DOMAIN_CPUMASK_LEN) < 0)
                goto cleanup;

            if (!(priv->autoCpuset = virCapabilitiesGetCpusForNodemask(caps,
                                                                       priv->autoNodeset)))
                goto cleanup;
        }
    }

4994 4995 4996 4997 4998 4999 5000
    /*
     * Normally PCI addresses are assigned in the virDomainCreate
     * or virDomainDefine methods. We might still need to assign
     * some here to cope with the question of upgrades. Regardless
     * we also need to populate the PCI address set cache for later
     * use in hotplug
     */
5001
    VIR_DEBUG("Assigning domain PCI addresses");
5002 5003
    if ((qemuDomainAssignAddresses(vm->def, priv->qemuCaps, vm,
                                   !!(flags & VIR_QEMU_PROCESS_START_NEW))) < 0)
5004
        goto cleanup;
5005

5006 5007 5008
    if (qemuAssignDeviceAliases(vm->def, priv->qemuCaps) < 0)
        goto cleanup;

5009
    VIR_DEBUG("Setting graphics devices");
5010 5011 5012
    if (qemuProcessSetupGraphics(driver, vm, flags) < 0)
        goto cleanup;

5013 5014 5015 5016 5017
    /* Drop possibly missing disks from the definition. This function
     * also resolves source pool/volume into a path and it needs to
     * happen after the def is copied and aliases are set. */
    if (qemuDomainCheckDiskPresence(conn, driver, vm, flags) < 0)
        goto cleanup;
5018

5019 5020 5021 5022
    VIR_DEBUG("Create domain masterKey");
    if (qemuDomainMasterKeyCreate(vm) < 0)
        goto cleanup;

5023 5024 5025 5026
    VIR_DEBUG("Add secrets to disks and hostdevs");
    if (qemuDomainSecretPrepare(conn, vm) < 0)
        goto cleanup;

5027 5028 5029 5030 5031 5032
    for (i = 0; i < vm->def->nchannels; i++) {
        if (qemuDomainPrepareChannel(vm->def->channels[i],
                                     priv->channelTargetDir) < 0)
            goto cleanup;
    }

5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048
    if (VIR_ALLOC(priv->monConfig) < 0)
        goto cleanup;

    VIR_DEBUG("Preparing monitor state");
    if (qemuProcessPrepareMonitorChr(priv->monConfig, priv->libDir) < 0)
        goto cleanup;

    priv->monJSON = virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_MONITOR_JSON);
    priv->monError = false;
    priv->monStart = 0;
    priv->gotShutdown = false;

    ret = 0;
 cleanup:
    VIR_FREE(nodeset);
    virObjectUnref(caps);
5049
    virObjectUnref(cfg);
5050 5051 5052 5053
    return ret;
}


J
Jiri Denemark 已提交
5054
/**
5055
 * qemuProcessPrepareHost
J
Jiri Denemark 已提交
5056
 *
5057 5058 5059
 * This function groups all code that modifies host system (which also may
 * update live XML) to prepare environment for a domain which is about to start
 * and it's the only place to do those modifications.
J
Jiri Denemark 已提交
5060
 *
5061
 * TODO: move all host modification from qemuBuildCommandLine into this function
J
Jiri Denemark 已提交
5062 5063
 */
int
5064 5065 5066
qemuProcessPrepareHost(virQEMUDriverPtr driver,
                       virDomainObjPtr vm,
                       bool incoming)
5067
{
5068
    int ret = -1;
5069
    unsigned int hostdev_flags = 0;
5070 5071 5072
    size_t i;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
5073

5074
    if (qemuPrepareNVRAM(cfg, vm) < 0)
J
Jiri Denemark 已提交
5075
        goto cleanup;
5076

5077 5078 5079 5080 5081
    /* network devices must be "prepared" before hostdevs, because
     * setting up a network device might create a new hostdev that
     * will need to be setup.
     */
    VIR_DEBUG("Preparing network devices");
5082
    if (qemuProcessNetworkPrepareDevices(vm->def) < 0)
J
Jiri Denemark 已提交
5083
        goto cleanup;
5084

5085
    /* Must be run before security labelling */
5086
    VIR_DEBUG("Preparing host devices");
5087 5088
    if (!cfg->relaxedACS)
        hostdev_flags |= VIR_HOSTDEV_STRICT_ACS_CHECK;
J
Jiri Denemark 已提交
5089
    if (!incoming)
5090
        hostdev_flags |= VIR_HOSTDEV_COLD_BOOT;
5091 5092
    if (qemuHostdevPrepareDomainDevices(driver, vm->def, priv->qemuCaps,
                                        hostdev_flags) < 0)
J
Jiri Denemark 已提交
5093
        goto cleanup;
5094

5095
    VIR_DEBUG("Preparing chr devices");
5096 5097 5098 5099
    if (virDomainChrDefForeach(vm->def,
                               true,
                               qemuProcessPrepareChardevDevice,
                               NULL) < 0)
J
Jiri Denemark 已提交
5100
        goto cleanup;
5101

5102
    if (vm->def->mem.nhugepages) {
5103 5104 5105 5106
        for (i = 0; i < cfg->nhugetlbfs; i++) {
            char *hugepagePath = qemuGetHugepagePath(&cfg->hugetlbfs[i]);

            if (!hugepagePath)
J
Jiri Denemark 已提交
5107
                goto cleanup;
5108 5109 5110 5111 5112 5113

            if (virSecurityManagerSetHugepages(driver->securityManager,
                                               vm->def, hugepagePath) < 0) {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               "%s", _("Unable to set huge path in security driver"));
                VIR_FREE(hugepagePath);
J
Jiri Denemark 已提交
5114
                goto cleanup;
5115 5116
            }
            VIR_FREE(hugepagePath);
5117 5118 5119
        }
    }

5120 5121
    /* Ensure no historical cgroup for this VM is lying around bogus
     * settings */
5122
    VIR_DEBUG("Ensuring no historical cgroup is lying around");
5123
    qemuRemoveCgroup(vm);
5124

5125
    if (virFileMakePath(cfg->logDir) < 0) {
5126 5127
        virReportSystemError(errno,
                             _("cannot create log directory %s"),
5128
                             cfg->logDir);
J
Jiri Denemark 已提交
5129
        goto cleanup;
5130 5131
    }

5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154
    VIR_FREE(priv->pidfile);
    if (!(priv->pidfile = virPidFileBuildPath(cfg->stateDir, vm->def->name))) {
        virReportSystemError(errno,
                             "%s", _("Failed to build pidfile path."));
        goto cleanup;
    }

    if (unlink(priv->pidfile) < 0 &&
        errno != ENOENT) {
        virReportSystemError(errno,
                             _("Cannot remove stale PID file %s"),
                             priv->pidfile);
        goto cleanup;
    }

    /*
     * Create all per-domain directories in order to make sure domain
     * with any possible seclabels can access it.
     */
    if (qemuProcessMakeDir(driver, vm, priv->libDir) < 0 ||
        qemuProcessMakeDir(driver, vm, priv->channelTargetDir) < 0)
        goto cleanup;

5155 5156
    VIR_DEBUG("Write domain masterKey");
    if (qemuDomainWriteMasterKeyFile(driver, vm) < 0)
J
John Ferlan 已提交
5157 5158
        goto cleanup;

5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199
    ret = 0;
 cleanup:
    virObjectUnref(cfg);
    return ret;
}


/**
 * qemuProcessLaunch:
 *
 * Launch a new QEMU process with stopped virtual CPUs.
 *
 * The caller is supposed to call qemuProcessStop with appropriate
 * flags in case of failure.
 *
 * Returns 0 on success,
 *        -1 on error which happened before devices were labeled and thus
 *           there is no need to restore them,
 *        -2 on error requesting security labels to be restored.
 */
int
qemuProcessLaunch(virConnectPtr conn,
                  virQEMUDriverPtr driver,
                  virDomainObjPtr vm,
                  qemuDomainAsyncJob asyncJob,
                  qemuProcessIncomingDefPtr incoming,
                  virDomainSnapshotObjPtr snapshot,
                  virNetDevVPortProfileOp vmop,
                  unsigned int flags)
{
    int ret = -1;
    int rv;
    int logfile = -1;
    qemuDomainLogContextPtr logCtxt = NULL;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virCommandPtr cmd = NULL;
    struct qemuProcessHookData hookData;
    virQEMUDriverConfigPtr cfg;
    virCapsPtr caps = NULL;
    size_t nnicindexes = 0;
    int *nicindexes = NULL;
Q
Qiaowei Ren 已提交
5200
    size_t i;
5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216

    VIR_DEBUG("vm=%p name=%s id=%d asyncJob=%d "
              "incoming.launchURI=%s incoming.deferredURI=%s "
              "incoming.fd=%d incoming.path=%s "
              "snapshot=%p vmop=%d flags=0x%x",
              vm, vm->def->name, vm->def->id, asyncJob,
              NULLSTR(incoming ? incoming->launchURI : NULL),
              NULLSTR(incoming ? incoming->deferredURI : NULL),
              incoming ? incoming->fd : -1,
              NULLSTR(incoming ? incoming->path : NULL),
              snapshot, vmop, flags);

    /* Okay, these are just internal flags,
     * but doesn't hurt to check */
    virCheckFlags(VIR_QEMU_PROCESS_START_COLD |
                  VIR_QEMU_PROCESS_START_PAUSED |
5217 5218
                  VIR_QEMU_PROCESS_START_AUTODESTROY |
                  VIR_QEMU_PROCESS_START_NEW, -1);
5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230

    cfg = virQEMUDriverGetConfig(driver);

    hookData.conn = conn;
    hookData.vm = vm;
    hookData.driver = driver;
    /* We don't increase cfg's reference counter here. */
    hookData.cfg = cfg;

    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
        goto cleanup;

5231
    VIR_DEBUG("Creating domain log file");
5232 5233
    if (!(logCtxt = qemuDomainLogContextNew(driver, vm,
                                            QEMU_DOMAIN_LOG_CONTEXT_MODE_START)))
J
Jiri Denemark 已提交
5234
        goto cleanup;
5235
    logfile = qemuDomainLogContextGetWriteFD(logCtxt);
5236

5237
    VIR_DEBUG("Building emulator command line");
5238
    if (!(cmd = qemuBuildCommandLine(driver,
5239 5240
                                     qemuDomainLogContextGetManager(logCtxt),
                                     vm->def, priv->monConfig,
E
Eric Blake 已提交
5241
                                     priv->monJSON, priv->qemuCaps,
5242 5243
                                     incoming ? incoming->launchURI : NULL,
                                     snapshot, vmop,
J
Ján Tomko 已提交
5244
                                     false,
5245
                                     qemuCheckFips(),
5246
                                     priv->autoNodeset,
5247
                                     &nnicindexes, &nicindexes,
5248
                                     priv->libDir)))
J
Jiri Denemark 已提交
5249
        goto cleanup;
5250

5251 5252
    if (incoming && incoming->fd != -1)
        virCommandPassFD(cmd, incoming->fd, 0);
5253

5254
    /* now that we know it is about to start call the hook if present */
5255 5256 5257
    if (qemuProcessStartHook(driver, vm,
                             VIR_HOOK_QEMU_OP_START,
                             VIR_HOOK_SUBOP_BEGIN) < 0)
J
Jiri Denemark 已提交
5258
        goto cleanup;
5259

5260
    qemuLogOperation(vm, "starting up", cmd, logCtxt);
5261

5262
    qemuDomainObjCheckTaint(driver, vm, logCtxt);
5263

5264
    qemuDomainLogContextMarkPosition(logCtxt);
5265 5266

    VIR_DEBUG("Clear emulator capabilities: %d",
5267 5268
              cfg->clearEmulatorCapabilities);
    if (cfg->clearEmulatorCapabilities)
5269 5270
        virCommandClearCaps(cmd);

5271 5272
    VIR_DEBUG("Setting up raw IO");
    if (qemuProcessSetupRawIO(driver, vm, cmd) < 0)
J
Jiri Denemark 已提交
5273
        goto cleanup;
5274

5275
    virCommandSetPreExecHook(cmd, qemuProcessHook, &hookData);
5276 5277
    virCommandSetMaxProcesses(cmd, cfg->maxProcesses);
    virCommandSetMaxFiles(cmd, cfg->maxFiles);
5278
    virCommandSetMaxCoreSize(cmd, cfg->maxCore);
5279
    virCommandSetUmask(cmd, 0x002);
5280

5281 5282
    VIR_DEBUG("Setting up security labelling");
    if (virSecurityManagerSetChildProcessLabel(driver->securityManager,
5283
                                               vm->def, cmd) < 0)
J
Jiri Denemark 已提交
5284
        goto cleanup;
5285

5286 5287 5288
    virCommandSetOutputFD(cmd, &logfile);
    virCommandSetErrorFD(cmd, &logfile);
    virCommandNonblockingFDs(cmd);
5289
    virCommandSetPidFile(cmd, priv->pidfile);
5290
    virCommandDaemonize(cmd);
5291
    virCommandRequireHandshake(cmd);
5292

5293
    if (virSecurityManagerPreFork(driver->securityManager) < 0)
J
Jiri Denemark 已提交
5294
        goto cleanup;
5295
    rv = virCommandRun(cmd, NULL);
5296
    virSecurityManagerPostFork(driver->securityManager);
5297

E
Eric Blake 已提交
5298
    /* wait for qemu process to show up */
5299
    if (rv == 0) {
5300
        if (virPidFileReadPath(priv->pidfile, &vm->pid) < 0) {
5301 5302
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Domain %s didn't show up"), vm->def->name);
5303
            rv = -1;
5304
        }
5305 5306 5307 5308 5309
        VIR_DEBUG("QEMU vm=%p name=%s running with pid=%llu",
                  vm, vm->def->name, (unsigned long long)vm->pid);
    } else {
        VIR_DEBUG("QEMU vm=%p name=%s failed to spawn",
                  vm, vm->def->name);
5310 5311
    }

5312
    VIR_DEBUG("Writing early domain status to disk");
5313
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
J
Jiri Denemark 已提交
5314
        goto cleanup;
5315

5316 5317
    VIR_DEBUG("Waiting for handshake from child");
    if (virCommandHandshakeWait(cmd) < 0) {
5318
        /* Read errors from child that occurred between fork and exec. */
5319 5320
        qemuProcessReportLogError(logCtxt,
                                  _("Process exited prior to exec"));
J
Jiri Denemark 已提交
5321
        goto cleanup;
5322 5323
    }

5324
    VIR_DEBUG("Setting up domain cgroup (if required)");
5325
    if (qemuSetupCgroup(driver, vm, nnicindexes, nicindexes) < 0)
J
Jiri Denemark 已提交
5326
        goto cleanup;
5327

5328 5329 5330 5331
    if (!(priv->perf = virPerfNew()))
        goto cleanup;

    for (i = 0; i < VIR_PERF_EVENT_LAST; i++) {
5332
        if (vm->def->perf.events[i] == VIR_TRISTATE_BOOL_YES &&
5333 5334
            virPerfEventEnable(priv->perf, i, vm->pid) < 0)
            goto cleanup;
Q
Qiaowei Ren 已提交
5335
    }
5336

5337 5338 5339 5340 5341 5342
    /* This must be done after cgroup placement to avoid resetting CPU
     * affinity */
    if (!vm->def->cputune.emulatorpin &&
        qemuProcessInitCpuAffinity(vm) < 0)
        goto cleanup;

5343 5344 5345 5346
    VIR_DEBUG("Setting emulator tuning/settings");
    if (qemuProcessSetupEmulator(vm) < 0)
        goto cleanup;

5347 5348
    VIR_DEBUG("Setting domain security labels");
    if (virSecurityManagerSetAllLabel(driver->securityManager,
J
Jiri Denemark 已提交
5349 5350 5351
                                      vm->def,
                                      incoming ? incoming->path : NULL) < 0)
        goto cleanup;
5352

5353
    /* Security manager labeled all devices, therefore
J
Jiri Denemark 已提交
5354 5355 5356 5357
     * if any operation from now on fails, we need to ask the caller to
     * restore labels.
     */
    ret = -2;
5358

J
Jiri Denemark 已提交
5359
    if (incoming && incoming->fd != -1) {
5360 5361 5362 5363 5364 5365 5366
        /* if there's an fd to migrate from, and it's a pipe, put the
         * proper security label on it
         */
        struct stat stdin_sb;

        VIR_DEBUG("setting security label on pipe used for migration");

J
Jiri Denemark 已提交
5367
        if (fstat(incoming->fd, &stdin_sb) < 0) {
5368
            virReportSystemError(errno,
J
Jiri Denemark 已提交
5369 5370
                                 _("cannot stat fd %d"), incoming->fd);
            goto cleanup;
5371 5372
        }
        if (S_ISFIFO(stdin_sb.st_mode) &&
J
Jiri Denemark 已提交
5373 5374 5375
            virSecurityManagerSetImageFDLabel(driver->securityManager,
                                              vm->def, incoming->fd) < 0)
            goto cleanup;
5376 5377 5378
    }

    VIR_DEBUG("Labelling done, completing handshake to child");
5379
    if (virCommandHandshakeNotify(cmd) < 0)
J
Jiri Denemark 已提交
5380
        goto cleanup;
5381 5382
    VIR_DEBUG("Handshake complete, child running");

5383
    if (rv == -1) /* The VM failed to start; tear filters before taps */
5384 5385
        virDomainConfVMNWFilterTeardown(vm);

5386
    if (rv == -1) /* The VM failed to start */
J
Jiri Denemark 已提交
5387
        goto cleanup;
5388

5389
    VIR_DEBUG("Waiting for monitor to show up");
5390
    if (qemuProcessWaitForMonitor(driver, vm, asyncJob, priv->qemuCaps, logCtxt) < 0)
J
Jiri Denemark 已提交
5391
        goto cleanup;
5392

D
Daniel P. Berrange 已提交
5393
    /* Failure to connect to agent shouldn't be fatal */
5394 5395
    if ((rv = qemuConnectAgent(driver, vm)) < 0) {
        if (rv == -2)
J
Jiri Denemark 已提交
5396
            goto cleanup;
5397

D
Daniel P. Berrange 已提交
5398 5399 5400 5401 5402 5403
        VIR_WARN("Cannot connect to QEMU guest agent for %s",
                 vm->def->name);
        virResetLastError();
        priv->agentError = true;
    }

5404
    VIR_DEBUG("Detecting if required emulator features are present");
5405
    if (!qemuProcessVerifyGuestCPU(driver, vm, asyncJob))
J
Jiri Denemark 已提交
5406
        goto cleanup;
5407

5408
    VIR_DEBUG("Setting up post-init cgroup restrictions");
5409
    if (qemuSetupCpusetMems(vm) < 0)
J
Jiri Denemark 已提交
5410
        goto cleanup;
5411

5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423
    VIR_DEBUG("setting up hotpluggable cpus");
    if (qemuDomainHasHotpluggableStartupVcpus(vm->def)) {
        if (qemuDomainRefreshVcpuInfo(driver, vm, asyncJob, false) < 0)
            goto cleanup;

        if (qemuProcessValidateHotpluggableVcpus(vm->def) < 0)
            goto cleanup;

        if (qemuProcessSetupHotpluggableVcpus(driver, vm, asyncJob) < 0)
            goto cleanup;
    }

5424
    VIR_DEBUG("Refreshing VCPU info");
5425
    if (qemuDomainRefreshVcpuInfo(driver, vm, asyncJob, false) < 0)
J
Jiri Denemark 已提交
5426
        goto cleanup;
5427

5428 5429 5430
    if (qemuDomainValidateVcpuInfo(vm) < 0)
        goto cleanup;

5431 5432
    qemuDomainVcpuPersistOrder(vm->def);

5433 5434
    VIR_DEBUG("Detecting IOThread PIDs");
    if (qemuProcessDetectIOThreadPIDs(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5435
        goto cleanup;
5436

5437 5438 5439 5440
    VIR_DEBUG("Setting global CPU cgroup (if required)");
    if (qemuSetupGlobalCpuCgroup(vm) < 0)
        goto cleanup;

5441 5442
    VIR_DEBUG("Setting vCPU tuning/settings");
    if (qemuProcessSetupVcpus(vm) < 0)
J
Jiri Denemark 已提交
5443
        goto cleanup;
5444

5445 5446
    VIR_DEBUG("Setting IOThread tuning/settings");
    if (qemuProcessSetupIOThreads(vm) < 0)
J
Jiri Denemark 已提交
5447
        goto cleanup;
5448

5449
    VIR_DEBUG("Setting any required VM passwords");
5450
    if (qemuProcessInitPasswords(conn, driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5451
        goto cleanup;
5452

5453 5454 5455 5456
    /* set default link states */
    /* qemu doesn't support setting this on the command line, so
     * enter the monitor */
    VIR_DEBUG("Setting network link states");
5457
    if (qemuProcessSetLinkStates(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5458
        goto cleanup;
5459

5460
    VIR_DEBUG("Fetching list of active devices");
5461
    if (qemuDomainUpdateDeviceList(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5462
        goto cleanup;
5463

5464 5465
    VIR_DEBUG("Updating info of memory devices");
    if (qemuDomainUpdateMemoryDeviceInfo(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5466
        goto cleanup;
5467

5468
    VIR_DEBUG("Setting initial memory amount");
5469
    if (qemuProcessSetupBalloon(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5470
        goto cleanup;
5471

5472
    /* Since CPUs were not started yet, the balloon could not return the memory
5473 5474
     * to the host and thus cur_balloon needs to be updated so that GetXMLdesc
     * and friends return the correct size in case they can't grab the job */
5475
    if (!incoming && !snapshot &&
5476
        qemuProcessRefreshBalloonState(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5477
        goto cleanup;
5478

5479 5480
    VIR_DEBUG("Detecting actual memory size for video device");
    if (qemuProcessUpdateVideoRamSize(driver, vm, asyncJob) < 0)
J
Jiri Denemark 已提交
5481 5482
        goto cleanup;

5483 5484
    VIR_DEBUG("Updating disk data");
    if (qemuProcessRefreshDisks(driver, vm, asyncJob) < 0)
5485 5486
        goto cleanup;

J
Jiri Denemark 已提交
5487 5488 5489 5490 5491 5492 5493
    if (flags & VIR_QEMU_PROCESS_START_AUTODESTROY &&
        qemuProcessAutoDestroyAdd(driver, vm, conn) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
5494
    qemuDomainSecretDestroy(vm);
J
Jiri Denemark 已提交
5495
    virCommandFree(cmd);
5496
    qemuDomainLogContextFree(logCtxt);
J
Jiri Denemark 已提交
5497 5498 5499 5500 5501 5502 5503
    virObjectUnref(cfg);
    virObjectUnref(caps);
    VIR_FREE(nicindexes);
    return ret;
}


5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534
/**
 * qemuProcessFinishStartup:
 *
 * Finish starting a new domain.
 */
int
qemuProcessFinishStartup(virConnectPtr conn,
                         virQEMUDriverPtr driver,
                         virDomainObjPtr vm,
                         qemuDomainAsyncJob asyncJob,
                         bool startCPUs,
                         virDomainPausedReason pausedReason)
{
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
    int ret = -1;

    if (startCPUs) {
        VIR_DEBUG("Starting domain CPUs");
        if (qemuProcessStartCPUs(driver, vm, conn,
                                 VIR_DOMAIN_RUNNING_BOOTED,
                                 asyncJob) < 0) {
            if (!virGetLastError())
                virReportError(VIR_ERR_OPERATION_FAILED, "%s",
                               _("resume operation failed"));
            goto cleanup;
        }
    } else {
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, pausedReason);
    }

    VIR_DEBUG("Writing domain status to disk");
5535
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550
        goto cleanup;

    if (qemuProcessStartHook(driver, vm,
                             VIR_HOOK_QEMU_OP_STARTED,
                             VIR_HOOK_SUBOP_BEGIN) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    virObjectUnref(cfg);
    return ret;
}


J
Jiri Denemark 已提交
5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581
int
qemuProcessStart(virConnectPtr conn,
                 virQEMUDriverPtr driver,
                 virDomainObjPtr vm,
                 qemuDomainAsyncJob asyncJob,
                 const char *migrateFrom,
                 int migrateFd,
                 const char *migratePath,
                 virDomainSnapshotObjPtr snapshot,
                 virNetDevVPortProfileOp vmop,
                 unsigned int flags)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    qemuProcessIncomingDefPtr incoming = NULL;
    unsigned int stopFlags;
    bool relabel = false;
    int ret = -1;
    int rv;

    VIR_DEBUG("conn=%p driver=%p vm=%p name=%s id=%d asyncJob=%s "
              "migrateFrom=%s migrateFd=%d migratePath=%s "
              "snapshot=%p vmop=%d flags=0x%x",
              conn, driver, vm, vm->def->name, vm->def->id,
              qemuDomainAsyncJobTypeToString(asyncJob),
              NULLSTR(migrateFrom), migrateFd, NULLSTR(migratePath),
              snapshot, vmop, flags);

    virCheckFlagsGoto(VIR_QEMU_PROCESS_START_COLD |
                      VIR_QEMU_PROCESS_START_PAUSED |
                      VIR_QEMU_PROCESS_START_AUTODESTROY, cleanup);

5582 5583 5584 5585
    if (!migrateFrom && !snapshot)
        flags |= VIR_QEMU_PROCESS_START_NEW;

    if (qemuProcessInit(driver, vm, asyncJob, !!migrateFrom, flags) < 0)
J
Jiri Denemark 已提交
5586 5587 5588
        goto cleanup;

    if (migrateFrom) {
5589
        incoming = qemuProcessIncomingDefNew(priv->qemuCaps, NULL, migrateFrom,
J
Jiri Denemark 已提交
5590 5591 5592 5593 5594
                                             migrateFd, migratePath);
        if (!incoming)
            goto stop;
    }

5595 5596 5597
    if (qemuProcessPrepareDomain(conn, driver, vm, flags) < 0)
        goto stop;

5598 5599 5600
    if (qemuProcessPrepareHost(driver, vm, !!incoming) < 0)
        goto stop;

J
Jiri Denemark 已提交
5601 5602
    if ((rv = qemuProcessLaunch(conn, driver, vm, asyncJob, incoming,
                                snapshot, vmop, flags)) < 0) {
5603
        if (rv == -2)
J
Jiri Denemark 已提交
5604 5605 5606 5607
            relabel = true;
        goto stop;
    }
    relabel = true;
5608

5609 5610 5611
    if (incoming &&
        incoming->deferredURI &&
        qemuMigrationRunIncoming(driver, vm, incoming->deferredURI, asyncJob) < 0)
J
Jiri Denemark 已提交
5612
        goto stop;
5613

5614 5615 5616 5617 5618
    if (qemuProcessFinishStartup(conn, driver, vm, asyncJob,
                                 !(flags & VIR_QEMU_PROCESS_START_PAUSED),
                                 incoming ?
                                 VIR_DOMAIN_PAUSED_MIGRATION :
                                 VIR_DOMAIN_PAUSED_USER) < 0)
J
Jiri Denemark 已提交
5619
        goto stop;
5620

5621 5622
    /* Keep watching qemu log for errors during incoming migration, otherwise
     * unset reporting errors from qemu log. */
5623
    if (!incoming)
5624
        qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
5625

5626 5627 5628
    ret = 0;

 cleanup:
5629
    qemuProcessIncomingDefFree(incoming);
5630
    return ret;
5631

J
Jiri Denemark 已提交
5632 5633 5634 5635 5636 5637
 stop:
    stopFlags = 0;
    if (!relabel)
        stopFlags |= VIR_QEMU_PROCESS_STOP_NO_RELABEL;
    if (migrateFrom)
        stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;
5638
    if (priv->mon)
5639
        qemuMonitorSetDomainLog(priv->mon, NULL, NULL, NULL);
5640
    qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, stopFlags);
5641
    goto cleanup;
5642 5643 5644
}


5645 5646 5647 5648 5649
virCommandPtr
qemuProcessCreatePretendCmd(virConnectPtr conn,
                            virQEMUDriverPtr driver,
                            virDomainObjPtr vm,
                            const char *migrateURI,
5650
                            bool enableFips,
5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661
                            bool standalone,
                            unsigned int flags)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virCommandPtr cmd = NULL;

    virCheckFlagsGoto(VIR_QEMU_PROCESS_START_COLD |
                      VIR_QEMU_PROCESS_START_PAUSED |
                      VIR_QEMU_PROCESS_START_AUTODESTROY, cleanup);

    flags |= VIR_QEMU_PROCESS_START_PRETEND;
5662
    flags |= VIR_QEMU_PROCESS_START_NEW;
5663

5664
    if (qemuProcessInit(driver, vm, QEMU_ASYNC_JOB_NONE, !!migrateURI, flags) < 0)
5665 5666 5667 5668 5669 5670
        goto cleanup;

    if (qemuProcessPrepareDomain(conn, driver, vm, flags) < 0)
        goto cleanup;

    VIR_DEBUG("Building emulator command line");
5671
    cmd = qemuBuildCommandLine(driver,
5672 5673 5674 5675 5676 5677 5678 5679 5680
                               NULL,
                               vm->def,
                               priv->monConfig,
                               priv->monJSON,
                               priv->qemuCaps,
                               migrateURI,
                               NULL,
                               VIR_NETDEV_VPORT_PROFILE_OP_NO_OP,
                               standalone,
5681
                               enableFips,
5682 5683 5684
                               priv->autoNodeset,
                               NULL,
                               NULL,
5685
                               priv->libDir);
5686 5687 5688 5689 5690 5691

 cleanup:
    return cmd;
}


5692
int
5693
qemuProcessKill(virDomainObjPtr vm, unsigned int flags)
5694
{
5695
    int ret;
5696

5697 5698 5699
    VIR_DEBUG("vm=%p name=%s pid=%llu flags=%x",
              vm, vm->def->name,
              (unsigned long long)vm->pid, flags);
5700

5701 5702 5703 5704 5705
    if (!(flags & VIR_QEMU_PROCESS_KILL_NOCHECK)) {
        if (!virDomainObjIsActive(vm)) {
            VIR_DEBUG("VM '%s' not active", vm->def->name);
            return 0;
        }
5706 5707
    }

5708
    if (flags & VIR_QEMU_PROCESS_KILL_NOWAIT) {
5709 5710 5711 5712 5713
        virProcessKill(vm->pid,
                       (flags & VIR_QEMU_PROCESS_KILL_FORCE) ?
                       SIGKILL : SIGTERM);
        return 0;
    }
5714

5715 5716
    ret = virProcessKillPainfully(vm->pid,
                                  !!(flags & VIR_QEMU_PROCESS_KILL_FORCE));
5717

5718
    return ret;
5719 5720 5721
}


5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760
/**
 * qemuProcessBeginStopJob:
 *
 * Stop all current jobs by killing the domain and start a new one for
 * qemuProcessStop.
 */
int
qemuProcessBeginStopJob(virQEMUDriverPtr driver,
                        virDomainObjPtr vm,
                        qemuDomainJob job,
                        bool forceKill)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    unsigned int killFlags = forceKill ? VIR_QEMU_PROCESS_KILL_FORCE : 0;
    int ret = -1;

    /* We need to prevent monitor EOF callback from doing our work (and
     * sending misleading events) while the vm is unlocked inside
     * BeginJob/ProcessKill API
     */
    priv->beingDestroyed = true;

    if (qemuProcessKill(vm, killFlags) < 0)
        goto cleanup;

    /* Wake up anything waiting on domain condition */
    virDomainObjBroadcast(vm);

    if (qemuDomainObjBeginJob(driver, vm, job) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
    priv->beingDestroyed = false;
    return ret;
}


5761
void qemuProcessStop(virQEMUDriverPtr driver,
5762
                     virDomainObjPtr vm,
5763
                     virDomainShutoffReason reason,
5764
                     qemuDomainAsyncJob asyncJob,
5765
                     unsigned int flags)
5766 5767 5768 5769 5770 5771
{
    int ret;
    int retries = 0;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virErrorPtr orig_err;
    virDomainDefPtr def;
A
Ansis Atteka 已提交
5772
    virNetDevVPortProfilePtr vport = NULL;
5773
    size_t i;
5774
    char *timestamp;
5775
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
5776

5777 5778
    VIR_DEBUG("Shutting down vm=%p name=%s id=%d pid=%llu, "
              "reason=%s, asyncJob=%s, flags=%x",
5779
              vm, vm->def->name, vm->def->id,
5780 5781 5782 5783
              (unsigned long long)vm->pid,
              virDomainShutoffReasonTypeToString(reason),
              qemuDomainAsyncJobTypeToString(asyncJob),
              flags);
5784

5785 5786 5787 5788
    /* This method is routinely used in clean up paths. Disable error
     * reporting so we don't squash a legit error. */
    orig_err = virSaveLastError();

5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803
    if (asyncJob != QEMU_ASYNC_JOB_NONE) {
        if (qemuDomainObjBeginNestedJob(driver, vm, asyncJob) < 0)
            goto cleanup;
    } else if (priv->job.asyncJob != QEMU_ASYNC_JOB_NONE &&
               priv->job.asyncOwner == virThreadSelfID() &&
               priv->job.active != QEMU_JOB_ASYNC_NESTED) {
        VIR_WARN("qemuProcessStop called without a nested job (async=%s)",
                 qemuDomainAsyncJobTypeToString(asyncJob));
    }

    if (!virDomainObjIsActive(vm)) {
        VIR_DEBUG("VM '%s' not active", vm->def->name);
        goto endjob;
    }

5804 5805
    vm->def->id = -1;

5806
    if (virAtomicIntDecAndTest(&driver->nactive) && driver->inhibitCallback)
5807 5808
        driver->inhibitCallback(false, driver->inhibitOpaque);

5809 5810
    /* Wake up anything waiting on domain condition */
    virDomainObjBroadcast(vm);
5811

5812 5813 5814
    if ((timestamp = virTimeStringNow()) != NULL) {
        qemuDomainLogAppendMessage(driver, vm, "%s: shutting down\n", timestamp);
        VIR_FREE(timestamp);
5815 5816
    }

5817 5818 5819
    /* Clear network bandwidth */
    virDomainClearNetBandwidth(vm);

5820 5821
    virDomainConfVMNWFilterTeardown(vm);

5822
    if (cfg->macFilter) {
5823
        def = vm->def;
5824
        for (i = 0; i < def->nnets; i++) {
5825 5826 5827
            virDomainNetDefPtr net = def->nets[i];
            if (net->ifname == NULL)
                continue;
5828 5829 5830
            ignore_value(ebtablesRemoveForwardAllowIn(driver->ebtables,
                                                      net->ifname,
                                                      &net->mac));
5831 5832 5833
        }
    }

5834
    virPortAllocatorRelease(driver->migrationPorts, priv->nbdPort);
5835
    priv->nbdPort = 0;
5836

D
Daniel P. Berrange 已提交
5837 5838 5839 5840 5841 5842
    if (priv->agent) {
        qemuAgentClose(priv->agent);
        priv->agent = NULL;
        priv->agentError = false;
    }

5843
    if (priv->mon) {
5844
        qemuMonitorClose(priv->mon);
5845 5846
        priv->mon = NULL;
    }
5847 5848 5849 5850 5851 5852 5853 5854

    if (priv->monConfig) {
        if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX)
            unlink(priv->monConfig->data.nix.path);
        virDomainChrSourceDefFree(priv->monConfig);
        priv->monConfig = NULL;
    }

J
John Ferlan 已提交
5855 5856 5857
    /* Remove the master key */
    qemuDomainMasterKeyRemove(priv);

5858 5859
    virFileDeleteTree(priv->libDir);
    virFileDeleteTree(priv->channelTargetDir);
5860

5861 5862
    qemuDomainClearPrivatePaths(vm);

5863 5864 5865 5866 5867 5868
    ignore_value(virDomainChrDefForeach(vm->def,
                                        false,
                                        qemuProcessCleanupChardevDevice,
                                        NULL));


5869
    /* shut it off for sure */
5870 5871 5872
    ignore_value(qemuProcessKill(vm,
                                 VIR_QEMU_PROCESS_KILL_FORCE|
                                 VIR_QEMU_PROCESS_KILL_NOCHECK));
5873

5874 5875
    qemuDomainCleanupRun(driver, vm);

5876
    /* Stop autodestroy in case guest is restarted */
5877
    qemuProcessAutoDestroyRemove(driver, vm);
5878

5879 5880
    /* now that we know it's stopped call the hook if present */
    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
5881
        char *xml = qemuDomainDefFormatXML(driver, vm->def, 0);
5882 5883

        /* we can't stop the operation even if the script raised an error */
5884 5885 5886
        ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
                                 VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END,
                                 NULL, xml, NULL));
5887 5888 5889
        VIR_FREE(xml);
    }

5890 5891 5892 5893
    /* Reset Security Labels unless caller don't want us to */
    if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL))
        virSecurityManagerRestoreAllLabel(driver->securityManager,
                                          vm->def,
5894
                                          !!(flags & VIR_QEMU_PROCESS_STOP_MIGRATED));
5895
    virSecurityManagerReleaseLabel(driver->securityManager, vm->def);
5896

5897
    for (i = 0; i < vm->def->ndisks; i++) {
5898
        virDomainDeviceDef dev;
5899
        virDomainDiskDefPtr disk = vm->def->disks[i];
5900 5901 5902 5903

        dev.type = VIR_DOMAIN_DEVICE_DISK;
        dev.data.disk = disk;
        ignore_value(qemuRemoveSharedDevice(driver, &dev, vm->def->name));
5904 5905
    }

5906
    /* Clear out dynamically assigned labels */
5907
    for (i = 0; i < vm->def->nseclabels; i++) {
5908
        if (vm->def->seclabels[i]->type == VIR_DOMAIN_SECLABEL_DYNAMIC)
5909 5910
            VIR_FREE(vm->def->seclabels[i]->label);
        VIR_FREE(vm->def->seclabels[i]->imagelabel);
5911 5912
    }

5913 5914 5915
    virStringFreeList(priv->qemuDevices);
    priv->qemuDevices = NULL;

5916
    qemuHostdevReAttachDomainDevices(driver, vm->def);
5917 5918 5919 5920

    def = vm->def;
    for (i = 0; i < def->nnets; i++) {
        virDomainNetDefPtr net = def->nets[i];
5921 5922 5923 5924
        vport = virDomainNetGetActualVirtPortProfile(net);

        switch (virDomainNetGetActualType(net)) {
        case VIR_DOMAIN_NET_TYPE_DIRECT:
5925
            ignore_value(virNetDevMacVLanDeleteWithVPortProfile(
5926
                             net->ifname, &net->mac,
5927 5928
                             virDomainNetGetActualDirectDev(net),
                             virDomainNetGetActualDirectMode(net),
5929
                             virDomainNetGetActualVirtPortProfile(net),
5930
                             cfg->stateDir));
5931
            break;
5932 5933 5934 5935 5936 5937
        case VIR_DOMAIN_NET_TYPE_ETHERNET:
            if (net->ifname) {
                ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap));
                VIR_FREE(net->ifname);
            }
            break;
5938 5939 5940 5941
        case VIR_DOMAIN_NET_TYPE_BRIDGE:
        case VIR_DOMAIN_NET_TYPE_NETWORK:
#ifdef VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP
            if (!(vport && vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH))
5942
                ignore_value(virNetDevTapDelete(net->ifname, net->backend.tap));
5943 5944
#endif
            break;
5945
        }
5946 5947 5948
        /* release the physical device (or any other resources used by
         * this interface in the network driver
         */
5949 5950 5951 5952 5953 5954 5955 5956 5957
        if (vport) {
            if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_MIDONET) {
                ignore_value(virNetDevMidonetUnbindPort(vport));
            } else if (vport->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) {
                ignore_value(virNetDevOpenvswitchRemovePort(
                                 virDomainNetGetActualBridgeName(net),
                                 net->ifname));
            }
        }
A
Ansis Atteka 已提交
5958

5959 5960
        /* kick the device out of the hostdev list too */
        virDomainNetRemoveHostdev(def, net);
5961
        networkReleaseActualDevice(vm->def, net);
5962
    }
5963

5964
 retry:
5965
    if ((ret = qemuRemoveCgroup(vm)) < 0) {
5966 5967 5968 5969 5970 5971 5972
        if (ret == -EBUSY && (retries++ < 5)) {
            usleep(200*1000);
            goto retry;
        }
        VIR_WARN("Failed to remove cgroup for %s",
                 vm->def->name);
    }
5973
    virCgroupFree(&priv->cgroup);
5974

5975
    virPerfFree(priv->perf);
5976
    priv->perf = NULL;
5977

5978 5979
    qemuProcessRemoveDomainStatus(driver, vm);

5980 5981
    /* Remove VNC and Spice ports from port reservation bitmap, but only if
       they were reserved by the driver (autoport=yes)
5982
    */
5983
    for (i = 0; i < vm->def->ngraphics; ++i) {
5984
        virDomainGraphicsDefPtr graphics = vm->def->graphics[i];
5985 5986
        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_VNC) {
            if (graphics->data.vnc.autoport) {
5987 5988
                virPortAllocatorRelease(driver->remotePorts,
                                        graphics->data.vnc.port);
5989
            } else if (graphics->data.vnc.portReserved) {
5990 5991 5992 5993 5994
                virPortAllocatorSetUsed(driver->remotePorts,
                                        graphics->data.spice.port,
                                        false);
                graphics->data.vnc.portReserved = false;
            }
5995 5996
            virPortAllocatorRelease(driver->webSocketPorts,
                                    graphics->data.vnc.websocket);
5997
        }
5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018
        if (graphics->type == VIR_DOMAIN_GRAPHICS_TYPE_SPICE) {
            if (graphics->data.spice.autoport) {
                virPortAllocatorRelease(driver->remotePorts,
                                        graphics->data.spice.port);
                virPortAllocatorRelease(driver->remotePorts,
                                        graphics->data.spice.tlsPort);
            } else {
                if (graphics->data.spice.portReserved) {
                    virPortAllocatorSetUsed(driver->remotePorts,
                                            graphics->data.spice.port,
                                            false);
                    graphics->data.spice.portReserved = false;
                }

                if (graphics->data.spice.tlsPortReserved) {
                    virPortAllocatorSetUsed(driver->remotePorts,
                                            graphics->data.spice.tlsPort,
                                            false);
                    graphics->data.spice.tlsPortReserved = false;
                }
            }
6019
        }
6020 6021
    }

6022
    vm->taint = 0;
6023
    vm->pid = -1;
J
Jiri Denemark 已提交
6024
    virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason);
6025 6026
    for (i = 0; i < vm->def->niothreadids; i++)
        vm->def->iothreadids[i]->thread_id = 0;
6027 6028
    virObjectUnref(priv->qemuCaps);
    priv->qemuCaps = NULL;
6029
    VIR_FREE(priv->pidfile);
6030

6031
    /* The "release" hook cleans up additional resources */
6032
    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
6033
        char *xml = qemuDomainDefFormatXML(driver, vm->def, 0);
6034 6035 6036

        /* we can't stop the operation even if the script raised an error */
        virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
6037 6038
                    VIR_HOOK_QEMU_OP_RELEASE, VIR_HOOK_SUBOP_END,
                    NULL, xml, NULL);
6039 6040 6041
        VIR_FREE(xml);
    }

6042
    virDomainObjRemoveTransientDef(vm);
6043

6044 6045 6046 6047 6048
 endjob:
    if (asyncJob != QEMU_ASYNC_JOB_NONE)
        qemuDomainObjEndJob(driver, vm);

 cleanup:
6049 6050 6051 6052
    if (orig_err) {
        virSetError(orig_err);
        virFreeError(orig_err);
    }
6053
    virObjectUnref(cfg);
6054
}
6055 6056


6057
int qemuProcessAttach(virConnectPtr conn ATTRIBUTE_UNUSED,
6058
                      virQEMUDriverPtr driver,
6059
                      virDomainObjPtr vm,
6060
                      pid_t pid,
6061 6062 6063 6064
                      const char *pidfile,
                      virDomainChrSourceDefPtr monConfig,
                      bool monJSON)
{
6065
    size_t i;
6066
    qemuDomainLogContextPtr logCtxt = NULL;
6067 6068 6069
    char *timestamp;
    qemuDomainObjPrivatePtr priv = vm->privateData;
    bool running = true;
6070
    virDomainPausedReason reason;
6071
    virSecurityLabelPtr seclabel = NULL;
6072
    virSecurityLabelDefPtr seclabeldef = NULL;
6073
    bool seclabelgen = false;
6074 6075
    virSecurityManagerPtr* sec_managers = NULL;
    const char *model;
6076
    virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
6077
    virCapsPtr caps = NULL;
6078
    bool active = false;
6079
    int ret;
6080 6081 6082 6083

    VIR_DEBUG("Beginning VM attach process");

    if (virDomainObjIsActive(vm)) {
6084 6085
        virReportError(VIR_ERR_OPERATION_INVALID,
                       "%s", _("VM is already active"));
6086
        virObjectUnref(cfg);
6087 6088 6089
        return -1;
    }

6090
    if (!(caps = virQEMUDriverGetCapabilities(driver, false)))
6091
        goto error;
6092

6093 6094 6095 6096 6097
    /* Do this upfront, so any part of the startup process can add
     * runtime state to vm->def that won't be persisted. This let's us
     * report implicit runtime defaults in the XML, like vnc listen/socket
     */
    VIR_DEBUG("Setting current domain def as transient");
6098
    if (virDomainObjSetDefTransient(caps, driver->xmlopt, vm) < 0)
6099
        goto error;
6100

6101
    vm->def->id = qemuDriverAllocateID(driver);
6102

6103
    if (virAtomicIntInc(&driver->nactive) == 1 && driver->inhibitCallback)
6104
        driver->inhibitCallback(true, driver->inhibitOpaque);
6105
    active = true;
6106

6107
    if (virFileMakePath(cfg->logDir) < 0) {
6108 6109
        virReportSystemError(errno,
                             _("cannot create log directory %s"),
6110
                             cfg->logDir);
6111
        goto error;
6112 6113 6114
    }

    VIR_FREE(priv->pidfile);
6115
    if (VIR_STRDUP(priv->pidfile, pidfile) < 0)
6116
        goto error;
6117

6118 6119
    vm->pid = pid;

6120
    VIR_DEBUG("Detect security driver config");
6121
    sec_managers = virSecurityManagerGetNested(driver->securityManager);
6122 6123
    if (sec_managers == NULL)
        goto error;
6124 6125

    for (i = 0; sec_managers[i]; i++) {
6126
        seclabelgen = false;
6127 6128
        model = virSecurityManagerGetModel(sec_managers[i]);
        seclabeldef = virDomainDefGetSecurityLabelDef(vm->def, model);
6129
        if (seclabeldef == NULL) {
6130
            if (!(seclabeldef = virSecurityLabelDefNew(model)))
6131 6132 6133
                goto error;
            seclabelgen = true;
        }
6134 6135
        seclabeldef->type = VIR_DOMAIN_SECLABEL_STATIC;
        if (VIR_ALLOC(seclabel) < 0)
6136
            goto error;
6137
        if (virSecurityManagerGetProcessLabel(sec_managers[i],
6138
                                              vm->def, vm->pid, seclabel) < 0)
6139
            goto error;
6140

6141
        if (VIR_STRDUP(seclabeldef->model, model) < 0)
6142
            goto error;
6143

6144
        if (VIR_STRDUP(seclabeldef->label, seclabel->label) < 0)
6145
            goto error;
6146
        VIR_FREE(seclabel);
6147 6148 6149 6150 6151 6152

        if (seclabelgen) {
            if (VIR_APPEND_ELEMENT(vm->def->seclabels, vm->def->nseclabels, seclabeldef) < 0)
                goto error;
            seclabelgen = false;
        }
6153
    }
6154

6155 6156
    if (virSecurityManagerCheckAllLabel(driver->securityManager, vm->def) < 0)
        goto error;
6157 6158 6159
    if (virSecurityManagerGenLabel(driver->securityManager, vm->def) < 0)
        goto error;

6160 6161 6162
    if (qemuDomainPerfRestart(vm) < 0)
        goto error;

6163
    VIR_DEBUG("Creating domain log file");
6164 6165
    if (!(logCtxt = qemuDomainLogContextNew(driver, vm,
                                            QEMU_DOMAIN_LOG_CONTEXT_MODE_ATTACH)))
6166
        goto error;
6167 6168

    VIR_DEBUG("Determining emulator version");
6169 6170
    virObjectUnref(priv->qemuCaps);
    if (!(priv->qemuCaps = virQEMUCapsCacheLookupCopy(driver->qemuCapsCache,
6171 6172
                                                      vm->def->emulator,
                                                      vm->def->os.machine)))
6173
        goto error;
6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185

    VIR_DEBUG("Preparing monitor state");
    priv->monConfig = monConfig;
    monConfig = NULL;
    priv->monJSON = monJSON;

    priv->gotShutdown = false;

    /*
     * Normally PCI addresses are assigned in the virDomainCreate
     * or virDomainDefine methods. We might still need to assign
     * some here to cope with the question of upgrades. Regardless
M
Martin Kletzander 已提交
6186
     * we also need to populate the PCI address set cache for later
6187 6188
     * use in hotplug
     */
6189
    VIR_DEBUG("Assigning domain PCI addresses");
6190
    if ((qemuDomainAssignAddresses(vm->def, priv->qemuCaps, vm, false)) < 0)
6191
        goto error;
6192

6193
    if ((timestamp = virTimeStringNow()) == NULL)
6194
        goto error;
6195

6196
    qemuDomainLogContextWrite(logCtxt, "%s: attaching\n", timestamp);
6197
    VIR_FREE(timestamp);
6198

6199
    qemuDomainObjTaint(driver, vm, VIR_DOMAIN_TAINT_EXTERNAL_LAUNCH, logCtxt);
6200 6201

    VIR_DEBUG("Waiting for monitor to show up");
6202
    if (qemuProcessWaitForMonitor(driver, vm, QEMU_ASYNC_JOB_NONE, priv->qemuCaps, NULL) < 0)
6203
        goto error;
6204

D
Daniel P. Berrange 已提交
6205
    /* Failure to connect to agent shouldn't be fatal */
6206 6207 6208 6209
    if ((ret = qemuConnectAgent(driver, vm)) < 0) {
        if (ret == -2)
            goto error;

D
Daniel P. Berrange 已提交
6210 6211 6212 6213 6214 6215
        VIR_WARN("Cannot connect to QEMU guest agent for %s",
                 vm->def->name);
        virResetLastError();
        priv->agentError = true;
    }

6216
    VIR_DEBUG("Detecting VCPU PIDs");
6217
    if (qemuDomainRefreshVcpuInfo(driver, vm, QEMU_ASYNC_JOB_NONE, false) < 0)
6218 6219
        goto error;

6220 6221 6222
    if (qemuDomainValidateVcpuInfo(vm) < 0)
        goto error;

6223 6224
    VIR_DEBUG("Detecting IOThread PIDs");
    if (qemuProcessDetectIOThreadPIDs(driver, vm, QEMU_ASYNC_JOB_NONE) < 0)
6225
        goto error;
6226 6227

    VIR_DEBUG("Getting initial memory amount");
6228
    qemuDomainObjEnterMonitor(driver, vm);
6229 6230 6231 6232 6233 6234 6235
    if (qemuMonitorGetBalloonInfo(priv->mon, &vm->def->mem.cur_balloon) < 0)
        goto exit_monitor;
    if (qemuMonitorGetStatus(priv->mon, &running, &reason) < 0)
        goto exit_monitor;
    if (qemuMonitorGetVirtType(priv->mon, &vm->def->virtType) < 0)
        goto exit_monitor;
    if (qemuDomainObjExitMonitor(driver, vm) < 0)
6236
        goto error;
6237

6238
    if (running) {
6239 6240
        virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
                             VIR_DOMAIN_RUNNING_UNPAUSED);
6241 6242 6243
        if (vm->def->memballoon &&
            vm->def->memballoon->model == VIR_DOMAIN_MEMBALLOON_MODEL_VIRTIO &&
            vm->def->memballoon->period) {
6244
            qemuDomainObjEnterMonitor(driver, vm);
6245
            qemuMonitorSetMemoryStatsPeriod(priv->mon, vm->def->memballoon,
6246
                                            vm->def->memballoon->period);
6247 6248
            if (qemuDomainObjExitMonitor(driver, vm) < 0)
                goto error;
6249 6250
        }
    } else {
6251
        virDomainObjSetState(vm, VIR_DOMAIN_PAUSED, reason);
6252
    }
6253 6254

    VIR_DEBUG("Writing domain status to disk");
6255
    if (virDomainSaveStatus(driver->xmlopt, cfg->stateDir, vm, driver->caps) < 0)
6256
        goto error;
6257

6258 6259
    /* Run an hook to allow admins to do some magic */
    if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) {
6260
        char *xml = qemuDomainDefFormatXML(driver, vm->def, 0);
6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271
        int hookret;

        hookret = virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name,
                              VIR_HOOK_QEMU_OP_ATTACH, VIR_HOOK_SUBOP_BEGIN,
                              NULL, xml, NULL);
        VIR_FREE(xml);

        /*
         * If the script raised an error abort the launch
         */
        if (hookret < 0)
6272
            goto error;
6273 6274
    }

6275
    qemuDomainLogContextFree(logCtxt);
6276
    VIR_FREE(seclabel);
6277
    VIR_FREE(sec_managers);
6278
    virObjectUnref(cfg);
6279
    virObjectUnref(caps);
6280 6281 6282

    return 0;

6283 6284
 exit_monitor:
    ignore_value(qemuDomainObjExitMonitor(driver, vm));
6285
 error:
6286 6287 6288 6289 6290 6291
    /* We jump here if we failed to attach to the VM for any reason.
     * Leave the domain running, but pretend we never attempted to
     * attach to it.  */
    if (active && virAtomicIntDecAndTest(&driver->nactive) &&
        driver->inhibitCallback)
        driver->inhibitCallback(false, driver->inhibitOpaque);
6292
    qemuDomainLogContextFree(logCtxt);
6293
    VIR_FREE(seclabel);
6294
    VIR_FREE(sec_managers);
6295 6296
    if (seclabelgen)
        virSecurityLabelDefFree(seclabeldef);
6297
    virDomainChrSourceDefFree(monConfig);
6298
    virObjectUnref(cfg);
6299
    virObjectUnref(caps);
6300 6301 6302 6303
    return -1;
}


6304
static virDomainObjPtr
6305 6306 6307
qemuProcessAutoDestroy(virDomainObjPtr dom,
                       virConnectPtr conn,
                       void *opaque)
6308
{
6309
    virQEMUDriverPtr driver = opaque;
6310
    qemuDomainObjPrivatePtr priv = dom->privateData;
6311
    virObjectEventPtr event = NULL;
6312
    unsigned int stopFlags = 0;
6313

6314
    VIR_DEBUG("vm=%s, conn=%p", dom->def->name, conn);
6315

6316 6317
    virObjectRef(dom);

6318 6319 6320
    if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_IN)
        stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED;

6321 6322
    if (priv->job.asyncJob) {
        VIR_DEBUG("vm=%s has long-term job active, cancelling",
6323
                  dom->def->name);
6324
        qemuDomainObjDiscardAsyncJob(driver, dom);
6325 6326 6327
    }

    VIR_DEBUG("Killing domain");
6328

6329 6330 6331 6332 6333
    if (qemuProcessBeginStopJob(driver, dom, QEMU_JOB_DESTROY, true) < 0)
        goto cleanup;

    qemuProcessStop(driver, dom, VIR_DOMAIN_SHUTOFF_DESTROYED,
                    QEMU_ASYNC_JOB_NONE, stopFlags);
6334

6335
    virDomainAuditStop(dom, "destroyed");
6336
    event = virDomainEventLifecycleNewFromObj(dom,
6337 6338
                                     VIR_DOMAIN_EVENT_STOPPED,
                                     VIR_DOMAIN_EVENT_STOPPED_DESTROYED);
6339

6340 6341
    qemuDomainObjEndJob(driver, dom);

6342
    qemuDomainRemoveInactive(driver, dom);
6343

6344
    qemuDomainEventQueue(driver, event);
6345

6346
 cleanup:
6347
    virDomainObjEndAPI(&dom);
6348
    return dom;
6349 6350
}

6351
int qemuProcessAutoDestroyAdd(virQEMUDriverPtr driver,
6352 6353 6354
                              virDomainObjPtr vm,
                              virConnectPtr conn)
{
6355
    VIR_DEBUG("vm=%s, conn=%p", vm->def->name, conn);
6356 6357
    return virCloseCallbacksSet(driver->closeCallbacks, vm, conn,
                                qemuProcessAutoDestroy);
6358 6359
}

6360
int qemuProcessAutoDestroyRemove(virQEMUDriverPtr driver,
6361 6362
                                 virDomainObjPtr vm)
{
6363
    int ret;
6364
    VIR_DEBUG("vm=%s", vm->def->name);
6365 6366 6367
    ret = virCloseCallbacksUnset(driver->closeCallbacks, vm,
                                 qemuProcessAutoDestroy);
    return ret;
6368
}
6369

6370
bool qemuProcessAutoDestroyActive(virQEMUDriverPtr driver,
6371 6372
                                  virDomainObjPtr vm)
{
6373
    virCloseCallback cb;
6374
    VIR_DEBUG("vm=%s", vm->def->name);
6375
    cb = virCloseCallbacksGet(driver->closeCallbacks, vm, NULL);
6376
    return cb == qemuProcessAutoDestroy;
6377
}
6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400


int
qemuProcessRefreshDisks(virQEMUDriverPtr driver,
                        virDomainObjPtr vm,
                        qemuDomainAsyncJob asyncJob)
{
    qemuDomainObjPrivatePtr priv = vm->privateData;
    virHashTablePtr table = NULL;
    int ret = -1;
    size_t i;

    if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) == 0) {
        table = qemuMonitorGetBlockInfo(priv->mon);
        if (qemuDomainObjExitMonitor(driver, vm) < 0)
            goto cleanup;
    }

    if (!table)
        goto cleanup;

    for (i = 0; i < vm->def->ndisks; i++) {
        virDomainDiskDefPtr disk = vm->def->disks[i];
6401
        qemuDomainDiskPrivatePtr diskpriv = QEMU_DOMAIN_DISK_PRIVATE(disk);
6402 6403
        struct qemuDomainDiskInfo *info;

6404 6405
        if (!(info = virHashLookup(table, disk->info.alias)))
            continue;
6406

6407 6408
        if (info->removable) {
            if (info->empty)
6409 6410
                ignore_value(virDomainDiskSetSource(disk, NULL));

6411 6412 6413 6414 6415 6416
            if (info->tray) {
                if (info->tray_open)
                    disk->tray_status = VIR_DOMAIN_DISK_TRAY_OPEN;
                else
                    disk->tray_status = VIR_DOMAIN_DISK_TRAY_CLOSED;
            }
6417
        }
6418 6419 6420 6421

        /* fill in additional data */
        diskpriv->removable = info->removable;
        diskpriv->tray = info->tray;
6422 6423 6424 6425 6426 6427 6428 6429
    }

    ret = 0;

 cleanup:
    virHashFree(table);
    return ret;
}