kvm-all.c 34.3 KB
Newer Older
A
aliguori 已提交
1 2 3 4
/*
 * QEMU KVM support
 *
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
A
aliguori 已提交
6 7 8
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
A
aliguori 已提交
10 11 12 13 14 15 16 17 18
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
19
#include <stdarg.h>
A
aliguori 已提交
20 21 22 23

#include <linux/kvm.h>

#include "qemu-common.h"
24
#include "qemu-barrier.h"
A
aliguori 已提交
25
#include "sysemu.h"
J
Jan Kiszka 已提交
26
#include "hw/hw.h"
27
#include "gdbstub.h"
A
aliguori 已提交
28
#include "kvm.h"
29
#include "bswap.h"
A
aliguori 已提交
30

31 32 33 34 35
/* This check must be after config-host.h is included */
#ifdef CONFIG_EVENTFD
#include <sys/eventfd.h>
#endif

A
aliguori 已提交
36 37 38
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
#define PAGE_SIZE TARGET_PAGE_SIZE

A
aliguori 已提交
39 40 41
//#define DEBUG_KVM

#ifdef DEBUG_KVM
42
#define DPRINTF(fmt, ...) \
A
aliguori 已提交
43 44
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
#else
45
#define DPRINTF(fmt, ...) \
A
aliguori 已提交
46 47 48
    do { } while (0)
#endif

A
aliguori 已提交
49 50
typedef struct KVMSlot
{
A
Anthony Liguori 已提交
51 52 53
    target_phys_addr_t start_addr;
    ram_addr_t memory_size;
    ram_addr_t phys_offset;
A
aliguori 已提交
54 55 56
    int slot;
    int flags;
} KVMSlot;
A
aliguori 已提交
57

58 59
typedef struct kvm_dirty_log KVMDirtyLog;

A
aliguori 已提交
60 61 62 63 64
struct KVMState
{
    KVMSlot slots[32];
    int fd;
    int vmfd;
A
aliguori 已提交
65
    int coalesced_mmio;
66 67 68
#ifdef KVM_CAP_COALESCED_MMIO
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
#endif
69
    int broken_set_mem_region;
70
    int migration_log;
71
    int vcpu_events;
72
    int robust_singlestep;
73
    int debugregs;
74 75 76
#ifdef KVM_CAP_SET_GUEST_DEBUG
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
#endif
77 78
    int irqchip_in_kernel;
    int pit_in_kernel;
79
    int xsave, xcrs;
80
    int many_ioeventfds;
A
aliguori 已提交
81 82 83 84 85 86 87 88 89
};

static KVMState *kvm_state;

static KVMSlot *kvm_alloc_slot(KVMState *s)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
A
aliguori 已提交
90
        /* KVM private memory slots */
J
Jan Kiszka 已提交
91
        if (i >= 8 && i < 12) {
A
aliguori 已提交
92
            continue;
J
Jan Kiszka 已提交
93 94
        }
        if (s->slots[i].memory_size == 0) {
A
aliguori 已提交
95
            return &s->slots[i];
J
Jan Kiszka 已提交
96
        }
A
aliguori 已提交
97 98
    }

99 100 101 102 103
    fprintf(stderr, "%s: no free slot available\n", __func__);
    abort();
}

static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
A
Anthony Liguori 已提交
104 105
                                         target_phys_addr_t start_addr,
                                         target_phys_addr_t end_addr)
106 107 108 109 110 111 112 113 114 115 116 117
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

        if (start_addr == mem->start_addr &&
            end_addr == mem->start_addr + mem->memory_size) {
            return mem;
        }
    }

A
aliguori 已提交
118 119 120
    return NULL;
}

121 122 123 124
/*
 * Find overlapping slot with lowest start address
 */
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
A
Anthony Liguori 已提交
125 126
                                            target_phys_addr_t start_addr,
                                            target_phys_addr_t end_addr)
A
aliguori 已提交
127
{
128
    KVMSlot *found = NULL;
A
aliguori 已提交
129 130 131 132 133
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

134 135 136 137 138 139 140 141 142
        if (mem->memory_size == 0 ||
            (found && found->start_addr < mem->start_addr)) {
            continue;
        }

        if (end_addr > mem->start_addr &&
            start_addr < mem->start_addr + mem->memory_size) {
            found = mem;
        }
A
aliguori 已提交
143 144
    }

145
    return found;
A
aliguori 已提交
146 147
}

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
                                      target_phys_addr_t *phys_addr)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

        if (ram_addr >= mem->phys_offset &&
            ram_addr < mem->phys_offset + mem->memory_size) {
            *phys_addr = mem->start_addr + (ram_addr - mem->phys_offset);
            return 1;
        }
    }

    return 0;
}

166 167 168 169 170 171 172
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
{
    struct kvm_userspace_memory_region mem;

    mem.slot = slot->slot;
    mem.guest_phys_addr = slot->start_addr;
    mem.memory_size = slot->memory_size;
173
    mem.userspace_addr = (unsigned long)qemu_safe_ram_ptr(slot->phys_offset);
174
    mem.flags = slot->flags;
175 176 177
    if (s->migration_log) {
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
    }
178 179 180
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
}

J
Jan Kiszka 已提交
181 182 183 184
static void kvm_reset_vcpu(void *opaque)
{
    CPUState *env = opaque;

J
Jan Kiszka 已提交
185
    kvm_arch_reset_vcpu(env);
J
Jan Kiszka 已提交
186
}
187

188 189 190 191 192 193 194 195 196 197 198
int kvm_irqchip_in_kernel(void)
{
    return kvm_state->irqchip_in_kernel;
}

int kvm_pit_in_kernel(void)
{
    return kvm_state->pit_in_kernel;
}


A
aliguori 已提交
199 200 201 202 203 204
int kvm_init_vcpu(CPUState *env)
{
    KVMState *s = kvm_state;
    long mmap_size;
    int ret;

205
    DPRINTF("kvm_init_vcpu\n");
A
aliguori 已提交
206

207
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
A
aliguori 已提交
208
    if (ret < 0) {
209
        DPRINTF("kvm_create_vcpu failed\n");
A
aliguori 已提交
210 211 212 213 214 215 216 217
        goto err;
    }

    env->kvm_fd = ret;
    env->kvm_state = s;

    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
218
        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
A
aliguori 已提交
219 220 221 222 223 224 225
        goto err;
    }

    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
                        env->kvm_fd, 0);
    if (env->kvm_run == MAP_FAILED) {
        ret = -errno;
226
        DPRINTF("mmap'ing vcpu state failed\n");
A
aliguori 已提交
227 228 229
        goto err;
    }

230
#ifdef KVM_CAP_COALESCED_MMIO
J
Jan Kiszka 已提交
231 232 233 234
    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
        s->coalesced_mmio_ring =
            (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
    }
235 236
#endif

A
aliguori 已提交
237
    ret = kvm_arch_init_vcpu(env);
J
Jan Kiszka 已提交
238
    if (ret == 0) {
239
        qemu_register_reset(kvm_reset_vcpu, env);
J
Jan Kiszka 已提交
240
        kvm_arch_reset_vcpu(env);
J
Jan Kiszka 已提交
241
    }
A
aliguori 已提交
242 243 244 245
err:
    return ret;
}

246 247 248
/*
 * dirty pages logging control
 */
A
Anthony Liguori 已提交
249 250
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
                                      ram_addr_t size, int flags, int mask)
251 252
{
    KVMState *s = kvm_state;
253
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
254 255
    int old_flags;

256
    if (mem == NULL)  {
257 258
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
                    TARGET_FMT_plx "\n", __func__, phys_addr,
A
Anthony Liguori 已提交
259
                    (target_phys_addr_t)(phys_addr + size - 1));
260 261 262
            return -EINVAL;
    }

263
    old_flags = mem->flags;
264

265
    flags = (mem->flags & ~mask) | flags;
266 267
    mem->flags = flags;

268 269 270 271 272 273 274 275
    /* If nothing changed effectively, no need to issue ioctl */
    if (s->migration_log) {
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
    }
    if (flags == old_flags) {
            return 0;
    }

276 277 278
    return kvm_set_user_memory_region(s, mem);
}

A
Anthony Liguori 已提交
279
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
280
{
J
Jan Kiszka 已提交
281 282
    return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES,
                                      KVM_MEM_LOG_DIRTY_PAGES);
283 284
}

A
Anthony Liguori 已提交
285
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
286
{
J
Jan Kiszka 已提交
287 288
    return kvm_dirty_pages_log_change(phys_addr, size, 0,
                                      KVM_MEM_LOG_DIRTY_PAGES);
289 290
}

291
static int kvm_set_migration_log(int enable)
292 293 294 295 296 297 298 299 300 301
{
    KVMState *s = kvm_state;
    KVMSlot *mem;
    int i, err;

    s->migration_log = enable;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        mem = &s->slots[i];

302 303 304
        if (!mem->memory_size) {
            continue;
        }
305 306 307 308 309 310 311 312 313 314 315
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
            continue;
        }
        err = kvm_set_user_memory_region(s, mem);
        if (err) {
            return err;
        }
    }
    return 0;
}

316 317 318 319 320
/* get kvm's dirty pages bitmap and update qemu's */
static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
                                         unsigned long *bitmap,
                                         unsigned long offset,
                                         unsigned long mem_size)
A
Alexander Graf 已提交
321
{
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
    unsigned int i, j;
    unsigned long page_number, addr, addr1, c;
    ram_addr_t ram_addr;
    unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) /
        HOST_LONG_BITS;

    /*
     * bitmap-traveling is faster than memory-traveling (for addr...)
     * especially when most of the memory is not dirty.
     */
    for (i = 0; i < len; i++) {
        if (bitmap[i] != 0) {
            c = leul_to_cpu(bitmap[i]);
            do {
                j = ffsl(c) - 1;
                c &= ~(1ul << j);
                page_number = i * HOST_LONG_BITS + j;
                addr1 = page_number * TARGET_PAGE_SIZE;
                addr = offset + addr1;
                ram_addr = cpu_get_physical_page_desc(addr);
                cpu_physical_memory_set_dirty(ram_addr);
            } while (c != 0);
        }
    }
    return 0;
A
Alexander Graf 已提交
347 348
}

349 350
#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))

351 352 353 354 355
/**
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
 * This means all bits are set to dirty.
 *
356
 * @start_add: start of logged region.
357 358
 * @end_addr: end of logged region.
 */
359
static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
J
Jan Kiszka 已提交
360
                                          target_phys_addr_t end_addr)
361 362
{
    KVMState *s = kvm_state;
363 364 365 366
    unsigned long size, allocated_size = 0;
    KVMDirtyLog d;
    KVMSlot *mem;
    int ret = 0;
367

368 369 370 371 372 373
    d.dirty_bitmap = NULL;
    while (start_addr < end_addr) {
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
        if (mem == NULL) {
            break;
        }
374

375
        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), HOST_LONG_BITS) / 8;
376 377 378 379 380 381 382
        if (!d.dirty_bitmap) {
            d.dirty_bitmap = qemu_malloc(size);
        } else if (size > allocated_size) {
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
        }
        allocated_size = size;
        memset(d.dirty_bitmap, 0, allocated_size);
383

384
        d.slot = mem->slot;
385

386
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
387
            DPRINTF("ioctl failed %d\n", errno);
388 389 390
            ret = -1;
            break;
        }
391

392 393 394
        kvm_get_dirty_pages_log_range(mem->start_addr, d.dirty_bitmap,
                                      mem->start_addr, mem->memory_size);
        start_addr = mem->start_addr + mem->memory_size;
395 396
    }
    qemu_free(d.dirty_bitmap);
397 398

    return ret;
399 400
}

A
Anthony Liguori 已提交
401
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
A
aliguori 已提交
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
{
    int ret = -ENOSYS;
#ifdef KVM_CAP_COALESCED_MMIO
    KVMState *s = kvm_state;

    if (s->coalesced_mmio) {
        struct kvm_coalesced_mmio_zone zone;

        zone.addr = start;
        zone.size = size;

        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
    }
#endif

    return ret;
}

A
Anthony Liguori 已提交
420
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
A
aliguori 已提交
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
{
    int ret = -ENOSYS;
#ifdef KVM_CAP_COALESCED_MMIO
    KVMState *s = kvm_state;

    if (s->coalesced_mmio) {
        struct kvm_coalesced_mmio_zone zone;

        zone.addr = start;
        zone.size = size;

        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
    }
#endif

    return ret;
}

439 440 441 442 443 444 445 446 447 448 449 450
int kvm_check_extension(KVMState *s, unsigned int extension)
{
    int ret;

    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
    if (ret < 0) {
        ret = 0;
    }

    return ret;
}

451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
static int kvm_check_many_ioeventfds(void)
{
    /* Older kernels have a 6 device limit on the KVM io bus.  Find out so we
     * can avoid creating too many ioeventfds.
     */
#ifdef CONFIG_EVENTFD
    int ioeventfds[7];
    int i, ret = 0;
    for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
        ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
        if (ioeventfds[i] < 0) {
            break;
        }
        ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
        if (ret < 0) {
            close(ioeventfds[i]);
            break;
        }
    }

    /* Decide whether many devices are supported or not */
    ret = i == ARRAY_SIZE(ioeventfds);

    while (i-- > 0) {
        kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
        close(ioeventfds[i]);
    }
    return ret;
#else
    return 0;
#endif
}

J
Jan Kiszka 已提交
484 485
static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
                             ram_addr_t phys_offset)
486 487 488 489 490 491
{
    KVMState *s = kvm_state;
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
    KVMSlot *mem, old;
    int err;

492 493 494 495
    /* kvm works in page size chunks, but the function may be called
       with sub-page size and unaligned start address. */
    size = TARGET_PAGE_ALIGN(size);
    start_addr = TARGET_PAGE_ALIGN(start_addr);
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591

    /* KVM does not support read-only slots */
    phys_offset &= ~IO_MEM_ROM;

    while (1) {
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
        if (!mem) {
            break;
        }

        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
            /* The new slot fits into the existing one and comes with
             * identical parameters - nothing to be done. */
            return;
        }

        old = *mem;

        /* unregister the overlapping slot */
        mem->memory_size = 0;
        err = kvm_set_user_memory_region(s, mem);
        if (err) {
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
                    __func__, strerror(-err));
            abort();
        }

        /* Workaround for older KVM versions: we can't join slots, even not by
         * unregistering the previous ones and then registering the larger
         * slot. We have to maintain the existing fragmentation. Sigh.
         *
         * This workaround assumes that the new slot starts at the same
         * address as the first existing one. If not or if some overlapping
         * slot comes around later, we will fail (not seen in practice so far)
         * - and actually require a recent KVM version. */
        if (s->broken_set_mem_region &&
            old.start_addr == start_addr && old.memory_size < size &&
            flags < IO_MEM_UNASSIGNED) {
            mem = kvm_alloc_slot(s);
            mem->memory_size = old.memory_size;
            mem->start_addr = old.start_addr;
            mem->phys_offset = old.phys_offset;
            mem->flags = 0;

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
                        strerror(-err));
                abort();
            }

            start_addr += old.memory_size;
            phys_offset += old.memory_size;
            size -= old.memory_size;
            continue;
        }

        /* register prefix slot */
        if (old.start_addr < start_addr) {
            mem = kvm_alloc_slot(s);
            mem->memory_size = start_addr - old.start_addr;
            mem->start_addr = old.start_addr;
            mem->phys_offset = old.phys_offset;
            mem->flags = 0;

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
                        __func__, strerror(-err));
                abort();
            }
        }

        /* register suffix slot */
        if (old.start_addr + old.memory_size > start_addr + size) {
            ram_addr_t size_delta;

            mem = kvm_alloc_slot(s);
            mem->start_addr = start_addr + size;
            size_delta = mem->start_addr - old.start_addr;
            mem->memory_size = old.memory_size - size_delta;
            mem->phys_offset = old.phys_offset + size_delta;
            mem->flags = 0;

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
                        __func__, strerror(-err));
                abort();
            }
        }
    }

    /* in case the KVM bug workaround already "consumed" the new slot */
J
Jan Kiszka 已提交
592
    if (!size) {
593
        return;
J
Jan Kiszka 已提交
594
    }
595
    /* KVM does not need to know about this memory */
J
Jan Kiszka 已提交
596
    if (flags >= IO_MEM_UNASSIGNED) {
597
        return;
J
Jan Kiszka 已提交
598
    }
599 600 601 602 603 604 605 606 607 608 609 610 611 612
    mem = kvm_alloc_slot(s);
    mem->memory_size = size;
    mem->start_addr = start_addr;
    mem->phys_offset = phys_offset;
    mem->flags = 0;

    err = kvm_set_user_memory_region(s, mem);
    if (err) {
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
                strerror(-err));
        abort();
    }
}

613
static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
614 615
                                  target_phys_addr_t start_addr,
                                  ram_addr_t size, ram_addr_t phys_offset)
616
{
J
Jan Kiszka 已提交
617
    kvm_set_phys_mem(start_addr, size, phys_offset);
618 619 620
}

static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
621 622
                                        target_phys_addr_t start_addr,
                                        target_phys_addr_t end_addr)
623
{
J
Jan Kiszka 已提交
624
    return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
625 626 627
}

static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
628
                                    int enable)
629
{
J
Jan Kiszka 已提交
630
    return kvm_set_migration_log(enable);
631 632 633
}

static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
J
Jan Kiszka 已提交
634 635 636
    .set_memory = kvm_client_set_memory,
    .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
    .migration_log = kvm_client_migration_log,
637 638
};

A
aliguori 已提交
639 640
int kvm_init(int smp_cpus)
{
641 642 643
    static const char upgrade_note[] =
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
        "(see http://sourceforge.net/projects/kvm).\n";
A
aliguori 已提交
644 645 646 647 648 649
    KVMState *s;
    int ret;
    int i;

    s = qemu_mallocz(sizeof(KVMState));

650
#ifdef KVM_CAP_SET_GUEST_DEBUG
B
Blue Swirl 已提交
651
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
652
#endif
J
Jan Kiszka 已提交
653
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
A
aliguori 已提交
654
        s->slots[i].slot = i;
J
Jan Kiszka 已提交
655
    }
A
aliguori 已提交
656
    s->vmfd = -1;
K
Kevin Wolf 已提交
657
    s->fd = qemu_open("/dev/kvm", O_RDWR);
A
aliguori 已提交
658 659 660 661 662 663 664 665
    if (s->fd == -1) {
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
        ret = -errno;
        goto err;
    }

    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
    if (ret < KVM_API_VERSION) {
J
Jan Kiszka 已提交
666
        if (ret > 0) {
A
aliguori 已提交
667
            ret = -EINVAL;
J
Jan Kiszka 已提交
668
        }
A
aliguori 已提交
669 670 671 672 673 674 675 676 677 678 679
        fprintf(stderr, "kvm version too old\n");
        goto err;
    }

    if (ret > KVM_API_VERSION) {
        ret = -EINVAL;
        fprintf(stderr, "kvm version not supported\n");
        goto err;
    }

    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
680 681 682 683 684
    if (s->vmfd < 0) {
#ifdef TARGET_S390X
        fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
                        "your host kernel command line\n");
#endif
A
aliguori 已提交
685
        goto err;
686
    }
A
aliguori 已提交
687 688 689

    /* initially, KVM allocated its own memory and we had to jump through
     * hooks to make phys_ram_base point to this.  Modern versions of KVM
P
pbrook 已提交
690
     * just use a user allocated buffer so we can use regular pages
A
aliguori 已提交
691 692
     * unmodified.  Make sure we have a sufficiently modern version of KVM.
     */
693 694
    if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
        ret = -EINVAL;
695 696
        fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
                upgrade_note);
A
aliguori 已提交
697 698 699
        goto err;
    }

700 701 702
    /* There was a nasty bug in < kvm-80 that prevents memory slots from being
     * destroyed properly.  Since we rely on this capability, refuse to work
     * with any kernel without this capability. */
703 704
    if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
        ret = -EINVAL;
705 706

        fprintf(stderr,
707 708
                "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
                upgrade_note);
709 710 711
        goto err;
    }

712
    s->coalesced_mmio = 0;
A
aliguori 已提交
713
#ifdef KVM_CAP_COALESCED_MMIO
714
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
715
    s->coalesced_mmio_ring = NULL;
A
aliguori 已提交
716 717
#endif

718 719
    s->broken_set_mem_region = 1;
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
720
    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
721 722 723 724 725
    if (ret > 0) {
        s->broken_set_mem_region = 0;
    }
#endif

726 727 728 729 730
    s->vcpu_events = 0;
#ifdef KVM_CAP_VCPU_EVENTS
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
#endif

731 732 733 734 735 736
    s->robust_singlestep = 0;
#ifdef KVM_CAP_X86_ROBUST_SINGLESTEP
    s->robust_singlestep =
        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
#endif

737 738 739 740 741
    s->debugregs = 0;
#ifdef KVM_CAP_DEBUGREGS
    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
#endif

742 743 744 745 746 747 748 749 750 751
    s->xsave = 0;
#ifdef KVM_CAP_XSAVE
    s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
#endif

    s->xcrs = 0;
#ifdef KVM_CAP_XCRS
    s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
#endif

A
aliguori 已提交
752
    ret = kvm_arch_init(s, smp_cpus);
J
Jan Kiszka 已提交
753
    if (ret < 0) {
A
aliguori 已提交
754
        goto err;
J
Jan Kiszka 已提交
755
    }
A
aliguori 已提交
756 757

    kvm_state = s;
758
    cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
A
aliguori 已提交
759

760 761
    s->many_ioeventfds = kvm_check_many_ioeventfds();

A
aliguori 已提交
762 763 764 765
    return 0;

err:
    if (s) {
J
Jan Kiszka 已提交
766
        if (s->vmfd != -1) {
A
aliguori 已提交
767
            close(s->vmfd);
J
Jan Kiszka 已提交
768 769
        }
        if (s->fd != -1) {
A
aliguori 已提交
770
            close(s->fd);
J
Jan Kiszka 已提交
771
        }
A
aliguori 已提交
772 773 774 775 776 777
    }
    qemu_free(s);

    return ret;
}

778 779
static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
                         uint32_t count)
A
aliguori 已提交
780 781 782 783 784 785 786 787
{
    int i;
    uint8_t *ptr = data;

    for (i = 0; i < count; i++) {
        if (direction == KVM_EXIT_IO_IN) {
            switch (size) {
            case 1:
788
                stb_p(ptr, cpu_inb(port));
A
aliguori 已提交
789 790
                break;
            case 2:
791
                stw_p(ptr, cpu_inw(port));
A
aliguori 已提交
792 793
                break;
            case 4:
794
                stl_p(ptr, cpu_inl(port));
A
aliguori 已提交
795 796 797 798 799
                break;
            }
        } else {
            switch (size) {
            case 1:
800
                cpu_outb(port, ldub_p(ptr));
A
aliguori 已提交
801 802
                break;
            case 2:
803
                cpu_outw(port, lduw_p(ptr));
A
aliguori 已提交
804 805
                break;
            case 4:
806
                cpu_outl(port, ldl_p(ptr));
A
aliguori 已提交
807 808 809 810 811 812 813 814 815 816
                break;
            }
        }

        ptr += size;
    }

    return 1;
}

M
Marcelo Tosatti 已提交
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
#ifdef KVM_CAP_INTERNAL_ERROR_DATA
static void kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
{

    if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
        int i;

        fprintf(stderr, "KVM internal error. Suberror: %d\n",
                run->internal.suberror);

        for (i = 0; i < run->internal.ndata; ++i) {
            fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
                    i, (uint64_t)run->internal.data[i]);
        }
    }
    cpu_dump_state(env, stderr, fprintf, 0);
    if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
        fprintf(stderr, "emulation failure\n");
J
Jan Kiszka 已提交
835 836 837
        if (!kvm_arch_stop_on_emulation_error(env)) {
            return;
        }
M
Marcelo Tosatti 已提交
838 839 840 841 842 843 844 845
    }
    /* FIXME: Should trigger a qmp message to let management know
     * something went wrong.
     */
    vm_stop(0);
}
#endif

846
void kvm_flush_coalesced_mmio_buffer(void)
A
aliguori 已提交
847 848 849
{
#ifdef KVM_CAP_COALESCED_MMIO
    KVMState *s = kvm_state;
850 851
    if (s->coalesced_mmio_ring) {
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
A
aliguori 已提交
852 853 854 855 856 857
        while (ring->first != ring->last) {
            struct kvm_coalesced_mmio *ent;

            ent = &ring->coalesced_mmio[ring->first];

            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
858
            smp_wmb();
A
aliguori 已提交
859 860 861 862 863 864
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
        }
    }
#endif
}

865
static void do_kvm_cpu_synchronize_state(void *_env)
866
{
867 868
    CPUState *env = _env;

J
Jan Kiszka 已提交
869
    if (!env->kvm_vcpu_dirty) {
870
        kvm_arch_get_registers(env);
J
Jan Kiszka 已提交
871
        env->kvm_vcpu_dirty = 1;
872 873 874
    }
}

875 876
void kvm_cpu_synchronize_state(CPUState *env)
{
J
Jan Kiszka 已提交
877
    if (!env->kvm_vcpu_dirty) {
878
        run_on_cpu(env, do_kvm_cpu_synchronize_state, env);
J
Jan Kiszka 已提交
879
    }
880 881
}

882 883 884 885 886 887 888 889 890 891 892 893
void kvm_cpu_synchronize_post_reset(CPUState *env)
{
    kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
    env->kvm_vcpu_dirty = 0;
}

void kvm_cpu_synchronize_post_init(CPUState *env)
{
    kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
    env->kvm_vcpu_dirty = 0;
}

A
aliguori 已提交
894 895 896 897 898
int kvm_cpu_exec(CPUState *env)
{
    struct kvm_run *run = env->kvm_run;
    int ret;

899
    DPRINTF("kvm_cpu_exec()\n");
A
aliguori 已提交
900 901

    do {
902
#ifndef CONFIG_IOTHREAD
903
        if (env->exit_request) {
904
            DPRINTF("interrupt exit requested\n");
A
aliguori 已提交
905 906 907
            ret = 0;
            break;
        }
908
#endif
A
aliguori 已提交
909

M
Marcelo Tosatti 已提交
910 911 912 913 914
        if (kvm_arch_process_irqchip_events(env)) {
            ret = 0;
            break;
        }

J
Jan Kiszka 已提交
915
        if (env->kvm_vcpu_dirty) {
916
            kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
J
Jan Kiszka 已提交
917
            env->kvm_vcpu_dirty = 0;
918 919
        }

920
        kvm_arch_pre_run(env, run);
921
        cpu_single_env = NULL;
922
        qemu_mutex_unlock_iothread();
A
aliguori 已提交
923
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
924
        qemu_mutex_lock_iothread();
925
        cpu_single_env = env;
A
aliguori 已提交
926 927 928
        kvm_arch_post_run(env, run);

        if (ret == -EINTR || ret == -EAGAIN) {
929
            cpu_exit(env);
930
            DPRINTF("io window exit\n");
A
aliguori 已提交
931 932 933 934 935
            ret = 0;
            break;
        }

        if (ret < 0) {
936
            DPRINTF("kvm run failed %s\n", strerror(-ret));
A
aliguori 已提交
937 938 939
            abort();
        }

940
        kvm_flush_coalesced_mmio_buffer();
A
aliguori 已提交
941

A
aliguori 已提交
942 943 944
        ret = 0; /* exit loop */
        switch (run->exit_reason) {
        case KVM_EXIT_IO:
945
            DPRINTF("handle_io\n");
946
            ret = kvm_handle_io(run->io.port,
A
aliguori 已提交
947 948 949 950 951 952
                                (uint8_t *)run + run->io.data_offset,
                                run->io.direction,
                                run->io.size,
                                run->io.count);
            break;
        case KVM_EXIT_MMIO:
953
            DPRINTF("handle_mmio\n");
A
aliguori 已提交
954 955 956 957 958 959 960
            cpu_physical_memory_rw(run->mmio.phys_addr,
                                   run->mmio.data,
                                   run->mmio.len,
                                   run->mmio.is_write);
            ret = 1;
            break;
        case KVM_EXIT_IRQ_WINDOW_OPEN:
961
            DPRINTF("irq_window_open\n");
A
aliguori 已提交
962 963
            break;
        case KVM_EXIT_SHUTDOWN:
964
            DPRINTF("shutdown\n");
A
aliguori 已提交
965 966 967 968
            qemu_system_reset_request();
            ret = 1;
            break;
        case KVM_EXIT_UNKNOWN:
969
            DPRINTF("kvm_exit_unknown\n");
A
aliguori 已提交
970 971
            break;
        case KVM_EXIT_FAIL_ENTRY:
972
            DPRINTF("kvm_exit_fail_entry\n");
A
aliguori 已提交
973 974
            break;
        case KVM_EXIT_EXCEPTION:
975
            DPRINTF("kvm_exit_exception\n");
A
aliguori 已提交
976
            break;
M
Marcelo Tosatti 已提交
977 978 979 980 981
#ifdef KVM_CAP_INTERNAL_ERROR_DATA
        case KVM_EXIT_INTERNAL_ERROR:
            kvm_handle_internal_error(env, run);
            break;
#endif
A
aliguori 已提交
982
        case KVM_EXIT_DEBUG:
983
            DPRINTF("kvm_exit_debug\n");
984 985 986 987 988 989 990 991
#ifdef KVM_CAP_SET_GUEST_DEBUG
            if (kvm_arch_debug(&run->debug.arch)) {
                env->exception_index = EXCP_DEBUG;
                return 0;
            }
            /* re-enter, this exception was guest-internal */
            ret = 1;
#endif /* KVM_CAP_SET_GUEST_DEBUG */
A
aliguori 已提交
992 993
            break;
        default:
994
            DPRINTF("kvm_arch_handle_exit\n");
A
aliguori 已提交
995 996 997 998 999
            ret = kvm_arch_handle_exit(env, run);
            break;
        }
    } while (ret > 0);

1000 1001
    if (env->exit_request) {
        env->exit_request = 0;
A
aliguori 已提交
1002 1003 1004
        env->exception_index = EXCP_INTERRUPT;
    }

A
aliguori 已提交
1005 1006 1007
    return ret;
}

1008
int kvm_ioctl(KVMState *s, int type, ...)
A
aliguori 已提交
1009 1010
{
    int ret;
1011 1012
    void *arg;
    va_list ap;
A
aliguori 已提交
1013

1014 1015 1016 1017 1018
    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);

    ret = ioctl(s->fd, type, arg);
J
Jan Kiszka 已提交
1019
    if (ret == -1) {
A
aliguori 已提交
1020
        ret = -errno;
J
Jan Kiszka 已提交
1021
    }
A
aliguori 已提交
1022 1023 1024
    return ret;
}

1025
int kvm_vm_ioctl(KVMState *s, int type, ...)
A
aliguori 已提交
1026 1027
{
    int ret;
1028 1029 1030 1031 1032 1033
    void *arg;
    va_list ap;

    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);
A
aliguori 已提交
1034

1035
    ret = ioctl(s->vmfd, type, arg);
J
Jan Kiszka 已提交
1036
    if (ret == -1) {
A
aliguori 已提交
1037
        ret = -errno;
J
Jan Kiszka 已提交
1038
    }
A
aliguori 已提交
1039 1040 1041
    return ret;
}

1042
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
A
aliguori 已提交
1043 1044
{
    int ret;
1045 1046 1047 1048 1049 1050
    void *arg;
    va_list ap;

    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);
A
aliguori 已提交
1051

1052
    ret = ioctl(env->kvm_fd, type, arg);
J
Jan Kiszka 已提交
1053
    if (ret == -1) {
A
aliguori 已提交
1054
        ret = -errno;
J
Jan Kiszka 已提交
1055
    }
A
aliguori 已提交
1056 1057
    return ret;
}
A
aliguori 已提交
1058 1059 1060

int kvm_has_sync_mmu(void)
{
A
aurel32 已提交
1061
#ifdef KVM_CAP_SYNC_MMU
A
aliguori 已提交
1062 1063
    KVMState *s = kvm_state;

1064 1065
    return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
#else
A
aliguori 已提交
1066
    return 0;
1067
#endif
A
aliguori 已提交
1068
}
1069

1070 1071 1072 1073 1074
int kvm_has_vcpu_events(void)
{
    return kvm_state->vcpu_events;
}

1075 1076 1077 1078 1079
int kvm_has_robust_singlestep(void)
{
    return kvm_state->robust_singlestep;
}

1080 1081 1082 1083 1084
int kvm_has_debugregs(void)
{
    return kvm_state->debugregs;
}

1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
int kvm_has_xsave(void)
{
    return kvm_state->xsave;
}

int kvm_has_xcrs(void)
{
    return kvm_state->xcrs;
}

1095 1096 1097 1098 1099 1100 1101 1102
int kvm_has_many_ioeventfds(void)
{
    if (!kvm_enabled()) {
        return 0;
    }
    return kvm_state->many_ioeventfds;
}

1103 1104 1105
void kvm_setup_guest_memory(void *start, size_t size)
{
    if (!kvm_has_sync_mmu()) {
A
Andreas Färber 已提交
1106
        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
1107 1108

        if (ret) {
A
Andreas Färber 已提交
1109 1110 1111
            perror("qemu_madvise");
            fprintf(stderr,
                    "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
1112 1113 1114 1115 1116
            exit(1);
        }
    }
}

1117 1118 1119 1120 1121 1122
#ifdef KVM_CAP_SET_GUEST_DEBUG
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
                                                 target_ulong pc)
{
    struct kvm_sw_breakpoint *bp;

B
Blue Swirl 已提交
1123
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
J
Jan Kiszka 已提交
1124
        if (bp->pc == pc) {
1125
            return bp;
J
Jan Kiszka 已提交
1126
        }
1127 1128 1129 1130 1131 1132
    }
    return NULL;
}

int kvm_sw_breakpoints_active(CPUState *env)
{
B
Blue Swirl 已提交
1133
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
1134 1135
}

G
Glauber Costa 已提交
1136 1137 1138 1139 1140 1141 1142 1143 1144
struct kvm_set_guest_debug_data {
    struct kvm_guest_debug dbg;
    CPUState *env;
    int err;
};

static void kvm_invoke_set_guest_debug(void *data)
{
    struct kvm_set_guest_debug_data *dbg_data = data;
J
Jan Kiszka 已提交
1145 1146 1147
    CPUState *env = dbg_data->env;

    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
G
Glauber Costa 已提交
1148 1149
}

1150 1151
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
{
G
Glauber Costa 已提交
1152
    struct kvm_set_guest_debug_data data;
1153

1154
    data.dbg.control = reinject_trap;
1155

1156 1157 1158
    if (env->singlestep_enabled) {
        data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
    }
G
Glauber Costa 已提交
1159 1160
    kvm_arch_update_guest_debug(env, &data.dbg);
    data.env = env;
1161

1162
    run_on_cpu(env, kvm_invoke_set_guest_debug, &data);
G
Glauber Costa 已提交
1163
    return data.err;
1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180
}

int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    struct kvm_sw_breakpoint *bp;
    CPUState *env;
    int err;

    if (type == GDB_BREAKPOINT_SW) {
        bp = kvm_find_sw_breakpoint(current_env, addr);
        if (bp) {
            bp->use_count++;
            return 0;
        }

        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
J
Jan Kiszka 已提交
1181
        if (!bp) {
1182
            return -ENOMEM;
J
Jan Kiszka 已提交
1183
        }
1184 1185 1186 1187 1188 1189 1190 1191 1192

        bp->pc = addr;
        bp->use_count = 1;
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
        if (err) {
            free(bp);
            return err;
        }

B
Blue Swirl 已提交
1193
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1194 1195 1196
                          bp, entry);
    } else {
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
J
Jan Kiszka 已提交
1197
        if (err) {
1198
            return err;
J
Jan Kiszka 已提交
1199
        }
1200 1201 1202 1203
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        err = kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1204
        if (err) {
1205
            return err;
J
Jan Kiszka 已提交
1206
        }
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
    }
    return 0;
}

int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    struct kvm_sw_breakpoint *bp;
    CPUState *env;
    int err;

    if (type == GDB_BREAKPOINT_SW) {
        bp = kvm_find_sw_breakpoint(current_env, addr);
J
Jan Kiszka 已提交
1220
        if (!bp) {
1221
            return -ENOENT;
J
Jan Kiszka 已提交
1222
        }
1223 1224 1225 1226 1227 1228 1229

        if (bp->use_count > 1) {
            bp->use_count--;
            return 0;
        }

        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
J
Jan Kiszka 已提交
1230
        if (err) {
1231
            return err;
J
Jan Kiszka 已提交
1232
        }
1233

B
Blue Swirl 已提交
1234
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1235 1236 1237
        qemu_free(bp);
    } else {
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
J
Jan Kiszka 已提交
1238
        if (err) {
1239
            return err;
J
Jan Kiszka 已提交
1240
        }
1241 1242 1243 1244
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        err = kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1245
        if (err) {
1246
            return err;
J
Jan Kiszka 已提交
1247
        }
1248 1249 1250 1251 1252 1253 1254 1255 1256 1257
    }
    return 0;
}

void kvm_remove_all_breakpoints(CPUState *current_env)
{
    struct kvm_sw_breakpoint *bp, *next;
    KVMState *s = current_env->kvm_state;
    CPUState *env;

B
Blue Swirl 已提交
1258
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1259 1260 1261
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
            /* Try harder to find a CPU that currently sees the breakpoint. */
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
J
Jan Kiszka 已提交
1262
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) {
1263
                    break;
J
Jan Kiszka 已提交
1264
                }
1265 1266 1267 1268 1269
            }
        }
    }
    kvm_arch_remove_all_hw_breakpoints();

J
Jan Kiszka 已提交
1270
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1271
        kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1272
    }
1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
}

#else /* !KVM_CAP_SET_GUEST_DEBUG */

int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
{
    return -EINVAL;
}

int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    return -EINVAL;
}

int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    return -EINVAL;
}

void kvm_remove_all_breakpoints(CPUState *current_env)
{
}
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
1298 1299 1300 1301 1302 1303

int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
{
    struct kvm_signal_mask *sigmask;
    int r;

J
Jan Kiszka 已提交
1304
    if (!sigset) {
1305
        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
J
Jan Kiszka 已提交
1306
    }
1307 1308 1309 1310 1311 1312 1313 1314 1315 1316

    sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));

    sigmask->len = 8;
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
    free(sigmask);

    return r;
}
1317

1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349
int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign)
{
#ifdef KVM_IOEVENTFD
    int ret;
    struct kvm_ioeventfd iofd;

    iofd.datamatch = val;
    iofd.addr = addr;
    iofd.len = 4;
    iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH;
    iofd.fd = fd;

    if (!kvm_enabled()) {
        return -ENOSYS;
    }

    if (!assign) {
        iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
    }

    ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);

    if (ret < 0) {
        return -errno;
    }

    return 0;
#else
    return -ENOSYS;
#endif
}

1350 1351
int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
{
1352
#ifdef KVM_IOEVENTFD
1353 1354 1355 1356 1357 1358 1359 1360
    struct kvm_ioeventfd kick = {
        .datamatch = val,
        .addr = addr,
        .len = 2,
        .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
        .fd = fd,
    };
    int r;
J
Jan Kiszka 已提交
1361
    if (!kvm_enabled()) {
1362
        return -ENOSYS;
J
Jan Kiszka 已提交
1363 1364
    }
    if (!assign) {
1365
        kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
J
Jan Kiszka 已提交
1366
    }
1367
    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
J
Jan Kiszka 已提交
1368
    if (r < 0) {
1369
        return r;
J
Jan Kiszka 已提交
1370
    }
1371
    return 0;
1372 1373
#else
    return -ENOSYS;
1374
#endif
1375
}