kvm-all.c 35.7 KB
Newer Older
A
aliguori 已提交
1 2 3 4
/*
 * QEMU KVM support
 *
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
A
aliguori 已提交
6 7 8
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
A
aliguori 已提交
10 11 12 13 14 15 16 17 18
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
19
#include <stdarg.h>
A
aliguori 已提交
20 21 22 23

#include <linux/kvm.h>

#include "qemu-common.h"
24
#include "qemu-barrier.h"
A
aliguori 已提交
25
#include "sysemu.h"
J
Jan Kiszka 已提交
26
#include "hw/hw.h"
27
#include "gdbstub.h"
A
aliguori 已提交
28
#include "kvm.h"
29
#include "bswap.h"
A
aliguori 已提交
30

31 32 33 34 35
/* This check must be after config-host.h is included */
#ifdef CONFIG_EVENTFD
#include <sys/eventfd.h>
#endif

A
aliguori 已提交
36 37 38
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
#define PAGE_SIZE TARGET_PAGE_SIZE

A
aliguori 已提交
39 40 41
//#define DEBUG_KVM

#ifdef DEBUG_KVM
42
#define DPRINTF(fmt, ...) \
A
aliguori 已提交
43 44
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
#else
45
#define DPRINTF(fmt, ...) \
A
aliguori 已提交
46 47 48
    do { } while (0)
#endif

A
aliguori 已提交
49 50
typedef struct KVMSlot
{
A
Anthony Liguori 已提交
51 52 53
    target_phys_addr_t start_addr;
    ram_addr_t memory_size;
    ram_addr_t phys_offset;
A
aliguori 已提交
54 55 56
    int slot;
    int flags;
} KVMSlot;
A
aliguori 已提交
57

58 59
typedef struct kvm_dirty_log KVMDirtyLog;

A
aliguori 已提交
60 61 62 63 64
struct KVMState
{
    KVMSlot slots[32];
    int fd;
    int vmfd;
A
aliguori 已提交
65
    int coalesced_mmio;
66
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
67
    bool coalesced_flush_in_progress;
68
    int broken_set_mem_region;
69
    int migration_log;
70
    int vcpu_events;
71
    int robust_singlestep;
72
    int debugregs;
73 74 75
#ifdef KVM_CAP_SET_GUEST_DEBUG
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
#endif
76 77
    int irqchip_in_kernel;
    int pit_in_kernel;
78
    int xsave, xcrs;
79
    int many_ioeventfds;
A
aliguori 已提交
80 81
};

82
KVMState *kvm_state;
A
aliguori 已提交
83

84 85 86 87 88 89
static const KVMCapabilityInfo kvm_required_capabilites[] = {
    KVM_CAP_INFO(USER_MEMORY),
    KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
    KVM_CAP_LAST_INFO
};

A
aliguori 已提交
90 91 92 93 94
static KVMSlot *kvm_alloc_slot(KVMState *s)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
J
Jan Kiszka 已提交
95
        if (s->slots[i].memory_size == 0) {
A
aliguori 已提交
96
            return &s->slots[i];
J
Jan Kiszka 已提交
97
        }
A
aliguori 已提交
98 99
    }

100 101 102 103 104
    fprintf(stderr, "%s: no free slot available\n", __func__);
    abort();
}

static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
A
Anthony Liguori 已提交
105 106
                                         target_phys_addr_t start_addr,
                                         target_phys_addr_t end_addr)
107 108 109 110 111 112 113 114 115 116 117 118
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

        if (start_addr == mem->start_addr &&
            end_addr == mem->start_addr + mem->memory_size) {
            return mem;
        }
    }

A
aliguori 已提交
119 120 121
    return NULL;
}

122 123 124 125
/*
 * Find overlapping slot with lowest start address
 */
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
A
Anthony Liguori 已提交
126 127
                                            target_phys_addr_t start_addr,
                                            target_phys_addr_t end_addr)
A
aliguori 已提交
128
{
129
    KVMSlot *found = NULL;
A
aliguori 已提交
130 131 132 133 134
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

135 136 137 138 139 140 141 142 143
        if (mem->memory_size == 0 ||
            (found && found->start_addr < mem->start_addr)) {
            continue;
        }

        if (end_addr > mem->start_addr &&
            start_addr < mem->start_addr + mem->memory_size) {
            found = mem;
        }
A
aliguori 已提交
144 145
    }

146
    return found;
A
aliguori 已提交
147 148
}

149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
                                      target_phys_addr_t *phys_addr)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

        if (ram_addr >= mem->phys_offset &&
            ram_addr < mem->phys_offset + mem->memory_size) {
            *phys_addr = mem->start_addr + (ram_addr - mem->phys_offset);
            return 1;
        }
    }

    return 0;
}

167 168 169 170 171 172 173
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
{
    struct kvm_userspace_memory_region mem;

    mem.slot = slot->slot;
    mem.guest_phys_addr = slot->start_addr;
    mem.memory_size = slot->memory_size;
174
    mem.userspace_addr = (unsigned long)qemu_safe_ram_ptr(slot->phys_offset);
175
    mem.flags = slot->flags;
176 177 178
    if (s->migration_log) {
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
    }
179 180 181
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
}

J
Jan Kiszka 已提交
182 183 184 185
static void kvm_reset_vcpu(void *opaque)
{
    CPUState *env = opaque;

J
Jan Kiszka 已提交
186
    kvm_arch_reset_vcpu(env);
J
Jan Kiszka 已提交
187
}
188

189 190 191 192 193 194 195 196 197 198
int kvm_irqchip_in_kernel(void)
{
    return kvm_state->irqchip_in_kernel;
}

int kvm_pit_in_kernel(void)
{
    return kvm_state->pit_in_kernel;
}

A
aliguori 已提交
199 200 201 202 203 204
int kvm_init_vcpu(CPUState *env)
{
    KVMState *s = kvm_state;
    long mmap_size;
    int ret;

205
    DPRINTF("kvm_init_vcpu\n");
A
aliguori 已提交
206

207
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
A
aliguori 已提交
208
    if (ret < 0) {
209
        DPRINTF("kvm_create_vcpu failed\n");
A
aliguori 已提交
210 211 212 213 214
        goto err;
    }

    env->kvm_fd = ret;
    env->kvm_state = s;
215
    env->kvm_vcpu_dirty = 1;
A
aliguori 已提交
216 217 218

    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
219
        ret = mmap_size;
220
        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
A
aliguori 已提交
221 222 223 224 225 226 227
        goto err;
    }

    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
                        env->kvm_fd, 0);
    if (env->kvm_run == MAP_FAILED) {
        ret = -errno;
228
        DPRINTF("mmap'ing vcpu state failed\n");
A
aliguori 已提交
229 230 231
        goto err;
    }

J
Jan Kiszka 已提交
232 233 234 235
    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
        s->coalesced_mmio_ring =
            (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
    }
236

A
aliguori 已提交
237
    ret = kvm_arch_init_vcpu(env);
J
Jan Kiszka 已提交
238
    if (ret == 0) {
239
        qemu_register_reset(kvm_reset_vcpu, env);
J
Jan Kiszka 已提交
240
        kvm_arch_reset_vcpu(env);
J
Jan Kiszka 已提交
241
    }
A
aliguori 已提交
242 243 244 245
err:
    return ret;
}

246 247 248
/*
 * dirty pages logging control
 */
249 250 251 252 253 254 255

static int kvm_mem_flags(KVMState *s, bool log_dirty)
{
    return log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
}

static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
256 257
{
    KVMState *s = kvm_state;
258
    int flags, mask = KVM_MEM_LOG_DIRTY_PAGES;
259 260 261
    int old_flags;

    old_flags = mem->flags;
262

263
    flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty);
264 265
    mem->flags = flags;

266 267 268 269
    /* If nothing changed effectively, no need to issue ioctl */
    if (s->migration_log) {
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
    }
270

271
    if (flags == old_flags) {
272
        return 0;
273 274
    }

275 276 277
    return kvm_set_user_memory_region(s, mem);
}

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
                                      ram_addr_t size, bool log_dirty)
{
    KVMState *s = kvm_state;
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);

    if (mem == NULL)  {
        fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
                TARGET_FMT_plx "\n", __func__, phys_addr,
                (target_phys_addr_t)(phys_addr + size - 1));
        return -EINVAL;
    }
    return kvm_slot_dirty_pages_log_change(mem, log_dirty);
}

293 294
static int kvm_log_start(CPUPhysMemoryClient *client,
                         target_phys_addr_t phys_addr, ram_addr_t size)
295
{
296
    return kvm_dirty_pages_log_change(phys_addr, size, true);
297 298
}

299 300
static int kvm_log_stop(CPUPhysMemoryClient *client,
                        target_phys_addr_t phys_addr, ram_addr_t size)
301
{
302
    return kvm_dirty_pages_log_change(phys_addr, size, false);
303 304
}

305
static int kvm_set_migration_log(int enable)
306 307 308 309 310 311 312 313 314 315
{
    KVMState *s = kvm_state;
    KVMSlot *mem;
    int i, err;

    s->migration_log = enable;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        mem = &s->slots[i];

316 317 318
        if (!mem->memory_size) {
            continue;
        }
319 320 321 322 323 324 325 326 327 328 329
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
            continue;
        }
        err = kvm_set_user_memory_region(s, mem);
        if (err) {
            return err;
        }
    }
    return 0;
}

330 331 332 333 334
/* get kvm's dirty pages bitmap and update qemu's */
static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
                                         unsigned long *bitmap,
                                         unsigned long offset,
                                         unsigned long mem_size)
A
Alexander Graf 已提交
335
{
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
    unsigned int i, j;
    unsigned long page_number, addr, addr1, c;
    ram_addr_t ram_addr;
    unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) /
        HOST_LONG_BITS;

    /*
     * bitmap-traveling is faster than memory-traveling (for addr...)
     * especially when most of the memory is not dirty.
     */
    for (i = 0; i < len; i++) {
        if (bitmap[i] != 0) {
            c = leul_to_cpu(bitmap[i]);
            do {
                j = ffsl(c) - 1;
                c &= ~(1ul << j);
                page_number = i * HOST_LONG_BITS + j;
                addr1 = page_number * TARGET_PAGE_SIZE;
                addr = offset + addr1;
                ram_addr = cpu_get_physical_page_desc(addr);
                cpu_physical_memory_set_dirty(ram_addr);
            } while (c != 0);
        }
    }
    return 0;
A
Alexander Graf 已提交
361 362
}

363 364
#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))

365 366 367 368 369
/**
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
 * This means all bits are set to dirty.
 *
370
 * @start_add: start of logged region.
371 372
 * @end_addr: end of logged region.
 */
373
static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
J
Jan Kiszka 已提交
374
                                          target_phys_addr_t end_addr)
375 376
{
    KVMState *s = kvm_state;
377 378 379 380
    unsigned long size, allocated_size = 0;
    KVMDirtyLog d;
    KVMSlot *mem;
    int ret = 0;
381

382 383 384 385 386 387
    d.dirty_bitmap = NULL;
    while (start_addr < end_addr) {
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
        if (mem == NULL) {
            break;
        }
388

389 390 391 392 393 394 395 396 397 398 399 400 401 402
        /* XXX bad kernel interface alert
         * For dirty bitmap, kernel allocates array of size aligned to
         * bits-per-long.  But for case when the kernel is 64bits and
         * the userspace is 32bits, userspace can't align to the same
         * bits-per-long, since sizeof(long) is different between kernel
         * and user space.  This way, userspace will provide buffer which
         * may be 4 bytes less than the kernel will use, resulting in
         * userspace memory corruption (which is not detectable by valgrind
         * too, in most cases).
         * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
         * a hope that sizeof(long) wont become >8 any time soon.
         */
        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
                     /*HOST_LONG_BITS*/ 64) / 8;
403
        if (!d.dirty_bitmap) {
404
            d.dirty_bitmap = g_malloc(size);
405
        } else if (size > allocated_size) {
406
            d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
407 408 409
        }
        allocated_size = size;
        memset(d.dirty_bitmap, 0, allocated_size);
410

411
        d.slot = mem->slot;
412

413
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
414
            DPRINTF("ioctl failed %d\n", errno);
415 416 417
            ret = -1;
            break;
        }
418

419 420 421
        kvm_get_dirty_pages_log_range(mem->start_addr, d.dirty_bitmap,
                                      mem->start_addr, mem->memory_size);
        start_addr = mem->start_addr + mem->memory_size;
422
    }
423
    g_free(d.dirty_bitmap);
424 425

    return ret;
426 427
}

A
Anthony Liguori 已提交
428
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
A
aliguori 已提交
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
{
    int ret = -ENOSYS;
    KVMState *s = kvm_state;

    if (s->coalesced_mmio) {
        struct kvm_coalesced_mmio_zone zone;

        zone.addr = start;
        zone.size = size;

        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
    }

    return ret;
}

A
Anthony Liguori 已提交
445
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
A
aliguori 已提交
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
{
    int ret = -ENOSYS;
    KVMState *s = kvm_state;

    if (s->coalesced_mmio) {
        struct kvm_coalesced_mmio_zone zone;

        zone.addr = start;
        zone.size = size;

        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
    }

    return ret;
}

462 463 464 465 466 467 468 469 470 471 472 473
int kvm_check_extension(KVMState *s, unsigned int extension)
{
    int ret;

    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
    if (ret < 0) {
        ret = 0;
    }

    return ret;
}

474 475
static int kvm_check_many_ioeventfds(void)
{
476 477 478 479 480
    /* Userspace can use ioeventfd for io notification.  This requires a host
     * that supports eventfd(2) and an I/O thread; since eventfd does not
     * support SIGIO it cannot interrupt the vcpu.
     *
     * Older kernels have a 6 device limit on the KVM io bus.  Find out so we
481 482
     * can avoid creating too many ioeventfds.
     */
483
#if defined(CONFIG_EVENTFD)
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
    int ioeventfds[7];
    int i, ret = 0;
    for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
        ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
        if (ioeventfds[i] < 0) {
            break;
        }
        ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
        if (ret < 0) {
            close(ioeventfds[i]);
            break;
        }
    }

    /* Decide whether many devices are supported or not */
    ret = i == ARRAY_SIZE(ioeventfds);

    while (i-- > 0) {
        kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
        close(ioeventfds[i]);
    }
    return ret;
#else
    return 0;
#endif
}

511 512 513 514 515 516 517 518 519 520 521 522
static const KVMCapabilityInfo *
kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
{
    while (list->name) {
        if (!kvm_check_extension(s, list->value)) {
            return list;
        }
        list++;
    }
    return NULL;
}

J
Jan Kiszka 已提交
523
static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
524
                             ram_addr_t phys_offset, bool log_dirty)
525 526 527 528 529 530
{
    KVMState *s = kvm_state;
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
    KVMSlot *mem, old;
    int err;

531 532 533 534
    /* kvm works in page size chunks, but the function may be called
       with sub-page size and unaligned start address. */
    size = TARGET_PAGE_ALIGN(size);
    start_addr = TARGET_PAGE_ALIGN(start_addr);
535 536 537 538 539 540 541 542 543 544 545 546 547 548

    /* KVM does not support read-only slots */
    phys_offset &= ~IO_MEM_ROM;

    while (1) {
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
        if (!mem) {
            break;
        }

        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
            /* The new slot fits into the existing one and comes with
549 550
             * identical parameters - update flags and done. */
            kvm_slot_dirty_pages_log_change(mem, log_dirty);
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
            return;
        }

        old = *mem;

        /* unregister the overlapping slot */
        mem->memory_size = 0;
        err = kvm_set_user_memory_region(s, mem);
        if (err) {
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
                    __func__, strerror(-err));
            abort();
        }

        /* Workaround for older KVM versions: we can't join slots, even not by
         * unregistering the previous ones and then registering the larger
         * slot. We have to maintain the existing fragmentation. Sigh.
         *
         * This workaround assumes that the new slot starts at the same
         * address as the first existing one. If not or if some overlapping
         * slot comes around later, we will fail (not seen in practice so far)
         * - and actually require a recent KVM version. */
        if (s->broken_set_mem_region &&
            old.start_addr == start_addr && old.memory_size < size &&
            flags < IO_MEM_UNASSIGNED) {
            mem = kvm_alloc_slot(s);
            mem->memory_size = old.memory_size;
            mem->start_addr = old.start_addr;
            mem->phys_offset = old.phys_offset;
580
            mem->flags = kvm_mem_flags(s, log_dirty);
581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
                        strerror(-err));
                abort();
            }

            start_addr += old.memory_size;
            phys_offset += old.memory_size;
            size -= old.memory_size;
            continue;
        }

        /* register prefix slot */
        if (old.start_addr < start_addr) {
            mem = kvm_alloc_slot(s);
            mem->memory_size = start_addr - old.start_addr;
            mem->start_addr = old.start_addr;
            mem->phys_offset = old.phys_offset;
601
            mem->flags =  kvm_mem_flags(s, log_dirty);
602 603 604 605 606

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
                        __func__, strerror(-err));
607 608 609 610 611
#ifdef TARGET_PPC
                fprintf(stderr, "%s: This is probably because your kernel's " \
                                "PAGE_SIZE is too big. Please try to use 4k " \
                                "PAGE_SIZE!\n", __func__);
#endif
612 613 614 615 616 617 618 619 620 621 622 623 624
                abort();
            }
        }

        /* register suffix slot */
        if (old.start_addr + old.memory_size > start_addr + size) {
            ram_addr_t size_delta;

            mem = kvm_alloc_slot(s);
            mem->start_addr = start_addr + size;
            size_delta = mem->start_addr - old.start_addr;
            mem->memory_size = old.memory_size - size_delta;
            mem->phys_offset = old.phys_offset + size_delta;
625
            mem->flags = kvm_mem_flags(s, log_dirty);
626 627 628 629 630 631 632 633 634 635 636

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
                        __func__, strerror(-err));
                abort();
            }
        }
    }

    /* in case the KVM bug workaround already "consumed" the new slot */
J
Jan Kiszka 已提交
637
    if (!size) {
638
        return;
J
Jan Kiszka 已提交
639
    }
640
    /* KVM does not need to know about this memory */
J
Jan Kiszka 已提交
641
    if (flags >= IO_MEM_UNASSIGNED) {
642
        return;
J
Jan Kiszka 已提交
643
    }
644 645 646 647
    mem = kvm_alloc_slot(s);
    mem->memory_size = size;
    mem->start_addr = start_addr;
    mem->phys_offset = phys_offset;
648
    mem->flags = kvm_mem_flags(s, log_dirty);
649 650 651 652 653 654 655 656 657

    err = kvm_set_user_memory_region(s, mem);
    if (err) {
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
                strerror(-err));
        abort();
    }
}

658
static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
659
                                  target_phys_addr_t start_addr,
660 661
                                  ram_addr_t size, ram_addr_t phys_offset,
                                  bool log_dirty)
662
{
663
    kvm_set_phys_mem(start_addr, size, phys_offset, log_dirty);
664 665 666
}

static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
667 668
                                        target_phys_addr_t start_addr,
                                        target_phys_addr_t end_addr)
669
{
J
Jan Kiszka 已提交
670
    return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
671 672 673
}

static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
674
                                    int enable)
675
{
J
Jan Kiszka 已提交
676
    return kvm_set_migration_log(enable);
677 678 679
}

static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
J
Jan Kiszka 已提交
680 681 682
    .set_memory = kvm_client_set_memory,
    .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
    .migration_log = kvm_client_migration_log,
683 684
    .log_start = kvm_log_start,
    .log_stop = kvm_log_stop,
685 686
};

687 688 689 690 691 692 693 694 695
static void kvm_handle_interrupt(CPUState *env, int mask)
{
    env->interrupt_request |= mask;

    if (!qemu_cpu_is_self(env)) {
        qemu_cpu_kick(env);
    }
}

696
int kvm_init(void)
A
aliguori 已提交
697
{
698 699 700
    static const char upgrade_note[] =
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
        "(see http://sourceforge.net/projects/kvm).\n";
A
aliguori 已提交
701
    KVMState *s;
702
    const KVMCapabilityInfo *missing_cap;
A
aliguori 已提交
703 704 705
    int ret;
    int i;

706
    s = g_malloc0(sizeof(KVMState));
A
aliguori 已提交
707

708
#ifdef KVM_CAP_SET_GUEST_DEBUG
B
Blue Swirl 已提交
709
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
710
#endif
J
Jan Kiszka 已提交
711
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
A
aliguori 已提交
712
        s->slots[i].slot = i;
J
Jan Kiszka 已提交
713
    }
A
aliguori 已提交
714
    s->vmfd = -1;
K
Kevin Wolf 已提交
715
    s->fd = qemu_open("/dev/kvm", O_RDWR);
A
aliguori 已提交
716 717 718 719 720 721 722 723
    if (s->fd == -1) {
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
        ret = -errno;
        goto err;
    }

    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
    if (ret < KVM_API_VERSION) {
J
Jan Kiszka 已提交
724
        if (ret > 0) {
A
aliguori 已提交
725
            ret = -EINVAL;
J
Jan Kiszka 已提交
726
        }
A
aliguori 已提交
727 728 729 730 731 732 733 734 735 736 737
        fprintf(stderr, "kvm version too old\n");
        goto err;
    }

    if (ret > KVM_API_VERSION) {
        ret = -EINVAL;
        fprintf(stderr, "kvm version not supported\n");
        goto err;
    }

    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
738 739 740 741 742
    if (s->vmfd < 0) {
#ifdef TARGET_S390X
        fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
                        "your host kernel command line\n");
#endif
A
aliguori 已提交
743
        goto err;
744
    }
A
aliguori 已提交
745

746 747 748 749
    missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
    if (!missing_cap) {
        missing_cap =
            kvm_check_extension_list(s, kvm_arch_required_capabilities);
A
aliguori 已提交
750
    }
751
    if (missing_cap) {
752
        ret = -EINVAL;
753 754
        fprintf(stderr, "kvm does not support %s\n%s",
                missing_cap->name, upgrade_note);
755 756 757
        goto err;
    }

758
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
A
aliguori 已提交
759

760
    s->broken_set_mem_region = 1;
761
    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
762 763 764 765
    if (ret > 0) {
        s->broken_set_mem_region = 0;
    }

766 767 768 769
#ifdef KVM_CAP_VCPU_EVENTS
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
#endif

770 771 772
    s->robust_singlestep =
        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);

773 774 775 776
#ifdef KVM_CAP_DEBUGREGS
    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
#endif

777 778 779 780 781 782 783 784
#ifdef KVM_CAP_XSAVE
    s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
#endif

#ifdef KVM_CAP_XCRS
    s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
#endif

785
    ret = kvm_arch_init(s);
J
Jan Kiszka 已提交
786
    if (ret < 0) {
A
aliguori 已提交
787
        goto err;
J
Jan Kiszka 已提交
788
    }
A
aliguori 已提交
789 790

    kvm_state = s;
791
    cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
A
aliguori 已提交
792

793 794
    s->many_ioeventfds = kvm_check_many_ioeventfds();

795 796
    cpu_interrupt_handler = kvm_handle_interrupt;

A
aliguori 已提交
797 798 799 800
    return 0;

err:
    if (s) {
J
Jan Kiszka 已提交
801
        if (s->vmfd != -1) {
A
aliguori 已提交
802
            close(s->vmfd);
J
Jan Kiszka 已提交
803 804
        }
        if (s->fd != -1) {
A
aliguori 已提交
805
            close(s->fd);
J
Jan Kiszka 已提交
806
        }
A
aliguori 已提交
807
    }
808
    g_free(s);
A
aliguori 已提交
809 810 811 812

    return ret;
}

813 814
static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
                          uint32_t count)
A
aliguori 已提交
815 816 817 818 819 820 821 822
{
    int i;
    uint8_t *ptr = data;

    for (i = 0; i < count; i++) {
        if (direction == KVM_EXIT_IO_IN) {
            switch (size) {
            case 1:
823
                stb_p(ptr, cpu_inb(port));
A
aliguori 已提交
824 825
                break;
            case 2:
826
                stw_p(ptr, cpu_inw(port));
A
aliguori 已提交
827 828
                break;
            case 4:
829
                stl_p(ptr, cpu_inl(port));
A
aliguori 已提交
830 831 832 833 834
                break;
            }
        } else {
            switch (size) {
            case 1:
835
                cpu_outb(port, ldub_p(ptr));
A
aliguori 已提交
836 837
                break;
            case 2:
838
                cpu_outw(port, lduw_p(ptr));
A
aliguori 已提交
839 840
                break;
            case 4:
841
                cpu_outl(port, ldl_p(ptr));
A
aliguori 已提交
842 843 844 845 846 847 848 849
                break;
            }
        }

        ptr += size;
    }
}

J
Jan Kiszka 已提交
850
static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
M
Marcelo Tosatti 已提交
851
{
852
    fprintf(stderr, "KVM internal error.");
M
Marcelo Tosatti 已提交
853 854 855
    if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
        int i;

856
        fprintf(stderr, " Suberror: %d\n", run->internal.suberror);
M
Marcelo Tosatti 已提交
857 858 859 860
        for (i = 0; i < run->internal.ndata; ++i) {
            fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
                    i, (uint64_t)run->internal.data[i]);
        }
861 862
    } else {
        fprintf(stderr, "\n");
M
Marcelo Tosatti 已提交
863 864 865
    }
    if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
        fprintf(stderr, "emulation failure\n");
J
Jan Kiszka 已提交
866
        if (!kvm_arch_stop_on_emulation_error(env)) {
867
            cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
868
            return EXCP_INTERRUPT;
J
Jan Kiszka 已提交
869
        }
M
Marcelo Tosatti 已提交
870 871 872 873
    }
    /* FIXME: Should trigger a qmp message to let management know
     * something went wrong.
     */
J
Jan Kiszka 已提交
874
    return -1;
M
Marcelo Tosatti 已提交
875 876
}

877
void kvm_flush_coalesced_mmio_buffer(void)
A
aliguori 已提交
878 879
{
    KVMState *s = kvm_state;
880 881 882 883 884 885 886

    if (s->coalesced_flush_in_progress) {
        return;
    }

    s->coalesced_flush_in_progress = true;

887 888
    if (s->coalesced_mmio_ring) {
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
A
aliguori 已提交
889 890 891 892 893 894
        while (ring->first != ring->last) {
            struct kvm_coalesced_mmio *ent;

            ent = &ring->coalesced_mmio[ring->first];

            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
895
            smp_wmb();
A
aliguori 已提交
896 897 898
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
        }
    }
899 900

    s->coalesced_flush_in_progress = false;
A
aliguori 已提交
901 902
}

903
static void do_kvm_cpu_synchronize_state(void *_env)
904
{
905 906
    CPUState *env = _env;

J
Jan Kiszka 已提交
907
    if (!env->kvm_vcpu_dirty) {
908
        kvm_arch_get_registers(env);
J
Jan Kiszka 已提交
909
        env->kvm_vcpu_dirty = 1;
910 911 912
    }
}

913 914
void kvm_cpu_synchronize_state(CPUState *env)
{
J
Jan Kiszka 已提交
915
    if (!env->kvm_vcpu_dirty) {
916
        run_on_cpu(env, do_kvm_cpu_synchronize_state, env);
J
Jan Kiszka 已提交
917
    }
918 919
}

920 921 922 923 924 925 926 927 928 929 930 931
void kvm_cpu_synchronize_post_reset(CPUState *env)
{
    kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
    env->kvm_vcpu_dirty = 0;
}

void kvm_cpu_synchronize_post_init(CPUState *env)
{
    kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
    env->kvm_vcpu_dirty = 0;
}

A
aliguori 已提交
932 933 934
int kvm_cpu_exec(CPUState *env)
{
    struct kvm_run *run = env->kvm_run;
935
    int ret, run_ret;
A
aliguori 已提交
936

937
    DPRINTF("kvm_cpu_exec()\n");
A
aliguori 已提交
938

939
    if (kvm_arch_process_async_events(env)) {
940
        env->exit_request = 0;
941
        return EXCP_HLT;
942
    }
M
Marcelo Tosatti 已提交
943

944 945
    cpu_single_env = env;

946
    do {
J
Jan Kiszka 已提交
947
        if (env->kvm_vcpu_dirty) {
948
            kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
J
Jan Kiszka 已提交
949
            env->kvm_vcpu_dirty = 0;
950 951
        }

952
        kvm_arch_pre_run(env, run);
953 954 955 956 957 958 959 960 961
        if (env->exit_request) {
            DPRINTF("interrupt exit requested\n");
            /*
             * KVM requires us to reenter the kernel after IO exits to complete
             * instruction emulation. This self-signal will ensure that we
             * leave ASAP again.
             */
            qemu_cpu_kick_self();
        }
962
        cpu_single_env = NULL;
963
        qemu_mutex_unlock_iothread();
964

965
        run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
966

967
        qemu_mutex_lock_iothread();
968
        cpu_single_env = env;
A
aliguori 已提交
969 970
        kvm_arch_post_run(env, run);

971 972
        kvm_flush_coalesced_mmio_buffer();

973
        if (run_ret < 0) {
974 975
            if (run_ret == -EINTR || run_ret == -EAGAIN) {
                DPRINTF("io window exit\n");
976
                ret = EXCP_INTERRUPT;
977 978
                break;
            }
979
            DPRINTF("kvm run failed %s\n", strerror(-run_ret));
A
aliguori 已提交
980 981 982 983 984
            abort();
        }

        switch (run->exit_reason) {
        case KVM_EXIT_IO:
985
            DPRINTF("handle_io\n");
986 987 988 989 990
            kvm_handle_io(run->io.port,
                          (uint8_t *)run + run->io.data_offset,
                          run->io.direction,
                          run->io.size,
                          run->io.count);
991
            ret = 0;
A
aliguori 已提交
992 993
            break;
        case KVM_EXIT_MMIO:
994
            DPRINTF("handle_mmio\n");
A
aliguori 已提交
995 996 997 998
            cpu_physical_memory_rw(run->mmio.phys_addr,
                                   run->mmio.data,
                                   run->mmio.len,
                                   run->mmio.is_write);
999
            ret = 0;
A
aliguori 已提交
1000 1001
            break;
        case KVM_EXIT_IRQ_WINDOW_OPEN:
1002
            DPRINTF("irq_window_open\n");
1003
            ret = EXCP_INTERRUPT;
A
aliguori 已提交
1004 1005
            break;
        case KVM_EXIT_SHUTDOWN:
1006
            DPRINTF("shutdown\n");
A
aliguori 已提交
1007
            qemu_system_reset_request();
1008
            ret = EXCP_INTERRUPT;
A
aliguori 已提交
1009 1010
            break;
        case KVM_EXIT_UNKNOWN:
1011 1012
            fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
                    (uint64_t)run->hw.hardware_exit_reason);
J
Jan Kiszka 已提交
1013
            ret = -1;
A
aliguori 已提交
1014
            break;
M
Marcelo Tosatti 已提交
1015
        case KVM_EXIT_INTERNAL_ERROR:
J
Jan Kiszka 已提交
1016
            ret = kvm_handle_internal_error(env, run);
M
Marcelo Tosatti 已提交
1017
            break;
A
aliguori 已提交
1018
        default:
1019
            DPRINTF("kvm_arch_handle_exit\n");
A
aliguori 已提交
1020 1021 1022
            ret = kvm_arch_handle_exit(env, run);
            break;
        }
1023
    } while (ret == 0);
A
aliguori 已提交
1024

J
Jan Kiszka 已提交
1025
    if (ret < 0) {
1026
        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
1027
        vm_stop(RUN_STATE_INTERNAL_ERROR);
A
aliguori 已提交
1028 1029
    }

1030 1031
    env->exit_request = 0;
    cpu_single_env = NULL;
A
aliguori 已提交
1032 1033 1034
    return ret;
}

1035
int kvm_ioctl(KVMState *s, int type, ...)
A
aliguori 已提交
1036 1037
{
    int ret;
1038 1039
    void *arg;
    va_list ap;
A
aliguori 已提交
1040

1041 1042 1043 1044 1045
    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);

    ret = ioctl(s->fd, type, arg);
J
Jan Kiszka 已提交
1046
    if (ret == -1) {
A
aliguori 已提交
1047
        ret = -errno;
J
Jan Kiszka 已提交
1048
    }
A
aliguori 已提交
1049 1050 1051
    return ret;
}

1052
int kvm_vm_ioctl(KVMState *s, int type, ...)
A
aliguori 已提交
1053 1054
{
    int ret;
1055 1056 1057 1058 1059 1060
    void *arg;
    va_list ap;

    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);
A
aliguori 已提交
1061

1062
    ret = ioctl(s->vmfd, type, arg);
J
Jan Kiszka 已提交
1063
    if (ret == -1) {
A
aliguori 已提交
1064
        ret = -errno;
J
Jan Kiszka 已提交
1065
    }
A
aliguori 已提交
1066 1067 1068
    return ret;
}

1069
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
A
aliguori 已提交
1070 1071
{
    int ret;
1072 1073 1074 1075 1076 1077
    void *arg;
    va_list ap;

    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);
A
aliguori 已提交
1078

1079
    ret = ioctl(env->kvm_fd, type, arg);
J
Jan Kiszka 已提交
1080
    if (ret == -1) {
A
aliguori 已提交
1081
        ret = -errno;
J
Jan Kiszka 已提交
1082
    }
A
aliguori 已提交
1083 1084
    return ret;
}
A
aliguori 已提交
1085 1086 1087

int kvm_has_sync_mmu(void)
{
1088
    return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
A
aliguori 已提交
1089
}
1090

1091 1092 1093 1094 1095
int kvm_has_vcpu_events(void)
{
    return kvm_state->vcpu_events;
}

1096 1097 1098 1099 1100
int kvm_has_robust_singlestep(void)
{
    return kvm_state->robust_singlestep;
}

1101 1102 1103 1104 1105
int kvm_has_debugregs(void)
{
    return kvm_state->debugregs;
}

1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
int kvm_has_xsave(void)
{
    return kvm_state->xsave;
}

int kvm_has_xcrs(void)
{
    return kvm_state->xcrs;
}

1116 1117 1118 1119 1120 1121 1122 1123
int kvm_has_many_ioeventfds(void)
{
    if (!kvm_enabled()) {
        return 0;
    }
    return kvm_state->many_ioeventfds;
}

1124 1125 1126
void kvm_setup_guest_memory(void *start, size_t size)
{
    if (!kvm_has_sync_mmu()) {
A
Andreas Färber 已提交
1127
        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
1128 1129

        if (ret) {
A
Andreas Färber 已提交
1130 1131 1132
            perror("qemu_madvise");
            fprintf(stderr,
                    "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
1133 1134 1135 1136 1137
            exit(1);
        }
    }
}

1138 1139 1140 1141 1142 1143
#ifdef KVM_CAP_SET_GUEST_DEBUG
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
                                                 target_ulong pc)
{
    struct kvm_sw_breakpoint *bp;

B
Blue Swirl 已提交
1144
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
J
Jan Kiszka 已提交
1145
        if (bp->pc == pc) {
1146
            return bp;
J
Jan Kiszka 已提交
1147
        }
1148 1149 1150 1151 1152 1153
    }
    return NULL;
}

int kvm_sw_breakpoints_active(CPUState *env)
{
B
Blue Swirl 已提交
1154
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
1155 1156
}

G
Glauber Costa 已提交
1157 1158 1159 1160 1161 1162 1163 1164 1165
struct kvm_set_guest_debug_data {
    struct kvm_guest_debug dbg;
    CPUState *env;
    int err;
};

static void kvm_invoke_set_guest_debug(void *data)
{
    struct kvm_set_guest_debug_data *dbg_data = data;
J
Jan Kiszka 已提交
1166 1167 1168
    CPUState *env = dbg_data->env;

    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
G
Glauber Costa 已提交
1169 1170
}

1171 1172
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
{
G
Glauber Costa 已提交
1173
    struct kvm_set_guest_debug_data data;
1174

1175
    data.dbg.control = reinject_trap;
1176

1177 1178 1179
    if (env->singlestep_enabled) {
        data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
    }
G
Glauber Costa 已提交
1180 1181
    kvm_arch_update_guest_debug(env, &data.dbg);
    data.env = env;
1182

1183
    run_on_cpu(env, kvm_invoke_set_guest_debug, &data);
G
Glauber Costa 已提交
1184
    return data.err;
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
}

int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    struct kvm_sw_breakpoint *bp;
    CPUState *env;
    int err;

    if (type == GDB_BREAKPOINT_SW) {
        bp = kvm_find_sw_breakpoint(current_env, addr);
        if (bp) {
            bp->use_count++;
            return 0;
        }

1201
        bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
J
Jan Kiszka 已提交
1202
        if (!bp) {
1203
            return -ENOMEM;
J
Jan Kiszka 已提交
1204
        }
1205 1206 1207 1208 1209

        bp->pc = addr;
        bp->use_count = 1;
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
        if (err) {
1210
            g_free(bp);
1211 1212 1213
            return err;
        }

B
Blue Swirl 已提交
1214
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1215 1216 1217
                          bp, entry);
    } else {
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
J
Jan Kiszka 已提交
1218
        if (err) {
1219
            return err;
J
Jan Kiszka 已提交
1220
        }
1221 1222 1223 1224
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        err = kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1225
        if (err) {
1226
            return err;
J
Jan Kiszka 已提交
1227
        }
1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240
    }
    return 0;
}

int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    struct kvm_sw_breakpoint *bp;
    CPUState *env;
    int err;

    if (type == GDB_BREAKPOINT_SW) {
        bp = kvm_find_sw_breakpoint(current_env, addr);
J
Jan Kiszka 已提交
1241
        if (!bp) {
1242
            return -ENOENT;
J
Jan Kiszka 已提交
1243
        }
1244 1245 1246 1247 1248 1249 1250

        if (bp->use_count > 1) {
            bp->use_count--;
            return 0;
        }

        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
J
Jan Kiszka 已提交
1251
        if (err) {
1252
            return err;
J
Jan Kiszka 已提交
1253
        }
1254

B
Blue Swirl 已提交
1255
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1256
        g_free(bp);
1257 1258
    } else {
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
J
Jan Kiszka 已提交
1259
        if (err) {
1260
            return err;
J
Jan Kiszka 已提交
1261
        }
1262 1263 1264 1265
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        err = kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1266
        if (err) {
1267
            return err;
J
Jan Kiszka 已提交
1268
        }
1269 1270 1271 1272 1273 1274 1275 1276 1277 1278
    }
    return 0;
}

void kvm_remove_all_breakpoints(CPUState *current_env)
{
    struct kvm_sw_breakpoint *bp, *next;
    KVMState *s = current_env->kvm_state;
    CPUState *env;

B
Blue Swirl 已提交
1279
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1280 1281 1282
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
            /* Try harder to find a CPU that currently sees the breakpoint. */
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
J
Jan Kiszka 已提交
1283
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) {
1284
                    break;
J
Jan Kiszka 已提交
1285
                }
1286 1287 1288 1289 1290
            }
        }
    }
    kvm_arch_remove_all_hw_breakpoints();

J
Jan Kiszka 已提交
1291
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1292
        kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1293
    }
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
}

#else /* !KVM_CAP_SET_GUEST_DEBUG */

int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
{
    return -EINVAL;
}

int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    return -EINVAL;
}

int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    return -EINVAL;
}

void kvm_remove_all_breakpoints(CPUState *current_env)
{
}
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
1319 1320 1321 1322 1323 1324

int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
{
    struct kvm_signal_mask *sigmask;
    int r;

J
Jan Kiszka 已提交
1325
    if (!sigset) {
1326
        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
J
Jan Kiszka 已提交
1327
    }
1328

1329
    sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
1330 1331 1332 1333

    sigmask->len = 8;
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1334
    g_free(sigmask);
1335 1336 1337

    return r;
}
1338

1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366
int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign)
{
    int ret;
    struct kvm_ioeventfd iofd;

    iofd.datamatch = val;
    iofd.addr = addr;
    iofd.len = 4;
    iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH;
    iofd.fd = fd;

    if (!kvm_enabled()) {
        return -ENOSYS;
    }

    if (!assign) {
        iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
    }

    ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);

    if (ret < 0) {
        return -errno;
    }

    return 0;
}

1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
{
    struct kvm_ioeventfd kick = {
        .datamatch = val,
        .addr = addr,
        .len = 2,
        .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
        .fd = fd,
    };
    int r;
J
Jan Kiszka 已提交
1377
    if (!kvm_enabled()) {
1378
        return -ENOSYS;
J
Jan Kiszka 已提交
1379 1380
    }
    if (!assign) {
1381
        kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
J
Jan Kiszka 已提交
1382
    }
1383
    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
J
Jan Kiszka 已提交
1384
    if (r < 0) {
1385
        return r;
J
Jan Kiszka 已提交
1386
    }
1387
    return 0;
1388
}
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398

int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
{
    return kvm_arch_on_sigbus_vcpu(env, code, addr);
}

int kvm_on_sigbus(int code, void *addr)
{
    return kvm_arch_on_sigbus(code, addr);
}