kvm-all.c 48.4 KB
Newer Older
A
aliguori 已提交
1 2 3 4
/*
 * QEMU KVM support
 *
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
A
aliguori 已提交
6 7 8
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
A
aliguori 已提交
10 11 12 13 14 15 16 17 18
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
19
#include <stdarg.h>
A
aliguori 已提交
20 21 22 23

#include <linux/kvm.h>

#include "qemu-common.h"
24
#include "qemu-barrier.h"
P
Paolo Bonzini 已提交
25 26
#include "qemu-option.h"
#include "qemu-config.h"
A
aliguori 已提交
27
#include "sysemu.h"
J
Jan Kiszka 已提交
28
#include "hw/hw.h"
29
#include "hw/msi.h"
30
#include "gdbstub.h"
A
aliguori 已提交
31
#include "kvm.h"
32
#include "bswap.h"
A
Avi Kivity 已提交
33
#include "memory.h"
34
#include "exec-memory.h"
35
#include "event_notifier.h"
A
aliguori 已提交
36

37 38 39 40 41
/* This check must be after config-host.h is included */
#ifdef CONFIG_EVENTFD
#include <sys/eventfd.h>
#endif

42
/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
A
aliguori 已提交
43 44
#define PAGE_SIZE TARGET_PAGE_SIZE

A
aliguori 已提交
45 46 47
//#define DEBUG_KVM

#ifdef DEBUG_KVM
48
#define DPRINTF(fmt, ...) \
A
aliguori 已提交
49 50
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
#else
51
#define DPRINTF(fmt, ...) \
A
aliguori 已提交
52 53 54
    do { } while (0)
#endif

55 56
#define KVM_MSI_HASHTAB_SIZE    256

A
aliguori 已提交
57 58
typedef struct KVMSlot
{
A
Anthony Liguori 已提交
59 60
    target_phys_addr_t start_addr;
    ram_addr_t memory_size;
61
    void *ram;
A
aliguori 已提交
62 63 64
    int slot;
    int flags;
} KVMSlot;
A
aliguori 已提交
65

66 67
typedef struct kvm_dirty_log KVMDirtyLog;

A
aliguori 已提交
68 69 70 71 72
struct KVMState
{
    KVMSlot slots[32];
    int fd;
    int vmfd;
A
aliguori 已提交
73
    int coalesced_mmio;
74
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
75
    bool coalesced_flush_in_progress;
76
    int broken_set_mem_region;
77
    int migration_log;
78
    int vcpu_events;
79
    int robust_singlestep;
80
    int debugregs;
81 82 83
#ifdef KVM_CAP_SET_GUEST_DEBUG
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
#endif
J
Jan Kiszka 已提交
84
    int pit_state2;
85
    int xsave, xcrs;
86
    int many_ioeventfds;
87 88 89 90
    /* The man page (and posix) say ioctl numbers are signed int, but
     * they're not.  Linux, glibc and *BSD all treat ioctl numbers as
     * unsigned, and treating them as signed here can break things */
    unsigned irqchip_inject_ioctl;
91 92 93 94
#ifdef KVM_CAP_IRQ_ROUTING
    struct kvm_irq_routing *irq_routes;
    int nr_allocated_irq_routes;
    uint32_t *used_gsi_bitmap;
95
    unsigned int gsi_count;
96
    QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
97
    bool direct_msi;
98
#endif
A
aliguori 已提交
99 100
};

101
KVMState *kvm_state;
102
bool kvm_kernel_irqchip;
A
aliguori 已提交
103

104 105 106 107 108 109
static const KVMCapabilityInfo kvm_required_capabilites[] = {
    KVM_CAP_INFO(USER_MEMORY),
    KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
    KVM_CAP_LAST_INFO
};

A
aliguori 已提交
110 111 112 113 114
static KVMSlot *kvm_alloc_slot(KVMState *s)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
J
Jan Kiszka 已提交
115
        if (s->slots[i].memory_size == 0) {
A
aliguori 已提交
116
            return &s->slots[i];
J
Jan Kiszka 已提交
117
        }
A
aliguori 已提交
118 119
    }

120 121 122 123 124
    fprintf(stderr, "%s: no free slot available\n", __func__);
    abort();
}

static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
A
Anthony Liguori 已提交
125 126
                                         target_phys_addr_t start_addr,
                                         target_phys_addr_t end_addr)
127 128 129 130 131 132 133 134 135 136 137 138
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

        if (start_addr == mem->start_addr &&
            end_addr == mem->start_addr + mem->memory_size) {
            return mem;
        }
    }

A
aliguori 已提交
139 140 141
    return NULL;
}

142 143 144 145
/*
 * Find overlapping slot with lowest start address
 */
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
A
Anthony Liguori 已提交
146 147
                                            target_phys_addr_t start_addr,
                                            target_phys_addr_t end_addr)
A
aliguori 已提交
148
{
149
    KVMSlot *found = NULL;
A
aliguori 已提交
150 151 152 153 154
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

155 156 157 158 159 160 161 162 163
        if (mem->memory_size == 0 ||
            (found && found->start_addr < mem->start_addr)) {
            continue;
        }

        if (end_addr > mem->start_addr &&
            start_addr < mem->start_addr + mem->memory_size) {
            found = mem;
        }
A
aliguori 已提交
164 165
    }

166
    return found;
A
aliguori 已提交
167 168
}

169 170
int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
                                       target_phys_addr_t *phys_addr)
171 172 173 174 175 176
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

177 178
        if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
            *phys_addr = mem->start_addr + (ram - mem->ram);
179 180 181 182 183 184 185
            return 1;
        }
    }

    return 0;
}

186 187 188 189 190 191 192
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
{
    struct kvm_userspace_memory_region mem;

    mem.slot = slot->slot;
    mem.guest_phys_addr = slot->start_addr;
    mem.memory_size = slot->memory_size;
193
    mem.userspace_addr = (unsigned long)slot->ram;
194
    mem.flags = slot->flags;
195 196 197
    if (s->migration_log) {
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
    }
198 199 200
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
}

J
Jan Kiszka 已提交
201 202
static void kvm_reset_vcpu(void *opaque)
{
203
    CPUArchState *env = opaque;
J
Jan Kiszka 已提交
204

J
Jan Kiszka 已提交
205
    kvm_arch_reset_vcpu(env);
J
Jan Kiszka 已提交
206
}
207

208
int kvm_init_vcpu(CPUArchState *env)
A
aliguori 已提交
209 210 211 212 213
{
    KVMState *s = kvm_state;
    long mmap_size;
    int ret;

214
    DPRINTF("kvm_init_vcpu\n");
A
aliguori 已提交
215

216
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
A
aliguori 已提交
217
    if (ret < 0) {
218
        DPRINTF("kvm_create_vcpu failed\n");
A
aliguori 已提交
219 220 221 222 223
        goto err;
    }

    env->kvm_fd = ret;
    env->kvm_state = s;
224
    env->kvm_vcpu_dirty = 1;
A
aliguori 已提交
225 226 227

    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
228
        ret = mmap_size;
229
        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
A
aliguori 已提交
230 231 232 233 234 235 236
        goto err;
    }

    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
                        env->kvm_fd, 0);
    if (env->kvm_run == MAP_FAILED) {
        ret = -errno;
237
        DPRINTF("mmap'ing vcpu state failed\n");
A
aliguori 已提交
238 239 240
        goto err;
    }

J
Jan Kiszka 已提交
241 242 243 244
    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
        s->coalesced_mmio_ring =
            (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
    }
245

A
aliguori 已提交
246
    ret = kvm_arch_init_vcpu(env);
J
Jan Kiszka 已提交
247
    if (ret == 0) {
248
        qemu_register_reset(kvm_reset_vcpu, env);
J
Jan Kiszka 已提交
249
        kvm_arch_reset_vcpu(env);
J
Jan Kiszka 已提交
250
    }
A
aliguori 已提交
251 252 253 254
err:
    return ret;
}

255 256 257
/*
 * dirty pages logging control
 */
258 259 260 261 262 263 264

static int kvm_mem_flags(KVMState *s, bool log_dirty)
{
    return log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
}

static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
265 266
{
    KVMState *s = kvm_state;
267
    int flags, mask = KVM_MEM_LOG_DIRTY_PAGES;
268 269 270
    int old_flags;

    old_flags = mem->flags;
271

272
    flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty);
273 274
    mem->flags = flags;

275 276 277 278
    /* If nothing changed effectively, no need to issue ioctl */
    if (s->migration_log) {
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
    }
279

280
    if (flags == old_flags) {
281
        return 0;
282 283
    }

284 285 286
    return kvm_set_user_memory_region(s, mem);
}

287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
                                      ram_addr_t size, bool log_dirty)
{
    KVMState *s = kvm_state;
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);

    if (mem == NULL)  {
        fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
                TARGET_FMT_plx "\n", __func__, phys_addr,
                (target_phys_addr_t)(phys_addr + size - 1));
        return -EINVAL;
    }
    return kvm_slot_dirty_pages_log_change(mem, log_dirty);
}

A
Avi Kivity 已提交
302 303
static void kvm_log_start(MemoryListener *listener,
                          MemoryRegionSection *section)
304
{
A
Avi Kivity 已提交
305 306 307 308 309 310 311
    int r;

    r = kvm_dirty_pages_log_change(section->offset_within_address_space,
                                   section->size, true);
    if (r < 0) {
        abort();
    }
312 313
}

A
Avi Kivity 已提交
314 315
static void kvm_log_stop(MemoryListener *listener,
                          MemoryRegionSection *section)
316
{
A
Avi Kivity 已提交
317 318 319 320 321 322 323
    int r;

    r = kvm_dirty_pages_log_change(section->offset_within_address_space,
                                   section->size, false);
    if (r < 0) {
        abort();
    }
324 325
}

326
static int kvm_set_migration_log(int enable)
327 328 329 330 331 332 333 334 335 336
{
    KVMState *s = kvm_state;
    KVMSlot *mem;
    int i, err;

    s->migration_log = enable;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        mem = &s->slots[i];

337 338 339
        if (!mem->memory_size) {
            continue;
        }
340 341 342 343 344 345 346 347 348 349 350
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
            continue;
        }
        err = kvm_set_user_memory_region(s, mem);
        if (err) {
            return err;
        }
    }
    return 0;
}

351
/* get kvm's dirty pages bitmap and update qemu's */
352 353
static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
                                         unsigned long *bitmap)
A
Alexander Graf 已提交
354
{
355
    unsigned int i, j;
356 357
    unsigned long page_number, c;
    target_phys_addr_t addr, addr1;
358
    unsigned int len = ((section->size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
359
    unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
360 361 362 363 364 365 366 367 368 369 370

    /*
     * bitmap-traveling is faster than memory-traveling (for addr...)
     * especially when most of the memory is not dirty.
     */
    for (i = 0; i < len; i++) {
        if (bitmap[i] != 0) {
            c = leul_to_cpu(bitmap[i]);
            do {
                j = ffsl(c) - 1;
                c &= ~(1ul << j);
371
                page_number = (i * HOST_LONG_BITS + j) * hpratio;
372
                addr1 = page_number * TARGET_PAGE_SIZE;
373
                addr = section->offset_within_region + addr1;
374 375
                memory_region_set_dirty(section->mr, addr,
                                        TARGET_PAGE_SIZE * hpratio);
376 377 378 379
            } while (c != 0);
        }
    }
    return 0;
A
Alexander Graf 已提交
380 381
}

382 383
#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))

384 385
/**
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
386 387 388
 * This function updates qemu's dirty bitmap using
 * memory_region_set_dirty().  This means all bits are set
 * to dirty.
389
 *
390
 * @start_add: start of logged region.
391 392
 * @end_addr: end of logged region.
 */
393
static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
394 395
{
    KVMState *s = kvm_state;
396 397 398 399
    unsigned long size, allocated_size = 0;
    KVMDirtyLog d;
    KVMSlot *mem;
    int ret = 0;
400 401
    target_phys_addr_t start_addr = section->offset_within_address_space;
    target_phys_addr_t end_addr = start_addr + section->size;
402

403 404 405 406 407 408
    d.dirty_bitmap = NULL;
    while (start_addr < end_addr) {
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
        if (mem == NULL) {
            break;
        }
409

410 411 412 413 414 415 416 417 418 419 420 421 422 423
        /* XXX bad kernel interface alert
         * For dirty bitmap, kernel allocates array of size aligned to
         * bits-per-long.  But for case when the kernel is 64bits and
         * the userspace is 32bits, userspace can't align to the same
         * bits-per-long, since sizeof(long) is different between kernel
         * and user space.  This way, userspace will provide buffer which
         * may be 4 bytes less than the kernel will use, resulting in
         * userspace memory corruption (which is not detectable by valgrind
         * too, in most cases).
         * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
         * a hope that sizeof(long) wont become >8 any time soon.
         */
        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
                     /*HOST_LONG_BITS*/ 64) / 8;
424
        if (!d.dirty_bitmap) {
425
            d.dirty_bitmap = g_malloc(size);
426
        } else if (size > allocated_size) {
427
            d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
428 429 430
        }
        allocated_size = size;
        memset(d.dirty_bitmap, 0, allocated_size);
431

432
        d.slot = mem->slot;
433

434
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
435
            DPRINTF("ioctl failed %d\n", errno);
436 437 438
            ret = -1;
            break;
        }
439

440
        kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
441
        start_addr = mem->start_addr + mem->memory_size;
442
    }
443
    g_free(d.dirty_bitmap);
444 445

    return ret;
446 447
}

A
Anthony Liguori 已提交
448
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
A
aliguori 已提交
449 450 451 452 453 454 455 456 457
{
    int ret = -ENOSYS;
    KVMState *s = kvm_state;

    if (s->coalesced_mmio) {
        struct kvm_coalesced_mmio_zone zone;

        zone.addr = start;
        zone.size = size;
458
        zone.pad = 0;
A
aliguori 已提交
459 460 461 462 463 464 465

        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
    }

    return ret;
}

A
Anthony Liguori 已提交
466
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
A
aliguori 已提交
467 468 469 470 471 472 473 474 475
{
    int ret = -ENOSYS;
    KVMState *s = kvm_state;

    if (s->coalesced_mmio) {
        struct kvm_coalesced_mmio_zone zone;

        zone.addr = start;
        zone.size = size;
476
        zone.pad = 0;
A
aliguori 已提交
477 478 479 480 481 482 483

        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
    }

    return ret;
}

484 485 486 487 488 489 490 491 492 493 494 495
int kvm_check_extension(KVMState *s, unsigned int extension)
{
    int ret;

    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
    if (ret < 0) {
        ret = 0;
    }

    return ret;
}

496 497
static int kvm_check_many_ioeventfds(void)
{
498 499 500 501 502
    /* Userspace can use ioeventfd for io notification.  This requires a host
     * that supports eventfd(2) and an I/O thread; since eventfd does not
     * support SIGIO it cannot interrupt the vcpu.
     *
     * Older kernels have a 6 device limit on the KVM io bus.  Find out so we
503 504
     * can avoid creating too many ioeventfds.
     */
505
#if defined(CONFIG_EVENTFD)
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
    int ioeventfds[7];
    int i, ret = 0;
    for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
        ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
        if (ioeventfds[i] < 0) {
            break;
        }
        ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
        if (ret < 0) {
            close(ioeventfds[i]);
            break;
        }
    }

    /* Decide whether many devices are supported or not */
    ret = i == ARRAY_SIZE(ioeventfds);

    while (i-- > 0) {
        kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
        close(ioeventfds[i]);
    }
    return ret;
#else
    return 0;
#endif
}

533 534 535 536 537 538 539 540 541 542 543 544
static const KVMCapabilityInfo *
kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
{
    while (list->name) {
        if (!kvm_check_extension(s, list->value)) {
            return list;
        }
        list++;
    }
    return NULL;
}

A
Avi Kivity 已提交
545
static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
546 547 548 549
{
    KVMState *s = kvm_state;
    KVMSlot *mem, old;
    int err;
A
Avi Kivity 已提交
550 551 552 553
    MemoryRegion *mr = section->mr;
    bool log_dirty = memory_region_is_logging(mr);
    target_phys_addr_t start_addr = section->offset_within_address_space;
    ram_addr_t size = section->size;
554
    void *ram = NULL;
A
Avi Kivity 已提交
555
    unsigned delta;
556

557 558
    /* kvm works in page size chunks, but the function may be called
       with sub-page size and unaligned start address. */
A
Avi Kivity 已提交
559 560 561 562 563 564 565 566 567 568
    delta = TARGET_PAGE_ALIGN(size) - size;
    if (delta > size) {
        return;
    }
    start_addr += delta;
    size -= delta;
    size &= TARGET_PAGE_MASK;
    if (!size || (start_addr & ~TARGET_PAGE_MASK)) {
        return;
    }
569

A
Avi Kivity 已提交
570 571
    if (!memory_region_is_ram(mr)) {
        return;
572 573
    }

A
Avi Kivity 已提交
574
    ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta;
A
Avi Kivity 已提交
575

576 577 578 579 580 581
    while (1) {
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
        if (!mem) {
            break;
        }

A
Avi Kivity 已提交
582
        if (add && start_addr >= mem->start_addr &&
583
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
584
            (ram - start_addr == mem->ram - mem->start_addr)) {
585
            /* The new slot fits into the existing one and comes with
586 587
             * identical parameters - update flags and done. */
            kvm_slot_dirty_pages_log_change(mem, log_dirty);
588 589 590 591 592
            return;
        }

        old = *mem;

593 594 595 596
        if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
            kvm_physical_sync_dirty_bitmap(section);
        }

597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
        /* unregister the overlapping slot */
        mem->memory_size = 0;
        err = kvm_set_user_memory_region(s, mem);
        if (err) {
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
                    __func__, strerror(-err));
            abort();
        }

        /* Workaround for older KVM versions: we can't join slots, even not by
         * unregistering the previous ones and then registering the larger
         * slot. We have to maintain the existing fragmentation. Sigh.
         *
         * This workaround assumes that the new slot starts at the same
         * address as the first existing one. If not or if some overlapping
         * slot comes around later, we will fail (not seen in practice so far)
         * - and actually require a recent KVM version. */
        if (s->broken_set_mem_region &&
A
Avi Kivity 已提交
615
            old.start_addr == start_addr && old.memory_size < size && add) {
616 617 618
            mem = kvm_alloc_slot(s);
            mem->memory_size = old.memory_size;
            mem->start_addr = old.start_addr;
619
            mem->ram = old.ram;
620
            mem->flags = kvm_mem_flags(s, log_dirty);
621 622 623 624 625 626 627 628 629

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
                        strerror(-err));
                abort();
            }

            start_addr += old.memory_size;
630
            ram += old.memory_size;
631 632 633 634 635 636 637 638 639
            size -= old.memory_size;
            continue;
        }

        /* register prefix slot */
        if (old.start_addr < start_addr) {
            mem = kvm_alloc_slot(s);
            mem->memory_size = start_addr - old.start_addr;
            mem->start_addr = old.start_addr;
640
            mem->ram = old.ram;
641
            mem->flags =  kvm_mem_flags(s, log_dirty);
642 643 644 645 646

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
                        __func__, strerror(-err));
647 648 649 650 651
#ifdef TARGET_PPC
                fprintf(stderr, "%s: This is probably because your kernel's " \
                                "PAGE_SIZE is too big. Please try to use 4k " \
                                "PAGE_SIZE!\n", __func__);
#endif
652 653 654 655 656 657 658 659 660 661 662 663
                abort();
            }
        }

        /* register suffix slot */
        if (old.start_addr + old.memory_size > start_addr + size) {
            ram_addr_t size_delta;

            mem = kvm_alloc_slot(s);
            mem->start_addr = start_addr + size;
            size_delta = mem->start_addr - old.start_addr;
            mem->memory_size = old.memory_size - size_delta;
664
            mem->ram = old.ram + size_delta;
665
            mem->flags = kvm_mem_flags(s, log_dirty);
666 667 668 669 670 671 672 673 674 675 676

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
                        __func__, strerror(-err));
                abort();
            }
        }
    }

    /* in case the KVM bug workaround already "consumed" the new slot */
J
Jan Kiszka 已提交
677
    if (!size) {
678
        return;
J
Jan Kiszka 已提交
679
    }
A
Avi Kivity 已提交
680
    if (!add) {
681
        return;
J
Jan Kiszka 已提交
682
    }
683 684 685
    mem = kvm_alloc_slot(s);
    mem->memory_size = size;
    mem->start_addr = start_addr;
686
    mem->ram = ram;
687
    mem->flags = kvm_mem_flags(s, log_dirty);
688 689 690 691 692 693 694 695 696

    err = kvm_set_user_memory_region(s, mem);
    if (err) {
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
                strerror(-err));
        abort();
    }
}

697 698 699 700 701 702 703 704
static void kvm_begin(MemoryListener *listener)
{
}

static void kvm_commit(MemoryListener *listener)
{
}

A
Avi Kivity 已提交
705 706 707 708 709 710 711 712 713 714 715 716
static void kvm_region_add(MemoryListener *listener,
                           MemoryRegionSection *section)
{
    kvm_set_phys_mem(section, true);
}

static void kvm_region_del(MemoryListener *listener,
                           MemoryRegionSection *section)
{
    kvm_set_phys_mem(section, false);
}

717 718 719 720 721
static void kvm_region_nop(MemoryListener *listener,
                           MemoryRegionSection *section)
{
}

A
Avi Kivity 已提交
722 723
static void kvm_log_sync(MemoryListener *listener,
                         MemoryRegionSection *section)
724
{
A
Avi Kivity 已提交
725 726
    int r;

727
    r = kvm_physical_sync_dirty_bitmap(section);
A
Avi Kivity 已提交
728 729 730
    if (r < 0) {
        abort();
    }
731 732
}

A
Avi Kivity 已提交
733
static void kvm_log_global_start(struct MemoryListener *listener)
734
{
A
Avi Kivity 已提交
735 736 737 738
    int r;

    r = kvm_set_migration_log(1);
    assert(r >= 0);
739 740
}

A
Avi Kivity 已提交
741
static void kvm_log_global_stop(struct MemoryListener *listener)
742
{
A
Avi Kivity 已提交
743 744 745 746
    int r;

    r = kvm_set_migration_log(0);
    assert(r >= 0);
747 748
}

749 750 751 752 753
static void kvm_mem_ioeventfd_add(MemoryRegionSection *section,
                                  bool match_data, uint64_t data, int fd)
{
    int r;

754
    assert(match_data && section->size <= 8);
755

756 757
    r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
                               data, true, section->size);
758 759 760 761 762 763 764 765 766 767
    if (r < 0) {
        abort();
    }
}

static void kvm_mem_ioeventfd_del(MemoryRegionSection *section,
                                  bool match_data, uint64_t data, int fd)
{
    int r;

768 769
    r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
                               data, false, section->size);
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
    if (r < 0) {
        abort();
    }
}

static void kvm_io_ioeventfd_add(MemoryRegionSection *section,
                                 bool match_data, uint64_t data, int fd)
{
    int r;

    assert(match_data && section->size == 2);

    r = kvm_set_ioeventfd_pio_word(fd, section->offset_within_address_space,
                                   data, true);
    if (r < 0) {
        abort();
    }
}

static void kvm_io_ioeventfd_del(MemoryRegionSection *section,
                                 bool match_data, uint64_t data, int fd)

{
    int r;

    r = kvm_set_ioeventfd_pio_word(fd, section->offset_within_address_space,
                                   data, false);
    if (r < 0) {
        abort();
    }
}

static void kvm_eventfd_add(MemoryListener *listener,
                            MemoryRegionSection *section,
804 805
                            bool match_data, uint64_t data,
                            EventNotifier *e)
806 807
{
    if (section->address_space == get_system_memory()) {
808 809
        kvm_mem_ioeventfd_add(section, match_data, data,
			      event_notifier_get_fd(e));
810
    } else {
811 812
        kvm_io_ioeventfd_add(section, match_data, data,
			     event_notifier_get_fd(e));
813 814 815 816 817
    }
}

static void kvm_eventfd_del(MemoryListener *listener,
                            MemoryRegionSection *section,
818 819
                            bool match_data, uint64_t data,
                            EventNotifier *e)
820 821
{
    if (section->address_space == get_system_memory()) {
822 823
        kvm_mem_ioeventfd_del(section, match_data, data,
			      event_notifier_get_fd(e));
824
    } else {
825 826
        kvm_io_ioeventfd_del(section, match_data, data,
			     event_notifier_get_fd(e));
827 828 829
    }
}

A
Avi Kivity 已提交
830
static MemoryListener kvm_memory_listener = {
831 832
    .begin = kvm_begin,
    .commit = kvm_commit,
A
Avi Kivity 已提交
833 834
    .region_add = kvm_region_add,
    .region_del = kvm_region_del,
835
    .region_nop = kvm_region_nop,
836 837
    .log_start = kvm_log_start,
    .log_stop = kvm_log_stop,
A
Avi Kivity 已提交
838 839 840
    .log_sync = kvm_log_sync,
    .log_global_start = kvm_log_global_start,
    .log_global_stop = kvm_log_global_stop,
841 842
    .eventfd_add = kvm_eventfd_add,
    .eventfd_del = kvm_eventfd_del,
843
    .priority = 10,
844 845
};

846
static void kvm_handle_interrupt(CPUArchState *env, int mask)
847 848 849 850 851 852 853 854
{
    env->interrupt_request |= mask;

    if (!qemu_cpu_is_self(env)) {
        qemu_cpu_kick(env);
    }
}

855 856 857 858 859
int kvm_irqchip_set_irq(KVMState *s, int irq, int level)
{
    struct kvm_irq_level event;
    int ret;

860
    assert(kvm_irqchip_in_kernel());
861 862 863 864 865 866 867 868 869 870 871 872 873

    event.level = level;
    event.irq = irq;
    ret = kvm_vm_ioctl(s, s->irqchip_inject_ioctl, &event);
    if (ret < 0) {
        perror("kvm_set_irqchip_line");
        abort();
    }

    return (s->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
}

#ifdef KVM_CAP_IRQ_ROUTING
874 875 876 877 878
typedef struct KVMMSIRoute {
    struct kvm_irq_routing_entry kroute;
    QTAILQ_ENTRY(KVMMSIRoute) entry;
} KVMMSIRoute;

879 880 881 882 883
static void set_gsi(KVMState *s, unsigned int gsi)
{
    s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
}

884 885 886 887 888
static void clear_gsi(KVMState *s, unsigned int gsi)
{
    s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32));
}

889 890
static void kvm_init_irq_routing(KVMState *s)
{
891
    int gsi_count, i;
892 893 894 895 896 897

    gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING);
    if (gsi_count > 0) {
        unsigned int gsi_bits, i;

        /* Round up so we can search ints using ffs */
898
        gsi_bits = ALIGN(gsi_count, 32);
899
        s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
900
        s->gsi_count = gsi_count;
901 902 903 904 905 906 907 908 909 910

        /* Mark any over-allocated bits as already in use */
        for (i = gsi_count; i < gsi_bits; i++) {
            set_gsi(s, i);
        }
    }

    s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
    s->nr_allocated_irq_routes = 0;

911 912 913 914
    if (!s->direct_msi) {
        for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
            QTAILQ_INIT(&s->msi_hashtab[i]);
        }
915 916
    }

917 918 919
    kvm_arch_init_irq_routing(s);
}

920 921 922 923 924 925 926 927 928
static void kvm_irqchip_commit_routes(KVMState *s)
{
    int ret;

    s->irq_routes->flags = 0;
    ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
    assert(ret == 0);
}

929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953
static void kvm_add_routing_entry(KVMState *s,
                                  struct kvm_irq_routing_entry *entry)
{
    struct kvm_irq_routing_entry *new;
    int n, size;

    if (s->irq_routes->nr == s->nr_allocated_irq_routes) {
        n = s->nr_allocated_irq_routes * 2;
        if (n < 64) {
            n = 64;
        }
        size = sizeof(struct kvm_irq_routing);
        size += n * sizeof(*new);
        s->irq_routes = g_realloc(s->irq_routes, size);
        s->nr_allocated_irq_routes = n;
    }
    n = s->irq_routes->nr++;
    new = &s->irq_routes->entries[n];
    memset(new, 0, sizeof(*new));
    new->gsi = entry->gsi;
    new->type = entry->type;
    new->flags = entry->flags;
    new->u = entry->u;

    set_gsi(s, entry->gsi);
954 955

    kvm_irqchip_commit_routes(s);
956 957
}

958
void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin)
959 960 961
{
    struct kvm_irq_routing_entry e;

962 963
    assert(pin < s->gsi_count);

964 965 966 967 968 969 970 971
    e.gsi = irq;
    e.type = KVM_IRQ_ROUTING_IRQCHIP;
    e.flags = 0;
    e.u.irqchip.irqchip = irqchip;
    e.u.irqchip.pin = pin;
    kvm_add_routing_entry(s, &e);
}

972
void kvm_irqchip_release_virq(KVMState *s, int virq)
973 974 975 976 977 978 979 980 981 982 983 984
{
    struct kvm_irq_routing_entry *e;
    int i;

    for (i = 0; i < s->irq_routes->nr; i++) {
        e = &s->irq_routes->entries[i];
        if (e->gsi == virq) {
            s->irq_routes->nr--;
            *e = s->irq_routes->entries[s->irq_routes->nr];
        }
    }
    clear_gsi(s, virq);
985 986

    kvm_irqchip_commit_routes(s);
987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
}

static unsigned int kvm_hash_msi(uint32_t data)
{
    /* This is optimized for IA32 MSI layout. However, no other arch shall
     * repeat the mistake of not providing a direct MSI injection API. */
    return data & 0xff;
}

static void kvm_flush_dynamic_msi_routes(KVMState *s)
{
    KVMMSIRoute *route, *next;
    unsigned int hash;

    for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) {
        QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) {
            kvm_irqchip_release_virq(s, route->kroute.gsi);
            QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry);
            g_free(route);
        }
    }
}

static int kvm_irqchip_get_virq(KVMState *s)
{
    uint32_t *word = s->used_gsi_bitmap;
    int max_words = ALIGN(s->gsi_count, 32) / 32;
    int i, bit;
    bool retry = true;

again:
    /* Return the lowest unused GSI in the bitmap */
    for (i = 0; i < max_words; i++) {
        bit = ffs(~word[i]);
        if (!bit) {
            continue;
        }

        return bit - 1 + i * 32;
    }
1027
    if (!s->direct_msi && retry) {
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
        retry = false;
        kvm_flush_dynamic_msi_routes(s);
        goto again;
    }
    return -ENOSPC;

}

static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
{
    unsigned int hash = kvm_hash_msi(msg.data);
    KVMMSIRoute *route;

    QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) {
        if (route->kroute.u.msi.address_lo == (uint32_t)msg.address &&
            route->kroute.u.msi.address_hi == (msg.address >> 32) &&
            route->kroute.u.msi.data == msg.data) {
            return route;
        }
    }
    return NULL;
}

int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
{
1053
    struct kvm_msi msi;
1054 1055
    KVMMSIRoute *route;

1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
    if (s->direct_msi) {
        msi.address_lo = (uint32_t)msg.address;
        msi.address_hi = msg.address >> 32;
        msi.data = msg.data;
        msi.flags = 0;
        memset(msi.pad, 0, sizeof(msi.pad));

        return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi);
    }

1066 1067
    route = kvm_lookup_msi_route(s, msg);
    if (!route) {
1068
        int virq;
1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093

        virq = kvm_irqchip_get_virq(s);
        if (virq < 0) {
            return virq;
        }

        route = g_malloc(sizeof(KVMMSIRoute));
        route->kroute.gsi = virq;
        route->kroute.type = KVM_IRQ_ROUTING_MSI;
        route->kroute.flags = 0;
        route->kroute.u.msi.address_lo = (uint32_t)msg.address;
        route->kroute.u.msi.address_hi = msg.address >> 32;
        route->kroute.u.msi.data = msg.data;

        kvm_add_routing_entry(s, &route->kroute);

        QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route,
                           entry);
    }

    assert(route->kroute.type == KVM_IRQ_ROUTING_MSI);

    return kvm_irqchip_set_irq(s, route->kroute.gsi, 1);
}

1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
{
    struct kvm_irq_routing_entry kroute;
    int virq;

    if (!kvm_irqchip_in_kernel()) {
        return -ENOSYS;
    }

    virq = kvm_irqchip_get_virq(s);
    if (virq < 0) {
        return virq;
    }

    kroute.gsi = virq;
    kroute.type = KVM_IRQ_ROUTING_MSI;
    kroute.flags = 0;
    kroute.u.msi.address_lo = (uint32_t)msg.address;
    kroute.u.msi.address_hi = msg.address >> 32;
    kroute.u.msi.data = msg.data;

    kvm_add_routing_entry(s, &kroute);

    return virq;
}

1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
{
    struct kvm_irqfd irqfd = {
        .fd = fd,
        .gsi = virq,
        .flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
    };

    if (!kvm_irqchip_in_kernel()) {
        return -ENOSYS;
    }

    return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
}

1135 1136 1137 1138 1139
#else /* !KVM_CAP_IRQ_ROUTING */

static void kvm_init_irq_routing(KVMState *s)
{
}
1140

1141 1142 1143 1144
void kvm_irqchip_release_virq(KVMState *s, int virq)
{
}

1145 1146 1147 1148
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
{
    abort();
}
1149 1150 1151

int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
{
1152
    return -ENOSYS;
1153
}
1154 1155 1156 1157 1158

static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
{
    abort();
}
1159 1160
#endif /* !KVM_CAP_IRQ_ROUTING */

1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
int kvm_irqchip_add_irqfd(KVMState *s, int fd, int virq)
{
    return kvm_irqchip_assign_irqfd(s, fd, virq, true);
}

int kvm_irqchip_remove_irqfd(KVMState *s, int fd, int virq)
{
    return kvm_irqchip_assign_irqfd(s, fd, virq, false);
}

1171 1172 1173 1174 1175 1176 1177
static int kvm_irqchip_create(KVMState *s)
{
    QemuOptsList *list = qemu_find_opts("machine");
    int ret;

    if (QTAILQ_EMPTY(&list->head) ||
        !qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
1178
                           "kernel_irqchip", true) ||
1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192
        !kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
        return 0;
    }

    ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
    if (ret < 0) {
        fprintf(stderr, "Create kernel irqchip failed\n");
        return ret;
    }

    s->irqchip_inject_ioctl = KVM_IRQ_LINE;
    if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
        s->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
    }
1193
    kvm_kernel_irqchip = true;
1194 1195 1196 1197 1198 1199

    kvm_init_irq_routing(s);

    return 0;
}

1200
int kvm_init(void)
A
aliguori 已提交
1201
{
1202 1203 1204
    static const char upgrade_note[] =
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
        "(see http://sourceforge.net/projects/kvm).\n";
A
aliguori 已提交
1205
    KVMState *s;
1206
    const KVMCapabilityInfo *missing_cap;
A
aliguori 已提交
1207 1208 1209
    int ret;
    int i;

1210
    s = g_malloc0(sizeof(KVMState));
A
aliguori 已提交
1211

1212 1213 1214 1215 1216 1217 1218 1219
    /*
     * On systems where the kernel can support different base page
     * sizes, host page size may be different from TARGET_PAGE_SIZE,
     * even with KVM.  TARGET_PAGE_SIZE is assumed to be the minimum
     * page size for the system though.
     */
    assert(TARGET_PAGE_SIZE <= getpagesize());

1220
#ifdef KVM_CAP_SET_GUEST_DEBUG
B
Blue Swirl 已提交
1221
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
1222
#endif
J
Jan Kiszka 已提交
1223
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
A
aliguori 已提交
1224
        s->slots[i].slot = i;
J
Jan Kiszka 已提交
1225
    }
A
aliguori 已提交
1226
    s->vmfd = -1;
K
Kevin Wolf 已提交
1227
    s->fd = qemu_open("/dev/kvm", O_RDWR);
A
aliguori 已提交
1228 1229 1230 1231 1232 1233 1234 1235
    if (s->fd == -1) {
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
        ret = -errno;
        goto err;
    }

    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
    if (ret < KVM_API_VERSION) {
J
Jan Kiszka 已提交
1236
        if (ret > 0) {
A
aliguori 已提交
1237
            ret = -EINVAL;
J
Jan Kiszka 已提交
1238
        }
A
aliguori 已提交
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249
        fprintf(stderr, "kvm version too old\n");
        goto err;
    }

    if (ret > KVM_API_VERSION) {
        ret = -EINVAL;
        fprintf(stderr, "kvm version not supported\n");
        goto err;
    }

    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
1250 1251 1252 1253 1254
    if (s->vmfd < 0) {
#ifdef TARGET_S390X
        fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
                        "your host kernel command line\n");
#endif
1255
        ret = s->vmfd;
A
aliguori 已提交
1256
        goto err;
1257
    }
A
aliguori 已提交
1258

1259 1260 1261 1262
    missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
    if (!missing_cap) {
        missing_cap =
            kvm_check_extension_list(s, kvm_arch_required_capabilities);
A
aliguori 已提交
1263
    }
1264
    if (missing_cap) {
1265
        ret = -EINVAL;
1266 1267
        fprintf(stderr, "kvm does not support %s\n%s",
                missing_cap->name, upgrade_note);
1268 1269 1270
        goto err;
    }

1271
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
A
aliguori 已提交
1272

1273
    s->broken_set_mem_region = 1;
1274
    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
1275 1276 1277 1278
    if (ret > 0) {
        s->broken_set_mem_region = 0;
    }

1279 1280 1281 1282
#ifdef KVM_CAP_VCPU_EVENTS
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
#endif

1283 1284 1285
    s->robust_singlestep =
        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);

1286 1287 1288 1289
#ifdef KVM_CAP_DEBUGREGS
    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
#endif

1290 1291 1292 1293 1294 1295 1296 1297
#ifdef KVM_CAP_XSAVE
    s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
#endif

#ifdef KVM_CAP_XCRS
    s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
#endif

J
Jan Kiszka 已提交
1298 1299 1300 1301
#ifdef KVM_CAP_PIT_STATE2
    s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
#endif

1302
#ifdef KVM_CAP_IRQ_ROUTING
1303
    s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
1304
#endif
1305

1306
    ret = kvm_arch_init(s);
J
Jan Kiszka 已提交
1307
    if (ret < 0) {
A
aliguori 已提交
1308
        goto err;
J
Jan Kiszka 已提交
1309
    }
A
aliguori 已提交
1310

1311 1312 1313 1314 1315
    ret = kvm_irqchip_create(s);
    if (ret < 0) {
        goto err;
    }

A
aliguori 已提交
1316
    kvm_state = s;
1317
    memory_listener_register(&kvm_memory_listener, NULL);
A
aliguori 已提交
1318

1319 1320
    s->many_ioeventfds = kvm_check_many_ioeventfds();

1321 1322
    cpu_interrupt_handler = kvm_handle_interrupt;

A
aliguori 已提交
1323 1324 1325 1326
    return 0;

err:
    if (s) {
1327
        if (s->vmfd >= 0) {
A
aliguori 已提交
1328
            close(s->vmfd);
J
Jan Kiszka 已提交
1329 1330
        }
        if (s->fd != -1) {
A
aliguori 已提交
1331
            close(s->fd);
J
Jan Kiszka 已提交
1332
        }
A
aliguori 已提交
1333
    }
1334
    g_free(s);
A
aliguori 已提交
1335 1336 1337 1338

    return ret;
}

1339 1340
static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
                          uint32_t count)
A
aliguori 已提交
1341 1342 1343 1344 1345 1346 1347 1348
{
    int i;
    uint8_t *ptr = data;

    for (i = 0; i < count; i++) {
        if (direction == KVM_EXIT_IO_IN) {
            switch (size) {
            case 1:
1349
                stb_p(ptr, cpu_inb(port));
A
aliguori 已提交
1350 1351
                break;
            case 2:
1352
                stw_p(ptr, cpu_inw(port));
A
aliguori 已提交
1353 1354
                break;
            case 4:
1355
                stl_p(ptr, cpu_inl(port));
A
aliguori 已提交
1356 1357 1358 1359 1360
                break;
            }
        } else {
            switch (size) {
            case 1:
1361
                cpu_outb(port, ldub_p(ptr));
A
aliguori 已提交
1362 1363
                break;
            case 2:
1364
                cpu_outw(port, lduw_p(ptr));
A
aliguori 已提交
1365 1366
                break;
            case 4:
1367
                cpu_outl(port, ldl_p(ptr));
A
aliguori 已提交
1368 1369 1370 1371 1372 1373 1374 1375
                break;
            }
        }

        ptr += size;
    }
}

1376
static int kvm_handle_internal_error(CPUArchState *env, struct kvm_run *run)
M
Marcelo Tosatti 已提交
1377
{
1378
    fprintf(stderr, "KVM internal error.");
M
Marcelo Tosatti 已提交
1379 1380 1381
    if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
        int i;

1382
        fprintf(stderr, " Suberror: %d\n", run->internal.suberror);
M
Marcelo Tosatti 已提交
1383 1384 1385 1386
        for (i = 0; i < run->internal.ndata; ++i) {
            fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
                    i, (uint64_t)run->internal.data[i]);
        }
1387 1388
    } else {
        fprintf(stderr, "\n");
M
Marcelo Tosatti 已提交
1389 1390 1391
    }
    if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
        fprintf(stderr, "emulation failure\n");
J
Jan Kiszka 已提交
1392
        if (!kvm_arch_stop_on_emulation_error(env)) {
1393
            cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
1394
            return EXCP_INTERRUPT;
J
Jan Kiszka 已提交
1395
        }
M
Marcelo Tosatti 已提交
1396 1397 1398 1399
    }
    /* FIXME: Should trigger a qmp message to let management know
     * something went wrong.
     */
J
Jan Kiszka 已提交
1400
    return -1;
M
Marcelo Tosatti 已提交
1401 1402
}

1403
void kvm_flush_coalesced_mmio_buffer(void)
A
aliguori 已提交
1404 1405
{
    KVMState *s = kvm_state;
1406 1407 1408 1409 1410 1411 1412

    if (s->coalesced_flush_in_progress) {
        return;
    }

    s->coalesced_flush_in_progress = true;

1413 1414
    if (s->coalesced_mmio_ring) {
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
A
aliguori 已提交
1415 1416 1417 1418 1419 1420
        while (ring->first != ring->last) {
            struct kvm_coalesced_mmio *ent;

            ent = &ring->coalesced_mmio[ring->first];

            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
1421
            smp_wmb();
A
aliguori 已提交
1422 1423 1424
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
        }
    }
1425 1426

    s->coalesced_flush_in_progress = false;
A
aliguori 已提交
1427 1428
}

1429
static void do_kvm_cpu_synchronize_state(void *_env)
1430
{
1431
    CPUArchState *env = _env;
1432

J
Jan Kiszka 已提交
1433
    if (!env->kvm_vcpu_dirty) {
1434
        kvm_arch_get_registers(env);
J
Jan Kiszka 已提交
1435
        env->kvm_vcpu_dirty = 1;
1436 1437 1438
    }
}

1439
void kvm_cpu_synchronize_state(CPUArchState *env)
1440
{
J
Jan Kiszka 已提交
1441
    if (!env->kvm_vcpu_dirty) {
1442
        run_on_cpu(env, do_kvm_cpu_synchronize_state, env);
J
Jan Kiszka 已提交
1443
    }
1444 1445
}

1446
void kvm_cpu_synchronize_post_reset(CPUArchState *env)
1447 1448 1449 1450 1451
{
    kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
    env->kvm_vcpu_dirty = 0;
}

1452
void kvm_cpu_synchronize_post_init(CPUArchState *env)
1453 1454 1455 1456 1457
{
    kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
    env->kvm_vcpu_dirty = 0;
}

1458
int kvm_cpu_exec(CPUArchState *env)
A
aliguori 已提交
1459 1460
{
    struct kvm_run *run = env->kvm_run;
1461
    int ret, run_ret;
A
aliguori 已提交
1462

1463
    DPRINTF("kvm_cpu_exec()\n");
A
aliguori 已提交
1464

1465
    if (kvm_arch_process_async_events(env)) {
1466
        env->exit_request = 0;
1467
        return EXCP_HLT;
1468
    }
M
Marcelo Tosatti 已提交
1469

1470
    do {
J
Jan Kiszka 已提交
1471
        if (env->kvm_vcpu_dirty) {
1472
            kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
J
Jan Kiszka 已提交
1473
            env->kvm_vcpu_dirty = 0;
1474 1475
        }

1476
        kvm_arch_pre_run(env, run);
1477 1478 1479 1480 1481 1482 1483 1484 1485
        if (env->exit_request) {
            DPRINTF("interrupt exit requested\n");
            /*
             * KVM requires us to reenter the kernel after IO exits to complete
             * instruction emulation. This self-signal will ensure that we
             * leave ASAP again.
             */
            qemu_cpu_kick_self();
        }
1486
        qemu_mutex_unlock_iothread();
1487

1488
        run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
1489

1490
        qemu_mutex_lock_iothread();
A
aliguori 已提交
1491 1492
        kvm_arch_post_run(env, run);

1493 1494
        kvm_flush_coalesced_mmio_buffer();

1495
        if (run_ret < 0) {
1496 1497
            if (run_ret == -EINTR || run_ret == -EAGAIN) {
                DPRINTF("io window exit\n");
1498
                ret = EXCP_INTERRUPT;
1499 1500
                break;
            }
1501 1502
            fprintf(stderr, "error: kvm run failed %s\n",
                    strerror(-run_ret));
A
aliguori 已提交
1503 1504 1505 1506 1507
            abort();
        }

        switch (run->exit_reason) {
        case KVM_EXIT_IO:
1508
            DPRINTF("handle_io\n");
1509 1510 1511 1512 1513
            kvm_handle_io(run->io.port,
                          (uint8_t *)run + run->io.data_offset,
                          run->io.direction,
                          run->io.size,
                          run->io.count);
1514
            ret = 0;
A
aliguori 已提交
1515 1516
            break;
        case KVM_EXIT_MMIO:
1517
            DPRINTF("handle_mmio\n");
A
aliguori 已提交
1518 1519 1520 1521
            cpu_physical_memory_rw(run->mmio.phys_addr,
                                   run->mmio.data,
                                   run->mmio.len,
                                   run->mmio.is_write);
1522
            ret = 0;
A
aliguori 已提交
1523 1524
            break;
        case KVM_EXIT_IRQ_WINDOW_OPEN:
1525
            DPRINTF("irq_window_open\n");
1526
            ret = EXCP_INTERRUPT;
A
aliguori 已提交
1527 1528
            break;
        case KVM_EXIT_SHUTDOWN:
1529
            DPRINTF("shutdown\n");
A
aliguori 已提交
1530
            qemu_system_reset_request();
1531
            ret = EXCP_INTERRUPT;
A
aliguori 已提交
1532 1533
            break;
        case KVM_EXIT_UNKNOWN:
1534 1535
            fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
                    (uint64_t)run->hw.hardware_exit_reason);
J
Jan Kiszka 已提交
1536
            ret = -1;
A
aliguori 已提交
1537
            break;
M
Marcelo Tosatti 已提交
1538
        case KVM_EXIT_INTERNAL_ERROR:
J
Jan Kiszka 已提交
1539
            ret = kvm_handle_internal_error(env, run);
M
Marcelo Tosatti 已提交
1540
            break;
A
aliguori 已提交
1541
        default:
1542
            DPRINTF("kvm_arch_handle_exit\n");
A
aliguori 已提交
1543 1544 1545
            ret = kvm_arch_handle_exit(env, run);
            break;
        }
1546
    } while (ret == 0);
A
aliguori 已提交
1547

J
Jan Kiszka 已提交
1548
    if (ret < 0) {
1549
        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
1550
        vm_stop(RUN_STATE_INTERNAL_ERROR);
A
aliguori 已提交
1551 1552
    }

1553
    env->exit_request = 0;
A
aliguori 已提交
1554 1555 1556
    return ret;
}

1557
int kvm_ioctl(KVMState *s, int type, ...)
A
aliguori 已提交
1558 1559
{
    int ret;
1560 1561
    void *arg;
    va_list ap;
A
aliguori 已提交
1562

1563 1564 1565 1566 1567
    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);

    ret = ioctl(s->fd, type, arg);
J
Jan Kiszka 已提交
1568
    if (ret == -1) {
A
aliguori 已提交
1569
        ret = -errno;
J
Jan Kiszka 已提交
1570
    }
A
aliguori 已提交
1571 1572 1573
    return ret;
}

1574
int kvm_vm_ioctl(KVMState *s, int type, ...)
A
aliguori 已提交
1575 1576
{
    int ret;
1577 1578 1579 1580 1581 1582
    void *arg;
    va_list ap;

    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);
A
aliguori 已提交
1583

1584
    ret = ioctl(s->vmfd, type, arg);
J
Jan Kiszka 已提交
1585
    if (ret == -1) {
A
aliguori 已提交
1586
        ret = -errno;
J
Jan Kiszka 已提交
1587
    }
A
aliguori 已提交
1588 1589 1590
    return ret;
}

1591
int kvm_vcpu_ioctl(CPUArchState *env, int type, ...)
A
aliguori 已提交
1592 1593
{
    int ret;
1594 1595 1596 1597 1598 1599
    void *arg;
    va_list ap;

    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);
A
aliguori 已提交
1600

1601
    ret = ioctl(env->kvm_fd, type, arg);
J
Jan Kiszka 已提交
1602
    if (ret == -1) {
A
aliguori 已提交
1603
        ret = -errno;
J
Jan Kiszka 已提交
1604
    }
A
aliguori 已提交
1605 1606
    return ret;
}
A
aliguori 已提交
1607 1608 1609

int kvm_has_sync_mmu(void)
{
1610
    return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
A
aliguori 已提交
1611
}
1612

1613 1614 1615 1616 1617
int kvm_has_vcpu_events(void)
{
    return kvm_state->vcpu_events;
}

1618 1619 1620 1621 1622
int kvm_has_robust_singlestep(void)
{
    return kvm_state->robust_singlestep;
}

1623 1624 1625 1626 1627
int kvm_has_debugregs(void)
{
    return kvm_state->debugregs;
}

1628 1629 1630 1631 1632 1633 1634 1635 1636 1637
int kvm_has_xsave(void)
{
    return kvm_state->xsave;
}

int kvm_has_xcrs(void)
{
    return kvm_state->xcrs;
}

J
Jan Kiszka 已提交
1638 1639 1640 1641 1642
int kvm_has_pit_state2(void)
{
    return kvm_state->pit_state2;
}

1643 1644 1645 1646 1647 1648 1649 1650
int kvm_has_many_ioeventfds(void)
{
    if (!kvm_enabled()) {
        return 0;
    }
    return kvm_state->many_ioeventfds;
}

1651 1652
int kvm_has_gsi_routing(void)
{
A
Alexander Graf 已提交
1653
#ifdef KVM_CAP_IRQ_ROUTING
1654
    return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
A
Alexander Graf 已提交
1655 1656 1657
#else
    return false;
#endif
1658 1659
}

1660 1661
int kvm_allows_irq0_override(void)
{
1662
    return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
1663 1664
}

1665 1666 1667
void kvm_setup_guest_memory(void *start, size_t size)
{
    if (!kvm_has_sync_mmu()) {
A
Andreas Färber 已提交
1668
        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
1669 1670

        if (ret) {
A
Andreas Färber 已提交
1671 1672 1673
            perror("qemu_madvise");
            fprintf(stderr,
                    "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
1674 1675 1676 1677 1678
            exit(1);
        }
    }
}

1679
#ifdef KVM_CAP_SET_GUEST_DEBUG
1680
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUArchState *env,
1681 1682 1683 1684
                                                 target_ulong pc)
{
    struct kvm_sw_breakpoint *bp;

B
Blue Swirl 已提交
1685
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
J
Jan Kiszka 已提交
1686
        if (bp->pc == pc) {
1687
            return bp;
J
Jan Kiszka 已提交
1688
        }
1689 1690 1691 1692
    }
    return NULL;
}

1693
int kvm_sw_breakpoints_active(CPUArchState *env)
1694
{
B
Blue Swirl 已提交
1695
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
1696 1697
}

G
Glauber Costa 已提交
1698 1699
struct kvm_set_guest_debug_data {
    struct kvm_guest_debug dbg;
1700
    CPUArchState *env;
G
Glauber Costa 已提交
1701 1702 1703 1704 1705 1706
    int err;
};

static void kvm_invoke_set_guest_debug(void *data)
{
    struct kvm_set_guest_debug_data *dbg_data = data;
1707
    CPUArchState *env = dbg_data->env;
J
Jan Kiszka 已提交
1708 1709

    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
G
Glauber Costa 已提交
1710 1711
}

1712
int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
1713
{
G
Glauber Costa 已提交
1714
    struct kvm_set_guest_debug_data data;
1715

1716
    data.dbg.control = reinject_trap;
1717

1718 1719 1720
    if (env->singlestep_enabled) {
        data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
    }
G
Glauber Costa 已提交
1721 1722
    kvm_arch_update_guest_debug(env, &data.dbg);
    data.env = env;
1723

1724
    run_on_cpu(env, kvm_invoke_set_guest_debug, &data);
G
Glauber Costa 已提交
1725
    return data.err;
1726 1727
}

1728
int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
1729 1730 1731
                          target_ulong len, int type)
{
    struct kvm_sw_breakpoint *bp;
1732
    CPUArchState *env;
1733 1734 1735 1736 1737 1738 1739 1740 1741
    int err;

    if (type == GDB_BREAKPOINT_SW) {
        bp = kvm_find_sw_breakpoint(current_env, addr);
        if (bp) {
            bp->use_count++;
            return 0;
        }

1742
        bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
J
Jan Kiszka 已提交
1743
        if (!bp) {
1744
            return -ENOMEM;
J
Jan Kiszka 已提交
1745
        }
1746 1747 1748 1749 1750

        bp->pc = addr;
        bp->use_count = 1;
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
        if (err) {
1751
            g_free(bp);
1752 1753 1754
            return err;
        }

B
Blue Swirl 已提交
1755
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1756 1757 1758
                          bp, entry);
    } else {
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
J
Jan Kiszka 已提交
1759
        if (err) {
1760
            return err;
J
Jan Kiszka 已提交
1761
        }
1762 1763 1764 1765
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        err = kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1766
        if (err) {
1767
            return err;
J
Jan Kiszka 已提交
1768
        }
1769 1770 1771 1772
    }
    return 0;
}

1773
int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
1774 1775 1776
                          target_ulong len, int type)
{
    struct kvm_sw_breakpoint *bp;
1777
    CPUArchState *env;
1778 1779 1780 1781
    int err;

    if (type == GDB_BREAKPOINT_SW) {
        bp = kvm_find_sw_breakpoint(current_env, addr);
J
Jan Kiszka 已提交
1782
        if (!bp) {
1783
            return -ENOENT;
J
Jan Kiszka 已提交
1784
        }
1785 1786 1787 1788 1789 1790 1791

        if (bp->use_count > 1) {
            bp->use_count--;
            return 0;
        }

        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
J
Jan Kiszka 已提交
1792
        if (err) {
1793
            return err;
J
Jan Kiszka 已提交
1794
        }
1795

B
Blue Swirl 已提交
1796
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1797
        g_free(bp);
1798 1799
    } else {
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
J
Jan Kiszka 已提交
1800
        if (err) {
1801
            return err;
J
Jan Kiszka 已提交
1802
        }
1803 1804 1805 1806
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        err = kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1807
        if (err) {
1808
            return err;
J
Jan Kiszka 已提交
1809
        }
1810 1811 1812 1813
    }
    return 0;
}

1814
void kvm_remove_all_breakpoints(CPUArchState *current_env)
1815 1816 1817
{
    struct kvm_sw_breakpoint *bp, *next;
    KVMState *s = current_env->kvm_state;
1818
    CPUArchState *env;
1819

B
Blue Swirl 已提交
1820
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1821 1822 1823
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
            /* Try harder to find a CPU that currently sees the breakpoint. */
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
J
Jan Kiszka 已提交
1824
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) {
1825
                    break;
J
Jan Kiszka 已提交
1826
                }
1827 1828 1829 1830 1831
            }
        }
    }
    kvm_arch_remove_all_hw_breakpoints();

J
Jan Kiszka 已提交
1832
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1833
        kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1834
    }
1835 1836 1837 1838
}

#else /* !KVM_CAP_SET_GUEST_DEBUG */

1839
int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
1840 1841 1842 1843
{
    return -EINVAL;
}

1844
int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
1845 1846 1847 1848 1849
                          target_ulong len, int type)
{
    return -EINVAL;
}

1850
int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
1851 1852 1853 1854 1855
                          target_ulong len, int type)
{
    return -EINVAL;
}

1856
void kvm_remove_all_breakpoints(CPUArchState *current_env)
1857 1858 1859
{
}
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
1860

1861
int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset)
1862 1863 1864 1865
{
    struct kvm_signal_mask *sigmask;
    int r;

J
Jan Kiszka 已提交
1866
    if (!sigset) {
1867
        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
J
Jan Kiszka 已提交
1868
    }
1869

1870
    sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
1871 1872 1873 1874

    sigmask->len = 8;
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1875
    g_free(sigmask);
1876 1877 1878

    return r;
}
1879

1880 1881
int kvm_set_ioeventfd_mmio(int fd, uint32_t addr, uint32_t val, bool assign,
                           uint32_t size)
1882 1883 1884 1885 1886 1887
{
    int ret;
    struct kvm_ioeventfd iofd;

    iofd.datamatch = val;
    iofd.addr = addr;
1888
    iofd.len = size;
1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908
    iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH;
    iofd.fd = fd;

    if (!kvm_enabled()) {
        return -ENOSYS;
    }

    if (!assign) {
        iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
    }

    ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);

    if (ret < 0) {
        return -errno;
    }

    return 0;
}

1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
{
    struct kvm_ioeventfd kick = {
        .datamatch = val,
        .addr = addr,
        .len = 2,
        .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
        .fd = fd,
    };
    int r;
J
Jan Kiszka 已提交
1919
    if (!kvm_enabled()) {
1920
        return -ENOSYS;
J
Jan Kiszka 已提交
1921 1922
    }
    if (!assign) {
1923
        kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
J
Jan Kiszka 已提交
1924
    }
1925
    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
J
Jan Kiszka 已提交
1926
    if (r < 0) {
1927
        return r;
J
Jan Kiszka 已提交
1928
    }
1929
    return 0;
1930
}
1931

1932
int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
1933 1934 1935 1936 1937 1938 1939 1940
{
    return kvm_arch_on_sigbus_vcpu(env, code, addr);
}

int kvm_on_sigbus(int code, void *addr)
{
    return kvm_arch_on_sigbus(code, addr);
}