kvm-all.c 34.0 KB
Newer Older
A
aliguori 已提交
1 2 3 4
/*
 * QEMU KVM support
 *
 * Copyright IBM, Corp. 2008
5
 *           Red Hat, Inc. 2008
A
aliguori 已提交
6 7 8
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
9
 *  Glauber Costa     <gcosta@redhat.com>
A
aliguori 已提交
10 11 12 13 14 15 16 17 18
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
19
#include <stdarg.h>
A
aliguori 已提交
20 21 22 23

#include <linux/kvm.h>

#include "qemu-common.h"
24
#include "qemu-barrier.h"
A
aliguori 已提交
25
#include "sysemu.h"
J
Jan Kiszka 已提交
26
#include "hw/hw.h"
27
#include "gdbstub.h"
A
aliguori 已提交
28
#include "kvm.h"
29
#include "bswap.h"
A
aliguori 已提交
30

31 32 33 34 35
/* This check must be after config-host.h is included */
#ifdef CONFIG_EVENTFD
#include <sys/eventfd.h>
#endif

A
aliguori 已提交
36 37 38
/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
#define PAGE_SIZE TARGET_PAGE_SIZE

A
aliguori 已提交
39 40 41
//#define DEBUG_KVM

#ifdef DEBUG_KVM
42
#define DPRINTF(fmt, ...) \
A
aliguori 已提交
43 44
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
#else
45
#define DPRINTF(fmt, ...) \
A
aliguori 已提交
46 47 48
    do { } while (0)
#endif

A
aliguori 已提交
49 50
typedef struct KVMSlot
{
A
Anthony Liguori 已提交
51 52 53
    target_phys_addr_t start_addr;
    ram_addr_t memory_size;
    ram_addr_t phys_offset;
A
aliguori 已提交
54 55 56
    int slot;
    int flags;
} KVMSlot;
A
aliguori 已提交
57

58 59
typedef struct kvm_dirty_log KVMDirtyLog;

A
aliguori 已提交
60 61 62 63 64
struct KVMState
{
    KVMSlot slots[32];
    int fd;
    int vmfd;
A
aliguori 已提交
65
    int coalesced_mmio;
66
    struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
67
    int broken_set_mem_region;
68
    int migration_log;
69
    int vcpu_events;
70
    int robust_singlestep;
71
    int debugregs;
72 73 74
#ifdef KVM_CAP_SET_GUEST_DEBUG
    struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
#endif
75 76
    int irqchip_in_kernel;
    int pit_in_kernel;
77
    int xsave, xcrs;
78
    int many_ioeventfds;
A
aliguori 已提交
79 80 81 82
};

static KVMState *kvm_state;

83 84 85 86 87 88
static const KVMCapabilityInfo kvm_required_capabilites[] = {
    KVM_CAP_INFO(USER_MEMORY),
    KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
    KVM_CAP_LAST_INFO
};

A
aliguori 已提交
89 90 91 92 93
static KVMSlot *kvm_alloc_slot(KVMState *s)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
A
aliguori 已提交
94
        /* KVM private memory slots */
J
Jan Kiszka 已提交
95
        if (i >= 8 && i < 12) {
A
aliguori 已提交
96
            continue;
J
Jan Kiszka 已提交
97 98
        }
        if (s->slots[i].memory_size == 0) {
A
aliguori 已提交
99
            return &s->slots[i];
J
Jan Kiszka 已提交
100
        }
A
aliguori 已提交
101 102
    }

103 104 105 106 107
    fprintf(stderr, "%s: no free slot available\n", __func__);
    abort();
}

static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
A
Anthony Liguori 已提交
108 109
                                         target_phys_addr_t start_addr,
                                         target_phys_addr_t end_addr)
110 111 112 113 114 115 116 117 118 119 120 121
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

        if (start_addr == mem->start_addr &&
            end_addr == mem->start_addr + mem->memory_size) {
            return mem;
        }
    }

A
aliguori 已提交
122 123 124
    return NULL;
}

125 126 127 128
/*
 * Find overlapping slot with lowest start address
 */
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
A
Anthony Liguori 已提交
129 130
                                            target_phys_addr_t start_addr,
                                            target_phys_addr_t end_addr)
A
aliguori 已提交
131
{
132
    KVMSlot *found = NULL;
A
aliguori 已提交
133 134 135 136 137
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

138 139 140 141 142 143 144 145 146
        if (mem->memory_size == 0 ||
            (found && found->start_addr < mem->start_addr)) {
            continue;
        }

        if (end_addr > mem->start_addr &&
            start_addr < mem->start_addr + mem->memory_size) {
            found = mem;
        }
A
aliguori 已提交
147 148
    }

149
    return found;
A
aliguori 已提交
150 151
}

152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
int kvm_physical_memory_addr_from_ram(KVMState *s, ram_addr_t ram_addr,
                                      target_phys_addr_t *phys_addr)
{
    int i;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        KVMSlot *mem = &s->slots[i];

        if (ram_addr >= mem->phys_offset &&
            ram_addr < mem->phys_offset + mem->memory_size) {
            *phys_addr = mem->start_addr + (ram_addr - mem->phys_offset);
            return 1;
        }
    }

    return 0;
}

170 171 172 173 174 175 176
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
{
    struct kvm_userspace_memory_region mem;

    mem.slot = slot->slot;
    mem.guest_phys_addr = slot->start_addr;
    mem.memory_size = slot->memory_size;
177
    mem.userspace_addr = (unsigned long)qemu_safe_ram_ptr(slot->phys_offset);
178
    mem.flags = slot->flags;
179 180 181
    if (s->migration_log) {
        mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
    }
182 183 184
    return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
}

J
Jan Kiszka 已提交
185 186 187 188
static void kvm_reset_vcpu(void *opaque)
{
    CPUState *env = opaque;

J
Jan Kiszka 已提交
189
    kvm_arch_reset_vcpu(env);
J
Jan Kiszka 已提交
190
}
191

192 193 194 195 196 197 198 199 200 201 202
int kvm_irqchip_in_kernel(void)
{
    return kvm_state->irqchip_in_kernel;
}

int kvm_pit_in_kernel(void)
{
    return kvm_state->pit_in_kernel;
}


A
aliguori 已提交
203 204 205 206 207 208
int kvm_init_vcpu(CPUState *env)
{
    KVMState *s = kvm_state;
    long mmap_size;
    int ret;

209
    DPRINTF("kvm_init_vcpu\n");
A
aliguori 已提交
210

211
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
A
aliguori 已提交
212
    if (ret < 0) {
213
        DPRINTF("kvm_create_vcpu failed\n");
A
aliguori 已提交
214 215 216 217 218 219 220 221
        goto err;
    }

    env->kvm_fd = ret;
    env->kvm_state = s;

    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
222
        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
A
aliguori 已提交
223 224 225 226 227 228 229
        goto err;
    }

    env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
                        env->kvm_fd, 0);
    if (env->kvm_run == MAP_FAILED) {
        ret = -errno;
230
        DPRINTF("mmap'ing vcpu state failed\n");
A
aliguori 已提交
231 232 233
        goto err;
    }

J
Jan Kiszka 已提交
234 235 236 237
    if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
        s->coalesced_mmio_ring =
            (void *)env->kvm_run + s->coalesced_mmio * PAGE_SIZE;
    }
238

A
aliguori 已提交
239
    ret = kvm_arch_init_vcpu(env);
J
Jan Kiszka 已提交
240
    if (ret == 0) {
241
        qemu_register_reset(kvm_reset_vcpu, env);
J
Jan Kiszka 已提交
242
        kvm_arch_reset_vcpu(env);
J
Jan Kiszka 已提交
243
    }
A
aliguori 已提交
244 245 246 247
err:
    return ret;
}

248 249 250
/*
 * dirty pages logging control
 */
A
Anthony Liguori 已提交
251 252
static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
                                      ram_addr_t size, int flags, int mask)
253 254
{
    KVMState *s = kvm_state;
255
    KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
256 257
    int old_flags;

258
    if (mem == NULL)  {
259 260
            fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
                    TARGET_FMT_plx "\n", __func__, phys_addr,
A
Anthony Liguori 已提交
261
                    (target_phys_addr_t)(phys_addr + size - 1));
262 263 264
            return -EINVAL;
    }

265
    old_flags = mem->flags;
266

267
    flags = (mem->flags & ~mask) | flags;
268 269
    mem->flags = flags;

270 271 272 273 274 275 276 277
    /* If nothing changed effectively, no need to issue ioctl */
    if (s->migration_log) {
        flags |= KVM_MEM_LOG_DIRTY_PAGES;
    }
    if (flags == old_flags) {
            return 0;
    }

278 279 280
    return kvm_set_user_memory_region(s, mem);
}

A
Anthony Liguori 已提交
281
int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
282
{
J
Jan Kiszka 已提交
283 284
    return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES,
                                      KVM_MEM_LOG_DIRTY_PAGES);
285 286
}

A
Anthony Liguori 已提交
287
int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
288
{
J
Jan Kiszka 已提交
289 290
    return kvm_dirty_pages_log_change(phys_addr, size, 0,
                                      KVM_MEM_LOG_DIRTY_PAGES);
291 292
}

293
static int kvm_set_migration_log(int enable)
294 295 296 297 298 299 300 301 302 303
{
    KVMState *s = kvm_state;
    KVMSlot *mem;
    int i, err;

    s->migration_log = enable;

    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
        mem = &s->slots[i];

304 305 306
        if (!mem->memory_size) {
            continue;
        }
307 308 309 310 311 312 313 314 315 316 317
        if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
            continue;
        }
        err = kvm_set_user_memory_region(s, mem);
        if (err) {
            return err;
        }
    }
    return 0;
}

318 319 320 321 322
/* get kvm's dirty pages bitmap and update qemu's */
static int kvm_get_dirty_pages_log_range(unsigned long start_addr,
                                         unsigned long *bitmap,
                                         unsigned long offset,
                                         unsigned long mem_size)
A
Alexander Graf 已提交
323
{
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
    unsigned int i, j;
    unsigned long page_number, addr, addr1, c;
    ram_addr_t ram_addr;
    unsigned int len = ((mem_size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) /
        HOST_LONG_BITS;

    /*
     * bitmap-traveling is faster than memory-traveling (for addr...)
     * especially when most of the memory is not dirty.
     */
    for (i = 0; i < len; i++) {
        if (bitmap[i] != 0) {
            c = leul_to_cpu(bitmap[i]);
            do {
                j = ffsl(c) - 1;
                c &= ~(1ul << j);
                page_number = i * HOST_LONG_BITS + j;
                addr1 = page_number * TARGET_PAGE_SIZE;
                addr = offset + addr1;
                ram_addr = cpu_get_physical_page_desc(addr);
                cpu_physical_memory_set_dirty(ram_addr);
            } while (c != 0);
        }
    }
    return 0;
A
Alexander Graf 已提交
349 350
}

351 352
#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))

353 354 355 356 357
/**
 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
 * This means all bits are set to dirty.
 *
358
 * @start_add: start of logged region.
359 360
 * @end_addr: end of logged region.
 */
361
static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
J
Jan Kiszka 已提交
362
                                          target_phys_addr_t end_addr)
363 364
{
    KVMState *s = kvm_state;
365 366 367 368
    unsigned long size, allocated_size = 0;
    KVMDirtyLog d;
    KVMSlot *mem;
    int ret = 0;
369

370 371 372 373 374 375
    d.dirty_bitmap = NULL;
    while (start_addr < end_addr) {
        mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
        if (mem == NULL) {
            break;
        }
376

377
        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), HOST_LONG_BITS) / 8;
378 379 380 381 382 383 384
        if (!d.dirty_bitmap) {
            d.dirty_bitmap = qemu_malloc(size);
        } else if (size > allocated_size) {
            d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
        }
        allocated_size = size;
        memset(d.dirty_bitmap, 0, allocated_size);
385

386
        d.slot = mem->slot;
387

388
        if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
389
            DPRINTF("ioctl failed %d\n", errno);
390 391 392
            ret = -1;
            break;
        }
393

394 395 396
        kvm_get_dirty_pages_log_range(mem->start_addr, d.dirty_bitmap,
                                      mem->start_addr, mem->memory_size);
        start_addr = mem->start_addr + mem->memory_size;
397 398
    }
    qemu_free(d.dirty_bitmap);
399 400

    return ret;
401 402
}

A
Anthony Liguori 已提交
403
int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
A
aliguori 已提交
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
{
    int ret = -ENOSYS;
    KVMState *s = kvm_state;

    if (s->coalesced_mmio) {
        struct kvm_coalesced_mmio_zone zone;

        zone.addr = start;
        zone.size = size;

        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
    }

    return ret;
}

A
Anthony Liguori 已提交
420
int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
A
aliguori 已提交
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
{
    int ret = -ENOSYS;
    KVMState *s = kvm_state;

    if (s->coalesced_mmio) {
        struct kvm_coalesced_mmio_zone zone;

        zone.addr = start;
        zone.size = size;

        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
    }

    return ret;
}

437 438 439 440 441 442 443 444 445 446 447 448
int kvm_check_extension(KVMState *s, unsigned int extension)
{
    int ret;

    ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
    if (ret < 0) {
        ret = 0;
    }

    return ret;
}

449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
static int kvm_check_many_ioeventfds(void)
{
    /* Older kernels have a 6 device limit on the KVM io bus.  Find out so we
     * can avoid creating too many ioeventfds.
     */
#ifdef CONFIG_EVENTFD
    int ioeventfds[7];
    int i, ret = 0;
    for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
        ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
        if (ioeventfds[i] < 0) {
            break;
        }
        ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
        if (ret < 0) {
            close(ioeventfds[i]);
            break;
        }
    }

    /* Decide whether many devices are supported or not */
    ret = i == ARRAY_SIZE(ioeventfds);

    while (i-- > 0) {
        kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
        close(ioeventfds[i]);
    }
    return ret;
#else
    return 0;
#endif
}

482 483 484 485 486 487 488 489 490 491 492 493
static const KVMCapabilityInfo *
kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
{
    while (list->name) {
        if (!kvm_check_extension(s, list->value)) {
            return list;
        }
        list++;
    }
    return NULL;
}

J
Jan Kiszka 已提交
494 495
static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size,
                             ram_addr_t phys_offset)
496 497 498 499 500 501
{
    KVMState *s = kvm_state;
    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
    KVMSlot *mem, old;
    int err;

502 503 504 505
    /* kvm works in page size chunks, but the function may be called
       with sub-page size and unaligned start address. */
    size = TARGET_PAGE_ALIGN(size);
    start_addr = TARGET_PAGE_ALIGN(start_addr);
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601

    /* KVM does not support read-only slots */
    phys_offset &= ~IO_MEM_ROM;

    while (1) {
        mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
        if (!mem) {
            break;
        }

        if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
            (start_addr + size <= mem->start_addr + mem->memory_size) &&
            (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
            /* The new slot fits into the existing one and comes with
             * identical parameters - nothing to be done. */
            return;
        }

        old = *mem;

        /* unregister the overlapping slot */
        mem->memory_size = 0;
        err = kvm_set_user_memory_region(s, mem);
        if (err) {
            fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
                    __func__, strerror(-err));
            abort();
        }

        /* Workaround for older KVM versions: we can't join slots, even not by
         * unregistering the previous ones and then registering the larger
         * slot. We have to maintain the existing fragmentation. Sigh.
         *
         * This workaround assumes that the new slot starts at the same
         * address as the first existing one. If not or if some overlapping
         * slot comes around later, we will fail (not seen in practice so far)
         * - and actually require a recent KVM version. */
        if (s->broken_set_mem_region &&
            old.start_addr == start_addr && old.memory_size < size &&
            flags < IO_MEM_UNASSIGNED) {
            mem = kvm_alloc_slot(s);
            mem->memory_size = old.memory_size;
            mem->start_addr = old.start_addr;
            mem->phys_offset = old.phys_offset;
            mem->flags = 0;

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error updating slot: %s\n", __func__,
                        strerror(-err));
                abort();
            }

            start_addr += old.memory_size;
            phys_offset += old.memory_size;
            size -= old.memory_size;
            continue;
        }

        /* register prefix slot */
        if (old.start_addr < start_addr) {
            mem = kvm_alloc_slot(s);
            mem->memory_size = start_addr - old.start_addr;
            mem->start_addr = old.start_addr;
            mem->phys_offset = old.phys_offset;
            mem->flags = 0;

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error registering prefix slot: %s\n",
                        __func__, strerror(-err));
                abort();
            }
        }

        /* register suffix slot */
        if (old.start_addr + old.memory_size > start_addr + size) {
            ram_addr_t size_delta;

            mem = kvm_alloc_slot(s);
            mem->start_addr = start_addr + size;
            size_delta = mem->start_addr - old.start_addr;
            mem->memory_size = old.memory_size - size_delta;
            mem->phys_offset = old.phys_offset + size_delta;
            mem->flags = 0;

            err = kvm_set_user_memory_region(s, mem);
            if (err) {
                fprintf(stderr, "%s: error registering suffix slot: %s\n",
                        __func__, strerror(-err));
                abort();
            }
        }
    }

    /* in case the KVM bug workaround already "consumed" the new slot */
J
Jan Kiszka 已提交
602
    if (!size) {
603
        return;
J
Jan Kiszka 已提交
604
    }
605
    /* KVM does not need to know about this memory */
J
Jan Kiszka 已提交
606
    if (flags >= IO_MEM_UNASSIGNED) {
607
        return;
J
Jan Kiszka 已提交
608
    }
609 610 611 612 613 614 615 616 617 618 619 620 621 622
    mem = kvm_alloc_slot(s);
    mem->memory_size = size;
    mem->start_addr = start_addr;
    mem->phys_offset = phys_offset;
    mem->flags = 0;

    err = kvm_set_user_memory_region(s, mem);
    if (err) {
        fprintf(stderr, "%s: error registering slot: %s\n", __func__,
                strerror(-err));
        abort();
    }
}

623
static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
624 625
                                  target_phys_addr_t start_addr,
                                  ram_addr_t size, ram_addr_t phys_offset)
626
{
J
Jan Kiszka 已提交
627
    kvm_set_phys_mem(start_addr, size, phys_offset);
628 629 630
}

static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
631 632
                                        target_phys_addr_t start_addr,
                                        target_phys_addr_t end_addr)
633
{
J
Jan Kiszka 已提交
634
    return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
635 636 637
}

static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
J
Jan Kiszka 已提交
638
                                    int enable)
639
{
J
Jan Kiszka 已提交
640
    return kvm_set_migration_log(enable);
641 642 643
}

static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
J
Jan Kiszka 已提交
644 645 646
    .set_memory = kvm_client_set_memory,
    .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
    .migration_log = kvm_client_migration_log,
647 648
};

649
int kvm_init(void)
A
aliguori 已提交
650
{
651 652 653
    static const char upgrade_note[] =
        "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
        "(see http://sourceforge.net/projects/kvm).\n";
A
aliguori 已提交
654
    KVMState *s;
655
    const KVMCapabilityInfo *missing_cap;
A
aliguori 已提交
656 657 658 659 660
    int ret;
    int i;

    s = qemu_mallocz(sizeof(KVMState));

661
#ifdef KVM_CAP_SET_GUEST_DEBUG
B
Blue Swirl 已提交
662
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
663
#endif
J
Jan Kiszka 已提交
664
    for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
A
aliguori 已提交
665
        s->slots[i].slot = i;
J
Jan Kiszka 已提交
666
    }
A
aliguori 已提交
667
    s->vmfd = -1;
K
Kevin Wolf 已提交
668
    s->fd = qemu_open("/dev/kvm", O_RDWR);
A
aliguori 已提交
669 670 671 672 673 674 675 676
    if (s->fd == -1) {
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
        ret = -errno;
        goto err;
    }

    ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
    if (ret < KVM_API_VERSION) {
J
Jan Kiszka 已提交
677
        if (ret > 0) {
A
aliguori 已提交
678
            ret = -EINVAL;
J
Jan Kiszka 已提交
679
        }
A
aliguori 已提交
680 681 682 683 684 685 686 687 688 689 690
        fprintf(stderr, "kvm version too old\n");
        goto err;
    }

    if (ret > KVM_API_VERSION) {
        ret = -EINVAL;
        fprintf(stderr, "kvm version not supported\n");
        goto err;
    }

    s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
691 692 693 694 695
    if (s->vmfd < 0) {
#ifdef TARGET_S390X
        fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
                        "your host kernel command line\n");
#endif
A
aliguori 已提交
696
        goto err;
697
    }
A
aliguori 已提交
698

699 700 701 702
    missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
    if (!missing_cap) {
        missing_cap =
            kvm_check_extension_list(s, kvm_arch_required_capabilities);
A
aliguori 已提交
703
    }
704
    if (missing_cap) {
705
        ret = -EINVAL;
706 707
        fprintf(stderr, "kvm does not support %s\n%s",
                missing_cap->name, upgrade_note);
708 709 710
        goto err;
    }

711
    s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
A
aliguori 已提交
712

713 714
    s->broken_set_mem_region = 1;
#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
715
    ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
716 717 718 719 720
    if (ret > 0) {
        s->broken_set_mem_region = 0;
    }
#endif

721 722 723 724 725
    s->vcpu_events = 0;
#ifdef KVM_CAP_VCPU_EVENTS
    s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
#endif

726 727 728 729 730 731
    s->robust_singlestep = 0;
#ifdef KVM_CAP_X86_ROBUST_SINGLESTEP
    s->robust_singlestep =
        kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
#endif

732 733 734 735 736
    s->debugregs = 0;
#ifdef KVM_CAP_DEBUGREGS
    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
#endif

737 738 739 740 741 742 743 744 745 746
    s->xsave = 0;
#ifdef KVM_CAP_XSAVE
    s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
#endif

    s->xcrs = 0;
#ifdef KVM_CAP_XCRS
    s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
#endif

747
    ret = kvm_arch_init(s);
J
Jan Kiszka 已提交
748
    if (ret < 0) {
A
aliguori 已提交
749
        goto err;
J
Jan Kiszka 已提交
750
    }
A
aliguori 已提交
751 752

    kvm_state = s;
753
    cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
A
aliguori 已提交
754

755 756
    s->many_ioeventfds = kvm_check_many_ioeventfds();

A
aliguori 已提交
757 758 759 760
    return 0;

err:
    if (s) {
J
Jan Kiszka 已提交
761
        if (s->vmfd != -1) {
A
aliguori 已提交
762
            close(s->vmfd);
J
Jan Kiszka 已提交
763 764
        }
        if (s->fd != -1) {
A
aliguori 已提交
765
            close(s->fd);
J
Jan Kiszka 已提交
766
        }
A
aliguori 已提交
767 768 769 770 771 772
    }
    qemu_free(s);

    return ret;
}

773 774
static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
                         uint32_t count)
A
aliguori 已提交
775 776 777 778 779 780 781 782
{
    int i;
    uint8_t *ptr = data;

    for (i = 0; i < count; i++) {
        if (direction == KVM_EXIT_IO_IN) {
            switch (size) {
            case 1:
783
                stb_p(ptr, cpu_inb(port));
A
aliguori 已提交
784 785
                break;
            case 2:
786
                stw_p(ptr, cpu_inw(port));
A
aliguori 已提交
787 788
                break;
            case 4:
789
                stl_p(ptr, cpu_inl(port));
A
aliguori 已提交
790 791 792 793 794
                break;
            }
        } else {
            switch (size) {
            case 1:
795
                cpu_outb(port, ldub_p(ptr));
A
aliguori 已提交
796 797
                break;
            case 2:
798
                cpu_outw(port, lduw_p(ptr));
A
aliguori 已提交
799 800
                break;
            case 4:
801
                cpu_outl(port, ldl_p(ptr));
A
aliguori 已提交
802 803 804 805 806 807 808 809 810 811
                break;
            }
        }

        ptr += size;
    }

    return 1;
}

M
Marcelo Tosatti 已提交
812
#ifdef KVM_CAP_INTERNAL_ERROR_DATA
J
Jan Kiszka 已提交
813
static int kvm_handle_internal_error(CPUState *env, struct kvm_run *run)
M
Marcelo Tosatti 已提交
814
{
815
    fprintf(stderr, "KVM internal error.");
M
Marcelo Tosatti 已提交
816 817 818
    if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
        int i;

819
        fprintf(stderr, " Suberror: %d\n", run->internal.suberror);
M
Marcelo Tosatti 已提交
820 821 822 823
        for (i = 0; i < run->internal.ndata; ++i) {
            fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
                    i, (uint64_t)run->internal.data[i]);
        }
824 825
    } else {
        fprintf(stderr, "\n");
M
Marcelo Tosatti 已提交
826 827 828
    }
    if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
        fprintf(stderr, "emulation failure\n");
J
Jan Kiszka 已提交
829
        if (!kvm_arch_stop_on_emulation_error(env)) {
830
            cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
J
Jan Kiszka 已提交
831
            return 0;
J
Jan Kiszka 已提交
832
        }
M
Marcelo Tosatti 已提交
833 834 835 836
    }
    /* FIXME: Should trigger a qmp message to let management know
     * something went wrong.
     */
J
Jan Kiszka 已提交
837
    return -1;
M
Marcelo Tosatti 已提交
838 839 840
}
#endif

841
void kvm_flush_coalesced_mmio_buffer(void)
A
aliguori 已提交
842 843
{
    KVMState *s = kvm_state;
844 845
    if (s->coalesced_mmio_ring) {
        struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
A
aliguori 已提交
846 847 848 849 850 851
        while (ring->first != ring->last) {
            struct kvm_coalesced_mmio *ent;

            ent = &ring->coalesced_mmio[ring->first];

            cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
852
            smp_wmb();
A
aliguori 已提交
853 854 855 856 857
            ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
        }
    }
}

858
static void do_kvm_cpu_synchronize_state(void *_env)
859
{
860 861
    CPUState *env = _env;

J
Jan Kiszka 已提交
862
    if (!env->kvm_vcpu_dirty) {
863
        kvm_arch_get_registers(env);
J
Jan Kiszka 已提交
864
        env->kvm_vcpu_dirty = 1;
865 866 867
    }
}

868 869
void kvm_cpu_synchronize_state(CPUState *env)
{
J
Jan Kiszka 已提交
870
    if (!env->kvm_vcpu_dirty) {
871
        run_on_cpu(env, do_kvm_cpu_synchronize_state, env);
J
Jan Kiszka 已提交
872
    }
873 874
}

875 876 877 878 879 880 881 882 883 884 885 886
void kvm_cpu_synchronize_post_reset(CPUState *env)
{
    kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
    env->kvm_vcpu_dirty = 0;
}

void kvm_cpu_synchronize_post_init(CPUState *env)
{
    kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
    env->kvm_vcpu_dirty = 0;
}

A
aliguori 已提交
887 888 889 890 891
int kvm_cpu_exec(CPUState *env)
{
    struct kvm_run *run = env->kvm_run;
    int ret;

892
    DPRINTF("kvm_cpu_exec()\n");
A
aliguori 已提交
893 894

    do {
895
#ifndef CONFIG_IOTHREAD
896
        if (env->exit_request) {
897
            DPRINTF("interrupt exit requested\n");
A
aliguori 已提交
898 899 900
            ret = 0;
            break;
        }
901
#endif
A
aliguori 已提交
902

M
Marcelo Tosatti 已提交
903 904 905 906 907
        if (kvm_arch_process_irqchip_events(env)) {
            ret = 0;
            break;
        }

J
Jan Kiszka 已提交
908
        if (env->kvm_vcpu_dirty) {
909
            kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
J
Jan Kiszka 已提交
910
            env->kvm_vcpu_dirty = 0;
911 912
        }

913
        kvm_arch_pre_run(env, run);
914
        cpu_single_env = NULL;
915
        qemu_mutex_unlock_iothread();
A
aliguori 已提交
916
        ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
917
        qemu_mutex_lock_iothread();
918
        cpu_single_env = env;
A
aliguori 已提交
919 920 921
        kvm_arch_post_run(env, run);

        if (ret == -EINTR || ret == -EAGAIN) {
922
            cpu_exit(env);
923
            DPRINTF("io window exit\n");
A
aliguori 已提交
924 925 926 927 928
            ret = 0;
            break;
        }

        if (ret < 0) {
929
            DPRINTF("kvm run failed %s\n", strerror(-ret));
A
aliguori 已提交
930 931 932
            abort();
        }

933
        kvm_flush_coalesced_mmio_buffer();
A
aliguori 已提交
934

A
aliguori 已提交
935 936 937
        ret = 0; /* exit loop */
        switch (run->exit_reason) {
        case KVM_EXIT_IO:
938
            DPRINTF("handle_io\n");
939
            ret = kvm_handle_io(run->io.port,
A
aliguori 已提交
940 941 942 943 944 945
                                (uint8_t *)run + run->io.data_offset,
                                run->io.direction,
                                run->io.size,
                                run->io.count);
            break;
        case KVM_EXIT_MMIO:
946
            DPRINTF("handle_mmio\n");
A
aliguori 已提交
947 948 949 950 951 952 953
            cpu_physical_memory_rw(run->mmio.phys_addr,
                                   run->mmio.data,
                                   run->mmio.len,
                                   run->mmio.is_write);
            ret = 1;
            break;
        case KVM_EXIT_IRQ_WINDOW_OPEN:
954
            DPRINTF("irq_window_open\n");
A
aliguori 已提交
955 956
            break;
        case KVM_EXIT_SHUTDOWN:
957
            DPRINTF("shutdown\n");
A
aliguori 已提交
958 959 960 961
            qemu_system_reset_request();
            ret = 1;
            break;
        case KVM_EXIT_UNKNOWN:
962 963
            fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
                    (uint64_t)run->hw.hardware_exit_reason);
J
Jan Kiszka 已提交
964
            ret = -1;
A
aliguori 已提交
965
            break;
M
Marcelo Tosatti 已提交
966 967
#ifdef KVM_CAP_INTERNAL_ERROR_DATA
        case KVM_EXIT_INTERNAL_ERROR:
J
Jan Kiszka 已提交
968
            ret = kvm_handle_internal_error(env, run);
M
Marcelo Tosatti 已提交
969 970
            break;
#endif
A
aliguori 已提交
971
        case KVM_EXIT_DEBUG:
972
            DPRINTF("kvm_exit_debug\n");
973 974 975 976 977 978 979 980
#ifdef KVM_CAP_SET_GUEST_DEBUG
            if (kvm_arch_debug(&run->debug.arch)) {
                env->exception_index = EXCP_DEBUG;
                return 0;
            }
            /* re-enter, this exception was guest-internal */
            ret = 1;
#endif /* KVM_CAP_SET_GUEST_DEBUG */
A
aliguori 已提交
981 982
            break;
        default:
983
            DPRINTF("kvm_arch_handle_exit\n");
A
aliguori 已提交
984 985 986 987 988
            ret = kvm_arch_handle_exit(env, run);
            break;
        }
    } while (ret > 0);

J
Jan Kiszka 已提交
989
    if (ret < 0) {
990
        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
J
Jan Kiszka 已提交
991 992 993
        vm_stop(0);
        env->exit_request = 1;
    }
994 995
    if (env->exit_request) {
        env->exit_request = 0;
A
aliguori 已提交
996 997 998
        env->exception_index = EXCP_INTERRUPT;
    }

A
aliguori 已提交
999 1000 1001
    return ret;
}

1002
int kvm_ioctl(KVMState *s, int type, ...)
A
aliguori 已提交
1003 1004
{
    int ret;
1005 1006
    void *arg;
    va_list ap;
A
aliguori 已提交
1007

1008 1009 1010 1011 1012
    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);

    ret = ioctl(s->fd, type, arg);
J
Jan Kiszka 已提交
1013
    if (ret == -1) {
A
aliguori 已提交
1014
        ret = -errno;
J
Jan Kiszka 已提交
1015
    }
A
aliguori 已提交
1016 1017 1018
    return ret;
}

1019
int kvm_vm_ioctl(KVMState *s, int type, ...)
A
aliguori 已提交
1020 1021
{
    int ret;
1022 1023 1024 1025 1026 1027
    void *arg;
    va_list ap;

    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);
A
aliguori 已提交
1028

1029
    ret = ioctl(s->vmfd, type, arg);
J
Jan Kiszka 已提交
1030
    if (ret == -1) {
A
aliguori 已提交
1031
        ret = -errno;
J
Jan Kiszka 已提交
1032
    }
A
aliguori 已提交
1033 1034 1035
    return ret;
}

1036
int kvm_vcpu_ioctl(CPUState *env, int type, ...)
A
aliguori 已提交
1037 1038
{
    int ret;
1039 1040 1041 1042 1043 1044
    void *arg;
    va_list ap;

    va_start(ap, type);
    arg = va_arg(ap, void *);
    va_end(ap);
A
aliguori 已提交
1045

1046
    ret = ioctl(env->kvm_fd, type, arg);
J
Jan Kiszka 已提交
1047
    if (ret == -1) {
A
aliguori 已提交
1048
        ret = -errno;
J
Jan Kiszka 已提交
1049
    }
A
aliguori 已提交
1050 1051
    return ret;
}
A
aliguori 已提交
1052 1053 1054

int kvm_has_sync_mmu(void)
{
1055
    return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
A
aliguori 已提交
1056
}
1057

1058 1059 1060 1061 1062
int kvm_has_vcpu_events(void)
{
    return kvm_state->vcpu_events;
}

1063 1064 1065 1066 1067
int kvm_has_robust_singlestep(void)
{
    return kvm_state->robust_singlestep;
}

1068 1069 1070 1071 1072
int kvm_has_debugregs(void)
{
    return kvm_state->debugregs;
}

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
int kvm_has_xsave(void)
{
    return kvm_state->xsave;
}

int kvm_has_xcrs(void)
{
    return kvm_state->xcrs;
}

1083 1084 1085 1086 1087 1088 1089 1090
int kvm_has_many_ioeventfds(void)
{
    if (!kvm_enabled()) {
        return 0;
    }
    return kvm_state->many_ioeventfds;
}

1091 1092 1093
void kvm_setup_guest_memory(void *start, size_t size)
{
    if (!kvm_has_sync_mmu()) {
A
Andreas Färber 已提交
1094
        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
1095 1096

        if (ret) {
A
Andreas Färber 已提交
1097 1098 1099
            perror("qemu_madvise");
            fprintf(stderr,
                    "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
1100 1101 1102 1103 1104
            exit(1);
        }
    }
}

1105 1106 1107 1108 1109 1110
#ifdef KVM_CAP_SET_GUEST_DEBUG
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
                                                 target_ulong pc)
{
    struct kvm_sw_breakpoint *bp;

B
Blue Swirl 已提交
1111
    QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
J
Jan Kiszka 已提交
1112
        if (bp->pc == pc) {
1113
            return bp;
J
Jan Kiszka 已提交
1114
        }
1115 1116 1117 1118 1119 1120
    }
    return NULL;
}

int kvm_sw_breakpoints_active(CPUState *env)
{
B
Blue Swirl 已提交
1121
    return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
1122 1123
}

G
Glauber Costa 已提交
1124 1125 1126 1127 1128 1129 1130 1131 1132
struct kvm_set_guest_debug_data {
    struct kvm_guest_debug dbg;
    CPUState *env;
    int err;
};

static void kvm_invoke_set_guest_debug(void *data)
{
    struct kvm_set_guest_debug_data *dbg_data = data;
J
Jan Kiszka 已提交
1133 1134 1135
    CPUState *env = dbg_data->env;

    dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
G
Glauber Costa 已提交
1136 1137
}

1138 1139
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
{
G
Glauber Costa 已提交
1140
    struct kvm_set_guest_debug_data data;
1141

1142
    data.dbg.control = reinject_trap;
1143

1144 1145 1146
    if (env->singlestep_enabled) {
        data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
    }
G
Glauber Costa 已提交
1147 1148
    kvm_arch_update_guest_debug(env, &data.dbg);
    data.env = env;
1149

1150
    run_on_cpu(env, kvm_invoke_set_guest_debug, &data);
G
Glauber Costa 已提交
1151
    return data.err;
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
}

int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    struct kvm_sw_breakpoint *bp;
    CPUState *env;
    int err;

    if (type == GDB_BREAKPOINT_SW) {
        bp = kvm_find_sw_breakpoint(current_env, addr);
        if (bp) {
            bp->use_count++;
            return 0;
        }

        bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
J
Jan Kiszka 已提交
1169
        if (!bp) {
1170
            return -ENOMEM;
J
Jan Kiszka 已提交
1171
        }
1172 1173 1174 1175 1176 1177 1178 1179 1180

        bp->pc = addr;
        bp->use_count = 1;
        err = kvm_arch_insert_sw_breakpoint(current_env, bp);
        if (err) {
            free(bp);
            return err;
        }

B
Blue Swirl 已提交
1181
        QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
1182 1183 1184
                          bp, entry);
    } else {
        err = kvm_arch_insert_hw_breakpoint(addr, len, type);
J
Jan Kiszka 已提交
1185
        if (err) {
1186
            return err;
J
Jan Kiszka 已提交
1187
        }
1188 1189 1190 1191
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        err = kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1192
        if (err) {
1193
            return err;
J
Jan Kiszka 已提交
1194
        }
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
    }
    return 0;
}

int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    struct kvm_sw_breakpoint *bp;
    CPUState *env;
    int err;

    if (type == GDB_BREAKPOINT_SW) {
        bp = kvm_find_sw_breakpoint(current_env, addr);
J
Jan Kiszka 已提交
1208
        if (!bp) {
1209
            return -ENOENT;
J
Jan Kiszka 已提交
1210
        }
1211 1212 1213 1214 1215 1216 1217

        if (bp->use_count > 1) {
            bp->use_count--;
            return 0;
        }

        err = kvm_arch_remove_sw_breakpoint(current_env, bp);
J
Jan Kiszka 已提交
1218
        if (err) {
1219
            return err;
J
Jan Kiszka 已提交
1220
        }
1221

B
Blue Swirl 已提交
1222
        QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
1223 1224 1225
        qemu_free(bp);
    } else {
        err = kvm_arch_remove_hw_breakpoint(addr, len, type);
J
Jan Kiszka 已提交
1226
        if (err) {
1227
            return err;
J
Jan Kiszka 已提交
1228
        }
1229 1230 1231 1232
    }

    for (env = first_cpu; env != NULL; env = env->next_cpu) {
        err = kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1233
        if (err) {
1234
            return err;
J
Jan Kiszka 已提交
1235
        }
1236 1237 1238 1239 1240 1241 1242 1243 1244 1245
    }
    return 0;
}

void kvm_remove_all_breakpoints(CPUState *current_env)
{
    struct kvm_sw_breakpoint *bp, *next;
    KVMState *s = current_env->kvm_state;
    CPUState *env;

B
Blue Swirl 已提交
1246
    QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
1247 1248 1249
        if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
            /* Try harder to find a CPU that currently sees the breakpoint. */
            for (env = first_cpu; env != NULL; env = env->next_cpu) {
J
Jan Kiszka 已提交
1250
                if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) {
1251
                    break;
J
Jan Kiszka 已提交
1252
                }
1253 1254 1255 1256 1257
            }
        }
    }
    kvm_arch_remove_all_hw_breakpoints();

J
Jan Kiszka 已提交
1258
    for (env = first_cpu; env != NULL; env = env->next_cpu) {
1259
        kvm_update_guest_debug(env, 0);
J
Jan Kiszka 已提交
1260
    }
1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
}

#else /* !KVM_CAP_SET_GUEST_DEBUG */

int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
{
    return -EINVAL;
}

int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    return -EINVAL;
}

int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
                          target_ulong len, int type)
{
    return -EINVAL;
}

void kvm_remove_all_breakpoints(CPUState *current_env)
{
}
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
1286 1287 1288 1289 1290 1291

int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
{
    struct kvm_signal_mask *sigmask;
    int r;

J
Jan Kiszka 已提交
1292
    if (!sigset) {
1293
        return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
J
Jan Kiszka 已提交
1294
    }
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304

    sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));

    sigmask->len = 8;
    memcpy(sigmask->sigset, sigset, sizeof(*sigset));
    r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
    free(sigmask);

    return r;
}
1305

1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337
int kvm_set_ioeventfd_mmio_long(int fd, uint32_t addr, uint32_t val, bool assign)
{
#ifdef KVM_IOEVENTFD
    int ret;
    struct kvm_ioeventfd iofd;

    iofd.datamatch = val;
    iofd.addr = addr;
    iofd.len = 4;
    iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH;
    iofd.fd = fd;

    if (!kvm_enabled()) {
        return -ENOSYS;
    }

    if (!assign) {
        iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
    }

    ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);

    if (ret < 0) {
        return -errno;
    }

    return 0;
#else
    return -ENOSYS;
#endif
}

1338 1339
int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
{
1340
#ifdef KVM_IOEVENTFD
1341 1342 1343 1344 1345 1346 1347 1348
    struct kvm_ioeventfd kick = {
        .datamatch = val,
        .addr = addr,
        .len = 2,
        .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
        .fd = fd,
    };
    int r;
J
Jan Kiszka 已提交
1349
    if (!kvm_enabled()) {
1350
        return -ENOSYS;
J
Jan Kiszka 已提交
1351 1352
    }
    if (!assign) {
1353
        kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
J
Jan Kiszka 已提交
1354
    }
1355
    r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
J
Jan Kiszka 已提交
1356
    if (r < 0) {
1357
        return r;
J
Jan Kiszka 已提交
1358
    }
1359
    return 0;
1360 1361
#else
    return -ENOSYS;
1362
#endif
1363
}