kqemu.c 27.8 KB
Newer Older
B
bellard 已提交
1 2
/*
 *  KQEMU support
3
 *
4
 *  Copyright (c) 2005-2008 Fabrice Bellard
B
bellard 已提交
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
18 19 20 21
 */
#include "config.h"
#ifdef _WIN32
#include <windows.h>
22
#include <winioctl.h>
B
bellard 已提交
23 24 25
#else
#include <sys/types.h>
#include <sys/mman.h>
26
#include <sys/ioctl.h>
B
bellard 已提交
27
#endif
28
#ifdef CONFIG_SOLARIS
29
#include <sys/ioccom.h>
T
ths 已提交
30
#endif
B
bellard 已提交
31 32 33 34 35 36 37 38 39 40
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <inttypes.h>

#include "cpu.h"
#include "exec-all.h"
41
#include "qemu-common.h"
B
bellard 已提交
42

43
#ifdef CONFIG_KQEMU
B
bellard 已提交
44 45

#define DEBUG
B
bellard 已提交
46
//#define PROFILE
B
bellard 已提交
47

48 49

#ifdef DEBUG
50 51
#  define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
#  define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
52 53 54 55 56
#else
#  define LOG_INT(...) do { } while (0)
#  define LOG_INT_STATE(env) do { } while (0)
#endif

B
bellard 已提交
57 58
#include <unistd.h>
#include <fcntl.h>
B
bellard 已提交
59
#include "kqemu.h"
B
bellard 已提交
60

61 62 63
#ifdef _WIN32
#define KQEMU_DEVICE "\\\\.\\kqemu"
#else
B
bellard 已提交
64
#define KQEMU_DEVICE "/dev/kqemu"
65 66
#endif

67 68
static void qpi_init(void);

69 70 71 72 73 74 75 76 77
#ifdef _WIN32
#define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
HANDLE kqemu_fd = KQEMU_INVALID_FD;
#define kqemu_closefd(x) CloseHandle(x)
#else
#define KQEMU_INVALID_FD -1
int kqemu_fd = KQEMU_INVALID_FD;
#define kqemu_closefd(x) close(x)
#endif
B
bellard 已提交
78

79 80 81 82
/* 0 = not allowed
   1 = user kqemu
   2 = kernel kqemu
*/
83
int kqemu_allowed = 0;
84
uint64_t *pages_to_flush;
B
bellard 已提交
85
unsigned int nb_pages_to_flush;
86
uint64_t *ram_pages_to_update;
B
bellard 已提交
87
unsigned int nb_ram_pages_to_update;
88
uint64_t *modified_ram_pages;
89 90
unsigned int nb_modified_ram_pages;
uint8_t *modified_ram_pages_table;
91 92
int qpi_io_memory;
uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
P
pbrook 已提交
93 94
ram_addr_t kqemu_phys_ram_size;
uint8_t *kqemu_phys_ram_base;
B
bellard 已提交
95 96 97 98 99 100

#define cpuid(index, eax, ebx, ecx, edx) \
  asm volatile ("cpuid" \
                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
                : "0" (index))

B
bellard 已提交
101 102 103 104 105 106
#ifdef __x86_64__
static int is_cpuid_supported(void)
{
    return 1;
}
#else
B
bellard 已提交
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
static int is_cpuid_supported(void)
{
    int v0, v1;
    asm volatile ("pushf\n"
                  "popl %0\n"
                  "movl %0, %1\n"
                  "xorl $0x00200000, %0\n"
                  "pushl %0\n"
                  "popf\n"
                  "pushf\n"
                  "popl %0\n"
                  : "=a" (v0), "=d" (v1)
                  :
                  : "cc");
    return (v0 != v1);
}
B
bellard 已提交
123
#endif
B
bellard 已提交
124 125 126

static void kqemu_update_cpuid(CPUState *env)
{
B
bellard 已提交
127
    int critical_features_mask, features, ext_features, ext_features_mask;
B
bellard 已提交
128 129 130 131 132 133
    uint32_t eax, ebx, ecx, edx;

    /* the following features are kept identical on the host and
       target cpus because they are important for user code. Strictly
       speaking, only SSE really matters because the OS must support
       it if the user code uses it. */
134 135 136
    critical_features_mask =
        CPUID_CMOV | CPUID_CX8 |
        CPUID_FXSR | CPUID_MMX | CPUID_SSE |
B
bellard 已提交
137
        CPUID_SSE2 | CPUID_SEP;
B
bellard 已提交
138
    ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
B
bellard 已提交
139 140
    if (!is_cpuid_supported()) {
        features = 0;
B
bellard 已提交
141
        ext_features = 0;
B
bellard 已提交
142 143 144
    } else {
        cpuid(1, eax, ebx, ecx, edx);
        features = edx;
B
bellard 已提交
145
        ext_features = ecx;
B
bellard 已提交
146
    }
B
bellard 已提交
147 148 149 150 151 152
#ifdef __x86_64__
    /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
       compatibility mode, so in order to have the best performances
       it is better not to use it */
    features &= ~CPUID_SEP;
#endif
B
bellard 已提交
153 154
    env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
        (features & critical_features_mask);
B
bellard 已提交
155 156
    env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
        (ext_features & ext_features_mask);
B
bellard 已提交
157 158 159 160 161 162 163
    /* XXX: we could update more of the target CPUID state so that the
       non accelerated code sees exactly the same CPU features as the
       accelerated code */
}

int kqemu_init(CPUState *env)
{
164
    struct kqemu_init kinit;
B
bellard 已提交
165
    int ret, version;
166 167 168
#ifdef _WIN32
    DWORD temp;
#endif
B
bellard 已提交
169 170 171 172

    if (!kqemu_allowed)
        return -1;

173 174 175 176 177
#ifdef _WIN32
    kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
                          FILE_SHARE_READ | FILE_SHARE_WRITE,
                          NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
                          NULL);
178 179 180 181 182
    if (kqemu_fd == KQEMU_INVALID_FD) {
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
                KQEMU_DEVICE, GetLastError());
        return -1;
    }
183
#else
B
bellard 已提交
184
    kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
185
    if (kqemu_fd == KQEMU_INVALID_FD) {
186 187
        fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
                KQEMU_DEVICE, strerror(errno));
B
bellard 已提交
188 189
        return -1;
    }
190
#endif
B
bellard 已提交
191
    version = 0;
192 193 194 195
#ifdef _WIN32
    DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
                    &version, sizeof(version), &temp, NULL);
#else
B
bellard 已提交
196
    ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
197
#endif
B
bellard 已提交
198 199 200 201 202 203
    if (version != KQEMU_VERSION) {
        fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
                version, KQEMU_VERSION);
        goto fail;
    }

204
    pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
205
                                  sizeof(uint64_t));
B
bellard 已提交
206 207 208
    if (!pages_to_flush)
        goto fail;

209
    ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
210
                                       sizeof(uint64_t));
B
bellard 已提交
211 212 213
    if (!ram_pages_to_update)
        goto fail;

214
    modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
215
                                      sizeof(uint64_t));
216 217
    if (!modified_ram_pages)
        goto fail;
P
pbrook 已提交
218 219
    modified_ram_pages_table =
        qemu_mallocz(kqemu_phys_ram_size >> TARGET_PAGE_BITS);
220 221 222
    if (!modified_ram_pages_table)
        goto fail;

223
    memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
P
pbrook 已提交
224 225
    kinit.ram_base = kqemu_phys_ram_base;
    kinit.ram_size = kqemu_phys_ram_size;
226 227 228 229
    kinit.ram_dirty = phys_ram_dirty;
    kinit.pages_to_flush = pages_to_flush;
    kinit.ram_pages_to_update = ram_pages_to_update;
    kinit.modified_ram_pages = modified_ram_pages;
230
#ifdef _WIN32
231
    ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
232 233
                          NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
#else
234
    ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
235
#endif
B
bellard 已提交
236 237 238
    if (ret < 0) {
        fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
    fail:
239 240
        kqemu_closefd(kqemu_fd);
        kqemu_fd = KQEMU_INVALID_FD;
B
bellard 已提交
241 242 243
        return -1;
    }
    kqemu_update_cpuid(env);
244
    env->kqemu_enabled = kqemu_allowed;
B
bellard 已提交
245
    nb_pages_to_flush = 0;
B
bellard 已提交
246
    nb_ram_pages_to_update = 0;
247 248

    qpi_init();
B
bellard 已提交
249 250 251 252 253
    return 0;
}

void kqemu_flush_page(CPUState *env, target_ulong addr)
{
254
    LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
B
bellard 已提交
255 256 257 258 259 260 261 262
    if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
        nb_pages_to_flush = KQEMU_FLUSH_ALL;
    else
        pages_to_flush[nb_pages_to_flush++] = addr;
}

void kqemu_flush(CPUState *env, int global)
{
263
    LOG_INT("kqemu_flush:\n");
B
bellard 已提交
264 265 266
    nb_pages_to_flush = KQEMU_FLUSH_ALL;
}

B
bellard 已提交
267 268
void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
{
269
    LOG_INT("kqemu_set_notdirty: addr=%08lx\n", 
270
                (unsigned long)ram_addr);
B
bellard 已提交
271 272 273
    /* we only track transitions to dirty state */
    if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
        return;
B
bellard 已提交
274 275 276 277 278 279
    if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
        nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
    else
        ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
}

280 281 282 283
static void kqemu_reset_modified_ram_pages(void)
{
    int i;
    unsigned long page_index;
284

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
    for(i = 0; i < nb_modified_ram_pages; i++) {
        page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
        modified_ram_pages_table[page_index] = 0;
    }
    nb_modified_ram_pages = 0;
}

void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
{
    unsigned long page_index;
    int ret;
#ifdef _WIN32
    DWORD temp;
#endif

    page_index = ram_addr >> TARGET_PAGE_BITS;
    if (!modified_ram_pages_table[page_index]) {
#if 0
        printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
#endif
        modified_ram_pages_table[page_index] = 1;
        modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
        if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
            /* flush */
#ifdef _WIN32
310 311
            ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
                                  &nb_modified_ram_pages,
312 313 314
                                  sizeof(nb_modified_ram_pages),
                                  NULL, 0, &temp, NULL);
#else
315
            ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
316 317 318 319 320 321 322
                        &nb_modified_ram_pages);
#endif
            kqemu_reset_modified_ram_pages();
        }
    }
}

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size, 
                        ram_addr_t phys_offset)
{
    struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
    uint64_t end;
    int ret, io_index;

    end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
    start_addr &= TARGET_PAGE_MASK;
    kphys_mem->phys_addr = start_addr;
    kphys_mem->size = end - start_addr;
    kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
    io_index = phys_offset & ~TARGET_PAGE_MASK;
    switch(io_index) {
    case IO_MEM_RAM:
        kphys_mem->io_index = KQEMU_IO_MEM_RAM;
        break;
    case IO_MEM_ROM:
        kphys_mem->io_index = KQEMU_IO_MEM_ROM;
        break;
    default:
        if (qpi_io_memory == io_index) {
            kphys_mem->io_index = KQEMU_IO_MEM_COMM;
        } else {
            kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
        }
        break;
    }
#ifdef _WIN32
    {
        DWORD temp;
        ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM, 
                              kphys_mem, sizeof(*kphys_mem),
                              NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
    }
#else
    ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
#endif
    if (ret < 0) {
        fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
                ret, start_addr, 
                (unsigned long)size, (unsigned long)phys_offset);
    }
}

B
bellard 已提交
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
struct fpstate {
    uint16_t fpuc;
    uint16_t dummy1;
    uint16_t fpus;
    uint16_t dummy2;
    uint16_t fptag;
    uint16_t dummy3;

    uint32_t fpip;
    uint32_t fpcs;
    uint32_t fpoo;
    uint32_t fpos;
    uint8_t fpregs1[8 * 10];
};

struct fpxstate {
    uint16_t fpuc;
    uint16_t fpus;
    uint16_t fptag;
    uint16_t fop;
    uint32_t fpuip;
    uint16_t cs_sel;
    uint16_t dummy0;
    uint32_t fpudp;
    uint16_t ds_sel;
    uint16_t dummy1;
    uint32_t mxcsr;
    uint32_t mxcsr_mask;
    uint8_t fpregs1[8 * 16];
B
bellard 已提交
397 398
    uint8_t xmm_regs[16 * 16];
    uint8_t dummy2[96];
B
bellard 已提交
399 400 401 402 403 404 405 406
};

static struct fpxstate fpx1 __attribute__((aligned(16)));

static void restore_native_fp_frstor(CPUState *env)
{
    int fptag, i, j;
    struct fpstate fp1, *fp = &fp1;
407

B
bellard 已提交
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
    fp->fpuc = env->fpuc;
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    fptag = 0;
    for (i=7; i>=0; i--) {
	fptag <<= 2;
	if (env->fptags[i]) {
            fptag |= 3;
        } else {
            /* the FPU automatically computes it */
        }
    }
    fp->fptag = fptag;
    j = env->fpstt;
    for(i = 0;i < 8; i++) {
        memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
        j = (j + 1) & 7;
    }
    asm volatile ("frstor %0" : "=m" (*fp));
}
427

B
bellard 已提交
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
static void save_native_fp_fsave(CPUState *env)
{
    int fptag, i, j;
    uint16_t fpuc;
    struct fpstate fp1, *fp = &fp1;

    asm volatile ("fsave %0" : : "m" (*fp));
    env->fpuc = fp->fpuc;
    env->fpstt = (fp->fpus >> 11) & 7;
    env->fpus = fp->fpus & ~0x3800;
    fptag = fp->fptag;
    for(i = 0;i < 8; i++) {
        env->fptags[i] = ((fptag & 3) == 3);
        fptag >>= 2;
    }
    j = env->fpstt;
    for(i = 0;i < 8; i++) {
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
        j = (j + 1) & 7;
    }
    /* we must restore the default rounding state */
    fpuc = 0x037f | (env->fpuc & (3 << 10));
    asm volatile("fldcw %0" : : "m" (fpuc));
}

static void restore_native_fp_fxrstor(CPUState *env)
{
    struct fpxstate *fp = &fpx1;
    int i, j, fptag;

    fp->fpuc = env->fpuc;
    fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    fptag = 0;
    for(i = 0; i < 8; i++)
        fptag |= (env->fptags[i] << i);
    fp->fptag = fptag ^ 0xff;

    j = env->fpstt;
    for(i = 0;i < 8; i++) {
        memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
        j = (j + 1) & 7;
    }
    if (env->cpuid_features & CPUID_SSE) {
        fp->mxcsr = env->mxcsr;
        /* XXX: check if DAZ is not available */
        fp->mxcsr_mask = 0xffff;
B
bellard 已提交
474
        memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
B
bellard 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
    }
    asm volatile ("fxrstor %0" : "=m" (*fp));
}

static void save_native_fp_fxsave(CPUState *env)
{
    struct fpxstate *fp = &fpx1;
    int fptag, i, j;
    uint16_t fpuc;

    asm volatile ("fxsave %0" : : "m" (*fp));
    env->fpuc = fp->fpuc;
    env->fpstt = (fp->fpus >> 11) & 7;
    env->fpus = fp->fpus & ~0x3800;
    fptag = fp->fptag ^ 0xff;
    for(i = 0;i < 8; i++) {
        env->fptags[i] = (fptag >> i) & 1;
    }
    j = env->fpstt;
    for(i = 0;i < 8; i++) {
        memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
        j = (j + 1) & 7;
    }
    if (env->cpuid_features & CPUID_SSE) {
        env->mxcsr = fp->mxcsr;
B
bellard 已提交
500
        memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
B
bellard 已提交
501 502 503 504 505 506 507 508
    }

    /* we must restore the default rounding state */
    asm volatile ("fninit");
    fpuc = 0x037f | (env->fpuc & (3 << 10));
    asm volatile("fldcw %0" : : "m" (fpuc));
}

B
bellard 已提交
509 510 511 512
static int do_syscall(CPUState *env,
                      struct kqemu_cpu_state *kenv)
{
    int selector;
513

B
bellard 已提交
514
    selector = (env->star >> 32) & 0xffff;
515
#ifdef TARGET_X86_64
B
bellard 已提交
516
    if (env->hflags & HF_LMA_MASK) {
B
bellard 已提交
517 518
        int code64;

B
bellard 已提交
519 520 521
        env->regs[R_ECX] = kenv->next_eip;
        env->regs[11] = env->eflags;

B
bellard 已提交
522 523
        code64 = env->hflags & HF_CS64_MASK;

B
bellard 已提交
524
        cpu_x86_set_cpl(env, 0);
525 526
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                               0, 0xffffffff,
B
bellard 已提交
527
                               DESC_G_MASK | DESC_P_MASK |
B
bellard 已提交
528 529
                               DESC_S_MASK |
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
530
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
B
bellard 已提交
531 532 533 534 535
                               0, 0xffffffff,
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                               DESC_S_MASK |
                               DESC_W_MASK | DESC_A_MASK);
        env->eflags &= ~env->fmask;
B
bellard 已提交
536
        if (code64)
B
bellard 已提交
537 538 539
            env->eip = env->lstar;
        else
            env->eip = env->cstar;
540
    } else
B
bellard 已提交
541 542 543
#endif
    {
        env->regs[R_ECX] = (uint32_t)kenv->next_eip;
544

B
bellard 已提交
545
        cpu_x86_set_cpl(env, 0);
546 547
        cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                           0, 0xffffffff,
B
bellard 已提交
548 549 550
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                               DESC_S_MASK |
                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
551
        cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
B
bellard 已提交
552 553 554 555 556 557 558 559 560 561
                               0, 0xffffffff,
                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                               DESC_S_MASK |
                               DESC_W_MASK | DESC_A_MASK);
        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
        env->eip = (uint32_t)env->star;
    }
    return 2;
}

562
#ifdef CONFIG_PROFILER
B
bellard 已提交
563 564 565 566 567 568 569 570 571 572 573

#define PC_REC_SIZE 1
#define PC_REC_HASH_BITS 16
#define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)

typedef struct PCRecord {
    unsigned long pc;
    int64_t count;
    struct PCRecord *next;
} PCRecord;

574 575
static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
static int nb_pc_records;
B
bellard 已提交
576

577
static void kqemu_record_pc(unsigned long pc)
B
bellard 已提交
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
{
    unsigned long h;
    PCRecord **pr, *r;

    h = pc / PC_REC_SIZE;
    h = h ^ (h >> PC_REC_HASH_BITS);
    h &= (PC_REC_HASH_SIZE - 1);
    pr = &pc_rec_hash[h];
    for(;;) {
        r = *pr;
        if (r == NULL)
            break;
        if (r->pc == pc) {
            r->count++;
            return;
        }
        pr = &r->next;
    }
    r = malloc(sizeof(PCRecord));
    r->count = 1;
    r->pc = pc;
    r->next = NULL;
    *pr = r;
    nb_pc_records++;
}

604
static int pc_rec_cmp(const void *p1, const void *p2)
B
bellard 已提交
605 606 607 608 609 610 611 612 613 614 615
{
    PCRecord *r1 = *(PCRecord **)p1;
    PCRecord *r2 = *(PCRecord **)p2;
    if (r1->count < r2->count)
        return 1;
    else if (r1->count == r2->count)
        return 0;
    else
        return -1;
}

616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
static void kqemu_record_flush(void)
{
    PCRecord *r, *r_next;
    int h;

    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
        for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
            r_next = r->next;
            free(r);
        }
        pc_rec_hash[h] = NULL;
    }
    nb_pc_records = 0;
}

B
bellard 已提交
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647
void kqemu_record_dump(void)
{
    PCRecord **pr, *r;
    int i, h;
    FILE *f;
    int64_t total, sum;

    pr = malloc(sizeof(PCRecord *) * nb_pc_records);
    i = 0;
    total = 0;
    for(h = 0; h < PC_REC_HASH_SIZE; h++) {
        for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
            pr[i++] = r;
            total += r->count;
        }
    }
    qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
648

B
bellard 已提交
649 650 651 652 653
    f = fopen("/tmp/kqemu.stats", "w");
    if (!f) {
        perror("/tmp/kqemu.stats");
        exit(1);
    }
B
bellard 已提交
654
    fprintf(f, "total: %" PRId64 "\n", total);
B
bellard 已提交
655 656 657 658
    sum = 0;
    for(i = 0; i < nb_pc_records; i++) {
        r = pr[i];
        sum += r->count;
659 660 661
        fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
                r->pc,
                r->count,
B
bellard 已提交
662 663 664 665 666
                (double)r->count / (double)total * 100.0,
                (double)sum / (double)total * 100.0);
    }
    fclose(f);
    free(pr);
667 668

    kqemu_record_flush();
B
bellard 已提交
669 670 671
}
#endif

672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
                                  const SegmentCache *sc)
{
    ksc->selector = sc->selector;
    ksc->flags = sc->flags;
    ksc->limit = sc->limit;
    ksc->base = sc->base;
}

static inline void kqemu_save_seg(SegmentCache *sc,
                                  const struct kqemu_segment_cache *ksc)
{
    sc->selector = ksc->selector;
    sc->flags = ksc->flags;
    sc->limit = ksc->limit;
    sc->base = ksc->base;
}

B
bellard 已提交
690 691 692
int kqemu_cpu_exec(CPUState *env)
{
    struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
693 694 695 696
    int ret, cpl, i;
#ifdef CONFIG_PROFILER
    int64_t ti;
#endif
697 698 699
#ifdef _WIN32
    DWORD temp;
#endif
B
bellard 已提交
700

701 702 703
#ifdef CONFIG_PROFILER
    ti = profile_getclock();
#endif
704 705
    LOG_INT("kqemu: cpu_exec: enter\n");
    LOG_INT_STATE(env);
706 707
    for(i = 0; i < CPU_NB_REGS; i++)
        kenv->regs[i] = env->regs[i];
B
bellard 已提交
708 709
    kenv->eip = env->eip;
    kenv->eflags = env->eflags;
710 711 712 713 714 715
    for(i = 0; i < 6; i++)
        kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
    kqemu_load_seg(&kenv->ldt, &env->ldt);
    kqemu_load_seg(&kenv->tr, &env->tr);
    kqemu_load_seg(&kenv->gdt, &env->gdt);
    kqemu_load_seg(&kenv->idt, &env->idt);
B
bellard 已提交
716 717 718 719 720
    kenv->cr0 = env->cr[0];
    kenv->cr2 = env->cr[2];
    kenv->cr3 = env->cr[3];
    kenv->cr4 = env->cr[4];
    kenv->a20_mask = env->a20_mask;
B
bellard 已提交
721
    kenv->efer = env->efer;
722 723 724 725 726
    kenv->tsc_offset = 0;
    kenv->star = env->star;
    kenv->sysenter_cs = env->sysenter_cs;
    kenv->sysenter_esp = env->sysenter_esp;
    kenv->sysenter_eip = env->sysenter_eip;
727
#ifdef TARGET_X86_64
728 729 730 731
    kenv->lstar = env->lstar;
    kenv->cstar = env->cstar;
    kenv->fmask = env->fmask;
    kenv->kernelgsbase = env->kernelgsbase;
B
bellard 已提交
732
#endif
B
bellard 已提交
733 734 735 736 737 738 739 740 741 742
    if (env->dr[7] & 0xff) {
        kenv->dr7 = env->dr[7];
        kenv->dr0 = env->dr[0];
        kenv->dr1 = env->dr[1];
        kenv->dr2 = env->dr[2];
        kenv->dr3 = env->dr[3];
    } else {
        kenv->dr7 = 0;
    }
    kenv->dr6 = env->dr[6];
743 744
    cpl = (env->hflags & HF_CPL_MASK);
    kenv->cpl = cpl;
B
bellard 已提交
745
    kenv->nb_pages_to_flush = nb_pages_to_flush;
746
    kenv->user_only = (env->kqemu_enabled == 1);
B
bellard 已提交
747 748
    kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
    nb_ram_pages_to_update = 0;
749
    kenv->nb_modified_ram_pages = nb_modified_ram_pages;
750

751 752 753 754 755 756
    kqemu_reset_modified_ram_pages();

    if (env->cpuid_features & CPUID_FXSR)
        restore_native_fp_fxrstor(env);
    else
        restore_native_fp_frstor(env);
B
bellard 已提交
757

758
#ifdef _WIN32
759 760 761 762 763 764 765 766
    if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
                        kenv, sizeof(struct kqemu_cpu_state),
                        kenv, sizeof(struct kqemu_cpu_state),
                        &temp, NULL)) {
        ret = kenv->retval;
    } else {
        ret = -1;
    }
767 768 769 770
#else
    ioctl(kqemu_fd, KQEMU_EXEC, kenv);
    ret = kenv->retval;
#endif
771 772 773 774
    if (env->cpuid_features & CPUID_FXSR)
        save_native_fp_fxsave(env);
    else
        save_native_fp_fsave(env);
B
bellard 已提交
775

776 777
    for(i = 0; i < CPU_NB_REGS; i++)
        env->regs[i] = kenv->regs[i];
B
bellard 已提交
778 779
    env->eip = kenv->eip;
    env->eflags = kenv->eflags;
780 781
    for(i = 0; i < 6; i++)
        kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
782
    cpu_x86_set_cpl(env, kenv->cpl);
783
    kqemu_save_seg(&env->ldt, &kenv->ldt);
784 785 786
    env->cr[0] = kenv->cr0;
    env->cr[4] = kenv->cr4;
    env->cr[3] = kenv->cr3;
B
bellard 已提交
787 788
    env->cr[2] = kenv->cr2;
    env->dr[6] = kenv->dr6;
789
#ifdef TARGET_X86_64
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806
    env->kernelgsbase = kenv->kernelgsbase;
#endif

    /* flush pages as indicated by kqemu */
    if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
        tlb_flush(env, 1);
    } else {
        for(i = 0; i < kenv->nb_pages_to_flush; i++) {
            tlb_flush_page(env, pages_to_flush[i]);
        }
    }
    nb_pages_to_flush = 0;

#ifdef CONFIG_PROFILER
    kqemu_time += profile_getclock() - ti;
    kqemu_exec_count++;
#endif
B
bellard 已提交
807

B
bellard 已提交
808 809 810 811
    if (kenv->nb_ram_pages_to_update > 0) {
        cpu_tlb_update_dirty(env);
    }

812 813 814 815 816 817 818 819
    if (kenv->nb_modified_ram_pages > 0) {
        for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
            unsigned long addr;
            addr = modified_ram_pages[i];
            tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
        }
    }

B
bellard 已提交
820 821 822 823
    /* restore the hidden flags */
    {
        unsigned int new_hflags;
#ifdef TARGET_X86_64
824
        if ((env->hflags & HF_LMA_MASK) &&
B
bellard 已提交
825 826 827 828 829 830 831 832 833 834 835
            (env->segs[R_CS].flags & DESC_L_MASK)) {
            /* long mode */
            new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
        } else
#endif
        {
            /* legacy / compatibility case */
            new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
                >> (DESC_B_SHIFT - HF_CS32_SHIFT);
            new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
                >> (DESC_B_SHIFT - HF_SS32_SHIFT);
836
            if (!(env->cr[0] & CR0_PE_MASK) ||
B
bellard 已提交
837 838 839 840 841 842 843 844 845
                   (env->eflags & VM_MASK) ||
                   !(env->hflags & HF_CS32_MASK)) {
                /* XXX: try to avoid this test. The problem comes from the
                   fact that is real mode or vm86 mode we only modify the
                   'base' and 'selector' fields of the segment cache to go
                   faster. A solution may be to force addseg to one in
                   translate-i386.c. */
                new_hflags |= HF_ADDSEG_MASK;
            } else {
846
                new_hflags |= ((env->segs[R_DS].base |
B
bellard 已提交
847
                                env->segs[R_ES].base |
848
                                env->segs[R_SS].base) != 0) <<
B
bellard 已提交
849 850 851
                    HF_ADDSEG_SHIFT;
            }
        }
852
        env->hflags = (env->hflags &
B
bellard 已提交
853 854 855
           ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
            new_hflags;
    }
856 857 858 859 860 861 862
    /* update FPU flags */
    env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
        ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
    if (env->cr[4] & CR4_OSFXSR_MASK)
        env->hflags |= HF_OSFXSR_MASK;
    else
        env->hflags &= ~HF_OSFXSR_MASK;
863

864
    LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
B
bellard 已提交
865 866 867
    if (ret == KQEMU_RET_SYSCALL) {
        /* syscall instruction */
        return do_syscall(env, kenv);
868
    } else
B
bellard 已提交
869 870 871 872 873
    if ((ret & 0xff00) == KQEMU_RET_INT) {
        env->exception_index = ret & 0xff;
        env->error_code = 0;
        env->exception_is_int = 1;
        env->exception_next_eip = kenv->next_eip;
874 875 876
#ifdef CONFIG_PROFILER
        kqemu_ret_int_count++;
#endif
877 878
        LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
        LOG_INT_STATE(env);
B
bellard 已提交
879 880 881 882 883 884
        return 1;
    } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
        env->exception_index = ret & 0xff;
        env->error_code = kenv->error_code;
        env->exception_is_int = 0;
        env->exception_next_eip = 0;
885 886 887
#ifdef CONFIG_PROFILER
        kqemu_ret_excp_count++;
#endif
888
        LOG_INT("kqemu: exception v=%02x e=%04x:\n",
B
bellard 已提交
889
                    env->exception_index, env->error_code);
890
        LOG_INT_STATE(env);
B
bellard 已提交
891 892
        return 1;
    } else if (ret == KQEMU_RET_INTR) {
893 894 895
#ifdef CONFIG_PROFILER
        kqemu_ret_intr_count++;
#endif
896
        LOG_INT_STATE(env);
B
bellard 已提交
897
        return 0;
898
    } else if (ret == KQEMU_RET_SOFTMMU) {
899 900 901 902 903
#ifdef CONFIG_PROFILER
        {
            unsigned long pc = env->eip + env->segs[R_CS].base;
            kqemu_record_pc(pc);
        }
B
bellard 已提交
904
#endif
905
        LOG_INT_STATE(env);
B
bellard 已提交
906 907 908 909 910 911 912 913 914
        return 2;
    } else {
        cpu_dump_state(env, stderr, fprintf, 0);
        fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
        exit(1);
    }
    return 0;
}

915 916
void kqemu_cpu_interrupt(CPUState *env)
{
917
#if defined(_WIN32)
918
    /* cancelling the I/O request causes KQEMU to finish executing the
919 920 921 922 923
       current block and successfully returning. */
    CancelIo(kqemu_fd);
#endif
}

924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991
/* 
   QEMU paravirtualization interface. The current interface only
   allows to modify the IF and IOPL flags when running in
   kqemu.

   At this point it is not very satisfactory. I leave it for reference
   as it adds little complexity.
*/

#define QPI_COMM_PAGE_PHYS_ADDR 0xff000000

static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
{
    return 0;
}

static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
{
    return 0;
}

static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
{
}

static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
{
}

static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
{
    CPUState *env;

    env = cpu_single_env;
    if (!env)
        return 0;
    return env->eflags & (IF_MASK | IOPL_MASK);
}

/* Note: after writing to this address, the guest code must make sure
   it is exiting the current TB. pushf/popf can be used for that
   purpose. */
static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
{
    CPUState *env;

    env = cpu_single_env;
    if (!env)
        return;
    env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) | 
        (val & (IF_MASK | IOPL_MASK));
}

static CPUReadMemoryFunc *qpi_mem_read[3] = {
    qpi_mem_readb,
    qpi_mem_readw,
    qpi_mem_readl,
};

static CPUWriteMemoryFunc *qpi_mem_write[3] = {
    qpi_mem_writeb,
    qpi_mem_writew,
    qpi_mem_writel,
};

static void qpi_init(void)
{
    kqemu_comm_base = 0xff000000 | 1;
992
    qpi_io_memory = cpu_register_io_memory(
993 994 995 996 997
                                           qpi_mem_read, 
                                           qpi_mem_write, NULL);
    cpu_register_physical_memory(kqemu_comm_base & ~0xfff, 
                                 0x1000, qpi_io_memory);
}
B
bellard 已提交
998
#endif