提交 c51d9cb5 编写于 作者: B Blue Swirl

Merge branch 'tci' of git://qemu.weilnetz.de/qemu

* 'tci' of git://qemu.weilnetz.de/qemu:
  tcg: Add tcg interpreter to configure / make
  tcg: Add tci disassembler
  tcg: Add interpreter for bytecode
  tcg: Add bytecode generator for tcg interpreter
  tcg: Make ARRAY_SIZE(tcg_op_defs) globally available
  tcg: TCG targets may define tcg_qemu_tb_exec
......@@ -69,6 +69,7 @@ all: $(PROGS) stap
# cpu emulator library
libobj-y = exec.o translate-all.o cpu-exec.o translate.o
libobj-y += tcg/tcg.o tcg/optimize.o
libobj-$(CONFIG_TCG_INTERPRETER) += tci.o
libobj-y += fpu/softfloat.o
libobj-y += op_helper.o helper.o
ifeq ($(TARGET_BASE_ARCH), i386)
......@@ -85,6 +86,9 @@ libobj-$(TARGET_SPARC) += int32_helper.o
libobj-$(TARGET_SPARC64) += int64_helper.o
libobj-y += disas.o
libobj-$(CONFIG_TCI_DIS) += tci-dis.o
tci-dis.o: QEMU_CFLAGS += -I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/tci
$(libobj-y): $(GENERATED_HEADERS)
......
......@@ -138,6 +138,7 @@ debug_tcg="no"
debug_mon="no"
debug="no"
strip_opt="yes"
tcg_interpreter="no"
bigendian="no"
mingw32="no"
EXESUF=""
......@@ -654,6 +655,10 @@ for opt do
;;
--enable-kvm) kvm="yes"
;;
--disable-tcg-interpreter) tcg_interpreter="no"
;;
--enable-tcg-interpreter) tcg_interpreter="yes"
;;
--disable-spice) spice="no"
;;
--enable-spice) spice="yes"
......@@ -1009,6 +1014,7 @@ echo " --enable-bluez enable bluez stack connectivity"
echo " --disable-slirp disable SLIRP userspace network connectivity"
echo " --disable-kvm disable KVM acceleration support"
echo " --enable-kvm enable KVM acceleration support"
echo " --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)"
echo " --disable-nptl disable usermode NPTL support"
echo " --enable-nptl enable usermode NPTL support"
echo " --enable-system enable all system emulation targets"
......@@ -2755,6 +2761,7 @@ echo "Linux AIO support $linux_aio"
echo "ATTR/XATTR support $attr"
echo "Install blobs $blobs"
echo "KVM support $kvm"
echo "TCG interpreter $tcg_interpreter"
echo "fdt support $fdt"
echo "preadv support $preadv"
echo "fdatasync $fdatasync"
......@@ -2803,6 +2810,15 @@ case "$cpu" in
armv4b|armv4l)
ARCH=arm
;;
*)
if test "$tcg_interpreter" = "yes" ; then
echo "Unsupported CPU = $cpu, will use TCG with TCI (experimental)"
ARCH=tci
else
echo "Unsupported CPU = $cpu, try --enable-tcg-interpreter"
exit 1
fi
;;
esac
echo "ARCH=$ARCH" >> $config_host_mak
if test "$debug_tcg" = "yes" ; then
......@@ -3036,6 +3052,9 @@ fi
if test "$signalfd" = "yes" ; then
echo "CONFIG_SIGNALFD=y" >> $config_host_mak
fi
if test "$tcg_interpreter" = "yes" ; then
echo "CONFIG_TCG_INTERPRETER=y" >> $config_host_mak
fi
if test "$need_offsetof" = "yes" ; then
echo "CONFIG_NEED_OFFSETOF=y" >> $config_host_mak
fi
......@@ -3509,7 +3528,9 @@ cflags=""
includes=""
ldflags=""
if test "$ARCH" = "sparc64" ; then
if test "$tcg_interpreter" = "yes"; then
includes="-I\$(SRC_PATH)/tcg/tci $includes"
elif test "$ARCH" = "sparc64" ; then
includes="-I\$(SRC_PATH)/tcg/sparc $includes"
elif test "$ARCH" = "s390x" ; then
includes="-I\$(SRC_PATH)/tcg/s390 $includes"
......@@ -3586,6 +3607,10 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
;;
esac
done
if test "$tcg_interpreter" = "yes" ; then
echo "CONFIG_TCI_DIS=y" >> $config_target_mak
echo "CONFIG_TCI_DIS=y" >> $libdis_config_mak
fi
case "$ARCH" in
alpha)
......@@ -3632,7 +3657,12 @@ if test "$gprof" = "yes" ; then
fi
fi
linker_script="-Wl,-T../config-host.ld -Wl,-T,\$(SRC_PATH)/\$(ARCH).ld"
if test "$ARCH" = "tci"; then
linker_script=""
else
linker_script="-Wl,-T../config-host.ld -Wl,-T,\$(SRC_PATH)/\$(ARCH).ld"
fi
if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
case "$ARCH" in
sparc)
......
......@@ -365,6 +365,7 @@ typedef struct disassemble_info {
target address. Return number of bytes processed. */
typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *);
int print_insn_tci(bfd_vma, disassemble_info*);
int print_insn_big_mips (bfd_vma, disassemble_info*);
int print_insn_little_mips (bfd_vma, disassemble_info*);
int print_insn_i386 (bfd_vma, disassemble_info*);
......
......@@ -273,7 +273,9 @@ void disas(FILE *out, void *code, unsigned long size)
#else
disasm_info.endian = BFD_ENDIAN_LITTLE;
#endif
#if defined(__i386__)
#if defined(CONFIG_TCG_INTERPRETER)
print_insn = print_insn_tci;
#elif defined(__i386__)
disasm_info.mach = bfd_mach_i386_i386;
print_insn = print_insn_i386;
#elif defined(__x86_64__)
......
......@@ -19,7 +19,12 @@
#if !defined(__DYNGEN_EXEC_H__)
#define __DYNGEN_EXEC_H__
#if defined(__i386__)
#if defined(CONFIG_TCG_INTERPRETER)
/* The TCG interpreter does not need a special register AREG0,
* but it is possible to use one by defining AREG0.
* On i386, register edi seems to work. */
/* Run without special register AREG0 or use a value defined elsewhere. */
#elif defined(__i386__)
#define AREG0 "ebp"
#elif defined(__x86_64__)
#define AREG0 "r14"
......@@ -55,6 +60,11 @@
#error unsupported CPU
#endif
#if defined(AREG0)
register CPUState *env asm(AREG0);
#else
/* TODO: Try env = cpu_single_env. */
extern CPUState *env;
#endif
#endif /* !defined(__DYNGEN_EXEC_H__) */
......@@ -122,6 +122,8 @@ void tlb_set_page(CPUState *env, target_ulong vaddr,
#if defined(_ARCH_PPC) || defined(__x86_64__) || defined(__arm__) || defined(__i386__)
#define USE_DIRECT_JUMP
#elif defined(CONFIG_TCG_INTERPRETER)
#define USE_DIRECT_JUMP
#endif
struct TranslationBlock {
......@@ -189,7 +191,14 @@ extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
#if defined(USE_DIRECT_JUMP)
#if defined(_ARCH_PPC)
#if defined(CONFIG_TCG_INTERPRETER)
static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
{
/* patch the branch destination */
*(uint32_t *)jmp_addr = addr - (jmp_addr + 4);
/* no need to flush icache explicitly */
}
#elif defined(_ARCH_PPC)
void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr);
#define tb_set_jmp_target1 ppc_tb_set_jmp_target
#elif defined(__i386__) || defined(__x86_64__)
......@@ -223,6 +232,8 @@ static inline void tb_set_jmp_target1(unsigned long jmp_addr, unsigned long addr
__asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
#endif
}
#else
#error tb_set_jmp_target1 is missing
#endif
static inline void tb_set_jmp_target(TranslationBlock *tb,
......@@ -269,7 +280,14 @@ extern int tb_invalidated_flag;
/* The return address may point to the start of the next instruction.
Subtracting one gets us the call instruction itself. */
#if defined(__s390__) && !defined(__s390x__)
#if defined(CONFIG_TCG_INTERPRETER)
/* Alpha and SH4 user mode emulations and Softmmu call GETPC().
For all others, GETPC remains undefined (which makes TCI a little faster. */
# if defined(CONFIG_SOFTMMU) || defined(TARGET_ALPHA) || defined(TARGET_SH4)
extern void *tci_tb_ptr;
# define GETPC() tci_tb_ptr
# endif
#elif defined(__s390__) && !defined(__s390x__)
# define GETPC() ((void*)(((unsigned long)__builtin_return_address(0) & 0x7fffffffUL) - 1))
#elif defined(__arm__)
/* Thumb return addresses have the low bit set, so we need to subtract two.
......
......@@ -96,3 +96,7 @@ enum {
#define TCG_AREG0 TCG_REG_R27
#define TCG_TARGET_HAS_GUEST_BASE
#define tcg_qemu_tb_exec(env, tb_ptr) \
((long REGPARM __attribute__ ((longcall)) \
(*)(void *, void *))code_gen_prologue)(env, tb_ptr)
......@@ -89,6 +89,7 @@ TCGOpDef tcg_op_defs[] = {
#include "tcg-opc.h"
#undef DEF
};
const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
static TCGRegSet tcg_target_available_regs[2];
static TCGRegSet tcg_target_call_clobber_regs;
......
......@@ -525,7 +525,8 @@ typedef struct TCGOpDef {
} TCGOpDef;
extern TCGOpDef tcg_op_defs[];
extern const size_t tcg_op_defs_max;
typedef struct TCGTargetOpDef {
TCGOpcode op;
const char *args_ct_str[TCG_MAX_OP_ARGS];
......@@ -584,10 +585,9 @@ TCGv_i32 tcg_const_local_i32(int32_t val);
TCGv_i64 tcg_const_local_i64(int64_t val);
extern uint8_t code_gen_prologue[];
#if defined(_ARCH_PPC) && !defined(_ARCH_PPC64)
#define tcg_qemu_tb_exec(env, tb_ptr) \
((long REGPARM __attribute__ ((longcall)) (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
#else
#define tcg_qemu_tb_exec(env, tb_ptr) \
/* TCG targets may use a different definition of tcg_qemu_tb_exec. */
#if !defined(tcg_qemu_tb_exec)
# define tcg_qemu_tb_exec(env, tb_ptr) \
((long REGPARM (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
#endif
TCG Interpreter (TCI) - Copyright (c) 2011 Stefan Weil.
This file is released under the BSD license.
1) Introduction
TCG (Tiny Code Generator) is a code generator which translates
code fragments ("basic blocks") from target code (any of the
targets supported by QEMU) to a code representation which
can be run on a host.
QEMU can create native code for some hosts (arm, hppa, i386, ia64, ppc, ppc64,
s390, sparc, x86_64). For others, unofficial host support was written.
By adding a code generator for a virtual machine and using an
interpreter for the generated bytecode, it is possible to
support (almost) any host.
This is what TCI (Tiny Code Interpreter) does.
2) Implementation
Like each TCG host frontend, TCI implements the code generator in
tcg-target.c, tcg-target.h. Both files are in directory tcg/tci.
The additional file tcg/tci.c adds the interpreter.
The bytecode consists of opcodes (same numeric values as those used by
TCG), command length and arguments of variable size and number.
3) Usage
For hosts without native TCG, the interpreter TCI must be enabled by
configure --enable-tcg-interpreter
If configure is called without --enable-tcg-interpreter, it will
suggest using this option. Setting it automatically would need
additional code in configure which must be fixed when new native TCG
implementations are added.
System emulation should work on any 32 or 64 bit host.
User mode emulation might work. Maybe a new linker script (*.ld)
is needed. Byte order might be wrong (on big endian hosts)
and need fixes in configure.
For hosts with native TCG, the interpreter TCI can be enabled by
configure --enable-tcg-interpreter
The only difference from running QEMU with TCI to running without TCI
should be speed. Especially during development of TCI, it was very
useful to compare runs with and without TCI. Create /tmp/qemu.log by
qemu-system-i386 -d in_asm,op_opt,cpu -singlestep
once with interpreter and once without interpreter and compare the resulting
qemu.log files. This is also useful to see the effects of additional
registers or additional opcodes (it is easy to modify the virtual machine).
It can also be used to verify native TCGs.
Hosts with native TCG can also enable TCI by claiming to be unsupported:
configure --cpu=unknown --enable-tcg-interpreter
configure then no longer uses the native linker script (*.ld) for
user mode emulation.
4) Status
TCI needs special implementation for 32 and 64 bit host, 32 and 64 bit target,
host and target with same or different endianness.
| host (le) host (be)
| 32 64 32 64
------------+------------------------------------------------------------
target (le) | s0, u0 s1, u1 s?, u? s?, u?
32 bit |
|
target (le) | sc, uc s1, u1 s?, u? s?, u?
64 bit |
|
target (be) | sc, u0 sc, uc s?, u? s?, u?
32 bit |
|
target (be) | sc, uc sc, uc s?, u? s?, u?
64 bit |
|
System emulation
s? = untested
sc = compiles
s0 = bios works
s1 = grub works
s2 = Linux boots
Linux user mode emulation
u? = untested
uc = compiles
u0 = static hello works
u1 = linux-user-test works
5) Todo list
* TCI is not widely tested. It was written and tested on a x86_64 host
running i386 and x86_64 system emulation and Linux user mode.
A cross compiled QEMU for i386 host also works with the same basic tests.
A cross compiled QEMU for mipsel host works, too. It is terribly slow
because I run it in a mips malta emulation, so it is an interpreted
emulation in an emulation.
A cross compiled QEMU for arm host works (tested with pc bios).
A cross compiled QEMU for ppc host works at least partially:
i386-linux-user/qemu-i386 can run a simple hello-world program
(tested in a ppc emulation).
* Some TCG opcodes are either missing in the code generator and/or
in the interpreter. These opcodes raise a runtime exception, so it is
possible to see where code must be added.
* The pseudo code is not optimized and still ugly. For hosts with special
alignment requirements, it needs some fixes (maybe aligned bytecode
would also improve speed for hosts which support byte alignment).
* A better disassembler for the pseudo code would be nice (a very primitive
disassembler is included in tcg-target.c).
* It might be useful to have a runtime option which selects the native TCG
or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
is a configure option, so you need two compilations of QEMU.
此差异已折叠。
/*
* Tiny Code Generator for QEMU
*
* Copyright (c) 2009, 2011 Stefan Weil
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* This code implements a TCG which does not generate machine code for some
* real target machine but which generates virtual machine code for an
* interpreter. Interpreted pseudo code is slow, but it works on any host.
*
* Some remarks might help in understanding the code:
*
* "target" or "TCG target" is the machine which runs the generated code.
* This is different to the usual meaning in QEMU where "target" is the
* emulated machine. So normally QEMU host is identical to TCG target.
* Here the TCG target is a virtual machine, but this virtual machine must
* use the same word size like the real machine.
* Therefore, we need both 32 and 64 bit virtual machines (interpreter).
*/
#if !defined(TCG_TARGET_H)
#define TCG_TARGET_H
#include "config-host.h"
#define TCG_TARGET_INTERPRETER 1
#ifdef CONFIG_DEBUG_TCG
/* Enable debug output. */
#define CONFIG_DEBUG_TCG_INTERPRETER
#endif
#if 0 /* TCI tries to emulate a little endian host. */
#if defined(HOST_WORDS_BIGENDIAN)
# define TCG_TARGET_WORDS_BIGENDIAN
#endif
#endif
/* Optional instructions. */
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
/* Not more than one of the next two defines must be 1. */
#define TCG_TARGET_HAS_div_i32 1
#define TCG_TARGET_HAS_div2_i32 0
#define TCG_TARGET_HAS_ext8s_i32 1
#define TCG_TARGET_HAS_ext16s_i32 1
#define TCG_TARGET_HAS_ext8u_i32 1
#define TCG_TARGET_HAS_ext16u_i32 1
#define TCG_TARGET_HAS_andc_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_neg_i32 1
#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_rot_i32 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
#define TCG_TARGET_HAS_deposit_i64 0
/* Not more than one of the next two defines must be 1. */
#define TCG_TARGET_HAS_div_i64 0
#define TCG_TARGET_HAS_div2_i64 0
#define TCG_TARGET_HAS_ext8s_i64 1
#define TCG_TARGET_HAS_ext16s_i64 1
#define TCG_TARGET_HAS_ext32s_i64 1
#define TCG_TARGET_HAS_ext8u_i64 1
#define TCG_TARGET_HAS_ext16u_i64 1
#define TCG_TARGET_HAS_ext32u_i64 1
#define TCG_TARGET_HAS_andc_i64 0
#define TCG_TARGET_HAS_eqv_i64 0
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_neg_i64 1
#define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_orc_i64 0
#define TCG_TARGET_HAS_rot_i64 1
#endif /* TCG_TARGET_REG_BITS == 64 */
/* Offset to user memory in user mode. */
#define TCG_TARGET_HAS_GUEST_BASE
/* Number of registers available.
For 32 bit hosts, we need more than 8 registers (call arguments). */
/* #define TCG_TARGET_NB_REGS 8 */
#define TCG_TARGET_NB_REGS 16
/* #define TCG_TARGET_NB_REGS 32 */
/* List of registers which are used by TCG. */
typedef enum {
TCG_REG_R0 = 0,
TCG_REG_R1,
TCG_REG_R2,
TCG_REG_R3,
TCG_REG_R4,
TCG_REG_R5,
TCG_REG_R6,
TCG_REG_R7,
TCG_AREG0 = TCG_REG_R7,
#if TCG_TARGET_NB_REGS >= 16
TCG_REG_R8,
TCG_REG_R9,
TCG_REG_R10,
TCG_REG_R11,
TCG_REG_R12,
TCG_REG_R13,
TCG_REG_R14,
TCG_REG_R15,
#if TCG_TARGET_NB_REGS >= 32
TCG_REG_R16,
TCG_REG_R17,
TCG_REG_R18,
TCG_REG_R19,
TCG_REG_R20,
TCG_REG_R21,
TCG_REG_R22,
TCG_REG_R23,
TCG_REG_R24,
TCG_REG_R25,
TCG_REG_R26,
TCG_REG_R27,
TCG_REG_R28,
TCG_REG_R29,
TCG_REG_R30,
TCG_REG_R31,
#endif
#endif
/* Special value UINT8_MAX is used by TCI to encode constant values. */
TCG_CONST = UINT8_MAX
} TCGRegister;
void tci_disas(uint8_t opc);
unsigned long tcg_qemu_tb_exec(CPUState *env, uint8_t *tb_ptr);
#define tcg_qemu_tb_exec tcg_qemu_tb_exec
#endif /* TCG_TARGET_H */
/*
* Tiny Code Interpreter for QEMU - disassembler
*
* Copyright (c) 2011 Stefan Weil
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "dis-asm.h"
#include "tcg/tcg.h"
/* Disassemble TCI bytecode. */
int print_insn_tci(bfd_vma addr, disassemble_info *info)
{
int length;
uint8_t byte;
int status;
TCGOpcode op;
status = info->read_memory_func(addr, &byte, 1, info);
if (status != 0) {
info->memory_error_func(status, addr, info);
return -1;
}
op = byte;
addr++;
status = info->read_memory_func(addr, &byte, 1, info);
if (status != 0) {
info->memory_error_func(status, addr, info);
return -1;
}
length = byte;
if (op >= tcg_op_defs_max) {
info->fprintf_func(info->stream, "illegal opcode %d", op);
} else {
const TCGOpDef *def = &tcg_op_defs[op];
int nb_oargs = def->nb_oargs;
int nb_iargs = def->nb_iargs;
int nb_cargs = def->nb_cargs;
/* TODO: Improve disassembler output. */
info->fprintf_func(info->stream, "%s\to=%d i=%d c=%d",
def->name, nb_oargs, nb_iargs, nb_cargs);
}
return length;
}
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册