diff --git a/Makefile b/Makefile index e5f495807fc93f7d62fa8add9042253b7bae4c74..41651e7b62ede5ad47a57bad2768c01bae74906f 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,26 @@ SO_CFLAGS = -fPIC -D_SHARE=1 SO_LDLAGS = -shared -fPIC endif +DIFF ?= kvm +ifneq ($(ISA),x86) +ifneq ($(DIFF),qemu) +DIFF = qemu +$(info KVM is only supported with ISA=x86, use QEMU instead) +endif +endif + +ifeq ($(DIFF),qemu) +DIFF_REF_PATH = $(NEMU_HOME)/tools/qemu-diff +DIFF_REF_SO = $(DIFF_REF_PATH)/build/$(ISA)-qemu-so +CFLAGS += -D__DIFF_REF_QEMU__ +else ifeq ($(DIFF),kvm) +DIFF_REF_PATH = $(NEMU_HOME)/tools/kvm-diff +DIFF_REF_SO = $(DIFF_REF_PATH)/build/$(ISA)-kvm-so +CFLAGS += -D__DIFF_REF_KVM__ +else +$(error invalid DIFF. Supported: qemu kvm) +endif + OBJ_DIR ?= $(BUILD_DIR)/obj-$(ISA)-$(ENGINE)$(SO) BINARY ?= $(BUILD_DIR)/$(ISA)-$(NAME)-$(ENGINE)$(SO) @@ -40,12 +60,6 @@ INCLUDES = $(addprefix -I, $(INC_DIR)) CFLAGS += -O2 -MMD -Wall -Werror -ggdb3 $(INCLUDES) \ -D__ISA__=$(ISA) -D__ISA_$(ISA)__ -D_ISA_H_=\"isa/$(ISA).h\" -QEMU_DIFF_PATH = $(NEMU_HOME)/tools/qemu-diff -QEMU_SO = $(QEMU_DIFF_PATH)/build/$(ISA)-qemu-so - -$(QEMU_SO): - $(MAKE) -C $(QEMU_DIFF_PATH) - # Files to be compiled SRCS = $(shell find src/ -name "*.c" | grep -v "isa\|engine") SRCS += $(shell find src/isa/$(ISA) -name "*.c") @@ -64,11 +78,11 @@ $(OBJ_DIR)/%.o: src/%.c # Some convenient rules -.PHONY: app run gdb clean run-env $(QEMU_SO) +.PHONY: app run gdb clean run-env $(DIFF_REF_SO) app: $(BINARY) override ARGS ?= --log=$(BUILD_DIR)/nemu-log.txt -override ARGS += --diff=$(QEMU_SO) +override ARGS += --diff=$(DIFF_REF_SO) # Command to execute NEMU IMG := @@ -79,7 +93,7 @@ $(BINARY): $(OBJS) @echo + LD $@ @$(LD) -O2 -rdynamic $(SO_LDLAGS) -o $@ $^ -lSDL2 -lreadline -ldl -run-env: $(BINARY) $(QEMU_SO) +run-env: $(BINARY) $(DIFF_REF_SO) run: run-env $(call git_commit, "run") @@ -89,6 +103,9 @@ gdb: run-env $(call git_commit, "gdb") gdb -s $(BINARY) --args $(NEMU_EXEC) +$(DIFF_REF_SO): + $(MAKE) -C $(DIFF_REF_PATH) + clean: -rm -rf $(BUILD_DIR) $(MAKE) -C tools/gen-expr clean diff --git a/include/cpu/decode.h b/include/cpu/decode.h index 140b4d0ea5b2d0c314708df9fe944534b3a26aa9..1b7bb8150671e028fc1a8afd3d6eb9aa92eeab5b 100644 --- a/include/cpu/decode.h +++ b/include/cpu/decode.h @@ -26,7 +26,7 @@ typedef struct { vaddr_t jmp_pc; Operand src1, dest, src2; int width; - rtlreg_t tmp_reg[3]; + rtlreg_t tmp_reg[4]; ISADecodeInfo isa; } DecodeExecState; diff --git a/include/isa/x86.h b/include/isa/x86.h index 395990028824400652e7ab485219415919473f9c..d8ec89c8f9a4ae9a6b5b6056a4779999a306e8b8 100644 --- a/include/isa/x86.h +++ b/include/isa/x86.h @@ -1,6 +1,8 @@ #ifndef __ISA_X86_H__ #define __ISA_X86_H__ +#define LAZY_CC + #include // memory @@ -60,6 +62,12 @@ typedef struct { rtlreg_t OF, CF, SF, ZF, IF; +#ifdef LAZY_CC + rtlreg_t cc_dest, cc_src1, cc_src2; + uint32_t cc_width; + uint32_t cc_op; +#endif + struct { uint32_t limit :16; uint32_t base :32; diff --git a/include/rtl/rtl.h b/include/rtl/rtl.h index 500405975655fba1c32df1e43224203346a623a0..dd39b2a672edeff2eeb244a29128bb8253c99491 100644 --- a/include/rtl/rtl.h +++ b/include/rtl/rtl.h @@ -12,7 +12,8 @@ #define ddest (id_dest->preg) #define s0 (&s->tmp_reg[0]) #define s1 (&s->tmp_reg[1]) -#define t0 (&s->tmp_reg[2]) +#define s2 (&s->tmp_reg[2]) +#define t0 (&s->tmp_reg[3]) extern const rtlreg_t rzero; #define rz (&rzero) diff --git a/src/engine/rv64/isa/x86.c b/src/engine/rv64/isa/x86.c index 691c1fa84b7788980c84e993355c22c100faeeb2..c40426183f4573cfdd2840002c869bbb036abd9f 100644 --- a/src/engine/rv64/isa/x86.c +++ b/src/engine/rv64/isa/x86.c @@ -17,15 +17,22 @@ uint32_t reg_ptr2idx(DecodeExecState *s, const rtlreg_t* dest) { CASE(rz, 0) CASE(s0, 2) CASE(s1, 3) - CASE(t0, 4) + CASE(s2, 4) + CASE(t0, 5) CASE(&id_src1->val, 7) CASE(&id_src2->val, 8) CASE(&id_dest->val, 9) CASE(&s->isa.mbr, 10) +#ifdef LAZY_CC + CASE(&cpu.cc_dest, 13) + CASE(&cpu.cc_src1, 14) + CASE(&cpu.cc_src2, 15) +#else CASE(&cpu.CF, 13) CASE(&cpu.OF, 14) CASE(&cpu.ZF, 15) CASE(&cpu.SF, 1) +#endif panic("bad ptr = %p", dest); } diff --git a/src/engine/rv64/rv64-backend/exec.c b/src/engine/rv64/rv64-backend/exec.c index 0271222c89cfaef907c982ae853e1afa4d7ebb81..07eb51e7c4af03f7201135c66499039ed46a1157 100644 --- a/src/engine/rv64/rv64-backend/exec.c +++ b/src/engine/rv64/rv64-backend/exec.c @@ -13,14 +13,14 @@ void backend_exec_code(uint64_t pc, int nr_instr) { backend_exec(nr_instr); } -vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr) { +vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr, int npc_type, int nr_suffix) { // copy code to rv64 interpreter to execute it backend_memcpy_from_frontend(RV64_EXEC_PC - riscv64_PMEM_BASE, buf, sizeof(uint32_t) * nr_instr); // if the basic block is end with a branch instruction, // execute until the branch instruction // see rtl_jrelop() at rtl-basic.c - int nr_exec = (tran_next_pc == NEXT_PC_BRANCH ? nr_instr - (5+2*suffix_inst) : nr_instr); + int nr_exec = (npc_type == NEXT_PC_BRANCH ? nr_instr - (5+2*nr_suffix) : nr_instr); backend_exec_code(RV64_EXEC_PC, nr_exec); riscv64_CPU_state r; @@ -32,9 +32,9 @@ vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr) { backend_getregs(&r); } - if (tran_next_pc == NEXT_PC_BRANCH) { + if (npc_type == NEXT_PC_BRANCH) { // execute the branch instruction and load x86.pc to x30 - backend_exec(suffix_inst+3); + backend_exec(nr_suffix+3); backend_getregs(&r); } diff --git a/src/engine/rv64/rv64-backend/rtl-basic.c b/src/engine/rv64/rv64-backend/rtl-basic.c index bd5e12961a469018cc53296d5ec51b139c9b4b30..367bbe07c1d18e806b58bb5384152dacb4ac1483 100644 --- a/src/engine/rv64/rv64-backend/rtl-basic.c +++ b/src/engine/rv64/rv64-backend/rtl-basic.c @@ -391,8 +391,8 @@ make_rtl(jrelop, uint32_t relop, const rtlreg_t *src1, const rtlreg_t *src2, vad case RELOP_NE: rv64_bne(rs1, rs2, offset); break; case RELOP_LT: rv64_blt(rs1, rs2, offset); break; case RELOP_GE: rv64_bge(rs1, rs2, offset); break; - case RELOP_LTU: rv64_bltu(rs1, rs2, offset); return; - case RELOP_GEU: rv64_bgeu(rs1, rs2, offset); return; + case RELOP_LTU: rv64_bltu(rs1, rs2, offset); break; + case RELOP_GEU: rv64_bgeu(rs1, rs2, offset); break; case RELOP_LE: rv64_bge(rs2, rs1, offset); break; case RELOP_GT: rv64_blt(rs2, rs1, offset); break; diff --git a/src/engine/rv64/tran.c b/src/engine/rv64/tran.c index 8f998c4e56d09944e1a92b90810ed01717a3d54d..b1fccc0780f86ba7bd1cd3dcf479d0a8fc2b7165 100644 --- a/src/engine/rv64/tran.c +++ b/src/engine/rv64/tran.c @@ -6,6 +6,8 @@ #include "tran.h" #include "spill.h" +#define TOP_N 10 +//#define DUMP_RV64 #define BUF_SIZE 13000 //8192 uint32_t trans_buffer[BUF_SIZE] = {}; @@ -14,9 +16,73 @@ int tran_next_pc = NEXT_PC_SEQ; static void clear_trans_buffer() { trans_buffer_index = 0; } void asm_print(vaddr_t ori_pc, int instr_len, bool print_flag); -vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr); +vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr, int npc_type, int nr_suffix); void guest_getregs(CPU_state *cpu); +typedef struct TB { + vaddr_t pc; + int npc_type; + vaddr_t npc; + void *code; + uint32_t nr_instr; + uint32_t guest_nr_instr; + uint32_t hit_time; + uint32_t nr_suffix; + struct TB *next; +} TB; + +static TB head = { .next = NULL }; + +static TB* find_tb(vaddr_t pc) { + TB *tb; + for (tb = head.next; tb != NULL; tb = tb->next) { + if (tb->pc == pc) return tb; + } + return NULL; +} + +static int find_topn_min(TB **top) { + int i; + int min = 0; + for (i = 1; i < TOP_N; i ++) { + if (top[i]->hit_time < top[min]->hit_time) min = i; + } + return min; +} + +static TB** find_topn_tb() { + static TB *top[TOP_N]; + TB *p = head.next;; + int i; + for (i = 0; i < TOP_N; i ++) { + Assert(p != NULL, "i = %d", i); + top[i] = p; + p = p->next; + } + int min = find_topn_min(top); + for (; p != NULL; p = p->next) { + if (p->hit_time > top[min]->hit_time) { + top[min] = p; + min = find_topn_min(top); + } + } + + for (i = 0; i < TOP_N; i ++) { + int max = i; + int j; + for (j = i + 1; j < TOP_N; j ++) { + if (top[max]->hit_time < top[j]->hit_time) max = j; + } + if (max != i) { + TB *tmp = top[i]; + top[i] = top[max]; + top[max] = tmp; + } + } + + return top; +} + void write_ins(uint32_t ins) { assert(trans_buffer_index < BUF_SIZE); trans_buffer[trans_buffer_index++]=ins; @@ -29,47 +95,88 @@ void mainloop() { nemu_state.state = NEMU_RUNNING; uint64_t total_instr = 0; while (1) { - __attribute__((unused)) vaddr_t ori_pc = cpu.pc; - __attribute__((unused)) vaddr_t seq_pc = isa_exec_once(); + vaddr_t tb_start = cpu.pc; + TB *tb = find_tb(tb_start); + if (tb == NULL) { + clear_trans_buffer(); + tran_next_pc = NEXT_PC_SEQ; + int guest_nr_instr = 0; + while (1) { + __attribute__((unused)) vaddr_t ori_pc = cpu.pc; + __attribute__((unused)) vaddr_t seq_pc = isa_exec_once(); + guest_nr_instr ++; #ifdef REG_SPILLING - spill_cleanall(); + spill_cleanall(); #endif - if (nemu_state.state != NEMU_RUNNING) tran_next_pc = NEXT_PC_END; + if (nemu_state.state != NEMU_RUNNING) tran_next_pc = NEXT_PC_END; #ifdef DEBUG - asm_print(ori_pc, seq_pc - ori_pc, true); + asm_print(ori_pc, seq_pc - ori_pc, true); #endif - -#ifndef DIFF_TEST - if (tran_next_pc != NEXT_PC_SEQ) { +#ifdef DIFF_TEST + if (true) +#else + if (tran_next_pc != NEXT_PC_SEQ) #endif - vaddr_t next_pc = rv64_exec_trans_buffer(trans_buffer, trans_buffer_index); - total_instr += trans_buffer_index; + { + tb = malloc(sizeof(TB)); + tb->pc = tb_start; + tb->nr_instr = trans_buffer_index; + tb->nr_suffix = suffix_inst; + tb->guest_nr_instr = guest_nr_instr; + tb->code = malloc(tb->nr_instr * 4); + memcpy(tb->code, trans_buffer, tb->nr_instr * 4); + tb->npc_type = tran_next_pc; + tb->npc = cpu.pc; + tb->hit_time = 0; + tb->next = head.next; + head.next = tb; + break; + } + } + } - if (tran_next_pc == NEXT_PC_END) { - // get cpu.eax and interpret `nemu_trap` again - guest_getregs(&cpu); - cpu.pc = ori_pc; + //Log("enter tb with pc = %x, nr_instr = %d", tb->pc, tb->nr_instr); + vaddr_t next_pc = rv64_exec_trans_buffer(tb->code, tb->nr_instr, tb->npc_type, tb->nr_suffix); + total_instr += tb->nr_instr; + tb->hit_time ++; + + if (tb->npc_type == NEXT_PC_END) { + // get cpu.eax and interpret `nemu_trap` again + guest_getregs(&cpu); + cpu.pc = tb_start; + nemu_state.state = NEMU_RUNNING; + while (nemu_state.state == NEMU_RUNNING) { isa_exec_once(); - break; } - - if (tran_next_pc != NEXT_PC_SEQ) cpu.pc = next_pc; - // Log("new basic block pc = %x", cpu.pc); - clear_trans_buffer(); - tran_next_pc = NEXT_PC_SEQ; -#ifndef DIFF_TEST + break; } -#endif + + if (tb->npc_type != NEXT_PC_SEQ) cpu.pc = next_pc; + else cpu.pc = tb->npc; #ifdef DIFF_TEST guest_getregs(&cpu); - difftest_step(ori_pc, cpu.pc); + difftest_step(tb_start, cpu.pc); if (nemu_state.state == NEMU_ABORT) break; #endif } + // display the top-n hot basic block + TB **top = find_topn_tb(); + int i; + for (i = 0; i < TOP_N; i ++) { + printf("%3d: pc = " FMT_WORD "(instr: %d -> %d), \thit time = %d\n", + i + 1, top[i]->pc, top[i]->guest_nr_instr, top[i]->nr_instr, top[i]->hit_time); +#ifdef DUMP_RV64 + int j; + for (j = 0; j < top[i]->nr_instr; j ++) { + printf("\t.word 0x%08x\n", ((uint32_t *)top[i]->code)[j]); + } +#endif + } + switch (nemu_state.state) { case NEMU_RUNNING: nemu_state.state = NEMU_STOP; break; diff --git a/src/isa/x86/exec/arith.h b/src/isa/x86/exec/arith.h index 8cf1d130300599e547932013f2e6eff436909fce..8c3b266979951968c3c3290eaf6139afda4c6f96 100644 --- a/src/isa/x86/exec/arith.h +++ b/src/isa/x86/exec/arith.h @@ -1,10 +1,12 @@ +#include "cc.h" + static inline make_EHelper(add) { // TODO(); rtl_add(s, s0, ddest, dsrc1); - operand_write(s, id_dest, s0); - +#ifdef LAZY_CC + rtl_set_lazycc(s, s0, ddest, NULL, LAZYCC_ADD, id_dest->width); +#else rtl_update_ZFSF(s, s0, id_dest->width); - if (id_dest->width != 4) { rtl_andi(s, s0, s0, 0xffffffffu >> ((4 - id_dest->width) * 8)); } @@ -12,137 +14,131 @@ static inline make_EHelper(add) { rtl_set_CF(s, s1); rtl_is_add_overflow(s, s1, s0, ddest, dsrc1, id_dest->width); rtl_set_OF(s, s1); - +#endif + operand_write(s, id_dest, s0); print_asm_template2(add); } // dest <- sub result static inline void cmp_internal(DecodeExecState *s) { rtl_sub(s, s0, ddest, dsrc1); - +#ifdef LAZY_CC + rtl_set_lazycc(s, ddest, dsrc1, NULL, LAZYCC_SUB, id_dest->width); +#else rtl_update_ZFSF(s, s0, id_dest->width); - - if (id_dest->width != 4) { - rtl_andi(s, s0, s0, 0xffffffffu >> ((4 - id_dest->width) * 8)); - } - rtl_is_sub_carry(s, s1, s0, ddest); + rtl_is_sub_carry(s, s1, ddest, dsrc1); rtl_set_CF(s, s1); rtl_is_sub_overflow(s, s1, s0, ddest, dsrc1, id_dest->width); rtl_set_OF(s, s1); +#endif } static inline make_EHelper(sub) { // TODO(); cmp_internal(s); - operand_write(s, id_dest, s0); - print_asm_template2(sub); } static inline make_EHelper(cmp) { // TODO(); cmp_internal(s); - print_asm_template2(cmp); } static inline make_EHelper(inc) { // TODO(); rtl_addi(s, s0, ddest, 1); - operand_write(s, id_dest, s0); - +#ifdef LAZY_CC + rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_INC, id_dest->width); +#else rtl_update_ZFSF(s, s0, id_dest->width); - - rtl_setrelopi(s, RELOP_EQ, s1, s0, 0x80000000); + rtl_setrelopi(s, RELOP_EQ, s1, s0, 0x1u << (id_dest->width * 8 - 1)); rtl_set_OF(s, s1); - +#endif + operand_write(s, id_dest, s0); print_asm_template1(inc); } static inline make_EHelper(dec) { // TODO(); rtl_subi(s, s0, ddest, 1); - operand_write(s, id_dest, s0); - +#ifdef LAZY_CC + rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_DEC, id_dest->width); +#else rtl_update_ZFSF(s, s0, id_dest->width); - - rtl_setrelopi(s, RELOP_EQ, s1, s0, 0x7fffffff); + rtl_setrelopi(s, RELOP_EQ, s1, ddest, 0x1u << (id_dest->width * 8 - 1)); rtl_set_OF(s, s1); - +#endif + operand_write(s, id_dest, s0); print_asm_template1(dec); } static inline make_EHelper(neg) { // TODO(); rtl_sub(s, s0, rz, ddest); - +#ifdef LAZY_CC + rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_NEG, id_dest->width); +#else rtl_update_ZFSF(s, s0, id_dest->width); - rtl_setrelopi(s, RELOP_NE, s1, ddest, 0); rtl_set_CF(s, s1); - rtl_setrelopi(s, RELOP_EQ, s1, ddest, 0x80000000); + rtl_setrelopi(s, RELOP_EQ, s1, ddest, 0x1u << (id_dest->width * 8 - 1)); rtl_set_OF(s, s1); - +#endif operand_write(s, id_dest, s0); - print_asm_template1(neg); } static inline make_EHelper(adc) { - // s0 = dest + src - rtl_add(s, s0, ddest, dsrc1); - // s1 = s0 + CF - rtl_get_CF(s, s1); - rtl_add(s, s1, s0, s1); - - operand_write(s, id_dest, s1); - +#ifdef LAZY_CC + rtl_lazy_setcc(s, s0, CC_B); // reading CC_B is to read CF +#else + rtl_get_CF(s, s0); +#endif + rtl_add(s, s0, dsrc1, s0); + rtl_add(s, s1, ddest, s0); + +#ifdef LAZY_CC + rtl_set_lazycc(s, s1, s0, dsrc1, LAZYCC_ADC, id_dest->width); +#else + rtl_update_ZFSF(s, s1, id_dest->width); + rtl_is_add_overflow(s, s2, s1, ddest, dsrc1, id_dest->width); + rtl_set_OF(s, s2); if (id_dest->width != 4) { rtl_andi(s, s1, s1, 0xffffffffu >> ((4 - id_dest->width) * 8)); } - - rtl_update_ZFSF(s, s1, id_dest->width); - - // update CF - rtl_is_add_carry(s, s1, s1, s0); - rtl_is_add_carry(s, s0, s0, ddest); - rtl_or(s, s0, s0, s1); + rtl_is_add_carry(s, s2, s1, s0); + rtl_is_add_carry(s, s0, s0, dsrc1); + rtl_or(s, s0, s0, s2); rtl_set_CF(s, s0); - - // update OF - rtl_is_add_overflow(s, s0, s1, ddest, dsrc1, id_dest->width); - rtl_set_OF(s, s0); - +#endif + operand_write(s, id_dest, s1); print_asm_template2(adc); } static inline make_EHelper(sbb) { - // s0 = dest - src - rtl_sub(s, s0, ddest, dsrc1); - // s1 = s0 - CF - rtl_get_CF(s, s1); - rtl_sub(s, s1, s0, s1); - - operand_write(s, id_dest, s1); - - if (id_dest->width != 4) { - rtl_andi(s, s1, s1, 0xffffffffu >> ((4 - id_dest->width) * 8)); - } - +#ifdef LAZY_CC + rtl_lazy_setcc(s, s0, CC_B); // reading CC_B is to read CF +#else + rtl_get_CF(s, s0); +#endif + rtl_add(s, s0, dsrc1, s0); + rtl_sub(s, s1, ddest, s0); + +#ifdef LAZY_CC + rtl_set_lazycc(s, s1, ddest, dsrc1, LAZYCC_SBB, id_dest->width); +#else rtl_update_ZFSF(s, s1, id_dest->width); - - // update CF - rtl_is_sub_carry(s, s1, s1, s0); - rtl_is_sub_carry(s, s0, s0, ddest); - rtl_or(s, s0, s0, s1); + rtl_is_sub_overflow(s, s2, s1, ddest, dsrc1, id_dest->width); + rtl_set_OF(s, s2); + rtl_is_add_carry(s, s2, s0, dsrc1); + rtl_is_sub_carry(s, s0, ddest, s0); + rtl_or(s, s0, s0, s2); rtl_set_CF(s, s0); - - // update OF - rtl_is_sub_overflow(s, s0, s1, ddest, dsrc1, id_dest->width); - rtl_set_OF(s, s0); - +#endif + operand_write(s, id_dest, s1); print_asm_template2(sbb); } diff --git a/src/isa/x86/exec/cc.h b/src/isa/x86/exec/cc.h index 8ad5ffd7345472c96df1b0ded48bccefec6aff04..ab8720d1ae107c0d723825be6b978e1123b534f9 100644 --- a/src/isa/x86/exec/cc.h +++ b/src/isa/x86/exec/cc.h @@ -3,6 +3,26 @@ #include "../local-include/rtl.h" +enum { + CC_O, CC_NO, CC_B, CC_NB, + CC_E, CC_NE, CC_BE, CC_NBE, + CC_S, CC_NS, CC_P, CC_NP, + CC_L, CC_NL, CC_LE, CC_NLE +}; + +enum { + LAZYCC_ADD, + LAZYCC_SUB, + LAZYCC_INC, + LAZYCC_DEC, + LAZYCC_NEG, + LAZYCC_ADC, + LAZYCC_SBB, + LAZYCC_LOGIC, +}; + +#include "lazycc.h" + /* Condition Code */ static inline const char* get_cc_name(int subcode) { @@ -15,14 +35,8 @@ static inline const char* get_cc_name(int subcode) { return cc_name[subcode]; } -static inline void rtl_setcc(DecodeExecState *s, rtlreg_t* dest, uint8_t subcode) { - bool invert = subcode & 0x1; - enum { - CC_O, CC_NO, CC_B, CC_NB, - CC_E, CC_NE, CC_BE, CC_NBE, - CC_S, CC_NS, CC_P, CC_NP, - CC_L, CC_NL, CC_LE, CC_NLE - }; +static inline void rtl_setcc(DecodeExecState *s, rtlreg_t* dest, uint32_t subcode) { + uint32_t invert = subcode & 0x1; // TODO: Query EFLAGS to determine whether the condition code is satisfied. // dest <- ( cc is satisfied ? 1 : 0) diff --git a/src/isa/x86/exec/control.h b/src/isa/x86/exec/control.h index ebca58da6b22b54eddb7d562f3cbf7558ca27c4d..28225d42bfc8b8ac14f5bee84b9b025284364b22 100644 --- a/src/isa/x86/exec/control.h +++ b/src/isa/x86/exec/control.h @@ -10,8 +10,12 @@ static inline make_EHelper(jmp) { static inline make_EHelper(jcc) { // the target address is calculated at the decode stage uint32_t cc = s->opcode & 0xf; +#ifdef LAZY_CC + rtl_lazy_jcc(s, cc); +#else rtl_setcc(s, s0, cc); rtl_jrelop(s, RELOP_NE, s0, rz, s->jmp_pc); +#endif print_asm("j%s %x", get_cc_name(cc), s->jmp_pc); } diff --git a/src/isa/x86/exec/exec.c b/src/isa/x86/exec/exec.c index c09ee2f5b551d91a7771da9a907e575af3fb80c5..0daeb1a1cfd570c30c7646e543aa747a969a84dd 100644 --- a/src/isa/x86/exec/exec.c +++ b/src/isa/x86/exec/exec.c @@ -15,7 +15,7 @@ static inline void set_width(DecodeExecState *s, int width) { #define IDEX(idx, id, ex) IDEXW(idx, id, ex, 0) #define EXW(idx, ex, w) IDEXW(idx, empty, ex, w) #define EX(idx, ex) EXW(idx, ex, 0) -#define EMPTY(idx) //EX(idx, inv) +#define EMPTY(idx) EX(idx, inv) #define CASE_ENTRY(idx, id, ex, w) case idx: id(s); ex(s); break; @@ -30,8 +30,8 @@ static inline make_EHelper(gp1) { /* 0xc0, 0xc1, 0xd0, 0xd1, 0xd2, 0xd3 */ static inline make_EHelper(gp2) { switch (s->isa.ext_opcode) { - EX(0x00, rol) EMPTY(0x01) EMPTY(0x02) EMPTY(0x03) - EX(0x04, shl) EX (0x05, shr) EMPTY(0x06) EX (0x07, sar) + EX(0x00, rol) EX(0x01, ror) EMPTY(0x02) EMPTY(0x03) + EX(0x04, shl) EX(0x05, shr) EMPTY(0x06) EX (0x07, sar) } } @@ -99,10 +99,6 @@ static inline make_EHelper(2byte_esc) { } static inline void exec(DecodeExecState *s) { -#ifdef USE_KVM - extern void kvm_exec(void); - kvm_exec(); -#else uint8_t opcode; again: opcode = instr_fetch(&s->seq_pc, 1); @@ -114,13 +110,13 @@ IDEXW(0x00, G2E, add, 1) IDEX (0x01, G2E, add) IDEXW(0x02, E2G, add, 1) EMPTY(0x04) IDEX (0x05, I2a, add) IDEXW(0x08, G2E, or, 1) IDEX (0x09, G2E, or) IDEXW(0x0a, E2G, or, 1) IDEX (0x0b, E2G, or) IDEXW(0x0c, I2a, or, 1) IDEX (0x0d, I2a, or) EMPTY(0x0e) EX (0x0f, 2byte_esc) -EMPTY(0x10) IDEX (0x11, G2E, adc) EMPTY(0x12) IDEX (0x13, E2G, adc) +IDEXW(0x10, G2E, adc, 1) IDEX (0x11, G2E, adc) IDEXW(0x12, E2G, adc, 1) IDEX (0x13, E2G, adc) -EMPTY(0x18) IDEX (0x19, G2E, sbb) EMPTY(0x1a) IDEX (0x1b, E2G, sbb) +IDEXW(0x18, G2E, sbb, 1) IDEX (0x19, G2E, sbb) IDEXW(0x1a, E2G, sbb, 1) IDEX (0x1b, E2G, sbb) IDEXW(0x20, G2E, and, 1) IDEX (0x21, G2E, and) IDEXW(0x22, E2G, and, 1) IDEX (0x23, E2G, and) IDEXW(0x24, I2a, and, 1) IDEX (0x25, I2a, and) -EMPTY(0x28) IDEX (0x29, G2E, sub) EMPTY(0x2a) IDEX (0x2b, E2G, sub) +IDEXW(0x28, G2E, sub, 1) IDEX (0x29, G2E, sub) EMPTY(0x2a) IDEX (0x2b, E2G, sub) EMPTY(0x2c) IDEX (0x2d, I2a, sub) IDEXW(0x30, G2E, xor, 1) IDEX (0x31, G2E, xor) IDEXW(0x32, E2G, xor, 1) IDEX (0x33, E2G, xor) EMPTY(0x34) IDEX (0x35, I2a, xor) @@ -178,10 +174,15 @@ IDEXW(0xec, in_dx2a, in, 1) IDEX (0xed, in_dx2a, in) IDEXW(0xee, out_a2dx, ou case 0x66: s->isa.is_operand_size_16 = true; goto again; default: exec_inv(s); } -#endif } +//#define USE_KVM vaddr_t isa_exec_once() { +#ifdef USE_KVM + extern void kvm_exec(void); + kvm_exec(); + return 0; +#endif DecodeExecState s; s.is_jmp = 0; s.isa = (ISADecodeInfo) { 0 }; diff --git a/src/isa/x86/exec/lazycc.h b/src/isa/x86/exec/lazycc.h new file mode 100644 index 0000000000000000000000000000000000000000..273d05227bf0114b8ffe1da490df03d1f3d0d0d4 --- /dev/null +++ b/src/isa/x86/exec/lazycc.h @@ -0,0 +1,191 @@ +#include +#include "cc.h" + +#ifdef LAZY_CC +static inline make_rtl(set_lazycc, const rtlreg_t *dest, const rtlreg_t *src1, const rtlreg_t *src2, + uint32_t cc_op, uint32_t width) { + rtl_mv(s, &cpu.cc_dest, dest); + if (src1 != NULL) rtl_mv(s, &cpu.cc_src1, src1); + if (src2 != NULL) rtl_mv(s, &cpu.cc_src2, src2); + cpu.cc_op = cc_op; + cpu.cc_width = width; +} + +#define UNARY 0x100 // compare with cpu.cc_dest and rz +static const int cc2relop [] = { + [CC_O] = 0, [CC_NO] = 0, + [CC_B] = RELOP_LTU, [CC_NB] = RELOP_GEU, + [CC_E] = UNARY | RELOP_EQ, [CC_NE] = UNARY | RELOP_NE, + [CC_BE] = RELOP_LEU, [CC_NBE] = RELOP_GTU, + [CC_S] = UNARY | RELOP_LT, [CC_NS] = UNARY | RELOP_GE, + [CC_P] = 0, [CC_NP] = 0, + [CC_L] = RELOP_LT, [CC_NL] = RELOP_GE, + [CC_LE] = RELOP_LE, [CC_NLE] = RELOP_GT, +}; + +static inline make_rtl(lazy_jcc, uint32_t cc) { + int exception = (cpu.cc_op == LAZYCC_SUB) && (cc == CC_E || cc == CC_NE); + if ((cc2relop[cc] & UNARY) && !exception) { + uint32_t relop = cc2relop[cc] ^ UNARY; + rtlreg_t *p = &cpu.cc_dest; + if (cpu.cc_op == LAZYCC_SUB) { + // sub && (CC_S || CC_NS) + rtl_sub(s, s2, &cpu.cc_dest, &cpu.cc_src1); + p = s2; + } + int exception = (cpu.cc_op == LAZYCC_LOGIC) && (cc == CC_E || cc == CC_NE); + if (cpu.cc_width != 4 && !exception) { + rtl_shli(s, s2, p, 32 - cpu.cc_width * 8); + p = s2; + } + rtl_jrelop(s, relop, p, rz, s->jmp_pc); + return; + } + + switch (cpu.cc_op) { + case LAZYCC_DEC: + if (cc2relop[cc] != 0) { + rtl_jrelop(s, cc2relop[cc], &cpu.cc_dest, rz, s->jmp_pc); + return; + } + break; + case LAZYCC_SBB: // FIXME: should consider CF + if (cc == CC_B) { + rtl_sub(s, s0, &cpu.cc_src1, &cpu.cc_dest); + rtl_is_add_carry(s, s0, s0, &cpu.cc_src2); + rtl_is_sub_carry(s, s1, &cpu.cc_src1, &cpu.cc_dest); + rtl_or(s, s0, s0, s1); + rtl_jrelop(s, RELOP_NE, s0, rz, s->jmp_pc); + return; + } + break; + case LAZYCC_SUB: + if (cc2relop[cc] != 0) { + rtl_jrelop(s, cc2relop[cc] & ~UNARY, &cpu.cc_dest, &cpu.cc_src1, s->jmp_pc); + return; + } + break; + case LAZYCC_LOGIC: + if (cc == CC_LE) { + rtl_jrelop(s, cc2relop[cc], &cpu.cc_dest, rz, s->jmp_pc); + return; + } + break; + default: panic("unhandle cc_op = %d", cpu.cc_op); + } + + panic("unhandle cc_op = %d, cc = %d", cpu.cc_op, cc); +} + +static inline make_rtl(lazy_setcc, rtlreg_t *dest, uint32_t cc) { + int exception = (cpu.cc_op == LAZYCC_SUB) && (cc == CC_E || cc == CC_NE); + if ((cc2relop[cc] & UNARY) && !exception) { + uint32_t relop = cc2relop[cc] ^ UNARY; + rtlreg_t *p = &cpu.cc_dest; + if (cpu.cc_op == LAZYCC_SUB) { + // sub && (CC_S || CC_NS) + rtl_sub(s, dest, &cpu.cc_dest, &cpu.cc_src1); + p = dest; + } + int exception = (cpu.cc_op == LAZYCC_LOGIC) && (cc == CC_E || cc == CC_NE); + if (cpu.cc_width != 4 && !exception) { + rtl_shli(s, dest, p, 32 - cpu.cc_width * 8); + p = dest; + } + rtl_setrelop(s, relop, dest, p, rz); + return; + } + + switch (cpu.cc_op) { + case LAZYCC_ADD: + if (cc2relop[cc] != 0) { + rtlreg_t *p = &cpu.cc_dest; + if (cpu.cc_width != 4) { + rtl_andi(s, dest, &cpu.cc_dest, 0xffffffffu >> ((4 - cpu.cc_width) * 8)); + p = dest; + } + rtl_setrelop(s, cc2relop[cc], dest, p, &cpu.cc_src1); + return; + } + if (cc == CC_O) { + rtl_sub(s, dest, &cpu.cc_dest, &cpu.cc_src1); + rtl_is_add_overflow(s, dest, &cpu.cc_dest, &cpu.cc_src1, dest, cpu.cc_width); + return; + } + break; + case LAZYCC_SUB: + if (cc2relop[cc] != 0) { + rtl_setrelop(s, cc2relop[cc] & ~UNARY, dest, &cpu.cc_dest, &cpu.cc_src1); + return; + } + if (cc == CC_O) { + rtl_sub(s, dest, &cpu.cc_dest, &cpu.cc_src1); + rtl_is_sub_overflow(s, dest, dest, &cpu.cc_dest, &cpu.cc_src1, cpu.cc_width); + return; + } + break; + case LAZYCC_NEG: + if (cc == CC_B) { + rtl_setrelopi(s, RELOP_NE, dest, &cpu.cc_dest, 0); + return; + } + if (cc == CC_O) { + rtl_setrelopi(s, RELOP_EQ, dest, &cpu.cc_dest, -(0x1u << (cpu.cc_width * 8 - 1))); + return; + } + break; + case LAZYCC_INC: + if (cc == CC_O) { + rtl_setrelopi(s, RELOP_EQ, dest, &cpu.cc_dest, 0x1u << (cpu.cc_width * 8 - 1)); + return; + } + break; + case LAZYCC_DEC: + if (cc == CC_O) { + rtl_addi(s, dest, &cpu.cc_dest, 1); + rtl_setrelopi(s, RELOP_EQ, dest, dest, 0x1u << (cpu.cc_width * 8 - 1)); + return; + } + break; + case LAZYCC_ADC: + if (cc == CC_B) { + rtlreg_t *p = &cpu.cc_dest; + if (cpu.cc_width != 4) { + rtl_andi(s, dest, &cpu.cc_dest, 0xffffffffu >> ((4 - cpu.cc_width) * 8)); + p = dest; + } + rtl_is_add_carry(s, t0, &cpu.cc_src1, &cpu.cc_src2); + rtl_is_add_carry(s, dest, p, &cpu.cc_src1); + rtl_or(s, dest, t0, dest); + return; + } + if (cc == CC_O) { + rtl_sub(s, dest, &cpu.cc_dest, &cpu.cc_src1); + rtl_is_add_overflow(s, dest, &cpu.cc_dest, dest, &cpu.cc_src2, cpu.cc_width); + return; + } + break; + case LAZYCC_SBB: + if (cc == CC_B) { + rtl_sub(s, s0, &cpu.cc_src1, &cpu.cc_dest); + rtl_is_add_carry(s, s0, s0, &cpu.cc_src2); + rtl_is_sub_carry(s, s1, &cpu.cc_src1, &cpu.cc_dest); + rtl_or(s, dest, s0, s1); + return; + } + if (cc == CC_O) { + rtl_is_sub_overflow(s, dest, &cpu.cc_dest, &cpu.cc_src1, &cpu.cc_src2, cpu.cc_width); + return; + } + break; + case LAZYCC_LOGIC: + if (cc == CC_E || cc == CC_NE || cc == CC_LE) { + rtl_setrelop(s, cc2relop[cc], dest, &cpu.cc_dest, rz); + return; + } + break; + default: panic("unhandle cc_op = %d", cpu.cc_op); + } + panic("unhandle cc_op = %d, cc = %d", cpu.cc_op, cc); +} +#endif diff --git a/src/isa/x86/exec/logic.h b/src/isa/x86/exec/logic.h index 04574bf1c9b1c2797cba7fc23f05b62aaee5d617..06c3f6858fe687749f50c3c9c351c078d1e59a29 100644 --- a/src/isa/x86/exec/logic.h +++ b/src/isa/x86/exec/logic.h @@ -3,9 +3,13 @@ // dest <- and result static inline void and_internal(DecodeExecState *s) { rtl_and(s, s0, ddest, dsrc1); +#ifdef LAZY_CC + rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_LOGIC, id_dest->width); +#else rtl_update_ZFSF(s, s0, id_dest->width); rtl_mv(s, &cpu.CF, rz); rtl_mv(s, &cpu.OF, rz); +#endif } static inline make_EHelper(test) { @@ -21,19 +25,27 @@ static inline make_EHelper(and) { static inline make_EHelper(xor) { rtl_xor(s, s0, ddest, dsrc1); - operand_write(s, id_dest, s0); +#ifdef LAZY_CC + rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_LOGIC, id_dest->width); +#else rtl_update_ZFSF(s, s0, id_dest->width); rtl_mv(s, &cpu.CF, rz); rtl_mv(s, &cpu.OF, rz); +#endif + operand_write(s, id_dest, s0); print_asm_template2(xor); } static inline make_EHelper(or) { rtl_or(s, s0, ddest, dsrc1); - operand_write(s, id_dest, s0); +#ifdef LAZY_CC + rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_LOGIC, id_dest->width); +#else rtl_update_ZFSF(s, s0, id_dest->width); rtl_mv(s, &cpu.CF, rz); rtl_mv(s, &cpu.OF, rz); +#endif + operand_write(s, id_dest, s0); print_asm_template2(or); } @@ -41,8 +53,10 @@ static inline make_EHelper(sar) { rtl_sext(s, s0, ddest, id_dest->width); rtl_sar(s, s0, s0, dsrc1); operand_write(s, id_dest, s0); +#ifndef LAZY_CC rtl_update_ZFSF(s, s0, id_dest->width); // unnecessary to update CF and OF in NEMU +#endif //difftest_skip_eflags(EFLAGS_MASK_CF | EFLAGS_MASK_OF); print_asm_template2(sar); } @@ -50,8 +64,10 @@ static inline make_EHelper(sar) { static inline make_EHelper(shl) { rtl_shl(s, s0, ddest, dsrc1); operand_write(s, id_dest, s0); +#ifndef LAZY_CC rtl_update_ZFSF(s, s0, id_dest->width); // unnecessary to update CF and OF in NEMU +#endif //difftest_skip_eflags(EFLAGS_MASK_CF | EFLAGS_MASK_OF | EFLAGS_MASK_ZF); print_asm_template2(shl); } @@ -59,8 +75,10 @@ static inline make_EHelper(shl) { static inline make_EHelper(shr) { rtl_shr(s, s0, ddest, dsrc1); operand_write(s, id_dest, s0); +#ifndef LAZY_CC rtl_update_ZFSF(s, s0, id_dest->width); // unnecessary to update CF and OF in NEMU +#endif //difftest_skip_eflags(EFLAGS_MASK_CF | EFLAGS_MASK_OF); print_asm_template2(shr); } @@ -78,10 +96,27 @@ static inline make_EHelper(rol) { print_asm_template2(rol); } +static inline make_EHelper(ror) { + rtl_shr(s, s0, ddest, dsrc1); + rtl_li(s, s1, id_dest->width * 8); + rtl_sub(s, s1, s1, dsrc1); + rtl_shl(s, s1, ddest, s1); + rtl_or(s, s1, s0, s1); + + operand_write(s, id_dest, s1); + // unnecessary to update eflags in NEMU + //difftest_skip_eflags(EFLAGS_MASK_ALL); + print_asm_template2(ror); +} + static inline make_EHelper(setcc) { uint32_t cc = s->opcode & 0xf; +#ifdef LAZY_CC + rtl_lazy_setcc(s, s0, cc); +#else rtl_setcc(s, s0, cc); +#endif operand_write(s, id_dest, s0); print_asm("set%s %s", get_cc_name(cc), id_dest->str); @@ -98,15 +133,19 @@ static inline make_EHelper(shld) { rtl_andi(s, dsrc1, dsrc1, 31); rtl_shl(s, s0, ddest, dsrc1); - rtl_li(s, s1, 32); + rtl_li(s, s1, 31); rtl_sub(s, s1, s1, dsrc1); + // shift twice to deal with dsrc1 = 0 rtl_shr(s, s1, dsrc2, s1); + rtl_shri(s, s1, s1, 1); rtl_or(s, s0, s0, s1); operand_write(s, id_dest, s0); +#ifndef LAZY_CC rtl_update_ZFSF(s, s0, id_dest->width); // unnecessary to update CF and OF in NEMU +#endif print_asm_template3(shld); } @@ -114,14 +153,18 @@ static inline make_EHelper(shrd) { rtl_andi(s, dsrc1, dsrc1, 31); rtl_shr(s, s0, ddest, dsrc1); - rtl_li(s, s1, 32); + rtl_li(s, s1, 31); rtl_sub(s, s1, s1, dsrc1); + // shift twice to deal with dsrc1 = 0 rtl_shl(s, s1, dsrc2, s1); + rtl_shli(s, s1, s1, 1); rtl_or(s, s0, s0, s1); operand_write(s, id_dest, s0); +#ifndef LAZY_CC rtl_update_ZFSF(s, s0, id_dest->width); // unnecessary to update CF and OF in NEMU +#endif print_asm_template3(shrd); } diff --git a/src/isa/x86/exec/system.h b/src/isa/x86/exec/system.h index f6c1cd936d7541a596f7624a5217486866355147..7ee663abccf29742c7d2f1ce585d9f39d93d66ee 100644 --- a/src/isa/x86/exec/system.h +++ b/src/isa/x86/exec/system.h @@ -30,7 +30,9 @@ static make_EHelper(int) { print_asm("int %s", id_dest->str); +#ifdef __DIFF_REF_QEMU__ difftest_skip_dut(1, 2); +#endif } static make_EHelper(iret) { diff --git a/src/isa/x86/kvm/kvm.c b/src/isa/x86/kvm/kvm.c index 7db61a8590f2fb17fd2ecf394b225f5eca13865a..2e2c7a04d18a06c52ce50558e75e5749f9a9f236 100644 --- a/src/isa/x86/kvm/kvm.c +++ b/src/isa/x86/kvm/kvm.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -137,9 +138,13 @@ int run_vm(struct vm *vm, struct vcpu *vcpu, size_t sz) { /* fall through */ default: - fprintf(stderr, "Got exit_reason %d," + if (ioctl(vcpu->fd, KVM_GET_REGS, ®s) < 0) { + perror("KVM_GET_REGS"); + assert(0); + } + fprintf(stderr, "Got exit_reason %d at pc = 0x%llx," " expected KVM_EXIT_HLT (%d)\n", - vcpu->kvm_run->exit_reason, KVM_EXIT_HLT); + vcpu->kvm_run->exit_reason, regs.rip, KVM_EXIT_HLT); assert(0); } } @@ -187,7 +192,7 @@ int run_protected_mode(struct vm *vm, struct vcpu *vcpu) { memset(®s, 0, sizeof(regs)); /* Clear all FLAGS bits, except bit 1 which is always set. */ regs.rflags = 2; - regs.rip = 0; + regs.rip = IMAGE_START; if (ioctl(vcpu->fd, KVM_SET_REGS, ®s) < 0) { perror("KVM_SET_REGS"); diff --git a/src/isa/x86/local-include/rtl.h b/src/isa/x86/local-include/rtl.h index c131551400a5e664d2002e303b51cab24738e81b..64450c14f0b93a18fc8e44e43f9ed4d5f0d48c4f 100644 --- a/src/isa/x86/local-include/rtl.h +++ b/src/isa/x86/local-include/rtl.h @@ -50,9 +50,9 @@ static inline make_rtl(is_sub_overflow, rtlreg_t* dest, } static inline make_rtl(is_sub_carry, rtlreg_t* dest, - const rtlreg_t* res, const rtlreg_t* src1) { + const rtlreg_t* src1, const rtlreg_t* src2) { // res = src1 - src2 - rtl_setrelop(s, RELOP_LTU, dest, src1, res); + rtl_setrelop(s, RELOP_LTU, dest, src1, src2); } static inline make_rtl(is_add_overflow, rtlreg_t* dest, @@ -64,7 +64,7 @@ static inline make_rtl(is_add_overflow, rtlreg_t* dest, static inline make_rtl(is_add_carry, rtlreg_t* dest, const rtlreg_t* res, const rtlreg_t* src1) { // res = src1 + src2 - rtl_is_sub_carry(s, dest, src1, res); + rtl_is_sub_carry(s, dest, res, src1); } #define make_rtl_setget_eflags(f) \ diff --git a/tools/kvm-diff/Makefile b/tools/kvm-diff/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ba629aa90f21aa6c28c28370a3718826818b4a01 --- /dev/null +++ b/tools/kvm-diff/Makefile @@ -0,0 +1,46 @@ +ifneq ($(MAKECMDGOALS),clean) # ignore check for make clean +ISA ?= x86 + +ifneq ($(ISA),x86) # ISA must be valid +$(error Only support x86) +endif +endif + +INC_DIR += ./include $(NEMU_HOME)/include +BUILD_DIR ?= ./build +OBJ_DIR ?= $(BUILD_DIR)/obj-$(ISA) +BINARY ?= $(BUILD_DIR)/$(ISA)-kvm-so + +.DEFAULT_GOAL = app + +# Compilation flags +CC = gcc +LD = gcc +INCLUDES = $(addprefix -I, $(INC_DIR)) +CFLAGS += -O2 -fPIC -MMD -Wall -Werror -DNEMU_HOME=$(NEMU_HOME) $(INCLUDES) + +# Files to be compiled +SRCS = $(shell find src/ -name "*.c") +OBJS = $(SRCS:src/%.c=$(OBJ_DIR)/%.o) + +# Compilation patterns +$(OBJ_DIR)/%.o: src/%.c + @echo + CC $< + @mkdir -p $(dir $@) + $(CC) $(CFLAGS) -c -o $@ $< + + +# Depencies +-include $(OBJS:.o=.d) + +# Some convinient rules + +.PHONY: app clean +app: $(BINARY) + +$(BINARY): $(OBJS) + @echo + LD $@ + @$(LD) -O2 -rdynamic -shared -fPIC -o $@ $^ + +clean: + -rm -rf $(BUILD_DIR) diff --git a/tools/kvm-diff/include/paddr.h b/tools/kvm-diff/include/paddr.h new file mode 100644 index 0000000000000000000000000000000000000000..d3dcfa766ab2dd671df670f05061cd6132875fd0 --- /dev/null +++ b/tools/kvm-diff/include/paddr.h @@ -0,0 +1 @@ +// this is an empty file to avoid compile error diff --git a/tools/kvm-diff/src/kvm.c b/tools/kvm-diff/src/kvm.c new file mode 100644 index 0000000000000000000000000000000000000000..d04af1f273cc0e1d5cbe1565fccd1200d6d76160 --- /dev/null +++ b/tools/kvm-diff/src/kvm.c @@ -0,0 +1,315 @@ +// from NEMU +#include +#include + +#include +#include +#include +#include +#include + +/* CR0 bits */ +#define CR0_PE 1u +#define CR0_PG (1u << 31) + +struct vm { + int sys_fd; + int fd; + uint8_t *mem; + uint8_t *mmio; +}; + +struct vcpu { + int fd; + struct kvm_run *kvm_run; + int int_wp_state; + uint32_t entry; +}; + +enum { + STATE_IDLE, // if encounter an int instruction, then set watchpoint + STATE_INT_INSTR, // if hit the watchpoint, then delete the watchpoint +}; + +static struct vm vm; +static struct vcpu vcpu; + +// This should be called everytime after KVM_SET_REGS. +// It seems that KVM_SET_REGS will clean the state of single step. +static void kvm_set_step_mode(bool watch, uint32_t watch_addr) { + struct kvm_guest_debug debug = {}; + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP; + debug.arch.debugreg[0] = watch_addr; + debug.arch.debugreg[7] = (watch ? 0x1 : 0x0); // watch instruction fetch at `watch_addr` + if (ioctl(vcpu.fd, KVM_SET_GUEST_DEBUG, &debug) < 0) { + perror("KVM_SET_GUEST_DEBUG"); + assert(0); + } +} + +static inline void kvm_getregs(struct kvm_regs *r) { + if (ioctl(vcpu.fd, KVM_GET_REGS, r) < 0) { + perror("KVM_GET_REGS"); + assert(0); + } +} + +static void kvm_setregs(const struct kvm_regs *r) { + if (ioctl(vcpu.fd, KVM_SET_REGS, r) < 0) { + perror("KVM_SET_REGS"); + assert(0); + } + kvm_set_step_mode(false, 0); +} + +static void kvm_getsregs(struct kvm_sregs *r) { + if (ioctl(vcpu.fd, KVM_GET_SREGS, r) < 0) { + perror("KVM_GET_SREGS"); + assert(0); + } +} + +static void kvm_setsregs(const struct kvm_sregs *r) { + if (ioctl(vcpu.fd, KVM_SET_SREGS, r) < 0) { + perror("KVM_SET_SREGS"); + assert(0); + } +} + +static void* create_mem(int slot, uintptr_t base, size_t mem_size) { + void *mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); + if (mem == MAP_FAILED) { + perror("mmap mem"); + assert(0); + } + + madvise(mem, mem_size, MADV_MERGEABLE); + + struct kvm_userspace_memory_region memreg; + memreg.slot = slot; + memreg.flags = 0; + memreg.guest_phys_addr = base; + memreg.memory_size = mem_size; + memreg.userspace_addr = (unsigned long)mem; + if (ioctl(vm.fd, KVM_SET_USER_MEMORY_REGION, &memreg) < 0) { + perror("KVM_SET_USER_MEMORY_REGION"); + assert(0); + } + return mem; +} + +static void vm_init(size_t mem_size) { + int api_ver; + + vm.sys_fd = open("/dev/kvm", O_RDWR); + if (vm.sys_fd < 0) { + perror("open /dev/kvm"); + assert(0); + } + + api_ver = ioctl(vm.sys_fd, KVM_GET_API_VERSION, 0); + if (api_ver < 0) { + perror("KVM_GET_API_VERSION"); + assert(0); + } + + if (api_ver != KVM_API_VERSION) { + fprintf(stderr, "Got KVM api version %d, expected %d\n", + api_ver, KVM_API_VERSION); + assert(0); + } + + vm.fd = ioctl(vm.sys_fd, KVM_CREATE_VM, 0); + if (vm.fd < 0) { + perror("KVM_CREATE_VM"); + assert(0); + } + + if (ioctl(vm.fd, KVM_SET_TSS_ADDR, 0xfffbd000) < 0) { + perror("KVM_SET_TSS_ADDR"); + assert(0); + } + + vm.mem = create_mem(0, 0, mem_size); + vm.mmio = create_mem(1, 0xa1000000, 0x1000); +} + +static void vcpu_init() { + int vcpu_mmap_size; + + vcpu.fd = ioctl(vm.fd, KVM_CREATE_VCPU, 0); + if (vcpu.fd < 0) { + perror("KVM_CREATE_VCPU"); + assert(0); + } + + vcpu_mmap_size = ioctl(vm.sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); + if (vcpu_mmap_size <= 0) { + perror("KVM_GET_VCPU_MMAP_SIZE"); + assert(0); + } + + vcpu.kvm_run = mmap(NULL, vcpu_mmap_size, PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu.fd, 0); + if (vcpu.kvm_run == MAP_FAILED) { + perror("mmap kvm_run"); + assert(0); + } + + vcpu.kvm_run->kvm_valid_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; + vcpu.int_wp_state = STATE_IDLE; +} + +static const uint8_t mbr[] = { + // start32: + 0x0f, 0x01, 0x15, 0x28, 0x7c, 0x00, 0x00, // lgdtl 0x7c28 + 0xea, 0x0e, 0x7c, 0x00, 0x00, 0x08, 0x00, // ljmp $0x8, 0x7c0e + + // here: + 0xeb, 0xfe, // jmp here + + // GDT + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00, + + // GDT descriptor + 0x17, 0x00, 0x10, 0x7c, 0x00, 0x00 +}; + +static void setup_protected_mode(struct kvm_sregs *sregs) { + struct kvm_segment seg = { + .base = 0, + .limit = 0xffffffff, + .selector = 1 << 3, + .present = 1, + .type = 11, /* Code: execute, read, accessed */ + .dpl = 0, + .db = 1, + .s = 1, /* Code/data */ + .l = 0, + .g = 1, /* 4KB granularity */ + }; + + sregs->cr0 |= CR0_PE; /* enter protected mode */ + + sregs->cs = seg; + + seg.type = 3; /* Data: read/write, accessed */ + seg.selector = 2 << 3; + sregs->ds = sregs->es = sregs->fs = sregs->gs = sregs->ss = seg; +} + +static void kvm_exec(uint64_t n) { + for (; n > 0; n --) { + if (ioctl(vcpu.fd, KVM_RUN, 0) < 0) { + if (errno == EINTR) { + n ++; + continue; + } + perror("KVM_RUN"); + assert(0); + } + + if (vcpu.kvm_run->exit_reason != KVM_EXIT_DEBUG) { + fprintf(stderr, "Got exit_reason %d at pc = 0x%llx," + " expected KVM_EXIT_HLT (%d)\n", + vcpu.kvm_run->exit_reason, vcpu.kvm_run->s.regs.regs.rip, KVM_EXIT_HLT); + assert(0); + } else { + switch (vcpu.int_wp_state) { + case STATE_IDLE: + ; uint32_t pc; + if (vcpu.kvm_run->s.regs.sregs.cr0 & CR0_PG) { + struct kvm_translation t = { .linear_address = vcpu.kvm_run->debug.arch.pc }; + int ret = ioctl(vcpu.fd, KVM_TRANSLATE, &t); + assert(ret == 0); + assert(t.valid); + pc = t.physical_address; + } else pc = vcpu.kvm_run->debug.arch.pc; + if (vm.mem[pc] == 0xcd) { + uint8_t nr = vm.mem[pc + 1]; + uint32_t pgate = vcpu.kvm_run->s.regs.sregs.idt.base + nr * 8; + // assume code.base = 0 + uint32_t entry = vm.mem[pgate] | (vm.mem[pgate + 1] << 8) | + (vm.mem[pgate + 6] << 16) | (vm.mem[pgate + 7] << 24); + kvm_set_step_mode(true, entry); + vcpu.int_wp_state = STATE_INT_INSTR; + vcpu.entry = entry; + } + break; + case STATE_INT_INSTR: + Assert(vcpu.entry == vcpu.kvm_run->debug.arch.pc, "entry not match"); + kvm_set_step_mode(false, 0); + vcpu.int_wp_state = STATE_IDLE; + break; + } + //Log("exception = %d, pc = %llx, dr6 = %llx, dr7 = %llx", vcpu.kvm_run->debug.arch.exception, + // vcpu.kvm_run->debug.arch.pc, vcpu.kvm_run->debug.arch.dr6, vcpu.kvm_run->debug.arch.dr7); + } + } +} + +static void run_protected_mode() { + struct kvm_sregs sregs; + kvm_getsregs(&sregs); + setup_protected_mode(&sregs); + kvm_setsregs(&sregs); + + struct kvm_regs regs; + memset(®s, 0, sizeof(regs)); + regs.rflags = 2; + regs.rip = 0x7c00; + // this will also set KVM_GUESTDBG_ENABLE + kvm_setregs(®s); + + memcpy(vm.mem + 0x7c00, mbr, sizeof(mbr)); + // run enough instructions to load GDT + kvm_exec(10); +} + +void difftest_memcpy_from_dut(paddr_t dest, void *src, size_t n) { + memcpy(vm.mem + dest, src, n); +} + +void difftest_getregs(void *r) { + struct kvm_regs *ref = &(vcpu.kvm_run->s.regs.regs); + x86_CPU_state *x86 = r; + x86->eax = ref->rax; + x86->ebx = ref->rbx; + x86->ecx = ref->rcx; + x86->edx = ref->rdx; + x86->esp = ref->rsp; + x86->ebp = ref->rbp; + x86->esi = ref->rsi; + x86->edi = ref->rdi; + x86->pc = ref->rip; +} + +void difftest_setregs(const void *r) { + struct kvm_regs *ref = &(vcpu.kvm_run->s.regs.regs); + const x86_CPU_state *x86 = r; + ref->rax = x86->eax; + ref->rbx = x86->ebx; + ref->rcx = x86->ecx; + ref->rdx = x86->edx; + ref->rsp = x86->esp; + ref->rbp = x86->ebp; + ref->rsi = x86->esi; + ref->rdi = x86->edi; + ref->rip = x86->pc; + ref->rflags |= (1 << 8); + + vcpu.kvm_run->kvm_dirty_regs = KVM_SYNC_X86_REGS; +} + +void difftest_exec(uint64_t n) { + kvm_exec(n); +} + +void difftest_init(int port) { + vm_init(PMEM_SIZE); + vcpu_init(); + run_protected_mode(); +}