提交 e8d983cf 编写于 作者: W wangkaifan

Merge branch 'master' into reg_spilling

......@@ -26,6 +26,26 @@ SO_CFLAGS = -fPIC -D_SHARE=1
SO_LDLAGS = -shared -fPIC
endif
DIFF ?= kvm
ifneq ($(ISA),x86)
ifneq ($(DIFF),qemu)
DIFF = qemu
$(info KVM is only supported with ISA=x86, use QEMU instead)
endif
endif
ifeq ($(DIFF),qemu)
DIFF_REF_PATH = $(NEMU_HOME)/tools/qemu-diff
DIFF_REF_SO = $(DIFF_REF_PATH)/build/$(ISA)-qemu-so
CFLAGS += -D__DIFF_REF_QEMU__
else ifeq ($(DIFF),kvm)
DIFF_REF_PATH = $(NEMU_HOME)/tools/kvm-diff
DIFF_REF_SO = $(DIFF_REF_PATH)/build/$(ISA)-kvm-so
CFLAGS += -D__DIFF_REF_KVM__
else
$(error invalid DIFF. Supported: qemu kvm)
endif
OBJ_DIR ?= $(BUILD_DIR)/obj-$(ISA)-$(ENGINE)$(SO)
BINARY ?= $(BUILD_DIR)/$(ISA)-$(NAME)-$(ENGINE)$(SO)
......@@ -40,12 +60,6 @@ INCLUDES = $(addprefix -I, $(INC_DIR))
CFLAGS += -O2 -MMD -Wall -Werror -ggdb3 $(INCLUDES) \
-D__ISA__=$(ISA) -D__ISA_$(ISA)__ -D_ISA_H_=\"isa/$(ISA).h\"
QEMU_DIFF_PATH = $(NEMU_HOME)/tools/qemu-diff
QEMU_SO = $(QEMU_DIFF_PATH)/build/$(ISA)-qemu-so
$(QEMU_SO):
$(MAKE) -C $(QEMU_DIFF_PATH)
# Files to be compiled
SRCS = $(shell find src/ -name "*.c" | grep -v "isa\|engine")
SRCS += $(shell find src/isa/$(ISA) -name "*.c")
......@@ -64,11 +78,11 @@ $(OBJ_DIR)/%.o: src/%.c
# Some convenient rules
.PHONY: app run gdb clean run-env $(QEMU_SO)
.PHONY: app run gdb clean run-env $(DIFF_REF_SO)
app: $(BINARY)
override ARGS ?= --log=$(BUILD_DIR)/nemu-log.txt
override ARGS += --diff=$(QEMU_SO)
override ARGS += --diff=$(DIFF_REF_SO)
# Command to execute NEMU
IMG :=
......@@ -79,7 +93,7 @@ $(BINARY): $(OBJS)
@echo + LD $@
@$(LD) -O2 -rdynamic $(SO_LDLAGS) -o $@ $^ -lSDL2 -lreadline -ldl
run-env: $(BINARY) $(QEMU_SO)
run-env: $(BINARY) $(DIFF_REF_SO)
run: run-env
$(call git_commit, "run")
......@@ -89,6 +103,9 @@ gdb: run-env
$(call git_commit, "gdb")
gdb -s $(BINARY) --args $(NEMU_EXEC)
$(DIFF_REF_SO):
$(MAKE) -C $(DIFF_REF_PATH)
clean:
-rm -rf $(BUILD_DIR)
$(MAKE) -C tools/gen-expr clean
......
......@@ -26,7 +26,7 @@ typedef struct {
vaddr_t jmp_pc;
Operand src1, dest, src2;
int width;
rtlreg_t tmp_reg[3];
rtlreg_t tmp_reg[4];
ISADecodeInfo isa;
} DecodeExecState;
......
#ifndef __ISA_X86_H__
#define __ISA_X86_H__
#define LAZY_CC
#include <common.h>
// memory
......@@ -60,6 +62,12 @@ typedef struct {
rtlreg_t OF, CF, SF, ZF, IF;
#ifdef LAZY_CC
rtlreg_t cc_dest, cc_src1, cc_src2;
uint32_t cc_width;
uint32_t cc_op;
#endif
struct {
uint32_t limit :16;
uint32_t base :32;
......
......@@ -12,7 +12,8 @@
#define ddest (id_dest->preg)
#define s0 (&s->tmp_reg[0])
#define s1 (&s->tmp_reg[1])
#define t0 (&s->tmp_reg[2])
#define s2 (&s->tmp_reg[2])
#define t0 (&s->tmp_reg[3])
extern const rtlreg_t rzero;
#define rz (&rzero)
......
......@@ -17,15 +17,22 @@ uint32_t reg_ptr2idx(DecodeExecState *s, const rtlreg_t* dest) {
CASE(rz, 0)
CASE(s0, 2)
CASE(s1, 3)
CASE(t0, 4)
CASE(s2, 4)
CASE(t0, 5)
CASE(&id_src1->val, 7)
CASE(&id_src2->val, 8)
CASE(&id_dest->val, 9)
CASE(&s->isa.mbr, 10)
#ifdef LAZY_CC
CASE(&cpu.cc_dest, 13)
CASE(&cpu.cc_src1, 14)
CASE(&cpu.cc_src2, 15)
#else
CASE(&cpu.CF, 13)
CASE(&cpu.OF, 14)
CASE(&cpu.ZF, 15)
CASE(&cpu.SF, 1)
#endif
panic("bad ptr = %p", dest);
}
......
......@@ -13,14 +13,14 @@ void backend_exec_code(uint64_t pc, int nr_instr) {
backend_exec(nr_instr);
}
vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr) {
vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr, int npc_type, int nr_suffix) {
// copy code to rv64 interpreter to execute it
backend_memcpy_from_frontend(RV64_EXEC_PC - riscv64_PMEM_BASE, buf, sizeof(uint32_t) * nr_instr);
// if the basic block is end with a branch instruction,
// execute until the branch instruction
// see rtl_jrelop() at rtl-basic.c
int nr_exec = (tran_next_pc == NEXT_PC_BRANCH ? nr_instr - (5+2*suffix_inst) : nr_instr);
int nr_exec = (npc_type == NEXT_PC_BRANCH ? nr_instr - (5+2*nr_suffix) : nr_instr);
backend_exec_code(RV64_EXEC_PC, nr_exec);
riscv64_CPU_state r;
......@@ -32,9 +32,9 @@ vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr) {
backend_getregs(&r);
}
if (tran_next_pc == NEXT_PC_BRANCH) {
if (npc_type == NEXT_PC_BRANCH) {
// execute the branch instruction and load x86.pc to x30
backend_exec(suffix_inst+3);
backend_exec(nr_suffix+3);
backend_getregs(&r);
}
......
......@@ -391,8 +391,8 @@ make_rtl(jrelop, uint32_t relop, const rtlreg_t *src1, const rtlreg_t *src2, vad
case RELOP_NE: rv64_bne(rs1, rs2, offset); break;
case RELOP_LT: rv64_blt(rs1, rs2, offset); break;
case RELOP_GE: rv64_bge(rs1, rs2, offset); break;
case RELOP_LTU: rv64_bltu(rs1, rs2, offset); return;
case RELOP_GEU: rv64_bgeu(rs1, rs2, offset); return;
case RELOP_LTU: rv64_bltu(rs1, rs2, offset); break;
case RELOP_GEU: rv64_bgeu(rs1, rs2, offset); break;
case RELOP_LE: rv64_bge(rs2, rs1, offset); break;
case RELOP_GT: rv64_blt(rs2, rs1, offset); break;
......
......@@ -6,6 +6,8 @@
#include "tran.h"
#include "spill.h"
#define TOP_N 10
//#define DUMP_RV64
#define BUF_SIZE 13000 //8192
uint32_t trans_buffer[BUF_SIZE] = {};
......@@ -14,9 +16,73 @@ int tran_next_pc = NEXT_PC_SEQ;
static void clear_trans_buffer() { trans_buffer_index = 0; }
void asm_print(vaddr_t ori_pc, int instr_len, bool print_flag);
vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr);
vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr, int npc_type, int nr_suffix);
void guest_getregs(CPU_state *cpu);
typedef struct TB {
vaddr_t pc;
int npc_type;
vaddr_t npc;
void *code;
uint32_t nr_instr;
uint32_t guest_nr_instr;
uint32_t hit_time;
uint32_t nr_suffix;
struct TB *next;
} TB;
static TB head = { .next = NULL };
static TB* find_tb(vaddr_t pc) {
TB *tb;
for (tb = head.next; tb != NULL; tb = tb->next) {
if (tb->pc == pc) return tb;
}
return NULL;
}
static int find_topn_min(TB **top) {
int i;
int min = 0;
for (i = 1; i < TOP_N; i ++) {
if (top[i]->hit_time < top[min]->hit_time) min = i;
}
return min;
}
static TB** find_topn_tb() {
static TB *top[TOP_N];
TB *p = head.next;;
int i;
for (i = 0; i < TOP_N; i ++) {
Assert(p != NULL, "i = %d", i);
top[i] = p;
p = p->next;
}
int min = find_topn_min(top);
for (; p != NULL; p = p->next) {
if (p->hit_time > top[min]->hit_time) {
top[min] = p;
min = find_topn_min(top);
}
}
for (i = 0; i < TOP_N; i ++) {
int max = i;
int j;
for (j = i + 1; j < TOP_N; j ++) {
if (top[max]->hit_time < top[j]->hit_time) max = j;
}
if (max != i) {
TB *tmp = top[i];
top[i] = top[max];
top[max] = tmp;
}
}
return top;
}
void write_ins(uint32_t ins) {
assert(trans_buffer_index < BUF_SIZE);
trans_buffer[trans_buffer_index++]=ins;
......@@ -29,47 +95,88 @@ void mainloop() {
nemu_state.state = NEMU_RUNNING;
uint64_t total_instr = 0;
while (1) {
__attribute__((unused)) vaddr_t ori_pc = cpu.pc;
__attribute__((unused)) vaddr_t seq_pc = isa_exec_once();
vaddr_t tb_start = cpu.pc;
TB *tb = find_tb(tb_start);
if (tb == NULL) {
clear_trans_buffer();
tran_next_pc = NEXT_PC_SEQ;
int guest_nr_instr = 0;
while (1) {
__attribute__((unused)) vaddr_t ori_pc = cpu.pc;
__attribute__((unused)) vaddr_t seq_pc = isa_exec_once();
guest_nr_instr ++;
#ifdef REG_SPILLING
spill_cleanall();
spill_cleanall();
#endif
if (nemu_state.state != NEMU_RUNNING) tran_next_pc = NEXT_PC_END;
if (nemu_state.state != NEMU_RUNNING) tran_next_pc = NEXT_PC_END;
#ifdef DEBUG
asm_print(ori_pc, seq_pc - ori_pc, true);
asm_print(ori_pc, seq_pc - ori_pc, true);
#endif
#ifndef DIFF_TEST
if (tran_next_pc != NEXT_PC_SEQ) {
#ifdef DIFF_TEST
if (true)
#else
if (tran_next_pc != NEXT_PC_SEQ)
#endif
vaddr_t next_pc = rv64_exec_trans_buffer(trans_buffer, trans_buffer_index);
total_instr += trans_buffer_index;
{
tb = malloc(sizeof(TB));
tb->pc = tb_start;
tb->nr_instr = trans_buffer_index;
tb->nr_suffix = suffix_inst;
tb->guest_nr_instr = guest_nr_instr;
tb->code = malloc(tb->nr_instr * 4);
memcpy(tb->code, trans_buffer, tb->nr_instr * 4);
tb->npc_type = tran_next_pc;
tb->npc = cpu.pc;
tb->hit_time = 0;
tb->next = head.next;
head.next = tb;
break;
}
}
}
if (tran_next_pc == NEXT_PC_END) {
// get cpu.eax and interpret `nemu_trap` again
guest_getregs(&cpu);
cpu.pc = ori_pc;
//Log("enter tb with pc = %x, nr_instr = %d", tb->pc, tb->nr_instr);
vaddr_t next_pc = rv64_exec_trans_buffer(tb->code, tb->nr_instr, tb->npc_type, tb->nr_suffix);
total_instr += tb->nr_instr;
tb->hit_time ++;
if (tb->npc_type == NEXT_PC_END) {
// get cpu.eax and interpret `nemu_trap` again
guest_getregs(&cpu);
cpu.pc = tb_start;
nemu_state.state = NEMU_RUNNING;
while (nemu_state.state == NEMU_RUNNING) {
isa_exec_once();
break;
}
if (tran_next_pc != NEXT_PC_SEQ) cpu.pc = next_pc;
// Log("new basic block pc = %x", cpu.pc);
clear_trans_buffer();
tran_next_pc = NEXT_PC_SEQ;
#ifndef DIFF_TEST
break;
}
#endif
if (tb->npc_type != NEXT_PC_SEQ) cpu.pc = next_pc;
else cpu.pc = tb->npc;
#ifdef DIFF_TEST
guest_getregs(&cpu);
difftest_step(ori_pc, cpu.pc);
difftest_step(tb_start, cpu.pc);
if (nemu_state.state == NEMU_ABORT) break;
#endif
}
// display the top-n hot basic block
TB **top = find_topn_tb();
int i;
for (i = 0; i < TOP_N; i ++) {
printf("%3d: pc = " FMT_WORD "(instr: %d -> %d), \thit time = %d\n",
i + 1, top[i]->pc, top[i]->guest_nr_instr, top[i]->nr_instr, top[i]->hit_time);
#ifdef DUMP_RV64
int j;
for (j = 0; j < top[i]->nr_instr; j ++) {
printf("\t.word 0x%08x\n", ((uint32_t *)top[i]->code)[j]);
}
#endif
}
switch (nemu_state.state) {
case NEMU_RUNNING: nemu_state.state = NEMU_STOP; break;
......
#include "cc.h"
static inline make_EHelper(add) {
// TODO();
rtl_add(s, s0, ddest, dsrc1);
operand_write(s, id_dest, s0);
#ifdef LAZY_CC
rtl_set_lazycc(s, s0, ddest, NULL, LAZYCC_ADD, id_dest->width);
#else
rtl_update_ZFSF(s, s0, id_dest->width);
if (id_dest->width != 4) {
rtl_andi(s, s0, s0, 0xffffffffu >> ((4 - id_dest->width) * 8));
}
......@@ -12,137 +14,131 @@ static inline make_EHelper(add) {
rtl_set_CF(s, s1);
rtl_is_add_overflow(s, s1, s0, ddest, dsrc1, id_dest->width);
rtl_set_OF(s, s1);
#endif
operand_write(s, id_dest, s0);
print_asm_template2(add);
}
// dest <- sub result
static inline void cmp_internal(DecodeExecState *s) {
rtl_sub(s, s0, ddest, dsrc1);
#ifdef LAZY_CC
rtl_set_lazycc(s, ddest, dsrc1, NULL, LAZYCC_SUB, id_dest->width);
#else
rtl_update_ZFSF(s, s0, id_dest->width);
if (id_dest->width != 4) {
rtl_andi(s, s0, s0, 0xffffffffu >> ((4 - id_dest->width) * 8));
}
rtl_is_sub_carry(s, s1, s0, ddest);
rtl_is_sub_carry(s, s1, ddest, dsrc1);
rtl_set_CF(s, s1);
rtl_is_sub_overflow(s, s1, s0, ddest, dsrc1, id_dest->width);
rtl_set_OF(s, s1);
#endif
}
static inline make_EHelper(sub) {
// TODO();
cmp_internal(s);
operand_write(s, id_dest, s0);
print_asm_template2(sub);
}
static inline make_EHelper(cmp) {
// TODO();
cmp_internal(s);
print_asm_template2(cmp);
}
static inline make_EHelper(inc) {
// TODO();
rtl_addi(s, s0, ddest, 1);
operand_write(s, id_dest, s0);
#ifdef LAZY_CC
rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_INC, id_dest->width);
#else
rtl_update_ZFSF(s, s0, id_dest->width);
rtl_setrelopi(s, RELOP_EQ, s1, s0, 0x80000000);
rtl_setrelopi(s, RELOP_EQ, s1, s0, 0x1u << (id_dest->width * 8 - 1));
rtl_set_OF(s, s1);
#endif
operand_write(s, id_dest, s0);
print_asm_template1(inc);
}
static inline make_EHelper(dec) {
// TODO();
rtl_subi(s, s0, ddest, 1);
operand_write(s, id_dest, s0);
#ifdef LAZY_CC
rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_DEC, id_dest->width);
#else
rtl_update_ZFSF(s, s0, id_dest->width);
rtl_setrelopi(s, RELOP_EQ, s1, s0, 0x7fffffff);
rtl_setrelopi(s, RELOP_EQ, s1, ddest, 0x1u << (id_dest->width * 8 - 1));
rtl_set_OF(s, s1);
#endif
operand_write(s, id_dest, s0);
print_asm_template1(dec);
}
static inline make_EHelper(neg) {
// TODO();
rtl_sub(s, s0, rz, ddest);
#ifdef LAZY_CC
rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_NEG, id_dest->width);
#else
rtl_update_ZFSF(s, s0, id_dest->width);
rtl_setrelopi(s, RELOP_NE, s1, ddest, 0);
rtl_set_CF(s, s1);
rtl_setrelopi(s, RELOP_EQ, s1, ddest, 0x80000000);
rtl_setrelopi(s, RELOP_EQ, s1, ddest, 0x1u << (id_dest->width * 8 - 1));
rtl_set_OF(s, s1);
#endif
operand_write(s, id_dest, s0);
print_asm_template1(neg);
}
static inline make_EHelper(adc) {
// s0 = dest + src
rtl_add(s, s0, ddest, dsrc1);
// s1 = s0 + CF
rtl_get_CF(s, s1);
rtl_add(s, s1, s0, s1);
operand_write(s, id_dest, s1);
#ifdef LAZY_CC
rtl_lazy_setcc(s, s0, CC_B); // reading CC_B is to read CF
#else
rtl_get_CF(s, s0);
#endif
rtl_add(s, s0, dsrc1, s0);
rtl_add(s, s1, ddest, s0);
#ifdef LAZY_CC
rtl_set_lazycc(s, s1, s0, dsrc1, LAZYCC_ADC, id_dest->width);
#else
rtl_update_ZFSF(s, s1, id_dest->width);
rtl_is_add_overflow(s, s2, s1, ddest, dsrc1, id_dest->width);
rtl_set_OF(s, s2);
if (id_dest->width != 4) {
rtl_andi(s, s1, s1, 0xffffffffu >> ((4 - id_dest->width) * 8));
}
rtl_update_ZFSF(s, s1, id_dest->width);
// update CF
rtl_is_add_carry(s, s1, s1, s0);
rtl_is_add_carry(s, s0, s0, ddest);
rtl_or(s, s0, s0, s1);
rtl_is_add_carry(s, s2, s1, s0);
rtl_is_add_carry(s, s0, s0, dsrc1);
rtl_or(s, s0, s0, s2);
rtl_set_CF(s, s0);
// update OF
rtl_is_add_overflow(s, s0, s1, ddest, dsrc1, id_dest->width);
rtl_set_OF(s, s0);
#endif
operand_write(s, id_dest, s1);
print_asm_template2(adc);
}
static inline make_EHelper(sbb) {
// s0 = dest - src
rtl_sub(s, s0, ddest, dsrc1);
// s1 = s0 - CF
rtl_get_CF(s, s1);
rtl_sub(s, s1, s0, s1);
operand_write(s, id_dest, s1);
if (id_dest->width != 4) {
rtl_andi(s, s1, s1, 0xffffffffu >> ((4 - id_dest->width) * 8));
}
#ifdef LAZY_CC
rtl_lazy_setcc(s, s0, CC_B); // reading CC_B is to read CF
#else
rtl_get_CF(s, s0);
#endif
rtl_add(s, s0, dsrc1, s0);
rtl_sub(s, s1, ddest, s0);
#ifdef LAZY_CC
rtl_set_lazycc(s, s1, ddest, dsrc1, LAZYCC_SBB, id_dest->width);
#else
rtl_update_ZFSF(s, s1, id_dest->width);
// update CF
rtl_is_sub_carry(s, s1, s1, s0);
rtl_is_sub_carry(s, s0, s0, ddest);
rtl_or(s, s0, s0, s1);
rtl_is_sub_overflow(s, s2, s1, ddest, dsrc1, id_dest->width);
rtl_set_OF(s, s2);
rtl_is_add_carry(s, s2, s0, dsrc1);
rtl_is_sub_carry(s, s0, ddest, s0);
rtl_or(s, s0, s0, s2);
rtl_set_CF(s, s0);
// update OF
rtl_is_sub_overflow(s, s0, s1, ddest, dsrc1, id_dest->width);
rtl_set_OF(s, s0);
#endif
operand_write(s, id_dest, s1);
print_asm_template2(sbb);
}
......
......@@ -3,6 +3,26 @@
#include "../local-include/rtl.h"
enum {
CC_O, CC_NO, CC_B, CC_NB,
CC_E, CC_NE, CC_BE, CC_NBE,
CC_S, CC_NS, CC_P, CC_NP,
CC_L, CC_NL, CC_LE, CC_NLE
};
enum {
LAZYCC_ADD,
LAZYCC_SUB,
LAZYCC_INC,
LAZYCC_DEC,
LAZYCC_NEG,
LAZYCC_ADC,
LAZYCC_SBB,
LAZYCC_LOGIC,
};
#include "lazycc.h"
/* Condition Code */
static inline const char* get_cc_name(int subcode) {
......@@ -15,14 +35,8 @@ static inline const char* get_cc_name(int subcode) {
return cc_name[subcode];
}
static inline void rtl_setcc(DecodeExecState *s, rtlreg_t* dest, uint8_t subcode) {
bool invert = subcode & 0x1;
enum {
CC_O, CC_NO, CC_B, CC_NB,
CC_E, CC_NE, CC_BE, CC_NBE,
CC_S, CC_NS, CC_P, CC_NP,
CC_L, CC_NL, CC_LE, CC_NLE
};
static inline void rtl_setcc(DecodeExecState *s, rtlreg_t* dest, uint32_t subcode) {
uint32_t invert = subcode & 0x1;
// TODO: Query EFLAGS to determine whether the condition code is satisfied.
// dest <- ( cc is satisfied ? 1 : 0)
......
......@@ -10,8 +10,12 @@ static inline make_EHelper(jmp) {
static inline make_EHelper(jcc) {
// the target address is calculated at the decode stage
uint32_t cc = s->opcode & 0xf;
#ifdef LAZY_CC
rtl_lazy_jcc(s, cc);
#else
rtl_setcc(s, s0, cc);
rtl_jrelop(s, RELOP_NE, s0, rz, s->jmp_pc);
#endif
print_asm("j%s %x", get_cc_name(cc), s->jmp_pc);
}
......
......@@ -15,7 +15,7 @@ static inline void set_width(DecodeExecState *s, int width) {
#define IDEX(idx, id, ex) IDEXW(idx, id, ex, 0)
#define EXW(idx, ex, w) IDEXW(idx, empty, ex, w)
#define EX(idx, ex) EXW(idx, ex, 0)
#define EMPTY(idx) //EX(idx, inv)
#define EMPTY(idx) EX(idx, inv)
#define CASE_ENTRY(idx, id, ex, w) case idx: id(s); ex(s); break;
......@@ -30,8 +30,8 @@ static inline make_EHelper(gp1) {
/* 0xc0, 0xc1, 0xd0, 0xd1, 0xd2, 0xd3 */
static inline make_EHelper(gp2) {
switch (s->isa.ext_opcode) {
EX(0x00, rol) EMPTY(0x01) EMPTY(0x02) EMPTY(0x03)
EX(0x04, shl) EX (0x05, shr) EMPTY(0x06) EX (0x07, sar)
EX(0x00, rol) EX(0x01, ror) EMPTY(0x02) EMPTY(0x03)
EX(0x04, shl) EX(0x05, shr) EMPTY(0x06) EX (0x07, sar)
}
}
......@@ -99,10 +99,6 @@ static inline make_EHelper(2byte_esc) {
}
static inline void exec(DecodeExecState *s) {
#ifdef USE_KVM
extern void kvm_exec(void);
kvm_exec();
#else
uint8_t opcode;
again:
opcode = instr_fetch(&s->seq_pc, 1);
......@@ -114,13 +110,13 @@ IDEXW(0x00, G2E, add, 1) IDEX (0x01, G2E, add) IDEXW(0x02, E2G, add, 1)
EMPTY(0x04) IDEX (0x05, I2a, add)
IDEXW(0x08, G2E, or, 1) IDEX (0x09, G2E, or) IDEXW(0x0a, E2G, or, 1) IDEX (0x0b, E2G, or)
IDEXW(0x0c, I2a, or, 1) IDEX (0x0d, I2a, or) EMPTY(0x0e) EX (0x0f, 2byte_esc)
EMPTY(0x10) IDEX (0x11, G2E, adc) EMPTY(0x12) IDEX (0x13, E2G, adc)
IDEXW(0x10, G2E, adc, 1) IDEX (0x11, G2E, adc) IDEXW(0x12, E2G, adc, 1) IDEX (0x13, E2G, adc)
EMPTY(0x18) IDEX (0x19, G2E, sbb) EMPTY(0x1a) IDEX (0x1b, E2G, sbb)
IDEXW(0x18, G2E, sbb, 1) IDEX (0x19, G2E, sbb) IDEXW(0x1a, E2G, sbb, 1) IDEX (0x1b, E2G, sbb)
IDEXW(0x20, G2E, and, 1) IDEX (0x21, G2E, and) IDEXW(0x22, E2G, and, 1) IDEX (0x23, E2G, and)
IDEXW(0x24, I2a, and, 1) IDEX (0x25, I2a, and)
EMPTY(0x28) IDEX (0x29, G2E, sub) EMPTY(0x2a) IDEX (0x2b, E2G, sub)
IDEXW(0x28, G2E, sub, 1) IDEX (0x29, G2E, sub) EMPTY(0x2a) IDEX (0x2b, E2G, sub)
EMPTY(0x2c) IDEX (0x2d, I2a, sub)
IDEXW(0x30, G2E, xor, 1) IDEX (0x31, G2E, xor) IDEXW(0x32, E2G, xor, 1) IDEX (0x33, E2G, xor)
EMPTY(0x34) IDEX (0x35, I2a, xor)
......@@ -178,10 +174,15 @@ IDEXW(0xec, in_dx2a, in, 1) IDEX (0xed, in_dx2a, in) IDEXW(0xee, out_a2dx, ou
case 0x66: s->isa.is_operand_size_16 = true; goto again;
default: exec_inv(s);
}
#endif
}
//#define USE_KVM
vaddr_t isa_exec_once() {
#ifdef USE_KVM
extern void kvm_exec(void);
kvm_exec();
return 0;
#endif
DecodeExecState s;
s.is_jmp = 0;
s.isa = (ISADecodeInfo) { 0 };
......
#include <cpu/exec.h>
#include "cc.h"
#ifdef LAZY_CC
static inline make_rtl(set_lazycc, const rtlreg_t *dest, const rtlreg_t *src1, const rtlreg_t *src2,
uint32_t cc_op, uint32_t width) {
rtl_mv(s, &cpu.cc_dest, dest);
if (src1 != NULL) rtl_mv(s, &cpu.cc_src1, src1);
if (src2 != NULL) rtl_mv(s, &cpu.cc_src2, src2);
cpu.cc_op = cc_op;
cpu.cc_width = width;
}
#define UNARY 0x100 // compare with cpu.cc_dest and rz
static const int cc2relop [] = {
[CC_O] = 0, [CC_NO] = 0,
[CC_B] = RELOP_LTU, [CC_NB] = RELOP_GEU,
[CC_E] = UNARY | RELOP_EQ, [CC_NE] = UNARY | RELOP_NE,
[CC_BE] = RELOP_LEU, [CC_NBE] = RELOP_GTU,
[CC_S] = UNARY | RELOP_LT, [CC_NS] = UNARY | RELOP_GE,
[CC_P] = 0, [CC_NP] = 0,
[CC_L] = RELOP_LT, [CC_NL] = RELOP_GE,
[CC_LE] = RELOP_LE, [CC_NLE] = RELOP_GT,
};
static inline make_rtl(lazy_jcc, uint32_t cc) {
int exception = (cpu.cc_op == LAZYCC_SUB) && (cc == CC_E || cc == CC_NE);
if ((cc2relop[cc] & UNARY) && !exception) {
uint32_t relop = cc2relop[cc] ^ UNARY;
rtlreg_t *p = &cpu.cc_dest;
if (cpu.cc_op == LAZYCC_SUB) {
// sub && (CC_S || CC_NS)
rtl_sub(s, s2, &cpu.cc_dest, &cpu.cc_src1);
p = s2;
}
int exception = (cpu.cc_op == LAZYCC_LOGIC) && (cc == CC_E || cc == CC_NE);
if (cpu.cc_width != 4 && !exception) {
rtl_shli(s, s2, p, 32 - cpu.cc_width * 8);
p = s2;
}
rtl_jrelop(s, relop, p, rz, s->jmp_pc);
return;
}
switch (cpu.cc_op) {
case LAZYCC_DEC:
if (cc2relop[cc] != 0) {
rtl_jrelop(s, cc2relop[cc], &cpu.cc_dest, rz, s->jmp_pc);
return;
}
break;
case LAZYCC_SBB: // FIXME: should consider CF
if (cc == CC_B) {
rtl_sub(s, s0, &cpu.cc_src1, &cpu.cc_dest);
rtl_is_add_carry(s, s0, s0, &cpu.cc_src2);
rtl_is_sub_carry(s, s1, &cpu.cc_src1, &cpu.cc_dest);
rtl_or(s, s0, s0, s1);
rtl_jrelop(s, RELOP_NE, s0, rz, s->jmp_pc);
return;
}
break;
case LAZYCC_SUB:
if (cc2relop[cc] != 0) {
rtl_jrelop(s, cc2relop[cc] & ~UNARY, &cpu.cc_dest, &cpu.cc_src1, s->jmp_pc);
return;
}
break;
case LAZYCC_LOGIC:
if (cc == CC_LE) {
rtl_jrelop(s, cc2relop[cc], &cpu.cc_dest, rz, s->jmp_pc);
return;
}
break;
default: panic("unhandle cc_op = %d", cpu.cc_op);
}
panic("unhandle cc_op = %d, cc = %d", cpu.cc_op, cc);
}
static inline make_rtl(lazy_setcc, rtlreg_t *dest, uint32_t cc) {
int exception = (cpu.cc_op == LAZYCC_SUB) && (cc == CC_E || cc == CC_NE);
if ((cc2relop[cc] & UNARY) && !exception) {
uint32_t relop = cc2relop[cc] ^ UNARY;
rtlreg_t *p = &cpu.cc_dest;
if (cpu.cc_op == LAZYCC_SUB) {
// sub && (CC_S || CC_NS)
rtl_sub(s, dest, &cpu.cc_dest, &cpu.cc_src1);
p = dest;
}
int exception = (cpu.cc_op == LAZYCC_LOGIC) && (cc == CC_E || cc == CC_NE);
if (cpu.cc_width != 4 && !exception) {
rtl_shli(s, dest, p, 32 - cpu.cc_width * 8);
p = dest;
}
rtl_setrelop(s, relop, dest, p, rz);
return;
}
switch (cpu.cc_op) {
case LAZYCC_ADD:
if (cc2relop[cc] != 0) {
rtlreg_t *p = &cpu.cc_dest;
if (cpu.cc_width != 4) {
rtl_andi(s, dest, &cpu.cc_dest, 0xffffffffu >> ((4 - cpu.cc_width) * 8));
p = dest;
}
rtl_setrelop(s, cc2relop[cc], dest, p, &cpu.cc_src1);
return;
}
if (cc == CC_O) {
rtl_sub(s, dest, &cpu.cc_dest, &cpu.cc_src1);
rtl_is_add_overflow(s, dest, &cpu.cc_dest, &cpu.cc_src1, dest, cpu.cc_width);
return;
}
break;
case LAZYCC_SUB:
if (cc2relop[cc] != 0) {
rtl_setrelop(s, cc2relop[cc] & ~UNARY, dest, &cpu.cc_dest, &cpu.cc_src1);
return;
}
if (cc == CC_O) {
rtl_sub(s, dest, &cpu.cc_dest, &cpu.cc_src1);
rtl_is_sub_overflow(s, dest, dest, &cpu.cc_dest, &cpu.cc_src1, cpu.cc_width);
return;
}
break;
case LAZYCC_NEG:
if (cc == CC_B) {
rtl_setrelopi(s, RELOP_NE, dest, &cpu.cc_dest, 0);
return;
}
if (cc == CC_O) {
rtl_setrelopi(s, RELOP_EQ, dest, &cpu.cc_dest, -(0x1u << (cpu.cc_width * 8 - 1)));
return;
}
break;
case LAZYCC_INC:
if (cc == CC_O) {
rtl_setrelopi(s, RELOP_EQ, dest, &cpu.cc_dest, 0x1u << (cpu.cc_width * 8 - 1));
return;
}
break;
case LAZYCC_DEC:
if (cc == CC_O) {
rtl_addi(s, dest, &cpu.cc_dest, 1);
rtl_setrelopi(s, RELOP_EQ, dest, dest, 0x1u << (cpu.cc_width * 8 - 1));
return;
}
break;
case LAZYCC_ADC:
if (cc == CC_B) {
rtlreg_t *p = &cpu.cc_dest;
if (cpu.cc_width != 4) {
rtl_andi(s, dest, &cpu.cc_dest, 0xffffffffu >> ((4 - cpu.cc_width) * 8));
p = dest;
}
rtl_is_add_carry(s, t0, &cpu.cc_src1, &cpu.cc_src2);
rtl_is_add_carry(s, dest, p, &cpu.cc_src1);
rtl_or(s, dest, t0, dest);
return;
}
if (cc == CC_O) {
rtl_sub(s, dest, &cpu.cc_dest, &cpu.cc_src1);
rtl_is_add_overflow(s, dest, &cpu.cc_dest, dest, &cpu.cc_src2, cpu.cc_width);
return;
}
break;
case LAZYCC_SBB:
if (cc == CC_B) {
rtl_sub(s, s0, &cpu.cc_src1, &cpu.cc_dest);
rtl_is_add_carry(s, s0, s0, &cpu.cc_src2);
rtl_is_sub_carry(s, s1, &cpu.cc_src1, &cpu.cc_dest);
rtl_or(s, dest, s0, s1);
return;
}
if (cc == CC_O) {
rtl_is_sub_overflow(s, dest, &cpu.cc_dest, &cpu.cc_src1, &cpu.cc_src2, cpu.cc_width);
return;
}
break;
case LAZYCC_LOGIC:
if (cc == CC_E || cc == CC_NE || cc == CC_LE) {
rtl_setrelop(s, cc2relop[cc], dest, &cpu.cc_dest, rz);
return;
}
break;
default: panic("unhandle cc_op = %d", cpu.cc_op);
}
panic("unhandle cc_op = %d, cc = %d", cpu.cc_op, cc);
}
#endif
......@@ -3,9 +3,13 @@
// dest <- and result
static inline void and_internal(DecodeExecState *s) {
rtl_and(s, s0, ddest, dsrc1);
#ifdef LAZY_CC
rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_LOGIC, id_dest->width);
#else
rtl_update_ZFSF(s, s0, id_dest->width);
rtl_mv(s, &cpu.CF, rz);
rtl_mv(s, &cpu.OF, rz);
#endif
}
static inline make_EHelper(test) {
......@@ -21,19 +25,27 @@ static inline make_EHelper(and) {
static inline make_EHelper(xor) {
rtl_xor(s, s0, ddest, dsrc1);
operand_write(s, id_dest, s0);
#ifdef LAZY_CC
rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_LOGIC, id_dest->width);
#else
rtl_update_ZFSF(s, s0, id_dest->width);
rtl_mv(s, &cpu.CF, rz);
rtl_mv(s, &cpu.OF, rz);
#endif
operand_write(s, id_dest, s0);
print_asm_template2(xor);
}
static inline make_EHelper(or) {
rtl_or(s, s0, ddest, dsrc1);
operand_write(s, id_dest, s0);
#ifdef LAZY_CC
rtl_set_lazycc(s, s0, NULL, NULL, LAZYCC_LOGIC, id_dest->width);
#else
rtl_update_ZFSF(s, s0, id_dest->width);
rtl_mv(s, &cpu.CF, rz);
rtl_mv(s, &cpu.OF, rz);
#endif
operand_write(s, id_dest, s0);
print_asm_template2(or);
}
......@@ -41,8 +53,10 @@ static inline make_EHelper(sar) {
rtl_sext(s, s0, ddest, id_dest->width);
rtl_sar(s, s0, s0, dsrc1);
operand_write(s, id_dest, s0);
#ifndef LAZY_CC
rtl_update_ZFSF(s, s0, id_dest->width);
// unnecessary to update CF and OF in NEMU
#endif
//difftest_skip_eflags(EFLAGS_MASK_CF | EFLAGS_MASK_OF);
print_asm_template2(sar);
}
......@@ -50,8 +64,10 @@ static inline make_EHelper(sar) {
static inline make_EHelper(shl) {
rtl_shl(s, s0, ddest, dsrc1);
operand_write(s, id_dest, s0);
#ifndef LAZY_CC
rtl_update_ZFSF(s, s0, id_dest->width);
// unnecessary to update CF and OF in NEMU
#endif
//difftest_skip_eflags(EFLAGS_MASK_CF | EFLAGS_MASK_OF | EFLAGS_MASK_ZF);
print_asm_template2(shl);
}
......@@ -59,8 +75,10 @@ static inline make_EHelper(shl) {
static inline make_EHelper(shr) {
rtl_shr(s, s0, ddest, dsrc1);
operand_write(s, id_dest, s0);
#ifndef LAZY_CC
rtl_update_ZFSF(s, s0, id_dest->width);
// unnecessary to update CF and OF in NEMU
#endif
//difftest_skip_eflags(EFLAGS_MASK_CF | EFLAGS_MASK_OF);
print_asm_template2(shr);
}
......@@ -78,10 +96,27 @@ static inline make_EHelper(rol) {
print_asm_template2(rol);
}
static inline make_EHelper(ror) {
rtl_shr(s, s0, ddest, dsrc1);
rtl_li(s, s1, id_dest->width * 8);
rtl_sub(s, s1, s1, dsrc1);
rtl_shl(s, s1, ddest, s1);
rtl_or(s, s1, s0, s1);
operand_write(s, id_dest, s1);
// unnecessary to update eflags in NEMU
//difftest_skip_eflags(EFLAGS_MASK_ALL);
print_asm_template2(ror);
}
static inline make_EHelper(setcc) {
uint32_t cc = s->opcode & 0xf;
#ifdef LAZY_CC
rtl_lazy_setcc(s, s0, cc);
#else
rtl_setcc(s, s0, cc);
#endif
operand_write(s, id_dest, s0);
print_asm("set%s %s", get_cc_name(cc), id_dest->str);
......@@ -98,15 +133,19 @@ static inline make_EHelper(shld) {
rtl_andi(s, dsrc1, dsrc1, 31);
rtl_shl(s, s0, ddest, dsrc1);
rtl_li(s, s1, 32);
rtl_li(s, s1, 31);
rtl_sub(s, s1, s1, dsrc1);
// shift twice to deal with dsrc1 = 0
rtl_shr(s, s1, dsrc2, s1);
rtl_shri(s, s1, s1, 1);
rtl_or(s, s0, s0, s1);
operand_write(s, id_dest, s0);
#ifndef LAZY_CC
rtl_update_ZFSF(s, s0, id_dest->width);
// unnecessary to update CF and OF in NEMU
#endif
print_asm_template3(shld);
}
......@@ -114,14 +153,18 @@ static inline make_EHelper(shrd) {
rtl_andi(s, dsrc1, dsrc1, 31);
rtl_shr(s, s0, ddest, dsrc1);
rtl_li(s, s1, 32);
rtl_li(s, s1, 31);
rtl_sub(s, s1, s1, dsrc1);
// shift twice to deal with dsrc1 = 0
rtl_shl(s, s1, dsrc2, s1);
rtl_shli(s, s1, s1, 1);
rtl_or(s, s0, s0, s1);
operand_write(s, id_dest, s0);
#ifndef LAZY_CC
rtl_update_ZFSF(s, s0, id_dest->width);
// unnecessary to update CF and OF in NEMU
#endif
print_asm_template3(shrd);
}
......@@ -30,7 +30,9 @@ static make_EHelper(int) {
print_asm("int %s", id_dest->str);
#ifdef __DIFF_REF_QEMU__
difftest_skip_dut(1, 2);
#endif
}
static make_EHelper(iret) {
......
#include <monitor/monitor.h>
#include <memory/vaddr.h>
#include <memory/paddr.h>
#include <isa.h>
#include <fcntl.h>
#include <errno.h>
......@@ -137,9 +138,13 @@ int run_vm(struct vm *vm, struct vcpu *vcpu, size_t sz) {
/* fall through */
default:
fprintf(stderr, "Got exit_reason %d,"
if (ioctl(vcpu->fd, KVM_GET_REGS, &regs) < 0) {
perror("KVM_GET_REGS");
assert(0);
}
fprintf(stderr, "Got exit_reason %d at pc = 0x%llx,"
" expected KVM_EXIT_HLT (%d)\n",
vcpu->kvm_run->exit_reason, KVM_EXIT_HLT);
vcpu->kvm_run->exit_reason, regs.rip, KVM_EXIT_HLT);
assert(0);
}
}
......@@ -187,7 +192,7 @@ int run_protected_mode(struct vm *vm, struct vcpu *vcpu) {
memset(&regs, 0, sizeof(regs));
/* Clear all FLAGS bits, except bit 1 which is always set. */
regs.rflags = 2;
regs.rip = 0;
regs.rip = IMAGE_START;
if (ioctl(vcpu->fd, KVM_SET_REGS, &regs) < 0) {
perror("KVM_SET_REGS");
......
......@@ -50,9 +50,9 @@ static inline make_rtl(is_sub_overflow, rtlreg_t* dest,
}
static inline make_rtl(is_sub_carry, rtlreg_t* dest,
const rtlreg_t* res, const rtlreg_t* src1) {
const rtlreg_t* src1, const rtlreg_t* src2) {
// res = src1 - src2
rtl_setrelop(s, RELOP_LTU, dest, src1, res);
rtl_setrelop(s, RELOP_LTU, dest, src1, src2);
}
static inline make_rtl(is_add_overflow, rtlreg_t* dest,
......@@ -64,7 +64,7 @@ static inline make_rtl(is_add_overflow, rtlreg_t* dest,
static inline make_rtl(is_add_carry, rtlreg_t* dest,
const rtlreg_t* res, const rtlreg_t* src1) {
// res = src1 + src2
rtl_is_sub_carry(s, dest, src1, res);
rtl_is_sub_carry(s, dest, res, src1);
}
#define make_rtl_setget_eflags(f) \
......
ifneq ($(MAKECMDGOALS),clean) # ignore check for make clean
ISA ?= x86
ifneq ($(ISA),x86) # ISA must be valid
$(error Only support x86)
endif
endif
INC_DIR += ./include $(NEMU_HOME)/include
BUILD_DIR ?= ./build
OBJ_DIR ?= $(BUILD_DIR)/obj-$(ISA)
BINARY ?= $(BUILD_DIR)/$(ISA)-kvm-so
.DEFAULT_GOAL = app
# Compilation flags
CC = gcc
LD = gcc
INCLUDES = $(addprefix -I, $(INC_DIR))
CFLAGS += -O2 -fPIC -MMD -Wall -Werror -DNEMU_HOME=$(NEMU_HOME) $(INCLUDES)
# Files to be compiled
SRCS = $(shell find src/ -name "*.c")
OBJS = $(SRCS:src/%.c=$(OBJ_DIR)/%.o)
# Compilation patterns
$(OBJ_DIR)/%.o: src/%.c
@echo + CC $<
@mkdir -p $(dir $@)
$(CC) $(CFLAGS) -c -o $@ $<
# Depencies
-include $(OBJS:.o=.d)
# Some convinient rules
.PHONY: app clean
app: $(BINARY)
$(BINARY): $(OBJS)
@echo + LD $@
@$(LD) -O2 -rdynamic -shared -fPIC -o $@ $^
clean:
-rm -rf $(BUILD_DIR)
// this is an empty file to avoid compile error
// from NEMU
#include <memory/paddr.h>
#include <isa/x86.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/kvm.h>
/* CR0 bits */
#define CR0_PE 1u
#define CR0_PG (1u << 31)
struct vm {
int sys_fd;
int fd;
uint8_t *mem;
uint8_t *mmio;
};
struct vcpu {
int fd;
struct kvm_run *kvm_run;
int int_wp_state;
uint32_t entry;
};
enum {
STATE_IDLE, // if encounter an int instruction, then set watchpoint
STATE_INT_INSTR, // if hit the watchpoint, then delete the watchpoint
};
static struct vm vm;
static struct vcpu vcpu;
// This should be called everytime after KVM_SET_REGS.
// It seems that KVM_SET_REGS will clean the state of single step.
static void kvm_set_step_mode(bool watch, uint32_t watch_addr) {
struct kvm_guest_debug debug = {};
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[0] = watch_addr;
debug.arch.debugreg[7] = (watch ? 0x1 : 0x0); // watch instruction fetch at `watch_addr`
if (ioctl(vcpu.fd, KVM_SET_GUEST_DEBUG, &debug) < 0) {
perror("KVM_SET_GUEST_DEBUG");
assert(0);
}
}
static inline void kvm_getregs(struct kvm_regs *r) {
if (ioctl(vcpu.fd, KVM_GET_REGS, r) < 0) {
perror("KVM_GET_REGS");
assert(0);
}
}
static void kvm_setregs(const struct kvm_regs *r) {
if (ioctl(vcpu.fd, KVM_SET_REGS, r) < 0) {
perror("KVM_SET_REGS");
assert(0);
}
kvm_set_step_mode(false, 0);
}
static void kvm_getsregs(struct kvm_sregs *r) {
if (ioctl(vcpu.fd, KVM_GET_SREGS, r) < 0) {
perror("KVM_GET_SREGS");
assert(0);
}
}
static void kvm_setsregs(const struct kvm_sregs *r) {
if (ioctl(vcpu.fd, KVM_SET_SREGS, r) < 0) {
perror("KVM_SET_SREGS");
assert(0);
}
}
static void* create_mem(int slot, uintptr_t base, size_t mem_size) {
void *mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
if (mem == MAP_FAILED) {
perror("mmap mem");
assert(0);
}
madvise(mem, mem_size, MADV_MERGEABLE);
struct kvm_userspace_memory_region memreg;
memreg.slot = slot;
memreg.flags = 0;
memreg.guest_phys_addr = base;
memreg.memory_size = mem_size;
memreg.userspace_addr = (unsigned long)mem;
if (ioctl(vm.fd, KVM_SET_USER_MEMORY_REGION, &memreg) < 0) {
perror("KVM_SET_USER_MEMORY_REGION");
assert(0);
}
return mem;
}
static void vm_init(size_t mem_size) {
int api_ver;
vm.sys_fd = open("/dev/kvm", O_RDWR);
if (vm.sys_fd < 0) {
perror("open /dev/kvm");
assert(0);
}
api_ver = ioctl(vm.sys_fd, KVM_GET_API_VERSION, 0);
if (api_ver < 0) {
perror("KVM_GET_API_VERSION");
assert(0);
}
if (api_ver != KVM_API_VERSION) {
fprintf(stderr, "Got KVM api version %d, expected %d\n",
api_ver, KVM_API_VERSION);
assert(0);
}
vm.fd = ioctl(vm.sys_fd, KVM_CREATE_VM, 0);
if (vm.fd < 0) {
perror("KVM_CREATE_VM");
assert(0);
}
if (ioctl(vm.fd, KVM_SET_TSS_ADDR, 0xfffbd000) < 0) {
perror("KVM_SET_TSS_ADDR");
assert(0);
}
vm.mem = create_mem(0, 0, mem_size);
vm.mmio = create_mem(1, 0xa1000000, 0x1000);
}
static void vcpu_init() {
int vcpu_mmap_size;
vcpu.fd = ioctl(vm.fd, KVM_CREATE_VCPU, 0);
if (vcpu.fd < 0) {
perror("KVM_CREATE_VCPU");
assert(0);
}
vcpu_mmap_size = ioctl(vm.sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
if (vcpu_mmap_size <= 0) {
perror("KVM_GET_VCPU_MMAP_SIZE");
assert(0);
}
vcpu.kvm_run = mmap(NULL, vcpu_mmap_size, PROT_READ | PROT_WRITE,
MAP_SHARED, vcpu.fd, 0);
if (vcpu.kvm_run == MAP_FAILED) {
perror("mmap kvm_run");
assert(0);
}
vcpu.kvm_run->kvm_valid_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
vcpu.int_wp_state = STATE_IDLE;
}
static const uint8_t mbr[] = {
// start32:
0x0f, 0x01, 0x15, 0x28, 0x7c, 0x00, 0x00, // lgdtl 0x7c28
0xea, 0x0e, 0x7c, 0x00, 0x00, 0x08, 0x00, // ljmp $0x8, 0x7c0e
// here:
0xeb, 0xfe, // jmp here
// GDT
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00,
0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00,
// GDT descriptor
0x17, 0x00, 0x10, 0x7c, 0x00, 0x00
};
static void setup_protected_mode(struct kvm_sregs *sregs) {
struct kvm_segment seg = {
.base = 0,
.limit = 0xffffffff,
.selector = 1 << 3,
.present = 1,
.type = 11, /* Code: execute, read, accessed */
.dpl = 0,
.db = 1,
.s = 1, /* Code/data */
.l = 0,
.g = 1, /* 4KB granularity */
};
sregs->cr0 |= CR0_PE; /* enter protected mode */
sregs->cs = seg;
seg.type = 3; /* Data: read/write, accessed */
seg.selector = 2 << 3;
sregs->ds = sregs->es = sregs->fs = sregs->gs = sregs->ss = seg;
}
static void kvm_exec(uint64_t n) {
for (; n > 0; n --) {
if (ioctl(vcpu.fd, KVM_RUN, 0) < 0) {
if (errno == EINTR) {
n ++;
continue;
}
perror("KVM_RUN");
assert(0);
}
if (vcpu.kvm_run->exit_reason != KVM_EXIT_DEBUG) {
fprintf(stderr, "Got exit_reason %d at pc = 0x%llx,"
" expected KVM_EXIT_HLT (%d)\n",
vcpu.kvm_run->exit_reason, vcpu.kvm_run->s.regs.regs.rip, KVM_EXIT_HLT);
assert(0);
} else {
switch (vcpu.int_wp_state) {
case STATE_IDLE:
; uint32_t pc;
if (vcpu.kvm_run->s.regs.sregs.cr0 & CR0_PG) {
struct kvm_translation t = { .linear_address = vcpu.kvm_run->debug.arch.pc };
int ret = ioctl(vcpu.fd, KVM_TRANSLATE, &t);
assert(ret == 0);
assert(t.valid);
pc = t.physical_address;
} else pc = vcpu.kvm_run->debug.arch.pc;
if (vm.mem[pc] == 0xcd) {
uint8_t nr = vm.mem[pc + 1];
uint32_t pgate = vcpu.kvm_run->s.regs.sregs.idt.base + nr * 8;
// assume code.base = 0
uint32_t entry = vm.mem[pgate] | (vm.mem[pgate + 1] << 8) |
(vm.mem[pgate + 6] << 16) | (vm.mem[pgate + 7] << 24);
kvm_set_step_mode(true, entry);
vcpu.int_wp_state = STATE_INT_INSTR;
vcpu.entry = entry;
}
break;
case STATE_INT_INSTR:
Assert(vcpu.entry == vcpu.kvm_run->debug.arch.pc, "entry not match");
kvm_set_step_mode(false, 0);
vcpu.int_wp_state = STATE_IDLE;
break;
}
//Log("exception = %d, pc = %llx, dr6 = %llx, dr7 = %llx", vcpu.kvm_run->debug.arch.exception,
// vcpu.kvm_run->debug.arch.pc, vcpu.kvm_run->debug.arch.dr6, vcpu.kvm_run->debug.arch.dr7);
}
}
}
static void run_protected_mode() {
struct kvm_sregs sregs;
kvm_getsregs(&sregs);
setup_protected_mode(&sregs);
kvm_setsregs(&sregs);
struct kvm_regs regs;
memset(&regs, 0, sizeof(regs));
regs.rflags = 2;
regs.rip = 0x7c00;
// this will also set KVM_GUESTDBG_ENABLE
kvm_setregs(&regs);
memcpy(vm.mem + 0x7c00, mbr, sizeof(mbr));
// run enough instructions to load GDT
kvm_exec(10);
}
void difftest_memcpy_from_dut(paddr_t dest, void *src, size_t n) {
memcpy(vm.mem + dest, src, n);
}
void difftest_getregs(void *r) {
struct kvm_regs *ref = &(vcpu.kvm_run->s.regs.regs);
x86_CPU_state *x86 = r;
x86->eax = ref->rax;
x86->ebx = ref->rbx;
x86->ecx = ref->rcx;
x86->edx = ref->rdx;
x86->esp = ref->rsp;
x86->ebp = ref->rbp;
x86->esi = ref->rsi;
x86->edi = ref->rdi;
x86->pc = ref->rip;
}
void difftest_setregs(const void *r) {
struct kvm_regs *ref = &(vcpu.kvm_run->s.regs.regs);
const x86_CPU_state *x86 = r;
ref->rax = x86->eax;
ref->rbx = x86->ebx;
ref->rcx = x86->ecx;
ref->rdx = x86->edx;
ref->rsp = x86->esp;
ref->rbp = x86->ebp;
ref->rsi = x86->esi;
ref->rdi = x86->edi;
ref->rip = x86->pc;
ref->rflags |= (1 << 8);
vcpu.kvm_run->kvm_dirty_regs = KVM_SYNC_X86_REGS;
}
void difftest_exec(uint64_t n) {
kvm_exec(n);
}
void difftest_init(int port) {
vm_init(PMEM_SIZE);
vcpu_init();
run_protected_mode();
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册