提交 7b5cfd3b 编写于 作者: W wangkaifan

Fix problem concerning lazy spilling

上级 ca1b4beb
......@@ -68,10 +68,6 @@ uint32_t reg_ptr2idx(DecodeExecState *s, const rtlreg_t* dest) {
// if not mapped, spill out one tmp_reg and remap
idx = spill_out_and_remap(s, tmp_idx);
// for (int i = 0; i < TMP_REG_NUM; i++) {
// printf("[%d] %d %d\n", i, tmp_regs[i].map_ptr, tmp_regs[i].used);
// }
return idx;
}
......@@ -81,7 +77,7 @@ void guest_getregs(CPU_state *mips32) {
int i;
for (i = 0; i < 32; i ++) {
switch (i) {
case 28: case 1: case 25: case 26: case 27: continue;
case 28: case 1: case TMP_REG_ADDR: case TMP_REG_1: case TMP_REG_2: continue;
}
mips32->gpr[i]._32 = r.gpr[i]._64;
}
......@@ -95,7 +91,7 @@ void guest_setregs(const CPU_state *mips32) {
int i;
for (i = 0; i < 32; i ++) {
switch (i) {
case 28: case 1: case 25: case 26: case 27: continue;
case 28: case 1: case TMP_REG_ADDR: case TMP_REG_1: case TMP_REG_2: continue;
}
r.gpr[i]._64 = mips32->gpr[i]._32;
}
......
#include <isa/riscv64.h>
#include "../tran.h"
#include "../spill.h"
#define RV64_EXEC_PC (riscv64_PMEM_BASE + BBL_MAX_SIZE) // skip bbl
......@@ -19,7 +20,7 @@ vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr) {
// if the basic block is end with a branch instruction,
// execute until the branch instruction
// see rtl_jrelop() at rtl-basic.c
int nr_exec = (tran_next_pc == NEXT_PC_BRANCH ? nr_instr - 5 : nr_instr);
int nr_exec = (tran_next_pc == NEXT_PC_BRANCH ? nr_instr - (5+2*suffix_inst) : nr_instr);
backend_exec_code(RV64_EXEC_PC, nr_exec);
riscv64_CPU_state r;
......@@ -33,9 +34,13 @@ vaddr_t rv64_exec_trans_buffer(void *buf, int nr_instr) {
if (tran_next_pc == NEXT_PC_BRANCH) {
// execute the branch instruction and load x86.pc to x30
backend_exec(3);
backend_exec(suffix_inst+3);
backend_getregs(&r);
}
#ifdef REG_SPILLING
tmp_regs_reset();
#endif
return r.gpr[tmp0]._64;
}
......@@ -358,22 +358,28 @@ make_rtl(host_sm, void *addr, const rtlreg_t *src1, int len) {
// we use tmp0 to store x86.pc of the next basic block
make_rtl(j, vaddr_t target) {
#ifdef REG_SPILLING
spill_out_all();
#endif
if (!load_imm_big(tmp0, target)) rv64_addiw(tmp0, tmp0, target & 0xfff);
tran_next_pc = NEXT_PC_JMP;
}
make_rtl(jr, rtlreg_t *target) {
spill_out_all();
rv64_addi(tmp0, reg_ptr2idx(s, target), 0);
#ifdef REG_SPILLING
spill_out_all();
#endif
tran_next_pc = NEXT_PC_JMP;
}
make_rtl(jrelop, uint32_t relop, const rtlreg_t *src1, const rtlreg_t *src2, vaddr_t target) {
spill_out_all();
uint32_t rs1 = reg_ptr2idx(s, src1);
uint32_t rs2 = reg_ptr2idx(s, src2);
uint32_t offset = 12; // branch two instructions
#ifdef REG_SPILLING
cal_suffix_inst();
#endif
uint32_t offset = 12 + 4*suffix_inst; // branch two instructions
extern int trans_buffer_index;
int old_idx = trans_buffer_index;
......@@ -397,13 +403,19 @@ make_rtl(jrelop, uint32_t relop, const rtlreg_t *src1, const rtlreg_t *src2, vad
// generate instrutions to load the not-taken target
load_imm_no_opt(tmp0, s->seq_pc); // only two instructions
#ifdef REG_SPILLING
spill_out_all();
#endif
// generate instrutions to load the taken target
load_imm_no_opt(tmp0, target); // only two instructions
#ifdef REG_SPILLING
spill_out_all();
#endif
tran_next_pc = NEXT_PC_BRANCH;
int new_idx = trans_buffer_index;
Assert(new_idx - old_idx == 5, "if this condition is broken, "
Assert(new_idx - old_idx == suffix_inst*2+5, "if this condition is broken, "
"you should also modify rv64_exec_trans_buffer() in exec.c");
}
......
......@@ -7,25 +7,23 @@
Tmp_reg tmp_regs[TMP_REG_NUM];
Tmp_reg spill_tmp_reg;
uint32_t suffix_inst = 0;
static inline bool load_imm_big(uint32_t r, const uint32_t imm) {
static inline void load_imm_no_opt(uint32_t r, const sword_t imm) {
RV_IMM rv_imm = { .val = imm };
uint32_t lui_imm = rv_imm.imm_31_12 + (rv_imm.imm_11_0 >> 11);
if (lui_imm == 0) return false;
else {
rv64_lui(r, lui_imm);
if (rv_imm.imm_11_0 != 0) rv64_addiw(r, r, rv_imm.imm_11_0);
return true;
}
rv64_lui(r, lui_imm);
rv64_addiw(r, r, rv_imm.imm_11_0);
}
void tmp_regs_init() {
suffix_inst = 0;
if (TMP_REG_NUM == 2) {
tmp_regs[0].idx = TMP_REG_1;
tmp_regs[0].map_ptr = 3;
tmp_regs[0].map_ptr = 0;
tmp_regs[0].used = 0;
tmp_regs[1].idx = TMP_REG_2;
tmp_regs[1].map_ptr = 4;
tmp_regs[1].map_ptr = 0;
tmp_regs[1].used = 0;
} else {
panic("Other TMP_REG_NUM!\n");
......@@ -34,6 +32,17 @@ void tmp_regs_init() {
spill_tmp_reg.map_ptr = 2;
}
void tmp_regs_reset() {
if (TMP_REG_NUM == 2) {
tmp_regs[0].map_ptr = 0;
tmp_regs[0].used = 0;
tmp_regs[1].map_ptr = 0;
tmp_regs[1].used = 0;
} else {
panic("Other TMP_REG_NUM!\n");
}
}
uint32_t check_tmp_reg(uint32_t tmp_idx) {
for (int i = 0; i < TMP_REG_NUM; i++) {
if (tmp_regs[i].map_ptr == tmp_idx) {
......@@ -59,11 +68,11 @@ uint32_t spill_out_and_remap(DecodeExecState *s, uint32_t tmp_idx) {
}
addr = SCRATCHPAD_BASE_ADDR + 4 * (tmp_regs[ptr].map_ptr);
load_imm_big(spill_tmp_reg.idx, addr);
load_imm_no_opt(spill_tmp_reg.idx, addr);
rv64_sw(tmp_regs[ptr].idx, spill_tmp_reg.idx, 0);
addr = SCRATCHPAD_BASE_ADDR + 4 * (tmp_idx);
load_imm_big(spill_tmp_reg.idx, addr);
load_imm_no_opt(spill_tmp_reg.idx, addr);
rv64_lw(tmp_regs[ptr].idx, spill_tmp_reg.idx, 0);
tmp_regs[ptr].map_ptr = tmp_idx;
......@@ -72,21 +81,23 @@ uint32_t spill_out_and_remap(DecodeExecState *s, uint32_t tmp_idx) {
return tmp_regs[ptr].idx;
}
void spill_out_all() {
void cal_suffix_inst() {
suffix_inst = 0;
for (int i = 0; i < TMP_REG_NUM; i++) {
if (tmp_regs[i].map_ptr != 0) {
suffix_inst += 3;
}
}
}
void spill_out_all() { // can be 0/3/6 inst
uint32_t addr;
for (int i = 0; i < TMP_REG_NUM; i++) {
addr = SCRATCHPAD_BASE_ADDR + 4 * (tmp_regs[i].map_ptr);
//printf("used: %d, %x\n", tmp_regs[i].used, addr);
load_imm_big(spill_tmp_reg.idx, addr);
rv64_sw(tmp_regs[i].idx, spill_tmp_reg.idx, 0);
tmp_regs[i].map_ptr = 3+i;
addr = SCRATCHPAD_BASE_ADDR + 4 * (tmp_regs[i].map_ptr);
load_imm_big(spill_tmp_reg.idx, addr);
rv64_lw(tmp_regs[i].idx, spill_tmp_reg.idx, 0);
tmp_regs[i].used = 0;
if (tmp_regs[i].map_ptr != 0) {
addr = SCRATCHPAD_BASE_ADDR + 4 * (tmp_regs[i].map_ptr);
load_imm_no_opt(spill_tmp_reg.idx, addr);
rv64_sw(tmp_regs[i].idx, spill_tmp_reg.idx, 0);
}
}
}
......@@ -97,4 +108,10 @@ void spill_clean(uint32_t tmp_idx) {
return;
}
}
}
void spill_cleanall() {
for (int i = 0; i < TMP_REG_NUM; i++) {
tmp_regs[i].used = 0;
}
}
\ No newline at end of file
......@@ -23,11 +23,15 @@ typedef struct {
extern Tmp_reg tmp_regs[TMP_REG_NUM];
extern Tmp_reg spill_tmp_reg;
extern uint32_t suffix_inst;
void tmp_regs_init();
void tmp_regs_reset();
uint32_t check_tmp_reg(uint32_t);
uint32_t spill_out_and_remap(DecodeExecState*, uint32_t);
void spill_clean(uint32_t);
void spill_cleanall();
void cal_suffix_inst();
void spill_out_all();
#endif
\ No newline at end of file
......@@ -19,7 +19,6 @@ void guest_getregs(CPU_state *cpu);
void write_ins(uint32_t ins) {
assert(trans_buffer_index < BUF_SIZE);
//printf("instr: %x\n", ins);
trans_buffer[trans_buffer_index++]=ins;
}
......@@ -33,11 +32,8 @@ void mainloop() {
__attribute__((unused)) vaddr_t ori_pc = cpu.pc;
__attribute__((unused)) vaddr_t seq_pc = isa_exec_once();
#ifdef REG_SPILLING
for (int i = 0; i < TMP_REG_NUM; i++) {
tmp_regs[i].used = 0;
}
spill_cleanall();
#endif
//printf("PC: %x\n", seq_pc);
if (nemu_state.state != NEMU_RUNNING) tran_next_pc = NEXT_PC_END;
......@@ -47,9 +43,6 @@ void mainloop() {
#ifndef DIFF_TEST
if (tran_next_pc != NEXT_PC_SEQ) {
#endif
#ifdef REG_SPILLING
tmp_regs_init();
#endif
vaddr_t next_pc = rv64_exec_trans_buffer(trans_buffer, trans_buffer_index);
total_instr += trans_buffer_index;
......
......@@ -176,7 +176,6 @@ static inline void exec(DecodeExecState *s) {
// in-page instructions, fetch 4 byte and
// see whether it is an RVC instruction later
s->isa.instr.val = instr_fetch(&s->seq_pc, 4);
//printf("[instr]%x\n", s->isa.instr.val);
}
check_mem_ex();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册