提交 7cbb1d28 编写于 作者: Y Yinan Xu

Merge remote-tracking branch 'origin/master' into dev-icache

......@@ -21,7 +21,7 @@ jobs:
echo ::set-env name=NOOP_HOME::$GITHUB_WORKSPACE
- name: Build EMU
run:
make ./build/emu SIM_ARGS=--disable-log NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME -j20
make ./build/emu SIM_ARGS=--disable-log EMU_THREADS=16 NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME -j20
cputest:
runs-on: self-hosted
......
......@@ -62,12 +62,14 @@ EMU_VFILES = $(shell find $(EMU_VSRC_DIR) -name "*.v" -or -name "*.sv")
EMU_CXXFLAGS = -std=c++11 -static -Wall -I$(EMU_CSRC_DIR)
EMU_CXXFLAGS += -DVERILATOR -Wno-maybe-uninitialized
EMU_LDFLAGS = -lpthread -lSDL2 -ldl
EMU_THREADS = 1
VERILATOR_FLAGS = --top-module $(SIM_TOP) \
+define+VERILATOR=1 \
+define+PRINTF_COND=1 \
+define+RANDOMIZE_REG_INIT \
+define+RANDOMIZE_MEM_INIT \
--threads $(EMU_THREADS) --threads-dpi none\
--assert \
--savable \
--stats-vars \
......@@ -123,7 +125,7 @@ EMU_FLAGS = -s $(SEED) -b $(B) -e $(E) $(SNAPSHOT_OPTION) $(WAVEFORM)
emu: $(EMU)
ifeq ($(REMOTE),localhost)
@$(EMU) -i $(IMAGE) $(EMU_FLAGS)
@numactl -m 0 -N 0 -- $(EMU) -i $(IMAGE) $(EMU_FLAGS)
else
ssh -tt $(REMOTE) "cd $(REMOTE_PRJ_HOME) && export NOOP_HOME=$(REMOTE_PREFIX)/$(NOOP_HOME) && $(EMU) -i $(REMOTE_PREFIX)/$(IMAGE) $(EMU_FLAGS)"
endif
......
......@@ -99,10 +99,12 @@ class IssueQueue
v && isSameType && (src===uop.pdest)
}
//TODO: opt this, do bypass select in 'select' stage not 'issue' stage
val bypassData = RegNext(io.bypassData)
def doBypass(src: UInt, srcType: UInt): (Bool, UInt) = {
val hitVec = io.bypassData.map(p => (p.valid, p.bits.uop)).
val hitVec = bypassData.map(p => (p.valid, p.bits.uop)).
map(wbUop => writeBackHit(src, srcType, wbUop))
val data = ParallelMux(hitVec.zip(io.bypassData.map(_.bits.data)))
val data = ParallelMux(hitVec.zip(bypassData.map(_.bits.data)))
(ParallelOR(hitVec).asBool(), data)
}
......@@ -136,13 +138,17 @@ class IssueQueue
}
}
// 1. wake up
for(i <- 0 until qsize){
uopQueue(i) := wakeUp(uopQueue(i))
}
// 2. select
for(i <- 0 until qsize){
val newUop = wakeUp(uopQueue(i))
uopQueue(i) := newUop
readyVec(i) := uopIsRdy(newUop)
readyVec(i) := uopIsRdy(uopQueue(i))
}
// select
val selectedIdxRegOH = Wire(UInt(qsize.W))
val selectMask = WireInit(VecInit(
(0 until qsize).map(i =>
......@@ -178,7 +184,7 @@ class IssueQueue
}
// (fake) deq to Load/Store unit
io.deq.valid := (stateQueue(selectedIdxReg)===s_valid) && readyVec(idxQueue(selectedIdxReg)) && selReg
io.deq.valid := (stateQueue(selectedIdxReg)===s_valid) && selReg
io.deq.bits.uop := uopQueue(idxQueue(selectedIdxReg))
val src1Bypass = doBypass(io.deq.bits.uop.psrc1, io.deq.bits.uop.ctrl.src1Type)
......
......@@ -74,6 +74,16 @@ class LoadUnit extends XSModule {
l2_out.ready := (l2_dcache && io.dcache.req.ready) || l2_mmio || l2_dtlb_miss
io.ldin.ready := l2_out.ready
// exception check
val addrAligned = LookupTree(io.ldin.bits.uop.ctrl.fuOpType(1,0), List(
"b00".U -> true.B, //b
"b01".U -> (l2_out.bits.vaddr(0) === 0.U), //h
"b10".U -> (l2_out.bits.vaddr(1,0) === 0.U), //w
"b11".U -> (l2_out.bits.vaddr(2,0) === 0.U) //d
))
l2_out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned
l2_out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld
// send result to dcache
// never send tlb missed or MMIO reqs to dcache
io.dcache.req.valid := l2_dcache
......
......@@ -12,7 +12,7 @@ class LsRoqEntry extends XSBundle {
val op = UInt(6.W)
val mask = UInt(8.W)
val data = UInt(XLEN.W)
val exception = UInt(8.W)
val exception = UInt(16.W) // TODO: opt size
val mmio = Bool()
val fwdMask = Vec(8, Bool())
val fwdData = Vec(8, UInt(8.W))
......@@ -107,7 +107,7 @@ class Lsroq extends XSModule {
(0 until LoadPipelineWidth).map(i => {
when(io.loadIn(i).fire()) {
when(io.loadIn(i).bits.miss) {
XSInfo(io.loadIn(i).valid, "load miss write to lsroq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x\n",
XSInfo(io.loadIn(i).valid, "load miss write to lsroq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
io.loadIn(i).bits.uop.lsroqIdx,
io.loadIn(i).bits.uop.cf.pc,
io.loadIn(i).bits.vaddr,
......@@ -117,10 +117,11 @@ class Lsroq extends XSModule {
io.loadIn(i).bits.forwardData.asUInt,
io.loadIn(i).bits.forwardMask.asUInt,
io.loadIn(i).bits.mmio,
io.loadIn(i).bits.rollback
)
}.otherwise {
XSInfo(io.loadIn(i).valid, "load hit write to cbd idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x\n",
io.loadIn(i).bits.rollback,
io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
)
}.otherwise {
XSInfo(io.loadIn(i).valid, "load hit write to cbd idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
io.loadIn(i).bits.uop.lsroqIdx,
io.loadIn(i).bits.uop.cf.pc,
io.loadIn(i).bits.vaddr,
......@@ -130,36 +131,39 @@ class Lsroq extends XSModule {
io.loadIn(i).bits.forwardData.asUInt,
io.loadIn(i).bits.forwardMask.asUInt,
io.loadIn(i).bits.mmio,
io.loadIn(i).bits.rollback
)
io.loadIn(i).bits.rollback,
io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
)
}
valid(io.loadIn(i).bits.uop.lsroqIdx) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
writebacked(io.loadIn(i).bits.uop.lsroqIdx) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
// allocated(io.loadIn(i).bits.uop.lsroqIdx) := io.loadIn(i).bits.miss // if hit, lsroq entry can be recycled
data(io.loadIn(i).bits.uop.lsroqIdx).paddr := io.loadIn(i).bits.paddr
data(io.loadIn(i).bits.uop.lsroqIdx).mask := io.loadIn(i).bits.mask
data(io.loadIn(i).bits.uop.lsroqIdx).data := io.loadIn(i).bits.data // for mmio / misc / debug
data(io.loadIn(i).bits.uop.lsroqIdx).mmio := io.loadIn(i).bits.mmio
data(io.loadIn(i).bits.uop.lsroqIdx).fwdMask := io.loadIn(i).bits.forwardMask
data(io.loadIn(i).bits.uop.lsroqIdx).fwdData := io.loadIn(i).bits.forwardData
data(io.loadIn(i).bits.uop.lsroqIdx).exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
miss(io.loadIn(i).bits.uop.lsroqIdx) := io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
store(io.loadIn(i).bits.uop.lsroqIdx) := false.B
pending(io.loadIn(i).bits.uop.lsroqIdx) := io.loadIn(i).bits.mmio
}
valid(io.loadIn(i).bits.uop.lsroqIdx) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
writebacked(io.loadIn(i).bits.uop.lsroqIdx) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
// allocated(io.loadIn(i).bits.uop.lsroqIdx) := io.loadIn(i).bits.miss // if hit, lsroq entry can be recycled
data(io.loadIn(i).bits.uop.lsroqIdx).paddr := io.loadIn(i).bits.paddr
data(io.loadIn(i).bits.uop.lsroqIdx).mask := io.loadIn(i).bits.mask
data(io.loadIn(i).bits.uop.lsroqIdx).data := io.loadIn(i).bits.data // for mmio / misc / debug
data(io.loadIn(i).bits.uop.lsroqIdx).mmio := io.loadIn(i).bits.mmio
data(io.loadIn(i).bits.uop.lsroqIdx).fwdMask := io.loadIn(i).bits.forwardMask
data(io.loadIn(i).bits.uop.lsroqIdx).fwdData := io.loadIn(i).bits.forwardData
miss(io.loadIn(i).bits.uop.lsroqIdx) := io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
store(io.loadIn(i).bits.uop.lsroqIdx) := false.B
pending(io.loadIn(i).bits.uop.lsroqIdx) := io.loadIn(i).bits.mmio
}
})
// writeback store
(0 until StorePipelineWidth).map(i => {
when(io.storeIn(i).fire()) {
valid(io.storeIn(i).bits.uop.lsroqIdx) := !io.storeIn(i).bits.mmio
data(io.storeIn(i).bits.uop.lsroqIdx).paddr := io.storeIn(i).bits.paddr
data(io.storeIn(i).bits.uop.lsroqIdx).mask := io.storeIn(i).bits.mask
data(io.storeIn(i).bits.uop.lsroqIdx).data := io.storeIn(i).bits.data
data(io.storeIn(i).bits.uop.lsroqIdx).mmio := io.storeIn(i).bits.mmio
miss(io.storeIn(i).bits.uop.lsroqIdx) := io.storeIn(i).bits.miss
store(io.storeIn(i).bits.uop.lsroqIdx) := true.B
pending(io.storeIn(i).bits.uop.lsroqIdx) := io.storeIn(i).bits.mmio
XSInfo("store write to lsroq idx %d pc 0x%x vaddr %x paddr %x data %x miss %x mmio %x roll %x\n",
})
// writeback store
(0 until StorePipelineWidth).map(i => {
when(io.storeIn(i).fire()) {
valid(io.storeIn(i).bits.uop.lsroqIdx) := !io.storeIn(i).bits.mmio
data(io.storeIn(i).bits.uop.lsroqIdx).paddr := io.storeIn(i).bits.paddr
data(io.storeIn(i).bits.uop.lsroqIdx).mask := io.storeIn(i).bits.mask
data(io.storeIn(i).bits.uop.lsroqIdx).data := io.storeIn(i).bits.data
data(io.storeIn(i).bits.uop.lsroqIdx).mmio := io.storeIn(i).bits.mmio
data(io.storeIn(i).bits.uop.lsroqIdx).exception := io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
miss(io.storeIn(i).bits.uop.lsroqIdx) := io.storeIn(i).bits.miss
store(io.storeIn(i).bits.uop.lsroqIdx) := true.B
pending(io.storeIn(i).bits.uop.lsroqIdx) := io.storeIn(i).bits.mmio
XSInfo("store write to lsroq idx %d pc 0x%x vaddr %x paddr %x data %x miss %x mmio %x roll %x exc %x\n",
io.storeIn(i).bits.uop.lsroqIdx(InnerLsroqIdxWidth - 1, 0),
io.storeIn(i).bits.uop.cf.pc,
io.storeIn(i).bits.vaddr,
......@@ -167,9 +171,10 @@ class Lsroq extends XSModule {
io.storeIn(i).bits.data,
io.storeIn(i).bits.miss,
io.storeIn(i).bits.mmio,
io.storeIn(i).bits.rollback
)
}
io.storeIn(i).bits.rollback,
io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
)
}
})
// cache miss request
......@@ -267,6 +272,7 @@ class Lsroq extends XSModule {
LSUOpType.ldu -> ZeroExt(rdataSel(63, 0), XLEN)
))
io.ldout(i).bits.uop := uop(loadWbSel(i))
io.ldout(i).bits.uop.cf.exceptionVec := data(loadWbSel(i)).exception.asBools
io.ldout(i).bits.data := rdataPartialLoad
io.ldout(i).bits.redirectValid := false.B
io.ldout(i).bits.redirect := DontCare
......@@ -301,6 +307,7 @@ class Lsroq extends XSModule {
(0 until StorePipelineWidth).map(i => {
io.stout(i).bits.uop := uop(storeWbSel(i))
io.stout(i).bits.uop.cf.exceptionVec := data(storeWbSel(i)).exception.asBools
io.stout(i).bits.data := data(storeWbSel(i)).data
io.stout(i).bits.redirectValid := false.B
io.stout(i).bits.redirect := DontCare
......
......@@ -70,6 +70,16 @@ class StoreUnit extends XSModule {
s2_out.valid := io.stin.valid && !io.dtlb.resp.bits.miss && !s2_out.bits.uop.needFlush(io.redirect)
io.stin.ready := s2_out.ready
// exception check
val addrAligned = LookupTree(io.stin.bits.uop.ctrl.fuOpType(1,0), List(
"b00".U -> true.B, //b
"b01".U -> (s2_out.bits.vaddr(0) === 0.U), //h
"b10".U -> (s2_out.bits.vaddr(1,0) === 0.U), //w
"b11".U -> (s2_out.bits.vaddr(2,0) === 0.U) //d
))
s2_out.bits.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned
s2_out.bits.uop.cf.exceptionVec(storePageFault) := io.dtlb.resp.bits.excp.pf.st
PipelineConnect(s2_out, s3_in, true.B, false.B)
//-------------------------------------------------------
// ST Pipeline Stage 3
......
......@@ -158,7 +158,9 @@ int difftest_step(DiffState *s) {
ref_difftest_getregs(&ref_r);
ref_r[DIFFTEST_THIS_PC] += selectBit(s->isRVC, i) ? 2 : 4;
if(selectBit(s->wen, i)){
ref_r[s->wdst[i]] = s->wdata[i];
if(s->wdst[i] != 0){
ref_r[s->wdst[i]] = s->wdata[i];
}
}
ref_difftest_setregs(ref_r);
}else{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册