提交 b0cf5de6 编写于 作者: Z Zihao Yu

Merge branch 'master' into merge-master

......@@ -48,8 +48,8 @@ dhrystone:
xj:
$(MAKE) -C $(NANOS_HOME) $(ARCH) run
# xjnemu:
# $(MAKE) -C $(NANOS_HOME) ARCH=riscv64-nemu run
xjnemu:
$(MAKE) -C $(NANOS_HOME) ARCH=riscv64-nemu run
rttos:
$(MAKE) -C $(RTTOS_HOME)/bsp/riscv64-noop run
......@@ -60,6 +60,14 @@ rttos-debug:
freertos:
$(MAKE) -C $(FREERTOS_HOME)/Demo/riscv64-noop noop_run
xv6:
$(MAKE) -C $(XV6_HOME) noop
xv6-debug:
$(MAKE) -C $(XV6_HOME) noop 2>&1 | tee > xv6.log
linux:
$(MAKE) -C $(BBL_LINUX_HOME) noop
# ------------------------------------------------------------------
# get disassembled test src
# ------------------------------------------------------------------
......@@ -70,3 +78,6 @@ disassemble-rttos:
disassemble-freertos:
cp $(FREERTOS_HOME)/Demo/riscv64-noop/build/FreeRTOS-simple.elf.txt ./d-freertos.log
disassemble-xv6:
cp $(XV6_HOME)/build/code.txt ./d-xv6.log
......@@ -222,22 +222,22 @@ proc create_hier_cell_hier_clkrst { parentCell nameHier } {
set_property -dict [ list \
CONFIG.CLKIN1_JITTER_PS {100.0} \
CONFIG.CLKOUT1_DRIVES {BUFG} \
CONFIG.CLKOUT1_JITTER {94.863} \
CONFIG.CLKOUT1_PHASE_ERROR {87.181} \
CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {300} \
CONFIG.CLKOUT1_JITTER {114.831} \
CONFIG.CLKOUT1_PHASE_ERROR {98.576} \
CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {200} \
CONFIG.CLKOUT2_DRIVES {BUFG} \
CONFIG.CLKOUT2_JITTER {115.833} \
CONFIG.CLKOUT2_PHASE_ERROR {87.181} \
CONFIG.CLKOUT2_JITTER {130.959} \
CONFIG.CLKOUT2_PHASE_ERROR {98.576} \
CONFIG.CLKOUT2_REQUESTED_OUT_FREQ {100.000} \
CONFIG.CLKOUT2_USED {true} \
CONFIG.CLKOUT3_DRIVES {BUFG} \
CONFIG.CLKOUT3_JITTER {139.035} \
CONFIG.CLKOUT3_PHASE_ERROR {87.181} \
CONFIG.CLKOUT3_JITTER {159.373} \
CONFIG.CLKOUT3_PHASE_ERROR {98.576} \
CONFIG.CLKOUT3_REQUESTED_OUT_FREQ {40} \
CONFIG.CLKOUT3_USED {true} \
CONFIG.CLKOUT4_DRIVES {Buffer} \
CONFIG.CLKOUT4_JITTER {151.083} \
CONFIG.CLKOUT4_PHASE_ERROR {87.181} \
CONFIG.CLKOUT4_JITTER {172.734} \
CONFIG.CLKOUT4_PHASE_ERROR {98.576} \
CONFIG.CLKOUT4_REQUESTED_OUT_FREQ {27} \
CONFIG.CLKOUT4_USED {true} \
CONFIG.CLKOUT5_DRIVES {Buffer} \
......@@ -247,13 +247,13 @@ proc create_hier_cell_hier_clkrst { parentCell nameHier } {
CONFIG.CLK_OUT2_PORT {uncoreclk} \
CONFIG.CLK_OUT3_PORT {clk50} \
CONFIG.CLK_OUT4_PORT {clk27} \
CONFIG.MMCM_CLKFBOUT_MULT_F {12.000} \
CONFIG.MMCM_CLKFBOUT_MULT_F {10.000} \
CONFIG.MMCM_CLKIN1_PERIOD {10.000} \
CONFIG.MMCM_CLKIN2_PERIOD {10.000} \
CONFIG.MMCM_CLKOUT0_DIVIDE_F {4.000} \
CONFIG.MMCM_CLKOUT1_DIVIDE {12} \
CONFIG.MMCM_CLKOUT2_DIVIDE {30} \
CONFIG.MMCM_CLKOUT3_DIVIDE {44} \
CONFIG.MMCM_CLKOUT0_DIVIDE_F {5.000} \
CONFIG.MMCM_CLKOUT1_DIVIDE {10} \
CONFIG.MMCM_CLKOUT2_DIVIDE {25} \
CONFIG.MMCM_CLKOUT3_DIVIDE {37} \
CONFIG.MMCM_COMPENSATION {AUTO} \
CONFIG.MMCM_DIVCLK_DIVIDE {1} \
CONFIG.NUM_OUT_CLKS {4} \
......
......@@ -11,49 +11,49 @@
#################9134 setting##############################
# FPGA pin to hdmi signal
set_property PACKAGE_PIN A1 [get_ports hdmi_clk]
set_property PACKAGE_PIN C8 [get_ports {hdmi_rgb[0]}]
set_property PACKAGE_PIN A3 [get_ports {hdmi_rgb[1]}]
set_property PACKAGE_PIN B1 [get_ports {hdmi_rgb[2]}]
set_property PACKAGE_PIN B3 [get_ports {hdmi_rgb[3]}]
set_property PACKAGE_PIN C1 [get_ports {hdmi_rgb[4]}]
set_property PACKAGE_PIN B6 [get_ports {hdmi_rgb[5]}]
set_property PACKAGE_PIN A2 [get_ports {hdmi_rgb[6]}]
set_property PACKAGE_PIN C6 [get_ports {hdmi_rgb[7]}]
set_property PACKAGE_PIN F3 [get_ports {hdmi_rgb[8]}]
set_property PACKAGE_PIN D6 [get_ports {hdmi_rgb[9]}]
set_property PACKAGE_PIN D1 [get_ports {hdmi_rgb[10]}]
set_property PACKAGE_PIN D7 [get_ports {hdmi_rgb[11]}]
set_property PACKAGE_PIN E1 [get_ports {hdmi_rgb[12]}]
set_property PACKAGE_PIN F5 [get_ports {hdmi_rgb[13]}]
set_property PACKAGE_PIN F6 [get_ports {hdmi_rgb[14]}]
set_property PACKAGE_PIN F7 [get_ports {hdmi_rgb[15]}]
set_property PACKAGE_PIN G6 [get_ports {hdmi_rgb[16]}]
set_property PACKAGE_PIN G5 [get_ports {hdmi_rgb[17]}]
set_property PACKAGE_PIN G8 [get_ports {hdmi_rgb[18]}]
set_property PACKAGE_PIN F1 [get_ports {hdmi_rgb[19]}]
set_property PACKAGE_PIN E2 [get_ports {hdmi_rgb[20]}]
set_property PACKAGE_PIN G1 [get_ports {hdmi_rgb[21]}]
set_property PACKAGE_PIN F2 [get_ports {hdmi_rgb[22]}]
set_property PACKAGE_PIN D4 [get_ports {hdmi_rgb[23]}]
set_property PACKAGE_PIN B4 [get_ports hdmi_videovalid]
set_property PACKAGE_PIN B8 [get_ports hdmi_hsync]
set_property PACKAGE_PIN G3 [get_ports hdmi_nreset]
set_property PACKAGE_PIN A4 [get_ports hdmi_vsync]
set_property PACKAGE_PIN E4 [get_ports hdmi_scl]
set_property PACKAGE_PIN E3 [get_ports hdmi_sda]
set_property IOSTANDARD LVCMOS15 [get_ports hdmi_clk]
set_property IOSTANDARD LVCMOS15 [get_ports {hdmi_rgb[*]}]
set_property IOSTANDARD LVCMOS15 [get_ports hdmi_videovalid]
set_property IOSTANDARD LVCMOS15 [get_ports hdmi_hsync]
set_property IOSTANDARD LVCMOS15 [get_ports hdmi_nreset]
set_property IOSTANDARD LVCMOS15 [get_ports hdmi_vsync]
set_property IOSTANDARD LVCMOS15 [get_ports hdmi_scl]
set_property IOSTANDARD LVCMOS15 [get_ports hdmi_sda]
set_property SLEW FAST [get_ports {hdmi_rgb[*]}]
set_property SLEW FAST [get_ports hdmi_videovalid]
set_property SLEW FAST [get_ports hdmi_hsync]
set_property SLEW FAST [get_ports hdmi_vsync]
## FPGA pin to hdmi signal
#set_property PACKAGE_PIN A1 [get_ports hdmi_clk]
#set_property PACKAGE_PIN C8 [get_ports {hdmi_rgb[0]}]
#set_property PACKAGE_PIN A3 [get_ports {hdmi_rgb[1]}]
#set_property PACKAGE_PIN B1 [get_ports {hdmi_rgb[2]}]
#set_property PACKAGE_PIN B3 [get_ports {hdmi_rgb[3]}]
#set_property PACKAGE_PIN C1 [get_ports {hdmi_rgb[4]}]
#set_property PACKAGE_PIN B6 [get_ports {hdmi_rgb[5]}]
#set_property PACKAGE_PIN A2 [get_ports {hdmi_rgb[6]}]
#set_property PACKAGE_PIN C6 [get_ports {hdmi_rgb[7]}]
#set_property PACKAGE_PIN F3 [get_ports {hdmi_rgb[8]}]
#set_property PACKAGE_PIN D6 [get_ports {hdmi_rgb[9]}]
#set_property PACKAGE_PIN D1 [get_ports {hdmi_rgb[10]}]
#set_property PACKAGE_PIN D7 [get_ports {hdmi_rgb[11]}]
#set_property PACKAGE_PIN E1 [get_ports {hdmi_rgb[12]}]
#set_property PACKAGE_PIN F5 [get_ports {hdmi_rgb[13]}]
#set_property PACKAGE_PIN F6 [get_ports {hdmi_rgb[14]}]
#set_property PACKAGE_PIN F7 [get_ports {hdmi_rgb[15]}]
#set_property PACKAGE_PIN G6 [get_ports {hdmi_rgb[16]}]
#set_property PACKAGE_PIN G5 [get_ports {hdmi_rgb[17]}]
#set_property PACKAGE_PIN G8 [get_ports {hdmi_rgb[18]}]
#set_property PACKAGE_PIN F1 [get_ports {hdmi_rgb[19]}]
#set_property PACKAGE_PIN E2 [get_ports {hdmi_rgb[20]}]
#set_property PACKAGE_PIN G1 [get_ports {hdmi_rgb[21]}]
#set_property PACKAGE_PIN F2 [get_ports {hdmi_rgb[22]}]
#set_property PACKAGE_PIN D4 [get_ports {hdmi_rgb[23]}]
#set_property PACKAGE_PIN B4 [get_ports hdmi_videovalid]
#set_property PACKAGE_PIN B8 [get_ports hdmi_hsync]
#set_property PACKAGE_PIN G3 [get_ports hdmi_nreset]
#set_property PACKAGE_PIN A4 [get_ports hdmi_vsync]
#set_property PACKAGE_PIN E4 [get_ports hdmi_scl]
#set_property PACKAGE_PIN E3 [get_ports hdmi_sda]
#
#set_property IOSTANDARD LVCMOS15 [get_ports hdmi_clk]
#set_property IOSTANDARD LVCMOS15 [get_ports {hdmi_rgb[*]}]
#set_property IOSTANDARD LVCMOS15 [get_ports hdmi_videovalid]
#set_property IOSTANDARD LVCMOS15 [get_ports hdmi_hsync]
#set_property IOSTANDARD LVCMOS15 [get_ports hdmi_nreset]
#set_property IOSTANDARD LVCMOS15 [get_ports hdmi_vsync]
#set_property IOSTANDARD LVCMOS15 [get_ports hdmi_scl]
#set_property IOSTANDARD LVCMOS15 [get_ports hdmi_sda]
#
#set_property SLEW FAST [get_ports {hdmi_rgb[*]}]
#set_property SLEW FAST [get_ports hdmi_videovalid]
#set_property SLEW FAST [get_ports hdmi_hsync]
#set_property SLEW FAST [get_ports hdmi_vsync]
`include "axi.vh"
module system_top (
inout hdmi_scl,
inout hdmi_sda,
output hdmi_nreset,
output hdmi_clk,
output hdmi_hsync,
output hdmi_vsync,
output hdmi_videovalid,
output [23:0] hdmi_rgb
//inout hdmi_scl,
//inout hdmi_sda,
//output hdmi_nreset,
//output hdmi_clk,
//output hdmi_hsync,
//output hdmi_vsync,
//output hdmi_videovalid,
//output [23:0] hdmi_rgb
//output [7:0] led
);
......@@ -64,10 +64,10 @@ module system_top (
.uart_txd(noop_uart_tx),
.uart_rxd(noop_uart_rx),
.VGA_rgb(hdmi_rgb),
.VGA_hsync(hdmi_hsync),
.VGA_vsync(hdmi_vsync),
.VGA_videovalid(hdmi_videovalid),
//.VGA_rgb(hdmi_rgb),
//.VGA_hsync(hdmi_hsync),
//.VGA_vsync(hdmi_vsync),
//.VGA_videovalid(hdmi_videovalid),
.coreclk(coreclk),
.corerstn(corerstn_sync[1]),
......@@ -77,14 +77,14 @@ module system_top (
.uncorerstn(uncorerstn)
);
i2c_config hdmi_i2c_config(
.rst(!uncorerstn),
.clk(clk27),
.i2c_scl(hdmi_scl),
.i2c_sda(hdmi_sda)
);
//i2c_config hdmi_i2c_config(
// .rst(!uncorerstn),
// .clk(clk27),
// .i2c_scl(hdmi_scl),
// .i2c_sda(hdmi_sda)
//);
assign hdmi_nreset = uncorerstn;
assign hdmi_clk = clk50;
//assign hdmi_nreset = uncorerstn;
//assign hdmi_clk = clk50;
endmodule
......@@ -19,7 +19,7 @@ object AXI4Parameters extends HasNOOPParameter {
// These are not fixed:
val idBits = 1
val addrBits = AddrBits
val addrBits = PAddrBits
val dataBits = DataBits
val userBits = 1
......
......@@ -61,10 +61,10 @@ class SimpleBusCrossbar1toN(addressSpace: List[(Long, Long)]) extends Module {
}
}
class SimpleBusCrossbarNto1(n: Int) extends Module {
class SimpleBusCrossbarNto1(n: Int, userBits:Int = 0) extends Module {
val io = IO(new Bundle {
val in = Flipped(Vec(n, new SimpleBusUC))
val out = new SimpleBusUC
val in = Flipped(Vec(n, new SimpleBusUC(userBits)))
val out = new SimpleBusUC(userBits)
})
val s_idle :: s_readResp :: s_writeResp :: Nil = Enum(3)
......
......@@ -22,14 +22,15 @@ object SimpleBusCmd {
// resp
def readLast = "b0110".U
def writeResp = "b0101".U
def probeHit = "b1100".U
def probeMiss = "b1000".U
def apply() = UInt(4.W)
}
class SimpleBusReqBundle(val userBits: Int = 0) extends SimpleBusBundle {
val addr = Output(UInt(64.W))
class SimpleBusReqBundle(val userBits: Int = 0, val addrBits: Int = 32) extends SimpleBusBundle {
val addr = Output(UInt(addrBits.W))
val size = Output(UInt(3.W))
val cmd = Output(SimpleBusCmd())
val wmask = Output(UInt((DataBits / 8).W))
......@@ -70,12 +71,13 @@ class SimpleBusRespBundle(val userBits: Int = 0) extends SimpleBusBundle {
def isReadLast() = cmd === SimpleBusCmd.readLast
def isProbeHit() = cmd === SimpleBusCmd.probeHit
def isProbeMiss() = cmd === SimpleBusCmd.probeMiss
def isWriteResp() = cmd === SimpleBusCmd.writeResp
def isPrefetch() = cmd === SimpleBusCmd.prefetch
}
// Uncache
class SimpleBusUC(val userBits: Int = 0) extends SimpleBusBundle {
val req = Decoupled(new SimpleBusReqBundle(userBits))
class SimpleBusUC(val userBits: Int = 0, val addrBits: Int = 32) extends SimpleBusBundle {
val req = Decoupled(new SimpleBusReqBundle(userBits, addrBits))
val resp = Flipped(Decoupled(new SimpleBusRespBundle(userBits)))
def isWrite() = req.valid && req.bits.isWrite()
......
......@@ -26,7 +26,7 @@ abstract class AXI4SlaveModule[T <: AXI4Lite, B <: Data](_type :T = new AXI4, _e
val beatCnt = Counter(256)
val len = HoldUnless(axi4.ar.bits.len, axi4.ar.fire())
val burst = HoldUnless(axi4.ar.bits.burst, axi4.ar.fire())
val wrapAddr = axi4.ar.bits.addr & ~(axi4.ar.bits.len.asTypeOf(UInt(AddrBits.W)) << axi4.ar.bits.size)
val wrapAddr = axi4.ar.bits.addr & ~(axi4.ar.bits.len.asTypeOf(UInt(PAddrBits.W)) << axi4.ar.bits.size)
raddr := HoldUnless(wrapAddr, axi4.ar.fire())
axi4.r.bits.last := (c.value === len)
when (ren) {
......
......@@ -11,10 +11,10 @@ class TimerIO extends Bundle {
}
class AXI4Timer(sim: Boolean = false) extends AXI4SlaveModule(new AXI4Lite, new TimerIO) {
val mtime = RegInit(0.U(64.W)) // unit: ms
val mtime = RegInit(0.U(64.W)) // unit: us
val mtimecmp = RegInit(0.U(64.W))
val clk = (if (!sim) 40000 /* 40MHz / 1000 */ else 10000)
val clk = (if (!sim) 40 /* 40MHz / 1000000 */ else 10000)
val tick = Counter(true.B, clk)._2
when (tick) { mtime := mtime + 1.U }
......
......@@ -7,13 +7,14 @@ import chisel3.util.experimental.BoringUtils
import utils._
class TableAddr(val idxBits: Int) extends NOOPBundle {
def tagBits = AddrBits - 2 - idxBits
def tagBits = VAddrBits - 2 - idxBits
//val res = UInt((AddrBits - VAddrBits).W)
val tag = UInt(tagBits.W)
val idx = UInt(idxBits.W)
val pad = UInt(2.W)//TODO
def fromUInt(x: UInt) = x.asTypeOf(UInt(AddrBits.W)).asTypeOf(this)
def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this)
def getTag(x: UInt) = fromUInt(x).tag
def getIdx(x: UInt) = fromUInt(x).idx
}
......@@ -29,9 +30,9 @@ object BTBtype {
class BPUUpdateReq extends NOOPBundle {
val valid = Output(Bool())
val pc = Output(UInt(AddrBits.W))
val pc = Output(UInt(VAddrBits.W))
val isMissPredict = Output(Bool())
val actualTarget = Output(UInt(AddrBits.W))
val actualTarget = Output(UInt(VAddrBits.W))
val actualTaken = Output(Bool()) // for branch
val fuOpType = Output(FuOpType())
val btbType = Output(BTBtype())
......@@ -40,7 +41,7 @@ class BPUUpdateReq extends NOOPBundle {
class BPU1 extends NOOPModule {
val io = IO(new Bundle {
val in = new Bundle { val pc = Flipped(Valid((UInt(AddrBits.W)))) }
val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) }
val out = new RedirectIO
val flush = Input(Bool())
val brIdx = Output(UInt(3.W))
......@@ -55,25 +56,35 @@ class BPU1 extends NOOPModule {
def btbEntry() = new Bundle {
val tag = UInt(btbAddr.tagBits.W)
val _type = UInt(2.W)
val target = UInt(AddrBits.W)
val target = UInt(VAddrBits.W)
val brIdx = UInt(3.W)
val valid = Bool()
}
val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true))
// flush BTB when executing fence.i
val flushBTB = WireInit(false.B)
val flushTLB = WireInit(false.B)
BoringUtils.addSink(flushBTB, "MOUFlushICache")
btb.reset := reset.asBool || flushBTB
BoringUtils.addSink(flushTLB, "MOUFlushTLB")
btb.reset := reset.asBool || (flushBTB || flushTLB)
Debug(false) {
when (reset.asBool || (flushBTB || flushTLB)) {
printf("[BPU-RESET] %d bpu-reset flushBTB:%d flushTLB:%d\n", GTimer(), flushBTB, flushTLB)
}
}
btb.io.r.req.valid := io.in.pc.valid
btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits)
val btbRead = Wire(btbEntry())
btbRead := btb.io.r.resp.data(0)
// since there is one cycle latency to read SyncReadMem,
// we should latch the input pc for one cycle
val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid)
val btbHit = btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.ready, init = false.B) && !(pcLatch(1) && btbRead.brIdx(0))
val btbHit = btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.fire(), init = false.B) && !(pcLatch(1) && btbRead.brIdx(0)) && btbRead.valid
// btbHit will ignore pc(1,0). pc(1,0) is used to build brIdx
// !(pcLatch(1) && btbRead.brIdx(0)) is used to deal with the following case:
// -------------------------------------------------
......@@ -84,14 +95,15 @@ class BPU1 extends NOOPModule {
io.lateJump := lateJump
// val lateJumpLatch = RegNext(lateJump)
// val lateJumpTarget = RegEnable(btbRead.target, lateJump)
Debug(){
// printf("[BTBHT] lateJump %x lateJumpLatch %x lateJumpTarget %x\n", lateJump, lateJumpLatch, lateJumpTarget)
when(btbHit){
printf("[BTBHT] pc=%x tag=%x,%x index=%x bridx=%x tgt=%x,%x flush %x\n", pcLatch, btbRead.tag, btbAddr.getTag(pcLatch), btbAddr.getIdx(pcLatch), btbRead.brIdx, btbRead.target, io.out.target, flush)
// printf("[BTBHT] btbRead.brIdx %x mask %x\n", btbRead.brIdx, Cat(lateJump, Fill(2, io.out.valid)))
Debug(false){
//printf("[BTBHT] lateJump %x lateJumpLatch %x lateJumpTarget %x\n", lateJump, lateJumpLatch, lateJumpTarget)
when(btbHit){
printf("[BTBHT1] %d pc=%x tag=%x,%x index=%x bridx=%x tgt=%x,%x flush %x type:%x\n", GTimer(), pcLatch, btbRead.tag, btbAddr.getTag(pcLatch), btbAddr.getIdx(pcLatch), btbRead.brIdx, btbRead.target, io.out.target, flush,btbRead._type)
printf("[BTBHT2] btbRead.brIdx %x mask %x\n", btbRead.brIdx, Cat(lateJump, Fill(2, io.out.valid)))
printf("[BTBHT5] btbReqValid:%d btbReqSetIdx:%x\n",btb.io.r.req.valid, btb.io.r.req.bits.setIdx)
}
}
// PHT
val pht = Mem(NRbtb, UInt(2.W))
val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid)
......@@ -99,7 +111,7 @@ class BPU1 extends NOOPModule {
// RAS
val NRras = 16
val ras = Mem(NRras, UInt(AddrBits.W))
val ras = Mem(NRras, UInt(VAddrBits.W))
// val raBrIdxs = Mem(NRras, UInt(2.W))
val sp = Counter(NRras)
val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid)
......@@ -110,16 +122,26 @@ class BPU1 extends NOOPModule {
val btbWrite = WireInit(0.U.asTypeOf(btbEntry()))
BoringUtils.addSink(req, "bpuUpdateReq")
Debug(){
when(req.valid){
printf("[BTBUP] pc=%x tag=%x index=%x bridx=%x tgt=%x type=%x\n", req.pc, btbAddr.getTag(req.pc), btbAddr.getIdx(req.pc), Cat(req.pc(1), ~req.pc(1)), req.actualTarget, req.btbType)
}
Debug(false){
when(req.valid){
printf("[BTBUP] pc=%x tag=%x index=%x bridx=%x tgt=%x type=%x\n", req.pc, btbAddr.getTag(req.pc), btbAddr.getIdx(req.pc), Cat(req.pc(1), ~req.pc(1)), req.actualTarget, req.btbType)
}
}
//val fflag = req.btbType===3.U && btb.io.w.req.valid && btb.io.w.req.bits.setIdx==="hc9".U
//when(fflag && GTimer()>2888000.U) {
// printf("%d\n", GTimer())
// printf("[BTBHT6] btbWrite.type is BTBtype.R/RET!!! Inpc:%x btbWrite.brIdx:%x setIdx:%x\n", io.in.pc.bits, btbWrite.brIdx, btb.io.w.req.bits.setIdx)
// printf("[BTBHT6] tag:%x target:%x _type:%x bridx:%x\n", btbWrite.tag,btbWrite.target,btbWrite._type,btbWrite.brIdx)
// printf(p"[BTBHT6] req:${req} \n")
//}
//printf("[BTBHT5] tag: target:%x type:%d brIdx:%d\n", req.actualTarget, req.btbType, Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1)))
btbWrite.tag := btbAddr.getTag(req.pc)
btbWrite.target := req.actualTarget
btbWrite._type := req.btbType
btbWrite.brIdx := Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1))
btbWrite.valid := true.B
// NOTE: We only update BTB at a miss prediction.
// If a miss prediction is found, the pipeline will be flushed
// in the next cycle. Therefore it is safe to use single-port
......@@ -130,6 +152,16 @@ class BPU1 extends NOOPModule {
btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc)
btb.io.w.req.bits.data := btbWrite
//Debug(true) {
//when (btb.io.w.req.valid && btbWrite.tag === btbAddr.getTag("hffffffff803541a4".U)) {
// printf("[BTBWrite] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx)
//}
//}
//when (GTimer() > 77437484.U && btb.io.w.req.valid) {
// printf("[BTBWrite-ALL] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx)
//}
val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc)))
val reqLatch = RegNext(req)
when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) {
......@@ -138,9 +170,9 @@ class BPU1 extends NOOPModule {
val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U))
when (wen) {
pht.write(btbAddr.getIdx(reqLatch.pc), newCnt)
Debug(){
printf("BPUPDATE: pc %x cnt %x\n", reqLatch.pc, newCnt)
}
//Debug(){
//printf("BPUPDATE: pc %x cnt %x\n", reqLatch.pc, newCnt)
//}
}
}
when (req.valid) {
......@@ -150,7 +182,10 @@ class BPU1 extends NOOPModule {
sp.value := sp.value + 1.U
}
.elsewhen (req.fuOpType === ALUOpType.ret) {
sp.value := sp.value - 1.U
when(sp.value === 0.U) {
//printf("ATTTTT: sp.value is 0.U\n") //TODO: sp.value may equal to 0.U
}
sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) //TODO: sp.value may less than 0.U
}
}
......@@ -158,7 +193,7 @@ class BPU1 extends NOOPModule {
// io.out.target := Mux(lateJumpLatch && !flush, lateJumpTarget, Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target))
// io.out.brIdx := btbRead.brIdx & Fill(3, io.out.valid)
io.brIdx := btbRead.brIdx & Cat(true.B, lateJump, Fill(2, io.out.valid))
io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B)
io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B && rasTarget=/=0.U) //TODO: add rasTarget=/=0.U, need fix
// io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !lateJump || lateJumpLatch && !flush && !lateJump
// Note:
// btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !lateJump : normal branch predict
......
......@@ -24,23 +24,23 @@ class DataSrcIO extends NOOPBundle {
}
class RedirectIO extends NOOPBundle {
val target = Output(UInt(AddrBits.W))
val target = Output(UInt(VAddrBits.W))
// val brIdx = Output(UInt(3.W)) // for RVC
val valid = Output(Bool())
}
// class IRIDCtrlFlowIO extends NOOPBundle {
// val instr = Output(UInt(64.W))
// val pc = Output(UInt(AddrBits.W))
// val pnpc = Output(UInt(AddrBits.W))
// val pc = Output(UInt(VAddrBits.W))
// val pnpc = Output(UInt(VAddrBits.W))
// val brIdx = Output(UInt(3.W))
// val redirect = new RedirectIO
// }
class CtrlFlowIO extends NOOPBundle {
val instr = Output(UInt(64.W))
val pc = Output(UInt(AddrBits.W))
val pnpc = Output(UInt(AddrBits.W))
val pc = Output(UInt(VAddrBits.W))
val pnpc = Output(UInt(VAddrBits.W))
val redirect = new RedirectIO
val exceptionVec = Output(Vec(16, Bool()))
val intrVec = Output(Vec(12, Bool()))
......@@ -80,3 +80,42 @@ class ForwardIO extends NOOPBundle {
val wb = new WriteBackIO
val fuType = Output(FuType())
}
class MMUIO extends NOOPBundle {
// val ptev = Output(Bool())
// val pteu = Output(Bool())
// val ptex = Output(Bool())
// val valid = Output(Bool())
// val isStore = Output(Bool())
val priviledgeMode = Input(UInt(2.W))
val status_sum = Input(Bool())
val status_mxr = Input(Bool())
val loadPF = Output(Bool())
val storePF = Output(Bool())
val addr = Output(UInt(VAddrBits.W))
def isPF() = loadPF || storePF
}
class MemMMUIO extends NOOPBundle {
val imem = new MMUIO
val dmem = new MMUIO
}
class TLBExuIO extends NOOPBundle {
val satp = Output(UInt(XLEN.W))
val sfence = new Bundle {
val valid = Output(Bool())
val asid = Output(UInt(9.W))
val vaddr = Output(UInt(XLEN.W))
}
def access(valid: Bool, src1: UInt, src2: UInt, func: UInt, satp: UInt) = {//func no use here for just sfence.vma only
this.sfence.valid := valid
this.sfence.vaddr := src1
this.sfence.asid := src2(8,0)
this.satp := satp
}
}
\ No newline at end of file
......@@ -21,7 +21,7 @@ case class CacheConfig (
sealed trait HasCacheConst {
implicit val cacheConfig: CacheConfig
val AddrBits: Int
val PAddrBits: Int
val XLEN: Int
val cacheName = cacheConfig.name
......@@ -41,9 +41,9 @@ sealed trait HasCacheConst {
val OffsetBits = log2Up(LineSize)
val IndexBits = log2Up(Sets)
val WordIndexBits = log2Up(LineBeats)
val TagBits = AddrBits - OffsetBits - IndexBits
val TagBits = PAddrBits - OffsetBits - IndexBits
val debug = false
val debug = true
def addrBundle = new Bundle {
val tag = UInt(TagBits.W)
......@@ -104,8 +104,10 @@ sealed class CacheStage1(implicit val cacheConfig: CacheConfig) extends CacheMod
if (ro) when (io.in.fire()) { assert(!io.in.bits.isWrite()) }
Debug(){
when(io.in.fire()){
printf("[L1$] " +name+" cache stage1, addr in: %x, user: %x\n", io.in.bits.addr, io.in.bits.user.getOrElse(0.U))
if (debug) {
when(io.in.fire()){
printf("[L1$] " +name+" cache stage1, addr in: %x, user: %x\n", io.in.bits.addr, io.in.bits.user.getOrElse(0.U))
}
}
}
......@@ -118,9 +120,11 @@ sealed class CacheStage1(implicit val cacheConfig: CacheConfig) extends CacheMod
io.out.valid := io.in.valid && io.metaReadBus.req.ready && io.dataReadBus.req.ready
io.in.ready := (!io.in.valid || io.out.fire()) && io.metaReadBus.req.ready && io.dataReadBus.req.ready
Debug(debug) {
printf("%d: [" + cacheName + " stage1]: in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, addr = %x, cmd = %x, dataReadBus.req.valid = %d\n",
GTimer(), io.in.ready, io.in.valid, io.out.valid, io.out.ready, io.in.bits.addr, io.in.bits.cmd, io.dataReadBus.req.valid)
Debug() {
if (debug) {
printf("%d: [" + cacheName + " stage1]: in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, addr = %x, cmd = %x, dataReadBus.req.valid = %d\n",
GTimer(), io.in.ready, io.in.valid, io.out.valid, io.out.ready, io.in.bits.addr, io.in.bits.cmd, io.dataReadBus.req.valid)
}
}
}
......@@ -162,7 +166,15 @@ sealed class CacheStage2(implicit val cacheConfig: CacheConfig) extends CacheMod
val hitVec = VecInit(metaWay.map(m => m.valid && (m.tag === addr.tag) && io.in.valid)).asUInt
val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U
val waymask = Mux(io.out.bits.hit, hitVec, victimWaymask)
val invalidVec = VecInit(metaWay.map(m => !m.valid)).asUInt
val hasInvalidWay = invalidVec.orR
val refillInvalidWaymask = Mux(invalidVec >= 8.U, "b1000".U,
Mux(invalidVec >= 4.U, "b0100".U,
Mux(invalidVec >= 2.U, "b0010".U, "b0001".U)))
// val waymask = Mux(io.out.bits.hit, hitVec, victimWaymask)
val waymask = Mux(io.out.bits.hit, hitVec, Mux(hasInvalidWay, refillInvalidWaymask, victimWaymask))
assert(!(io.in.valid && PopCount(waymask) > 1.U))
io.out.bits.metas := metaWay
......@@ -178,16 +190,17 @@ sealed class CacheStage2(implicit val cacheConfig: CacheConfig) extends CacheMod
when (isForwardData) { isForwardDataReg := true.B }
when (io.in.fire() || !io.in.valid) { isForwardDataReg := false.B }
val forwardDataReg = RegEnable(io.dataWriteBus.req.bits, isForwardData)
io.out.bits.isForwardData := isForwardDataReg
io.out.bits.forwardData := forwardDataReg
io.out.bits.isForwardData := isForwardDataReg || isForwardData
io.out.bits.forwardData := Mux(isForwardData, io.dataWriteBus.req.bits, forwardDataReg)
io.out.bits.req <> req
io.out.valid := io.in.valid
io.in.ready := !io.in.valid || io.out.fire()
Debug(debug) {
printf("%d: [" + cacheName + " stage2]: in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, addr = %x, waymask = %d\n",
GTimer(), io.in.ready, io.in.valid, io.out.valid, io.out.ready, req.addr, waymask)
Debug() {
if (debug) {
printf("%d: [" + cacheName + " S2]: isFD:%d isFDreg:%d inFire:%d invalid:%d \n", GTimer(), isForwardData, isForwardDataReg, io.in.fire(), io.in.valid)
}
}
}
......@@ -246,6 +259,7 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
val s_idle :: s_memReadReq :: s_memReadResp :: s_memWriteReq :: s_memWriteResp :: s_mmioReq :: s_mmioResp :: s_wait_resp :: s_release :: Nil = Enum(9)
val state = RegInit(s_idle)
val needFlush = RegInit(false.B)
when (io.flush && (state =/= s_idle)) { needFlush := true.B }
when (io.out.fire() && needFlush) { needFlush := false.B }
......@@ -267,8 +281,8 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
}
// critical word first read
val raddr = (if (XLEN == 64) Cat(req.addr(AddrBits-1,3), 0.U(3.W))
else Cat(req.addr(AddrBits-1,2), 0.U(2.W)))
val raddr = (if (XLEN == 64) Cat(req.addr(PAddrBits-1,3), 0.U(3.W))
else Cat(req.addr(PAddrBits-1,2), 0.U(2.W)))
// dirty block addr
val waddr = Cat(meta.tag, addr.index, 0.U(OffsetBits.W))
val cmd = Mux(state === s_memReadReq, SimpleBusCmd.readBurst,
......@@ -389,7 +403,7 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
}
} else {
io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand)
io.out.bits.cmd := SimpleBusCmd.readLast
io.out.bits.cmd := Mux(io.in.bits.req.isRead(), SimpleBusCmd.readLast, Mux(io.in.bits.req.isWrite(), SimpleBusCmd.writeResp, DontCare))//DontCare, added by lemover
}
io.out.bits.user.zip(req.user).map { case (o,i) => o := i }
......@@ -406,15 +420,23 @@ sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheMod
Mux(hit || req.isWrite(), io.out.fire(), (state === s_wait_resp) && (io.out.fire() || alreadyOutFire))
)
io.in.ready := io.out.ready && (state === s_idle) && !miss && !probe
io.in.ready := io.out.ready && (state === s_idle && !hitReadBurst) && !miss && !probe
io.dataReadRespToL1 := hitReadBurst && (state === s_idle && io.out.ready || state === s_release && state2 === s2_dataOK)
assert(!(metaHitWriteBus.req.valid && metaRefillWriteBus.req.valid))
assert(!(dataHitWriteBus.req.valid && dataRefillWriteBus.req.valid))
assert(!(!ro.B && io.flush), "only allow to flush icache")
Debug(debug) {
printf("%d: [" + cacheName + " stage3]: in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, state = %d, addr = %x, mem.req.valid = %d, mem.req.ready = %d\n\n",
GTimer(), io.in.ready, io.in.valid, io.out.valid, io.out.ready, state, req.addr, io.mem.req.valid, io.mem.req.ready)
Debug() {
if (debug) {
printf("%d: [" + cacheName + " S3]: in.ready = %d, in.valid = %d, hit = %x, state = %d, addr = %x cmd:%d probe:%d isFinish:%d\n",
GTimer(), io.in.ready, io.in.valid, hit, state, req.addr, req.cmd, probe, io.isFinish)
printf("%d: [" + cacheName + " S3]: out.valid:%d rdata:%x cmd:%d user:%x \n",
GTimer(), io.out.valid, io.out.bits.rdata, io.out.bits.cmd, io.out.bits.user.getOrElse(0.U))
printf("%d: [" + cacheName + " S3]: DHW: (%d, %d), data:%x MHW:(%d, %d)\n",
GTimer(), dataHitWriteBus.req.valid, dataHitWriteBus.req.ready, dataHitWriteBus.req.bits.data.asUInt, metaHitWriteBus.req.valid, metaHitWriteBus.req.ready)
printf("%d: [" + cacheName + " S3]: useFD:%d isFD:%d FD:%x DreadArray:%x dataRead:%x inwaymask:%x FDwaymask:%x \n",
GTimer(), useForwardData, io.in.bits.isForwardData, io.in.bits.forwardData.data.data, dataReadArray, dataRead, io.in.bits.waymask, io.in.bits.forwardData.waymask.getOrElse("b1".U))
}
}
}
......@@ -424,6 +446,7 @@ class Cache(implicit val cacheConfig: CacheConfig) extends CacheModule {
val flush = Input(UInt(2.W))
val out = new SimpleBusC
val mmio = new SimpleBusUC
val empty = Output(Bool())
})
// cpu pipeline
......@@ -444,14 +467,20 @@ class Cache(implicit val cacheConfig: CacheConfig) extends CacheModule {
arb.io.in(hasCohInt + 0) <> io.in.req
s1.io.in <> arb.io.out
/*
val s2BlockByPrefetch = if (cacheLevel == 2) {
s2.io.out.valid && s3.io.in.valid && s3.io.in.bits.req.isPrefetch() && !s3.io.in.ready
} else { false.B }
*/
PipelineConnect(s1.io.out, s2.io.in, s2.io.out.fire(), io.flush(0))
PipelineConnect(s2.io.out, s3.io.in, s3.io.isFinish, io.flush(1) || s2.io.out.bits.mmio && s2.io.out.bits.req.isPrefetch())
PipelineConnect(s2.io.out, s3.io.in, s3.io.isFinish, io.flush(1) || s2.io.out.bits.mmio && s2.io.out.bits.req.isPrefetch()/* || s2BlockByPrefetch*/)
io.in.resp <> s3.io.out
s3.io.flush := io.flush(1)
io.out.mem <> s3.io.mem
io.mmio <> s3.io.mmio
io.empty := !s2.io.in.valid && !s3.io.in.valid
io.in.resp.valid := Mux(s3.io.out.bits.isPrefetch(), false.B, s3.io.out.valid || s3.io.dataReadRespToL1)
io.in.resp.valid := Mux(s3.io.out.valid && s3.io.out.bits.isPrefetch(), false.B, s3.io.out.valid || s3.io.dataReadRespToL1)
if (hasCoh) {
val cohReq = io.out.coh.req.bits
......@@ -483,24 +512,29 @@ class Cache(implicit val cacheConfig: CacheConfig) extends CacheModule {
BoringUtils.addSource(s3.io.in.valid && s3.io.in.bits.hit, "perfCntCondM" + cacheName + "Hit")
Debug(debug) {
io.in.dump(cacheName + ".in")
printf("%d: s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)\n",
GTimer(), s1.io.in.valid, s1.io.in.ready, s2.io.in.valid, s2.io.in.ready, s3.io.in.valid, s3.io.in.ready)
when (s1.io.in.valid) { printf(p"[${cacheName}.S1]: ${s1.io.in.bits}\n") }
when (s2.io.in.valid) { printf(p"[${cacheName}.S2]: ${s2.io.in.bits.req}\n") }
when (s3.io.in.valid) { printf(p"[${cacheName}.S3]: ${s3.io.in.bits.req}\n") }
s3.io.mem.dump(cacheName + ".mem")
Debug() {
if (debug) {
when(true.B) {
io.in.dump(cacheName + ".in")
printf("%d:" + cacheName + "InReq(%d, %d) InResp(%d, %d) \n", GTimer(), io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready)
printf("%d:" + cacheName + " {IN s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)} {OUT s1:(%d,%d), s2:(%d,%d), s3:(%d,%d)}\n",
GTimer(), s1.io.in.valid, s1.io.in.ready, s2.io.in.valid, s2.io.in.ready, s3.io.in.valid, s3.io.in.ready, s1.io.out.valid, s1.io.out.ready, s2.io.out.valid, s2.io.out.ready, s3.io.out.valid, s3.io.out.ready)
when (s1.io.in.valid) { printf("%d ", GTimer()) ; printf(p"[${cacheName}.S1]: ${s1.io.in.bits}\n") }
when (s2.io.in.valid) { printf("%d ", GTimer()) ; printf(p"[${cacheName}.S2]: ${s2.io.in.bits.req}\n") }
when (s3.io.in.valid) { printf("%d ", GTimer()) ; printf(p"[${cacheName}.S3]: ${s3.io.in.bits.req}\n") }
//s3.io.mem.dump(cacheName + ".mem")
}}
}
}
object Cache {
def apply(in: SimpleBusUC, mmio: Seq[SimpleBusUC], flush: UInt, enable: Boolean = true)(implicit cacheConfig: CacheConfig) = {
def apply(in: SimpleBusUC, mmio: Seq[SimpleBusUC], flush: UInt, empty: Bool, enable: Boolean = true)(implicit cacheConfig: CacheConfig) = {
if (enable) {
val cache = Module(new Cache)
cache.io.flush := flush
cache.io.in <> in
mmio(0) <> cache.io.mmio
empty := cache.io.empty
cache.io.out
} else {
val addrspace = List(AddressSpace.dram) ++ AddressSpace.mmio
......@@ -511,6 +545,7 @@ object Cache {
(mmio zip xbar.io.out.drop(1)) foreach { case (mmio_in, xbar_out) =>
mmio_in <> xbar_out
}
empty := false.B
busC
}
}
......
......@@ -4,13 +4,15 @@ import chisel3._
import chisel3.util._
trait HasInstrType {
def InstrN = "b000".U
def InstrI = "b100".U
def InstrR = "b101".U
def InstrS = "b010".U
def InstrB = "b001".U
def InstrU = "b110".U
def InstrJ = "b111".U
def InstrN = "b0000".U
def InstrI = "b0100".U
def InstrR = "b0101".U
def InstrS = "b0010".U
def InstrB = "b0001".U
def InstrU = "b0110".U
def InstrJ = "b0111".U
def InstrA = "b1110".U
def InstrSA = "b1111".U // Atom Inst: SC
def isrfWen(instrType : UInt): Bool = instrType(2)
}
......@@ -56,6 +58,7 @@ object Instructions extends HasInstrType with HasNOOPParameter {
(if (HasMExtension) RVMInstr.table else Nil) ++
(if (HasCExtension) RVCInstr.table else Nil) ++
Priviledged.table ++
RVAInstr.table ++
RVZicsrInstr.table ++ RVZifenceiInstr.table
}
......
......@@ -12,8 +12,9 @@ class EXU(implicit val p: NOOPConfig) extends NOOPModule {
val in = Flipped(Decoupled(new DecodeIO))
val out = Decoupled(new CommitIO)
val flush = Input(Bool())
val dmem = new SimpleBusUC
val dmem = new SimpleBusUC(addrBits = VAddrBits)
val forward = new ForwardIO
val memMMU = Flipped(new MemMMUIO)
})
val src1 = io.in.bits.data.src1
......@@ -31,8 +32,10 @@ class EXU(implicit val p: NOOPConfig) extends NOOPModule {
alu.io.out.ready := true.B
val lsu = Module(new LSU)
val lsuOut = lsu.access(valid = fuValids(FuType.lsu), src1 = src1, src2 = io.in.bits.data.imm, func = fuOpType)
val lsuTlbPF = WireInit(false.B)
val lsuOut = lsu.access(valid = fuValids(FuType.lsu), src1 = src1, src2 = io.in.bits.data.imm, func = fuOpType, dtlbPF = lsuTlbPF)
lsu.io.wdata := src2
lsu.io.instr := io.in.bits.cf.instr
io.out.bits.isMMIO := lsu.io.isMMIO
io.dmem <> lsu.io.dmem
lsu.io.out.ready := true.B
......@@ -48,15 +51,18 @@ class EXU(implicit val p: NOOPConfig) extends NOOPModule {
io.out.bits.intrNO := csr.io.intrNO
csr.io.out.ready := true.B
csr.io.imemMMU <> io.memMMU.imem
csr.io.dmemMMU <> io.memMMU.dmem
val mou = Module(new MOU)
// mou does not write register
mou.access(valid = fuValids(FuType.mou), src1 = src1, src2 = src2, func = fuOpType)
mou.io.cfIn := io.in.bits.cf
mou.io.out.ready := true.B
io.out.bits.decode := DontCare
(io.out.bits.decode.ctrl, io.in.bits.ctrl) match { case (o, i) =>
o.rfWen := i.rfWen
o.rfWen := i.rfWen && (!lsuTlbPF || !fuValids(FuType.lsu)) && !(csr.io.wenFix && fuValids(FuType.csr))
o.rfDest := i.rfDest
o.fuType := i.fuType
}
......@@ -66,6 +72,12 @@ class EXU(implicit val p: NOOPConfig) extends NOOPModule {
io.out.bits.decode.cf.redirect <>
Mux(mou.io.redirect.valid, mou.io.redirect,
Mux(csr.io.redirect.valid, csr.io.redirect, alu.io.redirect))
Debug(){
//when(mou.io.redirect.valid || csr.io.redirect.valid || alu.io.redirect.valid){
printf("[REDIRECT] inValid:%d mou %x csr %x alu %x \n", io.in.valid, mou.io.redirect.valid, csr.io.redirect.valid, alu.io.redirect.valid)
printf("[REDIRECT] flush: %d mou %x csr %x alu %x\n", io.flush, mou.io.redirect.target, csr.io.redirect.target, alu.io.redirect.target)
//}
}
// FIXME: should handle io.out.ready == false
io.out.valid := io.in.valid && MuxLookup(fuType, true.B, List(
......
......@@ -36,19 +36,19 @@ class IDU1 extends NOOPModule with HasInstrType {
val rvcSpecial = pcOffset === 6.U && !isRVC && !brIdx(2)
val rvcSpecialJump = pcOffset === 6.U && !isRVC && brIdx(2)
val pnpcIsSeq = brIdx(3)
// val pnpcIsSeqRight = io.in.bits.pnpc === (Cat(io.in.bits.pc(AddrBits-1,2), 0.U(2.W)) + 4.U) // TODO: add a new user bit bpRight to do this
// val pnpcIsSeqRight = io.in.bits.pnpc === (Cat(io.in.bits.pc(VAddrBits-1,2), 0.U(2.W)) + 4.U) // TODO: add a new user bit bpRight to do this
// assert(pnpcIsSeq === pnpcIsSeqRight)
val flushIFU = (state === s_idle || state === s_extra) && rvcSpecial && io.in.valid && !pnpcIsSeq
when(flushIFU){printf("flushIFU at pc %x offset %x\n", io.in.bits.pc, pcOffset)}
when(flushIFU){printf("flushIFU at pc %x offset %x timer:%d\n", io.in.bits.pc, pcOffset, GTimer())}
assert(!flushIFU)
val loadNextInstline = (state === s_idle || state === s_extra) && (rvcSpecial || rvcSpecialJump) && io.in.valid && pnpcIsSeq
// val loadNextInstline =false.B
val pcOut = WireInit(0.U(AddrBits.W))
val pnpcOut = WireInit(0.U(AddrBits.W))
val specialPCR = Reg(UInt(AddrBits.W)) // reg for full inst that cross 2 inst line
val specialNPCR = Reg(UInt(AddrBits.W)) // reg for pnc for full inst jump that cross 2 inst line
val pcOut = WireInit(0.U(VAddrBits.W))
val pnpcOut = WireInit(0.U(VAddrBits.W))
val specialPCR = Reg(UInt(VAddrBits.W)) // reg for full inst that cross 2 inst line
val specialNPCR = Reg(UInt(VAddrBits.W)) // reg for pnc for full inst jump that cross 2 inst line
val specialInstR = Reg(UInt(16.W))
val redirectPC = Cat(io.in.bits.pc(31,3), 0.U(3.W))+"b1010".U // IDU can got get full inst from a single inst line
val redirectPC = Cat(io.in.bits.pc(VAddrBits-1,3), 0.U(3.W))+"b1010".U // IDU can got get full inst from a single inst line
val rvcForceLoadNext = (pcOffset === 2.U && !isRVC && io.in.bits.pnpc(2,0) === 4.U && !brIdx(1))
//------------------------------------------------------
// rvcForceLoadNext is used to deal with:
......@@ -107,7 +107,7 @@ class IDU1 extends NOOPModule with HasInstrType {
is(s_extra){//get 16 aligned inst, pc controled by this FSM
canGo := rvcFinish || rvcNext
canIn := rvcFinish || rvcForceLoadNext
pcOut := Cat(io.in.bits.pc(31,3), pcOffsetR(2,0))
pcOut := Cat(io.in.bits.pc(VAddrBits-1,3), pcOffsetR(2,0))
pnpcOut := Mux(rvcFinish, io.in.bits.pnpc, Mux(isRVC, pcOut+2.U, pcOut+4.U))
when(io.out.fire() && rvcFinish){state := s_idle}
when(io.out.fire() && rvcNext){
......@@ -181,5 +181,6 @@ class IDU1 extends NOOPModule with HasInstrType {
io.out.valid := io.in.valid && canGo
io.in.ready := (!io.in.valid || (io.out.fire() && canIn) || loadNextInstline)
// io.out.bits.cf <> io.in.bits
io.out.bits.exceptionVec := io.in.bits.exceptionVec/*.map(_ := false.B)*/ //Fix by zhangzifei from false.B
}
......@@ -32,6 +32,7 @@ class IDU2 extends NOOPModule with HasInstrType {
InstrI -> (SrcType.reg, SrcType.imm),
InstrR -> (SrcType.reg, SrcType.reg),
InstrS -> (SrcType.reg, SrcType.reg),
InstrSA-> (SrcType.reg, SrcType.reg),
InstrB -> (SrcType.reg, SrcType.reg),
InstrU -> (SrcType.pc , SrcType.imm),
InstrJ -> (SrcType.pc , SrcType.imm),
......@@ -81,6 +82,7 @@ class IDU2 extends NOOPModule with HasInstrType {
val imm = LookupTree(instrType, List(
InstrI -> SignExt(instr(31, 20), XLEN),
InstrS -> SignExt(Cat(instr(31, 25), instr(11, 7)), XLEN),
InstrSA -> SignExt(Cat(instr(31, 25), instr(11, 7)), XLEN),
InstrB -> SignExt(Cat(instr(31), instr(7), instr(30, 25), instr(11, 8), 0.U(1.W)), XLEN),
InstrU -> SignExt(Cat(instr(31, 12), 0.U(12.W)), XLEN),//fixed
InstrJ -> SignExt(Cat(instr(31), instr(19, 12), instr(20), instr(30, 21), 0.U(1.W)), XLEN)
......@@ -125,6 +127,10 @@ class IDU2 extends NOOPModule with HasInstrType {
io.in.ready := !io.in.valid || io.out.fire() && !hasIntr
io.out.bits.cf <> io.in.bits
Debug(){
when(io.out.fire()){printf("[IDU] issue: pc %x npc %x instr %x\n", io.out.bits.cf.pc, io.out.bits.cf.pnpc, io.out.bits.cf.instr)}
}
val intrVec = WireInit(0.U(12.W))
BoringUtils.addSink(intrVec, "intrVecIDU")
io.out.bits.cf.intrVec.zip(intrVec.asBools).map{ case(x, y) => x := y }
......@@ -132,7 +138,7 @@ class IDU2 extends NOOPModule with HasInstrType {
io.out.bits.cf.exceptionVec.map(_ := false.B)
io.out.bits.cf.exceptionVec(illegalInstr) := (instrType === InstrN && !hasIntr) && io.in.valid
io.out.bits.cf.exceptionVec(ecallM) := (instr === Priviledged.ECALL) && io.in.valid
io.out.bits.cf.exceptionVec(instrPageFault) := io.in.bits.exceptionVec(instrPageFault)
io.out.bits.ctrl.isNoopTrap := (instr === NOOPTrap.TRAP) && io.in.valid
}
......
......@@ -14,17 +14,18 @@ trait HasResetVector {
class IFU extends NOOPModule with HasResetVector {
val io = IO(new Bundle {
val imem = new SimpleBusUC(userBits = AddrBits*2 + 4)
// val pc = Input(UInt(AddrBits.W))
val imem = new SimpleBusUC(userBits = VAddrBits*2 + 4, addrBits = VAddrBits)
// val pc = Input(UInt(VAddrBits.W))
val out = Decoupled(new CtrlFlowIO)
val redirect = Flipped(new RedirectIO)
val flushVec = Output(UInt(4.W))
val bpFlush = Output(Bool())
val ipf = Input(Bool())
})
// pc
val pc = RegInit(resetVector.U(AddrBits.W))
val pc = RegInit(resetVector.U(VAddrBits.W))
val pcUpdate = io.redirect.valid || io.imem.req.fire()
val snpc = Mux(pc(1), pc + 2.U, pc + 4.U) // sequential next pc
......@@ -45,6 +46,10 @@ class IFU extends NOOPModule with HasResetVector {
val pbrIdx = bp1.io.brIdx
val npc = Mux(io.redirect.valid, io.redirect.target, Mux(lateJumpLatch, lateJumpTarget, Mux(bp1.io.out.valid, pnpc, snpc)))
val npcIsSeq = Mux(io.redirect.valid , false.B, Mux(lateJumpLatch, false.B, Mux(lateJump, true.B, Mux(bp1.io.out.valid, false.B, true.B))))
// Debug(){
// printf("[NPC] %x %x %x %x %x %x\n",lateJumpLatch, lateJumpTarget, lateJump, bp1.io.out.valid, pnpc, snpc)
// }
// val npc = Mux(io.redirect.valid, io.redirect.target, Mux(io.redirectRVC.valid, io.redirectRVC.target, snpc))
val brIdx = Wire(UInt(4.W))
// brIdx(0) -> branch at pc offset 0 (mod 4)
......@@ -66,25 +71,29 @@ class IFU extends NOOPModule with HasResetVector {
// printf("[IF1] pc=%x\n", pc)
}
Debug(){
when(pcUpdate) {
printf("[IFUPC] pc:%x pcUpdate:%d npc:%x RedValid:%d RedTarget:%x LJL:%d LJTarget:%x LJ:%d snpc:%x bpValid:%d pnpn:%x \n",pc, pcUpdate, npc, io.redirect.valid,io.redirect.target,lateJumpLatch,lateJumpTarget,lateJump,snpc,bp1.io.out.valid,pnpc)
//printf(p"[IFUIN] redirect: ${io.redirect} \n")
}
}
io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U)
io.bpFlush := false.B
io.imem.req.bits.apply(addr = Cat(pc(AddrBits-1,1),0.U(1.W)), //cache will treat it as Cat(pc(63,3),0.U(3.W))
size = "b11".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(brIdx(3,0), npc, pc))
io.imem.req.bits.apply(addr = Cat(pc(VAddrBits-1,1),0.U(1.W)), //cache will treat it as Cat(pc(63,3),0.U(3.W))
size = "b11".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(brIdx(3,0), npc(VAddrBits-1, 0), pc(VAddrBits-1, 0)))
io.imem.req.valid := io.out.ready
//TODO: add ctrlFlow.exceptionVec
io.imem.resp.ready := io.out.ready || io.flushVec(0)
Debug(){
when(io.imem.req.fire()){
printf("[IFI] pc=%x user=%x %x %x %x\n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, pbrIdx, brIdx)
}
}
io.out.bits := DontCare
//inst path only uses 32bit inst, get the right inst according to pc(2)
Debug(){
when(io.imem.req.fire()){
printf("[IFI] pc=%x user=%x %x %x %x \n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, pbrIdx, brIdx)
}
when (io.out.fire()) {
printf("[IFO] pc=%x inst=%x\n", io.out.bits.pc, io.out.bits.instr)
}
......@@ -94,10 +103,11 @@ class IFU extends NOOPModule with HasResetVector {
// else io.imem.resp.bits.rdata)
io.out.bits.instr := io.imem.resp.bits.rdata
io.imem.resp.bits.user.map{ case x =>
io.out.bits.pc := x(AddrBits-1,0)
io.out.bits.pnpc := x(AddrBits*2-1,AddrBits)
io.out.bits.brIdx := x(AddrBits*2 + 3, AddrBits*2)
io.out.bits.pc := x(VAddrBits-1,0)
io.out.bits.pnpc := x(VAddrBits*2-1,VAddrBits)
io.out.bits.brIdx := x(VAddrBits*2 + 3, VAddrBits*2)
}
io.out.bits.exceptionVec(instrPageFault) := io.ipf
io.out.valid := io.imem.resp.valid && !io.flushVec(0)
BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall")
......
......@@ -64,8 +64,8 @@ class ISU(implicit val p: NOOPConfig) extends NOOPModule with HasRegFileParamete
val rf = new RegFile
io.out.bits.data.src1 := Mux1H(List(
(io.in.bits.ctrl.src1Type === SrcType.pc) -> io.in.bits.cf.pc,
src1ForwardNextCycle -> io.forward.wb.rfData,
(io.in.bits.ctrl.src1Type === SrcType.pc) -> SignExt(io.in.bits.cf.pc, AddrBits),
src1ForwardNextCycle -> io.forward .wb.rfData,
(src1Forward && !src1ForwardNextCycle) -> io.wb.rfData,
((io.in.bits.ctrl.src1Type =/= SrcType.pc) && !src1ForwardNextCycle && !src1Forward) -> rf.read(rfSrc1)
))
......
......@@ -15,8 +15,11 @@ trait HasNOOPParameter {
val HasDiv = true
val HasIcache = true
val HasDcache = true
val AddrBits = 32
val AddrBytes = AddrBits / 8
val EnableStoreQueue = false
val AddrBits = 64 // AddrBits is used in some cases
val VAddrBits = 39 // VAddrBits is Virtual Memory addr bits
val PAddrBits = 32 // PAddrBits is Phyical Memory addr bits
val AddrBytes = AddrBits / 8 // unused
val DataBits = XLEN
val DataBytes = DataBits / 8
}
......@@ -44,7 +47,6 @@ class NOOP(implicit val p: NOOPConfig) extends NOOPModule {
val imem = new SimpleBusC
val dmem = new SimpleBusC
val mmio = new SimpleBusUC
val prefetchReq = Decoupled(new SimpleBusReqBundle)
val frontend = Flipped(new SimpleBusUC)
})
......@@ -76,12 +78,20 @@ class NOOP(implicit val p: NOOPConfig) extends NOOPModule {
ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready,
idu1.io.in.valid, idu1.io.in.ready, idu2.io.in.valid, idu2.io.in.ready, isu.io.in.valid, isu.io.in.ready,
exu.io.in.valid, exu.io.in.ready, wbu.io.in.valid, wbu.io.in.ready)
when (ifu.io.out.valid) { printf("IFU: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr, ifu.io.out.bits.pnpc) }
when (ifu.io.out.valid) { printf("IFU: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr, ifu.io.out.bits.pnpc)} ;
when (idu1.io.in.valid) { printf("ID1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu1.io.in.bits.pc, idu1.io.in.bits.instr, idu1.io.in.bits.pnpc) }
when (idu2.io.in.valid) { printf("ID2: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu2.io.in.bits.pc, idu2.io.in.bits.instr, idu2.io.in.bits.pnpc) }
when (isu.io.in.valid) { printf("ISU: pc = 0x%x, pnpc = 0x%x\n", isu.io.in.bits.cf.pc, isu.io.in.bits.cf.pnpc) }
when (exu.io.in.valid) { printf("EXU: pc = 0x%x, pnpc = 0x%x\n", exu.io.in.bits.cf.pc, exu.io.in.bits.cf.pnpc) }
when (wbu.io.in.valid) { printf("WBU: pc = 0x%x\n", wbu.io.in.bits.decode.cf.pc) }
when (isu.io.in.valid) { printf("ISU: pc = 0x%x, pnpc = 0x%x\n", isu.io.in.bits.cf.pc, isu.io.in.bits.cf.pnpc)} ;
when (exu.io.in.valid) { printf("EXU: pc = 0x%x, pnpc = 0x%x\n", exu.io.in.bits.cf.pc, exu.io.in.bits.cf.pnpc)} ;
when (wbu.io.in.valid) { printf("WBU: pc = 0x%x rfWen:%d rfDest:%d rfData:%x Futype:%x\n", wbu.io.in.bits.decode.cf.pc, wbu.io.in.bits.decode.ctrl.rfWen, wbu.io.in.bits.decode.ctrl.rfDest, wbu.io.wb.rfData, wbu.io.in.bits.decode.ctrl.fuType )}
// when (io.in.valid) { printf("TIMER: %d WBU: pc = 0x%x wen %x wdata %x mmio %x intrNO %x\n", GTimer(), io.in.bits.decode.cf.pc, io.wb.rfWen, io.wb.rfData, io.in.bits.isMMIO, io.in.bits.intrNO) }
// printf(p"IFUO: redirectIO:${ifu.io.out.bits.redirect}\n") ; printf("IFUO: exceptionVec: %x\n", ifu.io.out.bits.exceptionVec.asUInt)}
// printf(p"IDUO: redirectIO:${idu.io.out.bits.cf.redirect} redirectIOC:${idu.io.redirect}\n") ; printf("IDUO: exceptionVec:%x\n", idu.io.out.bits.cf.exceptionVec.asUInt)}
// printf(p"ISUO: ${isu.io.out.bits.cf.redirect}\n") ; printf("ISUO: exceptionVec:%x\n", isu.io.out.bits.cf.exceptionVec.asUInt)}
when (exu.io.out.bits.decode.cf.redirect.valid) { printf("EXUO: redirect valid:%d target:%x\n", exu.io.out.bits.decode.cf.redirect.valid, exu.io.out.bits.decode.cf.redirect.target) }
// when (wbu.io.in.valid) { printf("WBU: pc = 0x%x rfWen:%d rfDest:%d rfData:%x Futype:%x commits(0):%x commits(1):%x commits(3):%x\n", wbu.io.in.bits.decode.cf.pc, wbu.io.in.bits.decode.ctrl.rfWen, wbu.io.in.bits.decode.ctrl.rfDest, wbu.io.wb.rfData, wbu.io.in.bits.decode.ctrl.fuType, wbu.io.in.bits.commits(0), wbu.io.in.bits.commits(1), wbu.io.in.bits.commits(3)) }
}
isu.io.wb <> wbu.io.wb
......@@ -89,16 +99,20 @@ class NOOP(implicit val p: NOOPConfig) extends NOOPModule {
// forward
isu.io.forward <> exu.io.forward
val mmioXbar = Module(new SimpleBusCrossbarNto1(if (HasDcache) 2 else 3))
val dmemXbar = Module(new SimpleBusCrossbarNto1(4))
val itlb = TLB(in = ifu.io.imem, mem = dmemXbar.io.in(1), flush = ifu.io.flushVec(0) | ifu.io.bpFlush, csrMMU = exu.io.memMMU.imem)(TLBConfig(name = "itlb", userBits = VAddrBits*2 + 4, totalEntry = 4))
ifu.io.ipf := itlb.io.ipf
io.imem <> Cache(in = itlb.io.out, mmio = mmioXbar.io.in.take(1), flush = Fill(2, ifu.io.flushVec(0) | ifu.io.bpFlush), empty = itlb.io.cacheEmpty)(
CacheConfig(ro = true, name = "icache", userBits = VAddrBits*2 + 4))
val dtlb = TLB(in = exu.io.dmem, mem = dmemXbar.io.in(2), flush = false.B, csrMMU = exu.io.memMMU.dmem)(TLBConfig(name = "dtlb", totalEntry = 64))
dmemXbar.io.in(0) <> dtlb.io.out
io.dmem <> Cache(in = dmemXbar.io.out, mmio = mmioXbar.io.in.drop(1), flush = "b00".U, empty = dtlb.io.cacheEmpty, enable = HasDcache)(CacheConfig(ro = false, name = "dcache"))
// Make DMA access through L1 DCache to keep coherence
val dmemXbar = Module(new SimpleBusCrossbarNto1(2))
dmemXbar.io.in(0) <> exu.io.dmem
dmemXbar.io.in(1) <> io.frontend
dmemXbar.io.in(3) <> io.frontend
val mmioXbar = Module(new SimpleBusCrossbarNto1(if (HasDcache) 2 else 3))
io.imem <> Cache(ifu.io.imem, mmioXbar.io.in.take(1), Fill(2, ifu.io.flushVec(0) | ifu.io.bpFlush))(
CacheConfig(ro = true, name = "icache", userBits = AddrBits*2 + 4)) // userBits = AddrBits + BrIdxBits
io.dmem <> Cache(dmemXbar.io.out, mmioXbar.io.in.drop(1), "b00".U, enable = HasDcache)(CacheConfig(ro = false, name = "dcache"))
io.prefetchReq.bits := exu.io.dmem.req.bits
io.prefetchReq.valid := exu.io.dmem.req.valid
io.mmio <> mmioXbar.io.out
}
此差异已折叠。
......@@ -5,7 +5,7 @@ import chisel3.util._
import chisel3.util.experimental.BoringUtils
import utils._
class WBU(implicit val p: NOOPConfig) extends Module {
class WBU(implicit val p: NOOPConfig) extends NOOPModule{
val io = IO(new Bundle {
val in = Flipped(Decoupled(new CommitIO))
val wb = new WriteBackIO
......@@ -20,12 +20,15 @@ class WBU(implicit val p: NOOPConfig) extends Module {
io.redirect := io.in.bits.decode.cf.redirect
io.redirect.valid := io.in.bits.decode.cf.redirect.valid && io.in.valid
// when (io.in.valid) { printf("TIMER: %d WBU: pc = 0x%x wen %x wdata %x mmio %x intrNO %x\n", GTimer(), io.in.bits.decode.cf.pc, io.wb.rfWen, io.wb.rfData, io.in.bits.isMMIO, io.in.bits.intrNO) }
Debug(){
when (io.in.valid) { printf("[COMMIT] TIMER: %d WBU: pc = 0x%x inst %x wen %x wdata %x mmio %x intrNO %x\n", GTimer(), io.in.bits.decode.cf.pc, io.in.bits.decode.cf.instr, io.wb.rfWen, io.wb.rfData, io.in.bits.isMMIO, io.in.bits.intrNO) }
}
BoringUtils.addSource(io.in.valid, "perfCntCondMinstret")
if (!p.FPGAPlatform) {
BoringUtils.addSource(RegNext(io.in.valid), "difftestCommit")
BoringUtils.addSource(RegNext(io.in.bits.decode.cf.pc), "difftestThisPC")
BoringUtils.addSource(RegNext(SignExt(io.in.bits.decode.cf.pc, AddrBits)), "difftestThisPC")
BoringUtils.addSource(RegNext(io.in.bits.decode.cf.instr), "difftestThisINST")
BoringUtils.addSource(RegNext(io.in.bits.isMMIO), "difftestIsMMIO")
BoringUtils.addSource(RegNext(io.in.bits.decode.cf.instr(1,0)=/="b11".U), "difftestIsRVC")
BoringUtils.addSource(RegNext(io.in.bits.intrNO), "difftestIntrNO")
......
......@@ -99,7 +99,7 @@ class ALU extends NOOPModule {
val isBru = ALUOpType.isBru(func)
// val pcPlus2 = ALUOpType.pcPlus2(func)
val taken = LookupTree(ALUOpType.getBranchType(func), branchOpTable) ^ ALUOpType.isBranchInvert(func)
val target = Mux(isBranch, io.cfIn.pc + io.offset, adderRes)(AddrBits-1,0)
val target = Mux(isBranch, io.cfIn.pc + io.offset, adderRes)(VAddrBits-1,0)
val predictWrong = (io.redirect.target =/= io.cfIn.pnpc)
val isRVC = (io.cfIn.instr(1,0) =/= "b11".U)
io.redirect.target := Mux(!taken && isBranch, Mux(isRVC, io.cfIn.pc + 2.U, io.cfIn.pc + 4.U), target)
......@@ -107,21 +107,31 @@ class ALU extends NOOPModule {
io.redirect.valid := valid && isBru && predictWrong
// may be can be moved to ISU to calculate pc + 4
// this is actually for jal and jalr to write pc + 4/2 to rd
io.out.bits := Mux(isBru, Mux(!isRVC, io.cfIn.pc + 4.U, io.cfIn.pc + 2.U), aluRes)
io.out.bits := Mux(isBru, Mux(!isRVC, SignExt(io.cfIn.pc, AddrBits) + 4.U, SignExt(io.cfIn.pc, AddrBits) + 2.U), aluRes)
// when(pcPlus2 && isBru){
// printf("CJALR %x %x \n ", io.cfIn.instr, io.cfIn.pc)
// }
Debug(){
when(valid && isBru){
printf("[BRU] tgt %x, npc: %x, pdwrong: %x\n", io.redirect.target, io.cfIn.pnpc, predictWrong)
printf("[BRU] tgt %x, valid:%d, npc: %x, pdwrong: %x\n", io.redirect.target, io.redirect.valid, io.cfIn.pnpc, predictWrong)
printf("[BRU] taken:%d addrRes:%x src1:%x src2:%x func:%x\n", taken, adderRes, src1, src2, func)
}
}
Debug(){
Debug(false){
when(valid && isBru){
printf("[BPW] pc %x tgt %x, npc: %x, pdwrong: %x type: %x%x%x%x\n", io.cfIn.pc, io.redirect.target, io.cfIn.pnpc, predictWrong, isBranch, (func === ALUOpType.jal || func === ALUOpType.call), func === ALUOpType.jalr, func === ALUOpType.ret)
}
when(true.B) {
printf("[ALUIN0] valid:%d isBru:%d isBranch:%d \n", valid, isBru, isBranch)
printf("[ALUIN1] pc %x instr %x tgt %x, npc: %x, pdwrong: %x type: %x%x%x%x\n", io.cfIn.pc, io.cfIn.instr, io.redirect.target, io.cfIn.pnpc, predictWrong, isBranch, (func === ALUOpType.jal || func === ALUOpType.call), func === ALUOpType.jalr, func === ALUOpType.ret)
printf("[ALUIN2] func:%b ", func)
printf(" bpuUpdateReq: valid:%d pc:%x isMissPredict:%d actualTarget:%x actualTaken:%x fuOpType:%x btbType:%x isRVC:%d \n", valid && isBru, io.cfIn.pc, predictWrong, target, taken, func, LookupTree(func, RV32I_BRUInstr.bruFuncTobtbTypeTable), isRVC)
printf("[ALUIN3]tgt %x, npc: %x, pdwrong: %x\n", io.redirect.target, io.cfIn.pnpc, predictWrong)
printf("[ALUIN4]taken:%d addrRes:%x src1:%x src2:%x func:%x\n", taken, adderRes, src1, src2, func)
}
}
io.in.ready := true.B
......
此差异已折叠。
......@@ -7,29 +7,325 @@ import utils._
import bus.simplebus._
object LSUOpType {
def lb = "b0000".U
def lh = "b0001".U
def lw = "b0010".U
def ld = "b0011".U
def lbu = "b0100".U
def lhu = "b0101".U
def lwu = "b0110".U
def sb = "b1000".U
def sh = "b1001".U
def sw = "b1010".U
def sd = "b1011".U
def lb = "b000000".U
def lh = "b000001".U
def lw = "b000010".U
def ld = "b000011".U
def lbu = "b000100".U
def lhu = "b000101".U
def lwu = "b000110".U
def sb = "b001000".U
def sh = "b001001".U
def sw = "b001010".U
def sd = "b001011".U
def lr = "b100000".U
def sc = "b100001".U
def amoswap = "b100010".U
def amoadd = "b100011".U
def amoxor = "b100100".U
def amoand = "b100101".U
def amoor = "b100110".U
def amomin = "b110111".U
def amomax = "b110000".U
def amominu = "b110001".U
def amomaxu = "b110010".U
def isStore(func: UInt): Bool = func(3)
def isAtom(func: UInt): Bool = func(5)
def isLoad(func: UInt): Bool = !isStore(func) & !isAtom(func)
def isLR(func: UInt): Bool = func === lr
def isSC(func: UInt): Bool = func === sc
def isAMO(func: UInt): Bool = isAtom(func) && !isLR(func) && !isSC(func)
def atomW = "010".U
def atomD = "011".U
}
class LSUIO extends FunctionUnitIO {
val wdata = Input(UInt(XLEN.W))
val dmem = new SimpleBusUC
val instr = Input(UInt(32.W)) // Atom insts need aq rl funct3 bit from instr
val dmem = new SimpleBusUC(addrBits = VAddrBits)
val isMMIO = Output(Bool())
val dtlbPF = Output(Bool())
}
class StoreQueueEntry extends NOOPBundle{
val src1 = UInt(XLEN.W)
val src2 = UInt(XLEN.W)
val wdata = UInt(XLEN.W)
val func = UInt(6.W)
}
class AtomALU extends NOOPModule {
val io = IO(new NOOPBundle{
val src1 = Input(UInt(XLEN.W))
val src2 = Input(UInt(XLEN.W))
val func = Input(UInt(6.W))
val isWordOp = Input(Bool())
val result = Output(UInt(XLEN.W))
})
// src1: load result
// src2: reg result
val src1 = io.src1
val src2 = io.src2
val func = io.func
val isAdderSub = (func =/= LSUOpType.amoadd)
val adderRes = (src1 +& (src2 ^ Fill(XLEN, isAdderSub))) + isAdderSub
val xorRes = src1 ^ src2
val sltu = !adderRes(XLEN)
val slt = xorRes(XLEN-1) ^ sltu
val res = LookupTreeDefault(func(5, 0), adderRes, List(
LSUOpType.amoswap -> src2,
LSUOpType.amoadd -> adderRes,
LSUOpType.amoxor -> xorRes,
LSUOpType.amoand -> (src1 & src2),
LSUOpType.amoor -> (src1 | src2),
LSUOpType.amomin -> Mux(slt(0), src1, src2),
LSUOpType.amomax -> Mux(slt(0), src2, src1),
LSUOpType.amominu -> Mux(sltu(0), src1, src2),
LSUOpType.amomaxu -> Mux(sltu(0), src2, src1)
))
io.result := Mux(io.isWordOp, SignExt(res(31,0), 64), res)
}
class LSU extends NOOPModule {
val io = IO(new LSUIO)
val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func)
def access(valid: Bool, src1: UInt, src2: UInt, func: UInt, dtlbPF: Bool): UInt = {
this.valid := valid
this.src1 := src1
this.src2 := src2
this.func := func
dtlbPF := io.dtlbPF
io.out.bits
}
val lsExecUnit = Module(new LSExecUnit)
lsExecUnit.io.instr := DontCare
io.dtlbPF := lsExecUnit.io.dtlbPF
val storeReq = valid & LSUOpType.isStore(func)
val loadReq = valid & LSUOpType.isLoad(func)
val atomReq = valid & LSUOpType.isAtom(func)
val amoReq = valid & LSUOpType.isAMO(func)
val lrReq = valid & LSUOpType.isLR(func)
val scReq = valid & LSUOpType.isSC(func)
BoringUtils.addSource(amoReq, "ISAMO")
val aq = io.instr(26)
val rl = io.instr(25)
val funct3 = io.instr(14, 12)
val atomWidthW = !funct3(0)
val atomWidthD = funct3(0)
// Atom LR/SC Control Bits
val setLr = Wire(Bool())
val setLrVal = Wire(Bool())
val setLrAddr = Wire(UInt(AddrBits.W))
val lr = WireInit(Bool(), false.B)
val lrAddr = WireInit(UInt(AddrBits.W), DontCare)
BoringUtils.addSource(setLr, "set_lr")
BoringUtils.addSource(setLrVal, "set_lr_val")
BoringUtils.addSource(setLrAddr, "set_lr_addr")
BoringUtils.addSink(lr, "lr")
BoringUtils.addSink(lrAddr, "lr_addr")
val scInvalid = !(src1 === lrAddr) && scReq
// PF signal from TLB
val dtlbFinish = WireInit(false.B)
val dtlbPF = WireInit(false.B)
val dtlbEnable = WireInit(false.B)
BoringUtils.addSink(dtlbFinish, "DTLBFINISH")
BoringUtils.addSink(dtlbPF, "DTLBPF")
BoringUtils.addSink(dtlbEnable, "DTLBENABLE")
// LSU control FSM state
val s_idle :: s_load :: s_lr :: s_sc :: s_amo_l :: s_amo_a :: s_amo_s :: Nil = Enum(7)
// LSU control FSM
val state = RegInit(s_idle)
val atomMemReg = Reg(UInt(XLEN.W))
val atomRegReg = Reg(UInt(XLEN.W))
val atomALU = Module(new AtomALU)
atomALU.io.src1 := atomMemReg
atomALU.io.src2 := io.wdata
atomALU.io.func := func
atomALU.io.isWordOp := atomWidthW
// StoreQueue
// TODO: inst fence needs storeQueue to be finished
val enableStoreQueue = EnableStoreQueue // StoreQueue is disabled for page fault detection
val storeQueue = Module(new Queue(new StoreQueueEntry, 4))
storeQueue.io.enq.valid := state === s_idle && storeReq
storeQueue.io.enq.bits.src1 := src1
storeQueue.io.enq.bits.src2 := src2
storeQueue.io.enq.bits.wdata := io.wdata
storeQueue.io.enq.bits.func := func
storeQueue.io.deq.ready := lsExecUnit.io.out.fire()
lsExecUnit.io.in.valid := false.B
lsExecUnit.io.out.ready := DontCare
lsExecUnit.io.in.bits.src1 := DontCare
lsExecUnit.io.in.bits.src2 := DontCare
lsExecUnit.io.in.bits.func := DontCare
lsExecUnit.io.wdata := DontCare
io.out.valid := false.B
io.in.ready := false.B
switch (state) {
is(s_idle){
if(enableStoreQueue){
lsExecUnit.io.in.valid := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.valid, io.in.valid)
lsExecUnit.io.out.ready := io.out.ready
lsExecUnit.io.in.bits.src1 := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.bits.src1, src1)
lsExecUnit.io.in.bits.src2 := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.bits.src2, src2)
lsExecUnit.io.in.bits.func := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.bits.func, func)
lsExecUnit.io.wdata := Mux(storeQueue.io.deq.valid, storeQueue.io.deq.bits.wdata, io.wdata)
io.in.ready := Mux(storeReq, storeQueue.io.enq.ready, false.B) || scInvalid
io.out.valid := Mux(storeReq, storeQueue.io.enq.ready, false.B) || scInvalid
}else{
lsExecUnit.io.in.valid := io.in.valid && !atomReq
lsExecUnit.io.out.ready := io.out.ready
lsExecUnit.io.in.bits.src1 := src1
lsExecUnit.io.in.bits.src2 := src2
lsExecUnit.io.in.bits.func := func
lsExecUnit.io.wdata := io.wdata
io.in.ready := lsExecUnit.io.out.fire() || scInvalid
io.out.valid := lsExecUnit.io.out.valid || scInvalid
}
// when(storeReq){
// state := s_idle
// }
if(enableStoreQueue){
when(loadReq){state := Mux(storeQueue.io.deq.valid, s_idle, s_load)}
when(amoReq){state := Mux(storeQueue.io.deq.valid, s_idle, s_amo_l)}
when(lrReq){state := Mux(storeQueue.io.deq.valid, s_idle, s_lr)}
when(scReq){state := Mux(storeQueue.io.deq.valid, s_idle, s_sc)}
}else{
when(amoReq){state := s_amo_l}
when(lrReq){state := s_lr}
when(scReq){state := Mux(scInvalid, s_idle, s_sc)}
}
}
is(s_load){
lsExecUnit.io.in.valid := true.B
lsExecUnit.io.out.ready := io.out.ready
lsExecUnit.io.in.bits.src1 := src1
lsExecUnit.io.in.bits.src2 := src2
lsExecUnit.io.in.bits.func := func
lsExecUnit.io.wdata := DontCare
io.in.ready := lsExecUnit.io.out.fire()
io.out.valid := lsExecUnit.io.out.valid
when(lsExecUnit.io.out.fire()){state := s_idle}//load finished
}
is(s_amo_l){
lsExecUnit.io.in.valid := true.B
lsExecUnit.io.out.ready := true.B
lsExecUnit.io.in.bits.src1 := src1
lsExecUnit.io.in.bits.src2 := 0.U
lsExecUnit.io.in.bits.func := Mux(atomWidthD, LSUOpType.ld, LSUOpType.lw)
lsExecUnit.io.wdata := DontCare
io.in.ready := false.B
io.out.valid := false.B
when(lsExecUnit.io.out.fire()){
state := s_amo_a;
Debug(){printf("[AMO-L] lsExecUnit.io.out.bits %x addr %x src2 %x\n", lsExecUnit.io.out.bits, lsExecUnit.io.in.bits.src1, io.wdata)}
}
atomMemReg := lsExecUnit.io.out.bits
atomRegReg := lsExecUnit.io.out.bits
}
is(s_amo_a){
lsExecUnit.io.in.valid := false.B
lsExecUnit.io.out.ready := false.B
lsExecUnit.io.in.bits.src1 := DontCare
lsExecUnit.io.in.bits.src2 := DontCare
lsExecUnit.io.in.bits.func := DontCare
lsExecUnit.io.wdata := DontCare
io.in.ready := false.B
io.out.valid := false.B
state := s_amo_s
atomMemReg := atomALU.io.result
Debug(){printf("[AMO-A] src1 %x src2 %x res %x\n", atomMemReg, io.wdata, atomALU.io.result)}
}
is(s_amo_s){
lsExecUnit.io.in.valid := true.B
lsExecUnit.io.out.ready := io.out.ready
lsExecUnit.io.in.bits.src1 := src1
lsExecUnit.io.in.bits.src2 := 0.U
lsExecUnit.io.in.bits.func := Mux(atomWidthD, LSUOpType.sd, LSUOpType.sw)
lsExecUnit.io.wdata := atomMemReg
io.in.ready := lsExecUnit.io.out.fire()
io.out.valid := lsExecUnit.io.out.fire()
when(lsExecUnit.io.out.fire()){
state := s_idle;
Debug(){printf("[AMO-S] atomRegReg %x addr %x\n", atomRegReg, lsExecUnit.io.in.bits.src1)}
}
}
is(s_lr){
lsExecUnit.io.in.valid := true.B
lsExecUnit.io.out.ready := io.out.ready
lsExecUnit.io.in.bits.src1 := src1
lsExecUnit.io.in.bits.src2 := 0.U
lsExecUnit.io.in.bits.func := Mux(atomWidthD, LSUOpType.ld, LSUOpType.lw)
lsExecUnit.io.wdata := DontCare
io.in.ready := lsExecUnit.io.out.fire()
io.out.valid := lsExecUnit.io.out.fire()
when(lsExecUnit.io.out.fire()){
state := s_idle;
Debug(){printf("[LR]\n")}
}
}
is(s_sc){
lsExecUnit.io.in.valid := true.B
lsExecUnit.io.out.ready := io.out.ready
lsExecUnit.io.in.bits.src1 := src1
lsExecUnit.io.in.bits.src2 := 0.U
lsExecUnit.io.in.bits.func := Mux(atomWidthD, LSUOpType.sd, LSUOpType.sw)
lsExecUnit.io.wdata := io.wdata
io.in.ready := lsExecUnit.io.out.fire()
io.out.valid := lsExecUnit.io.out.fire()
when(lsExecUnit.io.out.fire()){
state := s_idle;
Debug(){printf("[SC] \n")}
}
}
}
when(dtlbPF){
state := s_idle
io.out.valid := true.B
io.in.ready := true.B
}
// controled by FSM
// io.in.ready := lsExecUnit.io.in.ready
// lsExecUnit.io.wdata := io.wdata
// io.out.valid := lsExecUnit.io.out.valid
//Set LR/SC bits
setLr := io.out.fire() && (lrReq || scReq)
setLrVal := lrReq
setLrAddr := src1
io.dmem <> lsExecUnit.io.dmem
io.out.bits := Mux(scReq, scInvalid, Mux(state === s_amo_s, atomRegReg, lsExecUnit.io.out.bits))
val addr = Mux(atomReq, src1, src1 + src2)
io.isMMIO := AddressSpace.isMMIO(addr) && io.out.valid
// io.isMMIO := lsExecUnit.io.isMMIO
}
class LSExecUnit extends NOOPModule {
val io = IO(new LSUIO)
val (valid, src1, src2, func) = (io.in.valid, io.in.bits.src1, io.in.bits.src2, io.in.bits.func)
def access(valid: Bool, src1: UInt, src2: UInt, func: UInt): UInt = {
......@@ -63,23 +359,50 @@ class LSU extends NOOPModule {
val isStore = valid && LSUOpType.isStore(func)
val partialLoad = !isStore && (func =/= LSUOpType.ld)
val s_idle :: s_wait_resp :: s_partialLoad :: Nil = Enum(3)
val s_idle :: s_wait_tlb :: s_wait_resp :: s_partialLoad :: Nil = Enum(4)
val state = RegInit(s_idle)
val dtlbFinish = WireInit(false.B)
val dtlbPF = WireInit(false.B)
val dtlbEnable = WireInit(false.B)
BoringUtils.addSink(dtlbFinish, "DTLBFINISH")
BoringUtils.addSink(dtlbPF, "DTLBPF")
BoringUtils.addSink(dtlbEnable, "DTLBENABLE")
io.dtlbPF := dtlbPF
switch (state) {
is (s_idle) { when (dmem.req.fire()) { state := s_wait_resp } }
is (s_idle) {
when (dmem.req.fire() && dtlbEnable) { state := s_wait_tlb }
when (dmem.req.fire() && !dtlbEnable) { state := s_wait_resp }
//when (dmem.req.fire()) { state := Mux(isStore, s_partialLoad, s_wait_resp) }
}
is (s_wait_tlb) {
when (dtlbFinish && dtlbPF ) { state := s_idle }
when (dtlbFinish && !dtlbPF) { state := s_wait_resp/*Mux(isStore, s_partialLoad, s_wait_resp) */}
}
is (s_wait_resp) { when (dmem.resp.fire()) { state := Mux(partialLoad, s_partialLoad, s_idle) } }
is (s_partialLoad) { state := s_idle }
}
Debug(){
//when (dmem.req.fire()){
printf("[LSU] IN(%d, %d) OUT(%d, %d) addr %x, size %x, wdata_raw %x, isStore %x \n", io.in.valid, io.in.ready, io.out.valid, io.out.ready, addr, func(1,0), io.wdata, isStore)
printf("[LSU] dtlbFinish:%d dtlbEnable:%d dtlbPF:%d state:%d addr:%x dmemReqFire:%d dmemRespFire:%d dmemRdata:%x \n",dtlbFinish, dtlbEnable, dtlbPF, state, dmem.req.bits.addr, dmem.req.fire(), dmem.resp.fire(), dmem.resp.bits.rdata)
//}
//when (dtlbFinish && dtlbEnable) {
printf("[LSU] dtlbFinish:%d dtlbEnable:%d dtlbPF:%d state:%d addr:%x dmemReqFire:%d dmemRespFire:%d dmemRdata:%x \n",dtlbFinish, dtlbEnable, dtlbPF, state, dmem.req.bits.addr, dmem.req.fire(), dmem.resp.fire(), dmem.resp.bits.rdata)
//}
}
val size = func(1,0)
dmem.req.bits.apply(addr = addr, size = size, wdata = genWdata(io.wdata, size),
dmem.req.bits.apply(addr = addr(VAddrBits-1, 0), size = size, wdata = genWdata(io.wdata, size),
wmask = genWmask(addr, size), cmd = Mux(isStore, SimpleBusCmd.write, SimpleBusCmd.read))
dmem.req.valid := valid && (state === s_idle)
dmem.resp.ready := true.B
io.out.valid := Mux(partialLoad, state === s_partialLoad, dmem.resp.fire() && (state === s_wait_resp))
io.in.ready := (state === s_idle)
io.out.valid := Mux( dtlbPF, true.B, Mux(partialLoad, state === s_partialLoad, dmem.resp.fire() && (state === s_wait_resp)))
io.in.ready := (state === s_idle) || dtlbPF
val rdata = dmem.resp.bits.rdata
val rdataLatch = RegNext(rdata)
......@@ -103,10 +426,16 @@ class LSU extends NOOPModule {
))
io.out.bits := Mux(partialLoad, rdataPartialLoad, rdata)
io.isMMIO := AddressSpace.isMMIO(addr) && io.out.valid
BoringUtils.addSource(dmem.isRead() && dmem.req.fire(), "perfCntCondMloadInstr")
BoringUtils.addSource(BoolStopWatch(dmem.isRead(), dmem.resp.fire()), "perfCntCondMloadStall")
BoringUtils.addSource(BoolStopWatch(dmem.isWrite(), dmem.resp.fire()), "perfCntCondMstoreStall")
BoringUtils.addSource(io.isMMIO, "perfCntCondMmmioInstr")
Debug() {
when (dmem.req.fire() && (addr === "h80104708".U || genWdata(io.wdata, size)(31,0) === "h80000218".U)){
//printf("[LSUBP] time %d, addr %x, size %x, wdata_raw %x, wdata %x, isStore %x \n", GTimer(), addr, func(1,0), io.wdata, genWdata(io.wdata, size), isStore)
}
}
}
......@@ -8,8 +8,9 @@ import utils._
// memory order unit
object MOUOpType {
def fence = "b0".U
def fencei = "b1".U
def fence = "b00".U
def fencei = "b01".U
def sfence_vma = "b10".U
}
class MOUIO extends FunctionUnitIO {
......@@ -39,6 +40,14 @@ class MOU extends NOOPModule {
}
}
val flushTLB = valid && (func === MOUOpType.sfence_vma)
BoringUtils.addSource(flushTLB, "MOUFlushTLB")
Debug(false) {
when (flushTLB) {
printf("[MOU] Flush TLB at %x\n", io.cfIn.pc)
}
}
io.out.bits := 0.U
io.in.ready := true.B
io.out.valid := valid
......
......@@ -7,10 +7,18 @@ object Priviledged extends HasInstrType {
def ECALL = BitPat("b000000000000_00000_000_00000_1110011")
def MRET = BitPat("b001100000010_00000_000_00000_1110011")
def SRET = BitPat("b000100000010_00000_000_00000_1110011")
def SFANCE_VMA = BitPat("b0001001_?????_?????_000_00000_1110011")
def FENCE = BitPat("b????????????_?????_000_?????_0001111")
def WFI = BitPat("b0001000_00101_00000_000_00000_1110011")
val table = Array(
ECALL -> List(InstrI, FuType.csr, CSROpType.jmp),
MRET -> List(InstrI, FuType.csr, CSROpType.jmp),
SRET -> List(InstrI, FuType.csr, CSROpType.jmp)
SRET -> List(InstrI, FuType.csr, CSROpType.jmp),
SFANCE_VMA -> List(InstrR, FuType.mou, MOUOpType.sfence_vma),
FENCE -> List(InstrS, FuType.alu, ALUOpType.add), // nop InstrS -> !wen
WFI -> List(InstrI, FuType.alu, ALUOpType.add) // nop
// FENCE -> List(InstrB, FuType.mou, MOUOpType.fencei)
)
}
package noop
import chisel3._
import chisel3.util._
object RVAInstr extends HasInstrType {
// Note: use instr(14,12) to distinguish D/W inst
// def LR = BitPat("b00010??00000_?????_???_?????_0101111")
// def SC = BitPat("b00011??00000_?????_???_?????_0101111")
def LR_D = BitPat("b00010_??_00000_?????_011_?????_0101111")
def SC_D = BitPat("b00011_??_?????_?????_011_?????_0101111")
def LR_W = BitPat("b00010_??_00000_?????_010_?????_0101111")
def SC_W = BitPat("b00011_??_?????_?????_010_?????_0101111")
def AMOSWAP = BitPat("b00001_??_?????_?????_01?_?????_0101111")
def AMOADD = BitPat("b00000_??_?????_?????_01?_?????_0101111")
def AMOXOR = BitPat("b00100_??_?????_?????_01?_?????_0101111")
def AMOAND = BitPat("b01100_??_?????_?????_01?_?????_0101111")
def AMOOR = BitPat("b01000_??_?????_?????_01?_?????_0101111")
def AMOMIN = BitPat("b10000_??_?????_?????_01?_?????_0101111")
def AMOMAX = BitPat("b10100_??_?????_?????_01?_?????_0101111")
def AMOMINU = BitPat("b11000_??_?????_?????_01?_?????_0101111")
def AMOMAXU = BitPat("b11100_??_?????_?????_01?_?????_0101111")
// funct3 === 010 or 011
val table = Array(
// LR -> List(InstrI, FuType.lsu, LSUOpType.lr),
LR_D -> List(InstrI, FuType.lsu, LSUOpType.lr),
LR_W -> List(InstrI, FuType.lsu, LSUOpType.lr),
// SC -> List(InstrS, FuType.lsu, LSUOpType.sc),
SC_D -> List(InstrSA, FuType.lsu, LSUOpType.sc),
SC_W -> List(InstrSA, FuType.lsu, LSUOpType.sc),
AMOSWAP -> List(InstrR, FuType.lsu, LSUOpType.amoswap),
AMOADD -> List(InstrR, FuType.lsu, LSUOpType.amoadd),
AMOXOR -> List(InstrR, FuType.lsu, LSUOpType.amoxor),
AMOAND -> List(InstrR, FuType.lsu, LSUOpType.amoand),
AMOOR -> List(InstrR, FuType.lsu, LSUOpType.amoor),
AMOMIN -> List(InstrR, FuType.lsu, LSUOpType.amomin),
AMOMAX -> List(InstrR, FuType.lsu, LSUOpType.amomax),
AMOMINU -> List(InstrR, FuType.lsu, LSUOpType.amominu),
AMOMAXU -> List(InstrR, FuType.lsu, LSUOpType.amomaxu)
)
}
......@@ -55,7 +55,7 @@ object RVCInstr extends HasInstrType with HasRVCConst {
// RVC 00
// def C_XX = BitPat("b????????????????_???_?_10_987_65_432_10")
def C_ILLEGAL = BitPat("b????????????????_000_0_00_000_00_000_00")
def C_ILLEGAL = BitPat("b0000000000000000_000_0_00_000_00_000_00")
def C_ADDI4SPN = BitPat("b????????????????_000_?_??_???_??_???_00")
def C_FLD = BitPat("b????????????????_001_?_??_???_??_???_00")
// def C_LQ = BitPat("b????????????????_001_?_??_???_??_???_00")
......@@ -119,7 +119,7 @@ object RVCInstr extends HasInstrType with HasRVCConst {
// def is_C_ADDI4SPN(op: UInt) = op(12,5) =/= 0.U
val table = Array(
// C_ILLEGAL -> List(InstrI, FuType.alu, ALUOpType.add),
C_ILLEGAL -> List(InstrN, FuType.csr, CSROpType.jmp),
C_ADDI4SPN -> List(InstrI, FuType.alu, ALUOpType.add),
// C_FLD -> List(InstrI, FuType.alu, ALUOpType.add),
C_LW -> List(InstrI, FuType.lsu, LSUOpType.lw),
......
......@@ -3,6 +3,7 @@ package system
import noop.{NOOP, NOOPConfig, HasNOOPParameter, Cache, CacheConfig}
import bus.axi4.{AXI4, AXI4Lite}
import bus.simplebus._
import utils._
import chisel3._
import chisel3.util._
......@@ -17,19 +18,30 @@ class Prefetcher extends Module with HasPrefetcherParameter {
val in = Flipped(Decoupled(new SimpleBusReqBundle))
val out = Decoupled(new SimpleBusReqBundle)
})
/*
io.in.ready := !io.in.valid || io.out.fire()
val lastReq = RegEnable(io.in.bits, io.in.fire())
val lastAddr = lastReq.addr
io.out.bits := lastReq
io.out.bits.cmd := SimpleBusCmd.prefetch
io.out.bits.addr := lastAddr + Cat(Cat(0.U((TagBits + IndexBits - 1).W), 1.U(1.W)), 0.U(OffsetBits.W))
io.out.valid := io.in.valid
*/
io.out.bits := io.in.bits
io.out.bits.cmd := SimpleBusCmd.prefetch
// io.out.bits.addr := io.in.bits.addr + Cat(Cat(0.U((TagBits + IndexBits - 1).W), 1.U(1.W)), 0.U(OffsetBits.W))
io.out.bits.addr := io.in.bits.addr + 64.U(32.W)
io.out.valid := io.in.valid
io.in.ready := !io.in.valid || io.out.fire()
val getNewReq = RegInit(false.B)
val prefetchReq = RegNext(io.in.bits)
prefetchReq.cmd := SimpleBusCmd.prefetch
prefetchReq.addr := io.in.bits.addr + XLEN.U
val lastReqAddr = (RegEnable(io.in.bits.addr, io.in.fire()))
val thisReqAddr = io.in.bits.addr
val lineMask = Cat(Fill(AddrBits - 6, 1.U(1.W)), 0.U(6.W))
val neqAddr = (thisReqAddr & lineMask) =/= (lastReqAddr & lineMask)
when (!getNewReq) {
io.out.bits <> io.in.bits
io.out.valid := io.in.valid
io.in.ready := !io.in.valid || io.out.fire()
getNewReq := io.in.fire() && io.in.bits.isBurst() && neqAddr
}.otherwise {
io.out.bits <> prefetchReq
io.out.valid := true.B
io.in.ready := false.B
getNewReq := !io.out.fire()
}
Debug() {
printf("%d: [Prefetcher]: in(%d,%d), out(%d,%d), in.bits.addr = %x\n",
GTimer(), io.in.valid, io.in.ready, io.out.valid, io.out.ready, io.in.bits.addr)
}
}
......@@ -11,7 +11,7 @@ import chisel3.util.experimental.BoringUtils
trait HasSoCParameter {
val EnableILA = false
val HasL2cache = true
val HasPrefetch = false
val HasPrefetch = true
}
class ILABundle extends Bundle {
......@@ -49,16 +49,14 @@ class NOOPSoC(implicit val p: NOOPConfig) extends Module with HasSoCParameter {
val l2cacheOut = Wire(new SimpleBusC)
val l2cacheIn = if (HasPrefetch) {
val prefetcher = Module(new Prefetcher)
prefetcher.io.in <> noop.io.prefetchReq
val l2cacheIn = Wire(new SimpleBusUC)
val l2cacheInReqArb = Module(new Arbiter(chiselTypeOf(noop.io.prefetchReq.bits), 2))
l2cacheInReqArb.io.in(0) <> xbar.io.out.req
l2cacheInReqArb.io.in(1) <> prefetcher.io.out
l2cacheIn.req <> l2cacheInReqArb.io.out
prefetcher.io.in <> xbar.io.out.req
l2cacheIn.req <> prefetcher.io.out
xbar.io.out.resp <> l2cacheIn.resp
l2cacheIn
} else xbar.io.out
l2cacheOut <> Cache(in = l2cacheIn, mmio = 0.U.asTypeOf(new SimpleBusUC) :: Nil, flush = "b00".U, enable = true)(
val l2Empty = Wire(Bool())
l2cacheOut <> Cache(in = l2cacheIn, mmio = 0.U.asTypeOf(new SimpleBusUC) :: Nil, flush = "b00".U, empty = l2Empty, enable = true)(
CacheConfig(name = "l2cache", totalSize = 128, cacheLevel = 2))
io.mem <> l2cacheOut.mem.toAXI4()
l2cacheOut.coh.resp.ready := true.B
......@@ -67,11 +65,7 @@ class NOOPSoC(implicit val p: NOOPConfig) extends Module with HasSoCParameter {
} else {
io.mem <> xbar.io.out.toAXI4()
}
if (!HasPrefetch) {
noop.io.prefetchReq.ready := true.B
}
noop.io.imem.coh.resp.ready := true.B
noop.io.imem.coh.req.valid := false.B
noop.io.imem.coh.req.bits := DontCare
......
......@@ -7,7 +7,7 @@ import noop.NOOPConfig
object Debug {
def apply(flag: Boolean = NOOPConfig().EnableDebug, cond: Bool = true.B)(body: => Unit): Any =
if (flag) { when (cond && GTimer() > 739200.U) { body } }
if (flag) { when (cond && GTimer() > 100.U) { body } }
}
object ShowType {
......
......@@ -17,3 +17,27 @@ object RegMap {
def generate(mapping: Map[Int, (UInt, UInt => UInt)], addr: UInt, rdata: UInt,
wen: Bool, wdata: UInt, wmask: UInt):Unit = generate(mapping, addr, rdata, addr, wen, wdata, wmask)
}
object MaskedRegMap { // TODO: add read mask
def Unwritable = null
def NoSideEffect: UInt => UInt = (x=>x)
def WritableMask = Fill(64, true.B)
def UnwritableMask = 0.U(64.W)
def apply(addr: Int, reg: UInt, wmask: UInt = WritableMask, wfn: UInt => UInt = (x => x), rmask: UInt = WritableMask) = (addr, (reg, wmask, wfn, rmask))
def generate(mapping: Map[Int, (UInt, UInt, UInt => UInt, UInt)], raddr: UInt, rdata: UInt,
waddr: UInt, wen: Bool, wdata: UInt):Unit = {
val chiselMapping = mapping.map { case (a, (r, wm, w, rm)) => (a.U, r, wm, w, rm) }
rdata := LookupTree(raddr, chiselMapping.map { case (a, r, wm, w, rm) => (a, r & rm) })
chiselMapping.map { case (a, r, wm, w, rm) =>
if (w != null && wm != UnwritableMask) when (wen && waddr === a) { r := w(MaskData(r, wdata, wm)) }
}
}
def isIllegalAddr(mapping: Map[Int, (UInt, UInt, UInt => UInt, UInt)], addr: UInt):Bool = {
val illegalAddr = Wire(Bool())
val chiselMapping = mapping.map { case (a, (r, wm, w, rm)) => (a.U, r, wm, w, rm) }
illegalAddr := LookupTreeDefault(addr, true.B, chiselMapping.map { case (a, r, wm, w, rm) => (a, false.B) })
illegalAddr
}
def generate(mapping: Map[Int, (UInt, UInt, UInt => UInt, UInt)], addr: UInt, rdata: UInt,
wen: Bool, wdata: UInt):Unit = generate(mapping, addr, rdata, addr, wen, wdata)
}
......@@ -8,6 +8,8 @@
# error Please define REF_SO to the path of NEMU shared object file
#endif
#define printCSR(x) printf(""#x": 0x%016lx ", x)
void (*ref_difftest_memcpy_from_dut)(paddr_t dest, void *src, size_t n) = NULL;
void (*ref_difftest_getregs)(void *c) = NULL;
void (*ref_difftest_setregs)(const void *c) = NULL;
......@@ -67,10 +69,28 @@ void init_difftest(uint64_t *reg) {
ref_difftest_setregs(reg);
}
int difftest_step(uint64_t *reg_scala, uint64_t this_pc, int isMMIO, int isRVC, uint64_t intrNO) {
int difftest_step(
uint64_t *reg_scala,
uint64_t this_pc,
int this_inst,
int isMMIO,
int isRVC,
uint64_t intrNO,
int priviledgeMode,
uint64_t mstatus,
uint64_t sstatus,
uint64_t mepc,
uint64_t sepc,
uint64_t mcause,
uint64_t scause
) {
#define DEBUG_RETIRE_TRACE_SIZE 16
uint64_t ref_r[33];
static uint64_t nemu_pc = 0x80000000;
static uint64_t pc_retire_queue[8] = {0};
static uint64_t pc_retire_queue[DEBUG_RETIRE_TRACE_SIZE] = {0};
static int inst_retire_queue[DEBUG_RETIRE_TRACE_SIZE] = {0};
static int pc_retire_pointer = 7;
if (isMMIO) {
......@@ -80,8 +100,9 @@ int difftest_step(uint64_t *reg_scala, uint64_t this_pc, int isMMIO, int isRVC,
nemu_pc += isRVC ? 2 : 4;
// to skip the checking of an instruction, just copy the reg state to reference design
ref_difftest_setregs(reg_scala);
pc_retire_pointer = (pc_retire_pointer+1) % 8;
pc_retire_queue[pc_retire_pointer] = nemu_pc;
pc_retire_pointer = (pc_retire_pointer+1) % DEBUG_RETIRE_TRACE_SIZE;
pc_retire_queue[pc_retire_pointer] = this_pc;
inst_retire_queue[pc_retire_pointer] = this_inst;
return 0;
}
......@@ -93,24 +114,37 @@ int difftest_step(uint64_t *reg_scala, uint64_t this_pc, int isMMIO, int isRVC,
ref_difftest_getregs(&ref_r);
pc_retire_pointer = (pc_retire_pointer+1) % 8;
pc_retire_queue[pc_retire_pointer] = nemu_pc;
pc_retire_pointer = (pc_retire_pointer+1) % DEBUG_RETIRE_TRACE_SIZE;
pc_retire_queue[pc_retire_pointer] = this_pc;
inst_retire_queue[pc_retire_pointer] = this_inst;
uint64_t temp = ref_r[32];
ref_r[32] = nemu_pc;
nemu_pc = temp;
if (memcmp(reg_scala, ref_r, sizeof(ref_r)) != 0) {
printf("=========Retire Trace=========\n");
printf("\n==============Retire Trace==============\n");
int j;
for(j = 0; j < 8; j++){
printf("retire pc [%x]: %lx %s\n", j, pc_retire_queue[j], (j==pc_retire_pointer)?"<--":"");
for(j = 0; j < DEBUG_RETIRE_TRACE_SIZE; j++){
printf("retire trace [%x]: pc %010lx inst %08x %s\n", j, pc_retire_queue[j], inst_retire_queue[j], (j==pc_retire_pointer)?"<--":"");
}
printf("\n============== Reg Diff ==============\n");
ref_isa_reg_display();
printf("\n============== Csr Diff ==============\n");
printCSR(priviledgeMode);
puts("");
printCSR(mstatus);
printCSR(mcause);
printCSR(mepc);
puts("");
printCSR(sstatus);
printCSR(scause);
printCSR(sepc);
puts("");
int i;
for (i = 0; i < 33; i ++) {
if (reg_scala[i] != ref_r[i]) {
printf("x%2d different at pc = 0x%08lx, right= 0x%016lx, wrong = 0x%016lx\n",
printf("x%2d different at pc = 0x%010lx, right= 0x%016lx, wrong = 0x%016lx\n",
i, this_pc, ref_r[i], reg_scala[i]);
}
}
......
......@@ -107,7 +107,7 @@ class Emulator {
uint32_t lasttime = 0;
uint64_t lastcommit = n;
int hascommit = 0;
const int stuck_limit = 200;
const int stuck_limit = 500;
#if VM_TRACE
Verilated::traceEverOn(true); // Verilator must compute traced signals
......@@ -136,9 +136,36 @@ class Emulator {
uint64_t reg[33];
read_emu_regs(reg);
extern int difftest_step(uint64_t *reg_scala, uint64_t this_pc, int isMMIO, int isRVC, uint64_t intrNO);
if (difftest_step(reg, dut_ptr->io_difftest_thisPC, dut_ptr->io_difftest_isMMIO,
dut_ptr->io_difftest_isRVC, dut_ptr->io_difftest_intrNO)) {
extern int difftest_step(
uint64_t *reg_scala,
uint64_t this_pc,
int this_inst,
int isMMIO,
int isRVC,
uint64_t intrNO,
int priviledgeMode,
uint64_t mstatus,
uint64_t sstatus,
uint64_t mepc,
uint64_t sepc,
uint64_t mcause,
uint64_t scause
);
if (difftest_step(
reg,
dut_ptr->io_difftest_thisPC,
dut_ptr->io_difftest_thisINST,
dut_ptr->io_difftest_isMMIO,
dut_ptr->io_difftest_isRVC,
dut_ptr->io_difftest_intrNO,
dut_ptr->io_difftest_priviledgeMode,
dut_ptr->io_difftest_mstatus,
dut_ptr->io_difftest_sstatus,
dut_ptr->io_difftest_mepc,
dut_ptr->io_difftest_sepc,
dut_ptr->io_difftest_mcause,
dut_ptr->io_difftest_scause
)) {
#if VM_TRACE
tfp->close();
#endif
......
......@@ -8,6 +8,67 @@ static long img_size = 0;
void* get_img_start() { return &ram[0]; }
long get_img_size() { return img_size; }
void addpageSv39() {
//three layers
//addr range: 0x0000000080000000 - 0x0000000088000000 for 128MB from 2GB - 2GB128MB
//the first layer: one entry for 1GB. (512GB in total by 512 entries). need the 2th entries
//the second layer: one entry for 2MB. (1GB in total by 512 entries). need the 0th-63rd entries
//the third layer: one entry for 4KB (2MB in total by 512 entries). need 64 with each one all
#define PAGESIZE (4 * 1024) // 4KB = 2^12B
#define ENTRYNUM (PAGESIZE / 8) //512 2^9
#define PTEVOLUME (PAGESIZE * ENTRYNUM) // 2MB
#define PTENUM (RAMSIZE / PTEVOLUME) // 128MB / 2MB = 64
#define PDDENUM 1
#define PDENUM 1
#define PDDEADDR (0x88000000 - (PAGESIZE * (PTENUM + 2))) //0x88000000 - 0x1000*66
#define PDEADDR (0x88000000 - (PAGESIZE * (PTENUM + 1))) //0x88000000 - 0x1000*65
#define PTEADDR(i) (0x88000000 - (PAGESIZE * PTENUM) + (PAGESIZE * i)) //0x88000000 - 0x100*64
#define PTEMMIONUM 128
#define PDEMMIONUM 1
uint64_t pdde[ENTRYNUM];
uint64_t pde[ENTRYNUM];
uint64_t pte[PTENUM][ENTRYNUM];
//special addr for mmio 0x40000000 - 0x4fffffff
uint64_t pdemmio[ENTRYNUM];
uint64_t ptemmio[PTEMMIONUM][ENTRYNUM];
pdde[1] = (((PDDEADDR-PAGESIZE*1) & 0xfffff000) >> 2) | 0x1;
for(int i = 0; i < PTEMMIONUM; i++) {
pdemmio[i] = (((PDDEADDR-PAGESIZE*(PTEMMIONUM+PDEMMIONUM-i)) & 0xfffff000) >> 2) | 0x1;
}
for(int outidx = 0; outidx < PTEMMIONUM; outidx++) {
for(int inidx = 0; inidx < ENTRYNUM; inidx++) {
ptemmio[outidx][inidx] = (((0x40000000 + outidx*PTEVOLUME + inidx*PAGESIZE) & 0xfffff000) >> 2) | 0xf;
}
}
//0x800000000 - 0x87ffffff
pdde[2] = ((PDEADDR & 0xfffff000) >> 2) | 0x1;
//pdde[2] = ((0x80000000&0xc0000000) >> 2) | 0xf;
for(int i = 0; i < PTENUM ;i++) {
pde[i] = ((PTEADDR(i)&0xfffff000)>>2) | 0x1;
//pde[i] = (((0x8000000+i*2*1024*1024)&0xffe00000)>>2) | 0xf;
}
for(int outidx = 0; outidx < PTENUM; outidx++ ) {
for(int inidx = 0; inidx < ENTRYNUM; inidx++ ) {
pte[outidx][inidx] = (((0x80000000 + outidx*PTEVOLUME + inidx*PAGESIZE) & 0xfffff000)>>2) | 0xf;
}
}
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM)),ptemmio, PAGESIZE*PTEMMIONUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM)), pdemmio, PAGESIZE*PDEMMIONUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM)), pdde, PAGESIZE*PDDENUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDENUM)), pde, PAGESIZE*PDENUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*PTENUM), pte, PAGESIZE*PTENUM);
}
void init_ram(const char *img, const char *mainargs) {
assert(img != NULL);
FILE *fp = fopen(img, "rb");
......@@ -26,6 +87,9 @@ void init_ram(const char *img, const char *mainargs) {
assert(ret == 1);
fclose(fp);
//new add
addpageSv39();
//new end
if (mainargs != NULL) {
strcpy((char *)ram + MAINARGS_START, mainargs);
}
......
......@@ -21,7 +21,7 @@ static int uart_dequeue(void) {
f = (f + 1) % QUEUE_SIZE;
} else {
// generate a random key every 1s for pal
k = "uiojkl"[rand()% 6];
k = -1;//"uiojkl"[rand()% 6];
}
return k;
}
......@@ -31,11 +31,11 @@ extern "C" void uart_getc(uint8_t *ch) {
static uint32_t lasttime = 0;
uint32_t now = uptime();
*ch = 0;
if (now - lasttime > 30000) {
lasttime = now;
*ch = -1;
// if (now - lasttime > 3000) {
// lasttime = now;
*ch = uart_dequeue();
}
// }
}
void uart_putc(char c) {
......@@ -47,7 +47,18 @@ static void preset_input() {
char init_cmd[128] = "2" // choose PAL
"jjjjjjjkkkkkk" // walk to enemy
;
char *buf = init_cmd;
char busybox_cmd[128] =
"ls\n"
"echo 123\n"
"cd /root/benchmark\n"
"ls\n"
"./stream\n"
"ls\n"
"cd /root/redis\n"
"ls\n"
"ifconfig -a\n"
"./redis-server\n";
char *buf = busybox_cmd;
int i;
for (i = 0; i < strlen(buf); i ++) {
uart_enqueue(buf[i]);
......
......@@ -20,6 +20,7 @@ extern "C" void put_pixel(uint32_t pixel) {
}
extern "C" void vmem_sync(void) {
return;
SDL_UpdateTexture(texture, NULL, vmem, SCREEN_W * sizeof(uint32_t));
SDL_RenderClear(renderer);
SDL_RenderCopy(renderer, texture, NULL, NULL);
......@@ -27,6 +28,7 @@ extern "C" void vmem_sync(void) {
}
void init_sdl() {
return;
SDL_Init(SDL_INIT_VIDEO);
SDL_CreateWindowAndRenderer(SCREEN_W, SCREEN_H, 0, &window, &renderer);
SDL_SetWindowTitle(window, "NOOP");
......
......@@ -14,9 +14,18 @@ class DiffTestIO extends Bundle {
val r = Output(Vec(32, UInt(64.W)))
val commit = Output(Bool())
val thisPC = Output(UInt(64.W))
val thisINST = Output(UInt(32.W))
val isMMIO = Output(Bool())
val isRVC = Output(Bool())
val intrNO = Output(UInt(64.W))
val priviledgeMode = Output(UInt(2.W))
val mstatus = Output(UInt(64.W))
val sstatus = Output(UInt(64.W))
val mepc = Output(UInt(64.W))
val sepc = Output(UInt(64.W))
val mcause = Output(UInt(64.W))
val scause = Output(UInt(64.W))
}
class NOOPSimTop extends Module {
......@@ -38,7 +47,7 @@ class NOOPSimTop extends Module {
mem.io.in <> memdelay.io.out
mmio.io.rw <> soc.io.mmio
soc.io.mtip := mmio.io.mtip
soc.io.mtip := false.B//mmio.io.mtip
// soc.io.meip := Counter(true.B, 9973)._2 // use prime here to not overlapped by mtip
soc.io.meip := false.B // use prime here to not overlapped by mtip
......@@ -46,10 +55,18 @@ class NOOPSimTop extends Module {
val difftest = WireInit(0.U.asTypeOf(new DiffTestIO))
BoringUtils.addSink(difftest.commit, "difftestCommit")
BoringUtils.addSink(difftest.thisPC, "difftestThisPC")
BoringUtils.addSink(difftest.thisINST, "difftestThisINST")
BoringUtils.addSink(difftest.isMMIO, "difftestIsMMIO")
BoringUtils.addSink(difftest.isRVC, "difftestIsRVC")
BoringUtils.addSink(difftest.intrNO, "difftestIntrNO")
BoringUtils.addSink(difftest.r, "difftestRegs")
BoringUtils.addSink(difftest.priviledgeMode, "difftestMode")
BoringUtils.addSink(difftest.mstatus, "difftestMstatus")
BoringUtils.addSink(difftest.sstatus, "difftestSstatus")
BoringUtils.addSink(difftest.mepc, "difftestMepc")
BoringUtils.addSink(difftest.sepc, "difftestSepc")
BoringUtils.addSink(difftest.mcause, "difftestMcause")
BoringUtils.addSink(difftest.scause, "difftestScause")
io.difftest := difftest
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册