提交 6486b069 编写于 作者: Z Zihao Yu

noop,IFU: add RAS, but it does not work well with small functions

* Some small functions will return quickly. `ret` is executed without
  the correct return address pushed into RAS.
上级 4b33ac8f
......@@ -29,12 +29,12 @@ class DataPathIO extends Bundle {
class PcInstrIO extends Bundle {
val instr = Output(UInt(32.W))
val pc = Output(UInt(32.W))
val isBranchTaken = Output(Bool())
val npc = Output(UInt(32.W))
}
class PcCtrlDataIO extends Bundle {
val pc = Output(UInt(32.W))
val isBranchTaken = Output(Bool())
val npc = Output(UInt(32.W))
val ctrl = new CtrlPathIO
val data = new DataPathIO
}
......
......@@ -41,7 +41,7 @@ class EXU extends Module with HasFuType {
val bruOut = bru.access(valid = fuValids(FuBru), src1 = src1, src2 = src2, func = fuOpType)
bru.io.pc := io.in.bits.pc
bru.io.offset := io.in.bits.data.imm
bru.io.predictTaken := io.in.bits.isBranchTaken
bru.io.npc := io.in.bits.npc
io.out.bits.br <> Mux(io.csrjmp.isTaken, io.csrjmp, bru.io.branch)
bru.io.out.ready := true.B
io.bpu1Update := bru.io
......
......@@ -47,8 +47,17 @@ class IDU extends Module with HasDecodeConst {
InstrJ -> Cat(Fill(12, instr(31)), instr(19, 12), instr(20), instr(30, 21), 0.U(1.W))
))
when (fuType === FuBru) {
when (io.out.bits.ctrl.rfDest === 1.U && fuOpType === BruJal) {
io.out.bits.ctrl.fuOpType := BruCall
}
when (io.out.bits.ctrl.rfSrc1 === 1.U && fuOpType === BruJalr) {
io.out.bits.ctrl.fuOpType := BruRet
}
}
io.out.bits.pc := io.in.bits.pc
io.out.bits.isBranchTaken := io.in.bits.isBranchTaken
io.out.bits.npc := io.in.bits.npc
io.out.bits.ctrl.isInvOpcode := (instrType === InstrN) && io.in.valid
io.out.bits.ctrl.isNoopTrap := (instr === NOOPTrap.TRAP) && io.in.valid
......
......@@ -55,9 +55,23 @@ class BPU1 extends Module with HasBRUOpType {
val jtbHit = jtbRead.tag === io.pc.asTypeOf(jtbAddr).tag
val jtbTarget = io.pc + Cat(Fill(12, jtbRead.offset(19)), jtbRead.offset)
// RAS
// store pc table
val NRrasPctb = 64
val rasPcAddr = new TableAddr(log2Up(NRjtb))
val rasPcTable = Mem(NRrasPctb, UInt(32.W))
val rasPcTableHit = rasPcTable.read(io.pc.asTypeOf(rasPcAddr).idx) === io.pc
val NRras = 16
val ras = Mem(NRras, UInt(32.W))
val sp = Counter(NRras)
val rasTarget = ras.read(sp.value)
// update
when (io.update.in.valid) {
when (io.update.in.bits.func === BruJal) {
when (io.update.in.bits.func === BruJal || io.update.in.bits.func === BruCall) {
val jtbWrite = Wire(jtbEntry)
jtbWrite.tag := io.update.pc.asTypeOf(jtbAddr).tag
jtbWrite.offset := io.update.offset(19, 0)
......@@ -70,11 +84,20 @@ class BPU1 extends Module with HasBRUOpType {
btbWrite.isTaken := btbWrite.offset(11) // static prediction
btb.write(io.update.pc.asTypeOf(btbAddr).idx, btbWrite)
}
when (io.update.in.bits.func === BruCall) {
ras.write(sp.value + 1.U, io.update.pc + 4.U)
sp.value := sp.value + 1.U
}
when (io.update.in.bits.func === BruRet) {
sp.value := sp.value - 1.U
rasPcTable.write(io.update.pc.asTypeOf(rasPcAddr).idx, io.update.pc)
}
}
io.out.target := Mux(jtbHit, jtbTarget, btbTarget)
io.out.isTaken := jtbHit || btbTaken
assert(!(jtbHit && btbHit), "should not both hit in BTB and JBT")
io.out.target := Mux(jtbHit, jtbTarget, Mux(rasPcTableHit, rasTarget, btbTarget))
io.out.isTaken := jtbHit || btbTaken || rasPcTableHit
assert(jtbHit + btbHit + rasPcTableHit <= 1.U, "should not both hit in BTB and JBT")
}
class BPU2 extends Module {
......@@ -104,7 +127,7 @@ class BPU2 extends Module {
class IFU extends Module with HasResetVector {
val io = IO(new Bundle {
val imem = new SimpleBus(userBits = 1)
val imem = new SimpleBus(userBits = 32)
val pc = Input(UInt(32.W))
val out = Decoupled(new PcInstrIO)
val br = Flipped(new BranchIO)
......@@ -125,9 +148,10 @@ class IFU extends Module with HasResetVector {
bp2.io.in.bits := io.out.bits
bp2.io.in.valid := io.imem.resp.fire()
pc := Mux(io.br.isTaken, io.br.target,
Mux(bp1.io.out.isTaken && io.imem.req.fire(), bp1.io.out.target,
Mux(io.imem.req.fire(), pc + 4.U, pc)))//)
val npc = Mux(io.br.isTaken, io.br.target, Mux(bp1.io.out.isTaken, bp1.io.out.target, pc + 4.U))
when (io.br.isTaken || io.imem.req.fire()) {
pc := npc
}
io.flushVec := Mux(io.br.isTaken, "b1111".U, 0.U)
io.bpFlush := false.B
......@@ -137,12 +161,12 @@ class IFU extends Module with HasResetVector {
io.imem.req.bits.addr := pc
io.imem.req.bits.size := "b10".U
io.imem.req.bits.wen := false.B
io.imem.req.bits.user.map(_ := bp1.io.out.isTaken)
io.imem.req.bits.user.map(_ := npc)
io.imem.resp.ready := io.out.ready || io.flushVec(0)
io.out.valid := io.imem.resp.valid && !io.flushVec(0)
io.out.bits.instr := io.imem.resp.bits.rdata
io.imem.resp.bits.user.map(io.out.bits.isBranchTaken := _)
io.imem.resp.bits.user.map(io.out.bits.npc := _)
io.out.bits.pc := io.pc
......
......@@ -84,7 +84,7 @@ class ISU extends Module with HasSrcType with HasFuType {
o.isNoopTrap := i.isNoopTrap
}
io.out.bits.pc := io.in.bits.pc
io.out.bits.isBranchTaken := io.in.bits.isBranchTaken
io.out.bits.npc := io.in.bits.npc
io.out.bits.ctrl.isSrc1Forward := src1ForwardNextCycle
io.out.bits.ctrl.isSrc2Forward := src2ForwardNextCycle
......
......@@ -42,7 +42,7 @@ class NOOP(hasPerfCnt: Boolean = false) extends Module with NOOPConfig with HasC
val icacheHit = WireInit(false.B)
io.imem <> (if (HasIcache) {
val icache = Module(new Cache(ro = true, name = "icache", userBits = 1))
val icache = Module(new Cache(ro = true, name = "icache", userBits = 32))
icacheHit := icache.io.hit
icache.io.in <> ifu.io.imem
icache.io.flush := Fill(2, ifu.io.flushVec(0) | ifu.io.bpFlush)
......
......@@ -17,6 +17,10 @@ trait HasBRUOpType {
def BruBltu = "b0110".U
def BruBgeu = "b0111".U
// for RAS
def BruCall = "b1100".U
def BruRet = "b1101".U
def isBranch(func: UInt) = !func(3)
}
......@@ -46,8 +50,8 @@ object BRUInstr extends HasDecodeConst {
class BRUIO extends FunctionUnitIO {
val pc = Input(UInt(32.W))
val npc = Input(UInt(32.W))
val offset = Input(UInt(32.W))
val predictTaken = Input(Bool())
val branch = new BranchIO
}
......@@ -72,13 +76,16 @@ class BRU extends Module with HasBRUOpType {
BruBge -> ((src1.asSInt >= src2.asSInt), io.offset(31)),
BruBltu -> ((src1 < src2), io.offset(31)),
BruBgeu -> ((src1 >= src2), io.offset(31)),
BruCall -> (true.B, true.B),
BruRet -> (true.B, false.B),
BruJal -> (true.B, true.B),
BruJalr -> (true.B, false.B)
)
val actual = LookupTree(func, false.B, table.map(x => (x._1, x._2._1)))
val predict = io.predictTaken
io.branch.target := Mux(func === BruJalr, src1 + io.offset, io.pc + Mux(actual, io.offset, 4.U))
io.branch.isTaken := valid && xorBool(actual, predict)
val taken = LookupTree(func, false.B, table.map(x => (x._1, x._2._1)))
io.branch.target := Mux(func === BruJalr || func === BruRet,
src1 + io.offset, io.pc + Mux(taken, io.offset, 4.U))
// with branch predictor, this is actually to fix the wrong prediction
io.branch.isTaken := valid && (io.branch.target =/= io.npc)
io.out.bits := io.pc + 4.U
io.in.ready := true.B
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册