提交 f01197f4 编写于 作者: Y Yinan Xu

Merge remote-tracking branch 'origin/master' into debian-gogogo

......@@ -49,3 +49,6 @@ jobs:
- name: Run microbench
run: |
make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run 2> /dev/null
- name: Run coremark
run: |
make -C $AM_HOME/apps/coremark ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run 2> /dev/null
......@@ -229,49 +229,67 @@ class MemBlock
assert(!(fenceFlush && atomicsFlush))
sbuffer.io.flush.valid := fenceFlush || atomicsFlush
// TODO: make 0/1 configurable
// AtomicsUnit
// AtomicsUnit will override other control signials,
// AtomicsUnit: AtomicsUnit will override other control signials,
// as atomics insts (LR/SC/AMO) will block the pipeline
val st0_atomics = reservationStations(2).io.deq.valid && reservationStations(2).io.deq.bits.uop.ctrl.fuType === FuType.mou
val st1_atomics = reservationStations(3).io.deq.valid && reservationStations(3).io.deq.bits.uop.ctrl.fuType === FuType.mou
// amo should always go through store issue queue 0
assert(!st1_atomics)
val s_normal :: s_atomics_0 :: s_atomics_1 :: Nil = Enum(3)
val state = RegInit(s_normal)
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready
val atomic_rs0 = exuParameters.LduCnt + 0
val atomic_rs1 = exuParameters.LduCnt + 1
val st0_atomics = reservationStations(atomic_rs0).io.deq.valid && reservationStations(atomic_rs0).io.deq.bits.uop.ctrl.fuType === FuType.mou
val st1_atomics = reservationStations(atomic_rs1).io.deq.valid && reservationStations(atomic_rs1).io.deq.bits.uop.ctrl.fuType === FuType.mou
// dispatch 0 takes priority
atomicsUnit.io.in.valid := st0_atomics
atomicsUnit.io.in.bits := reservationStations(2).io.deq.bits
when (st0_atomics) {
reservationStations(0).io.deq.ready := atomicsUnit.io.in.ready
reservationStations(atomic_rs0).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(0).io.stin.valid := false.B
}
when(atomicsUnit.io.dtlb.req.valid) {
dtlb.io.requestor(0) <> atomicsUnit.io.dtlb
// take load unit 0's tlb port
// make sure not to disturb loadUnit
assert(!loadUnits(0).io.dtlb.req.valid)
loadUnits(0).io.dtlb.resp.valid := false.B
state := s_atomics_0
assert(!st1_atomics)
}
when (st1_atomics) {
reservationStations(atomic_rs1).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(1).io.stin.valid := false.B
when(atomicsUnit.io.tlbFeedback.valid) {
assert(!storeUnits(0).io.tlbFeedback.valid)
atomicsUnit.io.tlbFeedback <> reservationStations(exuParameters.LduCnt + 0).io.feedback
state := s_atomics_1
assert(!st0_atomics)
}
when (atomicsUnit.io.out.valid) {
assert(state === s_atomics_0 || state === s_atomics_1)
state := s_normal
}
atomicsUnit.io.in.valid := st0_atomics || st1_atomics
atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq.bits, reservationStations(atomic_rs1).io.deq.bits)
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready
atomicsUnit.io.dcache <> io.dcache.atomics
atomicsUnit.io.flush_sbuffer.empty := sbuffer.io.flush.empty
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
// for atomicsUnit, it uses loadUnit(0)'s TLB port
when (state === s_atomics_0 || state === s_atomics_1) {
atomicsUnit.io.dtlb <> dtlb.io.requestor(0)
when(atomicsUnit.io.out.valid){
// take load unit 0's write back port
assert(!loadUnits(0).io.ldout.valid)
loadUnits(0).io.dtlb.resp.valid := false.B
loadUnits(0).io.ldout.ready := false.B
// make sure there's no in-flight uops in load unit
assert(!loadUnits(0).io.dtlb.req.valid)
assert(!loadUnits(0).io.ldout.valid)
}
when (state === s_atomics_0) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs0).io.feedback
assert(!storeUnits(0).io.tlbFeedback.valid)
}
when (state === s_atomics_1) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs1).io.feedback
assert(!storeUnits(1).io.tlbFeedback.valid)
}
lsq.io.exceptionAddr.lsIdx := io.lsqio.exceptionAddr.lsIdx
......
......@@ -273,65 +273,81 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val entry = Reg(Vec(TlbEntrySize, new TlbEntry))
val g = VecInit(entry.map(_.perm.g)).asUInt // TODO: need check if reverse is needed
val entryHitVec = widthMapSeq{i => VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/))) }
val hitVec = widthMapSeq{ i => (v.asBools zip entryHitVec(i)).map{ case (a,b) => a&b } }
val pfHitVec = widthMapSeq{ i => (pf.asBools zip entryHitVec(i)).map{ case (a,b) => a&b } }
val pfArray = widthMap{ i => ParallelOR(pfHitVec(i)).asBool && valid(i) && vmEnable }
val hit = widthMap{ i => ParallelOR(hitVec(i)).asBool && valid(i) && vmEnable && ~pfArray(i) }
val miss = widthMap{ i => !hit(i) && valid(i) && vmEnable && ~pfArray(i) }
val hitppn = widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.ppn)) }
val hitPerm = widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.perm)) }
val hitLevel= widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.level)) }
val multiHit = {
val hitSum = widthMap{ i => PopCount(hitVec(i)) }
val pfHitSum = widthMap{ i => PopCount(pfHitVec(i)) }
ParallelOR(widthMap{ i => !(hitSum(i)===0.U || hitSum(i)===1.U) || !(pfHitSum(i)===0.U || pfHitSum(i)===1.U)})
}
def TLBRead(i: Int) = {
val entryHitVec = VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/)))
val reqAddrReg = if (isDtlb) RegNext(reqAddr(i)) else reqAddr(i)
val cmdReg = if (isDtlb) RegNext(cmd(i)) else cmd(i)
val validReg = if (isDtlb) RegNext(valid(i)) else valid(i)
val entryHitVecReg = if (isDtlb) RegNext(entryHitVec) else entryHitVec
val hitVec = (v.asBools zip entryHitVecReg).map{ case (a,b) => a&b }
val pfHitVec = (pf.asBools zip entryHitVecReg).map{ case (a,b) => a&b }
val pfArray = ParallelOR(pfHitVec).asBool && validReg && vmEnable
val hit = ParallelOR(hitVec).asBool && validReg && vmEnable && ~pfArray
val miss = !hit && validReg && vmEnable && ~pfArray
val hitppn = ParallelMux(hitVec zip entry.map(_.ppn))
val hitPerm = ParallelMux(hitVec zip entry.map(_.perm))
val hitLevel= ParallelMux(hitVec zip entry.map(_.level))
val multiHit = {
val hitSum = PopCount(hitVec)
val pfHitSum = PopCount(pfHitVec)
!(hitSum===0.U || hitSum===1.U) || !(pfHitSum===0.U || pfHitSum===1.U)
}
// resp // TODO: A/D has not being concerned
for(i <- 0 until Width) {
val paddr = LookupTreeDefault(hitLevel(i), Cat(hitppn(i), reqAddr(i).off), List(
0.U -> Cat(hitppn(i)(ppnLen - 1, 2*vpnnLen), reqAddr(i).vpn(2*vpnnLen - 1, 0), reqAddr(i).off),
1.U -> Cat(hitppn(i)(ppnLen - 1, vpnnLen), reqAddr(i).vpn(vpnnLen - 1, 0), reqAddr(i).off),
2.U -> Cat(hitppn(i), reqAddr(i).off)
// resp // TODO: A/D has not being concerned
val paddr = LookupTreeDefault(hitLevel, Cat(hitppn, reqAddrReg.off), List(
0.U -> Cat(hitppn(ppnLen - 1, 2*vpnnLen), reqAddrReg.vpn(2*vpnnLen - 1, 0), reqAddrReg.off),
1.U -> Cat(hitppn(ppnLen - 1, vpnnLen), reqAddrReg.vpn(vpnnLen - 1, 0), reqAddrReg.off),
2.U -> Cat(hitppn, reqAddrReg.off)
))
val vaddr = SignExt(req(i).bits.vaddr, PAddrBits)
req(i).ready := resp(i).ready
resp(i).valid := valid(i)
resp(i).bits.paddr := Mux(vmEnable, paddr, SignExt(req(i).bits.vaddr, PAddrBits))
resp(i).bits.miss := miss(i)
resp(i).valid := validReg
resp(i).bits.paddr := Mux(vmEnable, paddr, if (isDtlb) RegNext(vaddr) else vaddr)
resp(i).bits.miss := miss
val perm = hitPerm(i) // NOTE: given the excp, the out module choose one to use?
val update = false.B && hit(i) && (!hitPerm(i).a || !hitPerm(i).d && TlbCmd.isWrite(cmd(i))) // update A/D through exception
val perm = hitPerm // NOTE: given the excp, the out module choose one to use?
val update = false.B && hit && (!hitPerm.a || !hitPerm.d && TlbCmd.isWrite(cmdReg)) // update A/D through exception
val modeCheck = !(mode === ModeU && !perm.u || mode === ModeS && perm.u && (!priv.sum || ifecth))
val ldPf = (pfArray(i) && TlbCmd.isRead(cmd(i)) && true.B /*!isAMO*/) || hit(i) && !(modeCheck && (perm.r || priv.mxr && perm.x)) && (TlbCmd.isRead(cmd(i)) && true.B/*!isAMO*/) // TODO: handle isAMO
val stPf = (pfArray(i) && TlbCmd.isWrite(cmd(i)) || false.B /*isAMO*/ ) || hit(i) && !(modeCheck && perm.w) && (TlbCmd.isWrite(cmd(i)) || false.B/*TODO isAMO. */)
val instrPf = (pfArray(i) && TlbCmd.isExec(cmd(i))) || hit(i) && !(modeCheck && perm.x) && TlbCmd.isExec(cmd(i))
val ldPf = (pfArray && TlbCmd.isRead(cmdReg) && true.B /*!isAMO*/) || hit && !(modeCheck && (perm.r || priv.mxr && perm.x)) && (TlbCmd.isRead(cmdReg) && true.B/*!isAMO*/) // TODO: handle isAMO
val stPf = (pfArray && TlbCmd.isWrite(cmdReg) || false.B /*isAMO*/ ) || hit && !(modeCheck && perm.w) && (TlbCmd.isWrite(cmdReg) || false.B/*TODO isAMO. */)
val instrPf = (pfArray && TlbCmd.isExec(cmdReg)) || hit && !(modeCheck && perm.x) && TlbCmd.isExec(cmdReg)
resp(i).bits.excp.pf.ld := ldPf || update
resp(i).bits.excp.pf.st := stPf || update
resp(i).bits.excp.pf.instr := instrPf || update
(hit, miss, pfHitVec, multiHit)
}
val readResult = (0 until Width).map(TLBRead(_))
val hitVec = readResult.map(res => res._1)
val missVec = readResult.map(res => res._2)
val pfHitVecVec = readResult.map(res => res._3)
val multiHitVec = readResult.map(res => res._4)
val hasMissReq = Cat(missVec).orR
// ptw
val state_idle :: state_wait :: Nil = Enum(2)
val state = RegInit(state_idle)
ptw <> DontCare // TODO: need check it
ptw.req.valid := ParallelOR(miss).asBool && state===state_idle && !sfence.valid
ptw.req.valid := hasMissReq && state===state_idle && !sfence.valid
ptw.resp.ready := state===state_wait
// val ptwReqSeq = Wire(Seq.fill(Width)(new comBundle()))
val ptwReqSeq = Seq.fill(Width)(Wire(new comBundle()))
for (i <- 0 until Width) {
ptwReqSeq(i).valid := valid(i) && miss(i)
ptwReqSeq(i).roqIdx := req(i).bits.roqIdx
ptwReqSeq(i).bits.vpn := reqAddr(i).vpn
ptwReqSeq(i).valid := ((if (isDtlb) RegNext(valid(i)) else valid(i)) && missVec(i))
ptwReqSeq(i).roqIdx := (if (isDtlb) RegNext(req(i).bits.roqIdx) else req(i).bits.roqIdx)
ptwReqSeq(i).bits.vpn := (if (isDtlb) RegNext(reqAddr(i).vpn) else reqAddr(i).vpn)
}
ptw.req.bits := Compare(ptwReqSeq).bits
switch (state) {
is (state_idle) {
when (ParallelOR(miss).asBool && ptw.req.fire()) {
when (hasMissReq && ptw.req.fire()) {
state := state_wait
}
assert(!ptw.resp.valid)
......@@ -345,7 +361,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
}
// reset pf when pf hit
val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVec(i)).asUInt, 0.U) })
val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVecVec(i)).asUInt, 0.U) })
val pfHitRefill = ParallelOR(pfHitReset.asBools)
// refill
......@@ -409,15 +425,15 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
ExcitingUtils.addSource(valid(1)/* && vmEnable*/, "perfCntDtlbReqCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/, "perfCntDtlbReqCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/, "perfCntDtlbReqCnt3", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && miss(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && miss(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && miss(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && miss(3), "perfCntDtlbMissCnt3", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && missVec(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && missVec(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && missVec(3), "perfCntDtlbMissCnt3", Perf)
}
if (!env.FPGAPlatform && !isDtlb) {
ExcitingUtils.addSource(valid(0)/* && vmEnable*/, "perfCntItlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && miss(0), "perfCntItlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntItlbMissCnt0", Perf)
}
// Log
......@@ -428,7 +444,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
XSDebug(sfence.valid, p"Sfence: ${sfence}\n")
XSDebug(ParallelOR(valid)|| ptw.resp.valid, p"CSR: ${csr}\n")
XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hit).asUInt)} miss:${Binary(VecInit(miss).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n")
XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n")
XSDebug(ptw.req.fire(), p"PTW req:${ptw.req.bits}\n")
XSDebug(ptw.resp.valid, p"PTW resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n")
......@@ -437,7 +453,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
// assert((hit(i)&pfArray(i))===false.B, "hit(%d):%d pfArray(%d):%d v:0x%x pf:0x%x", i.U, hit(i), i.U, pfArray(i), v, pf)
// }
// for(i <- 0 until Width) {
// XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVec:0x${Hexadecimal(VecInit(pfHitVec(i)).asUInt)}\n")
// XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVecVec:0x${Hexadecimal(VecInit(pfHitVecVec(i)).asUInt)}\n")
// }
// for(i <- 0 until TlbEntrySize) {
// XSDebug(multiHit, p"entry(${i.U}): v:${v(i)} ${entry(i)}\n")
......
......@@ -41,7 +41,7 @@ trait HasICacheParameters extends HasL1CacheParameters {
// icache Queue
val groupAlign = log2Up(cacheParams.blockBytes)
def groupPC(pc: UInt): UInt = Cat(pc(PAddrBits-1, groupAlign), 0.U(groupAlign.W))
//ECC encoding
def encRowBits = cacheParams.dataCode.width(rowBits)
def encTagBits = cacheParams.tagCode.width(tagBits)
......@@ -178,7 +178,7 @@ class ICacheMetaArray extends ICachArray
val metaArray = Module(new SRAMTemplate(UInt(encTagBits.W), set=nSets, way=nWays, shouldReset = true))
//read
//read
metaArray.io.r.req.valid := io.read.valid
io.read.ready := metaArray.io.r.req.ready
io.write.ready := DontCare
......@@ -206,7 +206,7 @@ class ICacheDataArray extends ICachArray
val dataArray = List.fill(blockWords){ Module(new SRAMTemplate(UInt(encRowBits.W), set=nSets, way = nWays))}
//read
//read
//do ECC decoding after way choose
for(b <- 0 until blockWords){
dataArray(b).io.r.req.valid := io.read.valid
......@@ -225,8 +225,8 @@ class ICacheDataArray extends ICachArray
for(b <- 0 until blockWords){
dataArray(b).io.w.req.valid := io.write.valid
dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx,
data=write_data_encoded(b),
dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx,
data=write_data_encoded(b),
waymask=write.waymask)
}
......@@ -273,7 +273,7 @@ class ICache extends ICacheModule
val metaArray = Module(new ICacheMetaArray)
val dataArray = Module(new ICacheDataArray)
// 256-bit valid
val validArray = RegInit(0.U((nSets * nWays).W))
val validArray = RegInit(0.U((nSets * nWays).W))
//----------------------------
// Stage 1
......@@ -283,7 +283,7 @@ class ICache extends ICacheModule
s1_req_mask := io.req.bits.mask
s2_ready := WireInit(false.B)
s1_fire := s1_valid && (s2_ready || io.flush(0))
// SRAM(Meta and Data) read request
val s1_idx = get_idx(s1_req_pc)
......@@ -294,8 +294,8 @@ class ICache extends ICacheModule
XSDebug("[Stage 1] v : r : f (%d %d %d) request pc: 0x%x mask: %b\n",s1_valid,s2_ready,s1_fire,s1_req_pc,s1_req_mask)
XSDebug("[Stage 1] index: %d\n",s1_idx)
//----------------------------
// Stage 2
//----------------------------
......@@ -325,9 +325,9 @@ class ICache extends ICacheModule
val invalidVec = ~validMeta
val hasInvalidWay = invalidVec.orR
val refillInvalidWaymask = PriorityMask(invalidVec)
val waymask = Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask))
s2_hit := ParallelOR(hitVec) || s2_tlb_resp.excp.pf.instr || s2_access_fault
s2_ready := s2_fire || !s2_valid || io.flush(0)
......@@ -336,8 +336,8 @@ class ICache extends ICacheModule
XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n")
XSDebug("[Stage 2] tag: %x hit:%d\n",s2_tag,s2_hit)
XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt)
//----------------------------
// Stage 3
//----------------------------
......@@ -351,16 +351,16 @@ class ICache extends ICacheModule
val s3_access_fault = RegEnable(s2_access_fault,init=false.B,enable=s2_fire)
when(io.flush(1)) { s3_valid := false.B }
.elsewhen(s2_fire) { s3_valid := s2_valid }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
val refillDataReg = Reg(Vec(refillCycles,UInt(beatBits.W)))
// icache hit
// icache hit
// data ECC encoding
// simply cut the hit cacheline
val dataHitWay = VecInit(s3_data.map(b => Mux1H(s3_wayMask,b).asUInt))
val outPacket = Wire(UInt((FetchWidth * 32).W))
val dataHitWayDecoded = VecInit(
(0 until blockWords).map{r =>
val dataHitWayDecoded = VecInit(
(0 until blockWords).map{r =>
val row = dataHitWay.asTypeOf(Vec(blockWords,UInt(encRowBits.W)))(r)
val decodedRow = cacheParams.dataCode.decode(row)
assert(!(s3_valid && s3_hit && decodedRow.uncorrectable))
......@@ -368,7 +368,7 @@ class ICache extends ICacheModule
}
)
outPacket := cutHelper(dataHitWay,s3_req_pc(5,1).asUInt,s3_req_mask.asUInt)
//ICache MissQueue
val icacheMissQueue = Module(new IcacheMissQueue)
val blocking = RegInit(false.B)
......@@ -394,9 +394,9 @@ class ICache extends ICacheModule
//refill write
val metaWriteReq = icacheMissQueue.io.meta_write.bits
icacheMissQueue.io.meta_write.ready := true.B
metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid
metaArray.io.write.bits.apply(tag=metaWriteReq.meta_write_tag,
idx=metaWriteReq.meta_write_idx,
metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid
metaArray.io.write.bits.apply(tag=metaWriteReq.meta_write_tag,
idx=metaWriteReq.meta_write_idx,
waymask=metaWriteReq.meta_write_waymask)
val wayNum = OHToUInt(metaWriteReq.meta_write_waymask.asTypeOf(Vec(nWays,Bool())))
......@@ -408,7 +408,7 @@ class ICache extends ICacheModule
//data
icacheMissQueue.io.refill.ready := true.B
val refillReq = icacheMissQueue.io.refill.bits
dataArray.io.write.valid := icacheMissQueue.io.refill.valid
dataArray.io.write.valid := icacheMissQueue.io.refill.valid
dataArray.io.write.bits.apply(data=refillReq.refill_data,
idx=refillReq.refill_idx,
waymask=refillReq.refill_waymask)
......@@ -440,7 +440,7 @@ class ICache extends ICacheModule
//----------------------------
//icache request
io.req.ready := metaArray.io.read.ready && dataArray.io.read.ready && s2_ready
//icache response: to pre-decoder
io.resp.valid := s3_valid && (s3_hit || icacheMissQueue.io.resp.valid)
io.resp.bits.data := Mux((s3_valid && s3_hit),outPacket,refillDataOut)
......@@ -456,7 +456,7 @@ class ICache extends ICacheModule
io.tlb.req.bits.cmd := TlbCmd.exec
io.tlb.req.bits.roqIdx := DontCare
io.tlb.req.bits.debug.pc := s2_req_pc
//To L1 plus
io.mem_acquire <> icacheMissQueue.io.mem_acquire
icacheMissQueue.io.mem_grant <> io.mem_grant
......
......@@ -128,7 +128,7 @@ class LoadPipe extends DCacheModule
val s2_data_word = s2_data_words(s2_word_idx)
val s2_decoded = cacheParams.dataCode.decode(s2_data_word)
val s2_data_word_decoded = s2_decoded.corrected
assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
val resp = Wire(ValidIO(new DCacheWordResp))
......
......@@ -11,7 +11,7 @@ import chisel3.experimental.chiselName
trait HasBPUParameter extends HasXSParameter {
val BPUDebug = true
val EnableCFICommitLog = true
val EnbaleCFIPredLog = false
val EnbaleCFIPredLog = true
val EnableBPUTimeRecord = EnableCFICommitLog || EnbaleCFIPredLog
}
......@@ -368,6 +368,9 @@ class BPUStage3 extends BPUStage {
XSDebug(io.inFire && s3IO.predecode.mask(i), "predecode(%d): brType:%d, br:%d, jal:%d, jalr:%d, call:%d, ret:%d, RVC:%d, excType:%d\n",
i.U, p.brType, p.isBr, p.isJal, p.isJalr, p.isCall, p.isRet, p.isRVC, p.excType)
}
XSDebug(p"brs:${Binary(brs)} jals:${Binary(jals)} jalrs:${Binary(jalrs)} calls:${Binary(calls)} rets:${Binary(rets)} rvcs:${Binary(RVCs)}\n")
XSDebug(p"callIdx:${callIdx} retIdx:${retIdx}\n")
XSDebug(p"brPred:${Binary(brPred)} loopRes:${Binary(loopRes)} prevHalfTaken:${prevHalfTaken} brTakens:${Binary(brTakens)}\n")
}
if (EnbaleCFIPredLog) {
......@@ -560,14 +563,6 @@ class BPU extends BaseBPU {
s2.io.debug_hist := s2_hist
s3.io.debug_hist := s3_hist
// val s1_histPtr = RegEnable(io.in.histPtr, enable=s1_fire)
// val s2_histPtr = RegEnable(s1_histPtr, enable=s2_fire)
// val s3_histPtr = RegEnable(s2_histPtr, enable=s3_fire)
// s1.io.debug_histPtr := s1_histPtr
// s2.io.debug_histPtr := s2_histPtr
// s3.io.debug_histPtr := s3_histPtr
//**********************Stage 2****************************//
tage.io.flush := io.flush(1) // TODO: fix this
tage.io.pc.valid := s2_fire
......
......@@ -35,8 +35,8 @@ class BIM extends BasePredictor with BimParams {
val bimAddr = new TableAddr(log2Up(BimSize), BimBanks)
val bankAlignedPC = bankAligned(io.pc.bits)
val pcLatch = RegEnable(bankAlignedPC, io.pc.valid)
val if1_bankAlignedPC = bankAligned(io.pc.bits)
val if2_pc = RegEnable(if1_bankAlignedPC, io.pc.valid)
val bim = List.fill(BimBanks) {
Module(new SRAMTemplate(UInt(2.W), set = nRows, shouldReset = false, holdRead = true))
......@@ -48,34 +48,34 @@ class BIM extends BasePredictor with BimParams {
when (resetRow === (nRows-1).U) { doing_reset := false.B }
// this bank means cache bank
val startsAtOddBank = bankInGroup(bankAlignedPC)(0)
val if1_startsAtOddBank = bankInGroup(if1_bankAlignedPC)(0)
val realMask = Mux(startsAtOddBank,
val if1_realMask = Mux(if1_startsAtOddBank,
Cat(io.inMask(bankWidth-1,0), io.inMask(PredictWidth-1, bankWidth)),
io.inMask)
val isInNextRow = VecInit((0 until BimBanks).map(i => Mux(startsAtOddBank, (i < bankWidth).B, false.B)))
val if1_isInNextRow = VecInit((0 until BimBanks).map(i => Mux(if1_startsAtOddBank, (i < bankWidth).B, false.B)))
val baseRow = bimAddr.getBankIdx(bankAlignedPC)
val if1_baseRow = bimAddr.getBankIdx(if1_bankAlignedPC)
val realRow = VecInit((0 until BimBanks).map(b => Mux(isInNextRow(b), (baseRow+1.U)(log2Up(nRows)-1, 0), baseRow)))
val if1_realRow = VecInit((0 until BimBanks).map(b => Mux(if1_isInNextRow(b), (if1_baseRow+1.U)(log2Up(nRows)-1, 0), if1_baseRow)))
val realRowLatch = VecInit(realRow.map(RegEnable(_, enable=io.pc.valid)))
val if2_realRow = VecInit(if1_realRow.map(RegEnable(_, enable=io.pc.valid)))
for (b <- 0 until BimBanks) {
bim(b).io.r.req.valid := realMask(b) && io.pc.valid
bim(b).io.r.req.bits.setIdx := realRow(b)
bim(b).io.r.req.valid := if1_realMask(b) && io.pc.valid
bim(b).io.r.req.bits.setIdx := if1_realRow(b)
}
val bimRead = VecInit(bim.map(_.io.r.resp.data(0)))
val if2_bimRead = VecInit(bim.map(_.io.r.resp.data(0)))
val startsAtOddBankLatch = bankInGroup(pcLatch)(0)
val if2_startsAtOddBank = bankInGroup(if2_pc)(0)
for (b <- 0 until BimBanks) {
val realBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U)
else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U))
val ctr = bimRead(realBank)
val realBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, (b+bankWidth).U, b.U)
else Mux(if2_startsAtOddBank, (b-bankWidth).U, b.U))
val ctr = if2_bimRead(realBank)
io.resp.ctrs(b) := ctr
io.meta.ctrs(b) := ctr
}
......
......@@ -72,9 +72,9 @@ class BTB extends BasePredictor with BTBParams{
override val io = IO(new BTBIO)
val btbAddr = new TableAddr(log2Up(BtbSize/BtbWays), BtbBanks)
val bankAlignedPC = bankAligned(io.pc.bits)
val if1_bankAlignedPC = bankAligned(io.pc.bits)
val pcLatch = RegEnable(bankAlignedPC, io.pc.valid)
val if2_pc = RegEnable(if1_bankAlignedPC, io.pc.valid)
val data = List.fill(BtbWays) {
List.fill(BtbBanks) {
......@@ -91,61 +91,61 @@ class BTB extends BasePredictor with BTBParams{
// BTB read requests
// this bank means cache bank
val startsAtOddBank = bankInGroup(bankAlignedPC)(0)
val if1_startsAtOddBank = bankInGroup(if1_bankAlignedPC)(0)
val baseBank = btbAddr.getBank(bankAlignedPC)
val if1_baseBank = btbAddr.getBank(if1_bankAlignedPC)
val realMask = Mux(startsAtOddBank,
val if1_realMask = Mux(if1_startsAtOddBank,
Cat(io.inMask(bankWidth-1,0), io.inMask(PredictWidth-1, bankWidth)),
io.inMask)
val realMaskLatch = RegEnable(realMask, io.pc.valid)
val if2_realMask = RegEnable(if1_realMask, io.pc.valid)
val isInNextRow = VecInit((0 until BtbBanks).map(i => Mux(startsAtOddBank, (i < bankWidth).B, false.B)))
val if1_isInNextRow = VecInit((0 until BtbBanks).map(i => Mux(if1_startsAtOddBank, (i < bankWidth).B, false.B)))
val baseRow = btbAddr.getBankIdx(bankAlignedPC)
val if1_baseRow = btbAddr.getBankIdx(if1_bankAlignedPC)
val nextRowStartsUp = baseRow.andR
val if1_nextRowStartsUp = if1_baseRow.andR
val realRow = VecInit((0 until BtbBanks).map(b => Mux(isInNextRow(b), (baseRow+1.U)(log2Up(nRows)-1, 0), baseRow)))
val if1_realRow = VecInit((0 until BtbBanks).map(b => Mux(if1_isInNextRow(b), (if1_baseRow+1.U)(log2Up(nRows)-1, 0), if1_baseRow)))
val realRowLatch = VecInit(realRow.map(RegEnable(_, enable=io.pc.valid)))
val if2_realRow = VecInit(if1_realRow.map(RegEnable(_, enable=io.pc.valid)))
for (w <- 0 until BtbWays) {
for (b <- 0 until BtbBanks) {
meta(w)(b).io.r.req.valid := realMask(b) && io.pc.valid
meta(w)(b).io.r.req.bits.setIdx := realRow(b)
data(w)(b).io.r.req.valid := realMask(b) && io.pc.valid
data(w)(b).io.r.req.bits.setIdx := realRow(b)
meta(w)(b).io.r.req.valid := if1_realMask(b) && io.pc.valid
meta(w)(b).io.r.req.bits.setIdx := if1_realRow(b)
data(w)(b).io.r.req.valid := if1_realMask(b) && io.pc.valid
data(w)(b).io.r.req.bits.setIdx := if1_realRow(b)
}
}
for (b <- 0 to 1) {
edata(b).io.r.req.valid := io.pc.valid
val row = if (b == 0) { Mux(startsAtOddBank, realRow(bankWidth), realRow(0)) }
else { Mux(startsAtOddBank, realRow(0), realRow(bankWidth))}
val row = if (b == 0) { Mux(if1_startsAtOddBank, if1_realRow(bankWidth), if1_realRow(0)) }
else { Mux(if1_startsAtOddBank, if1_realRow(0), if1_realRow(bankWidth))}
edata(b).io.r.req.bits.setIdx := row
}
// Entries read from SRAM
val metaRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => meta(w)(b).io.r.resp.data(0)))))
val dataRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => data(w)(b).io.r.resp.data(0)))))
val edataRead = VecInit((0 to 1).map(i => edata(i).io.r.resp.data(0)))
val if2_metaRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => meta(w)(b).io.r.resp.data(0)))))
val if2_dataRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => data(w)(b).io.r.resp.data(0)))))
val if2_edataRead = VecInit((0 to 1).map(i => edata(i).io.r.resp.data(0)))
val baseBankLatch = btbAddr.getBank(pcLatch)
val startsAtOddBankLatch = bankInGroup(pcLatch)(0)
val baseTag = btbAddr.getTag(pcLatch)
val if2_baseBank = btbAddr.getBank(if2_pc)
val if2_startsAtOddBank = bankInGroup(if2_pc)(0)
val if2_baseTag = btbAddr.getTag(if2_pc)
val tagIncremented = VecInit((0 until BtbBanks).map(b => RegEnable(isInNextRow(b.U) && nextRowStartsUp, io.pc.valid)))
val realTags = VecInit((0 until BtbBanks).map(b => Mux(tagIncremented(b), baseTag + 1.U, baseTag)))
val if2_tagIncremented = VecInit((0 until BtbBanks).map(b => RegEnable(if1_isInNextRow(b.U) && if1_nextRowStartsUp, io.pc.valid)))
val if2_realTags = VecInit((0 until BtbBanks).map(b => Mux(if2_tagIncremented(b), if2_baseTag + 1.U, if2_baseTag)))
val totalHits = VecInit((0 until BtbBanks).map( b =>
val if2_totalHits = VecInit((0 until BtbBanks).map( b =>
VecInit((0 until BtbWays).map( w =>
// This should correspond to the real mask from last valid cycle!
metaRead(w)(b).tag === realTags(b) && metaRead(w)(b).valid && realMaskLatch(b)
if2_metaRead(w)(b).tag === if2_realTags(b) && if2_metaRead(w)(b).valid && if2_realMask(b)
))
))
val bankHits = VecInit(totalHits.map(_.reduce(_||_)))
val bankHitWays = VecInit(totalHits.map(PriorityEncoder(_)))
val if2_bankHits = VecInit(if2_totalHits.map(_.reduce(_||_)))
val if2_bankHitWays = VecInit(if2_totalHits.map(PriorityEncoder(_)))
def allocWay(valids: UInt, meta_tags: UInt, req_tag: UInt) = {
......@@ -167,30 +167,30 @@ class BTB extends BasePredictor with BTBParams{
}
}
val allocWays = VecInit((0 until BtbBanks).map(b =>
allocWay(VecInit(metaRead.map(w => w(b).valid)).asUInt,
VecInit(metaRead.map(w => w(b).tag)).asUInt,
realTags(b))))
allocWay(VecInit(if2_metaRead.map(w => w(b).valid)).asUInt,
VecInit(if2_metaRead.map(w => w(b).tag)).asUInt,
if2_realTags(b))))
val writeWay = VecInit((0 until BtbBanks).map(
b => Mux(bankHits(b), bankHitWays(b), allocWays(b))
b => Mux(if2_bankHits(b), if2_bankHitWays(b), allocWays(b))
))
for (b <- 0 until BtbBanks) {
val realBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U)
else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U))
val meta_entry = metaRead(bankHitWays(realBank))(realBank)
val data_entry = dataRead(bankHitWays(realBank))(realBank)
val edataBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, 1.U, 0.U)
else Mux(startsAtOddBankLatch, 0.U, 1.U))
val realBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, (b+bankWidth).U, b.U)
else Mux(if2_startsAtOddBank, (b-bankWidth).U, b.U))
val meta_entry = if2_metaRead(if2_bankHitWays(realBank))(realBank)
val data_entry = if2_dataRead(if2_bankHitWays(realBank))(realBank)
val edataBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, 1.U, 0.U)
else Mux(if2_startsAtOddBank, 0.U, 1.U))
// Use real pc to calculate the target
io.resp.targets(b) := Mux(data_entry.extended, edataRead(edataBank), (pcLatch.asSInt + (b << 1).S + data_entry.offset).asUInt)
io.resp.hits(b) := bankHits(realBank)
io.resp.targets(b) := Mux(data_entry.extended, if2_edataRead(edataBank), (if2_pc.asSInt + (b << 1).S + data_entry.offset).asUInt)
io.resp.hits(b) := if2_bankHits(realBank)
io.resp.types(b) := meta_entry.btbType
io.resp.isRVC(b) := meta_entry.isRVC
io.meta.writeWay(b) := writeWay(realBank)
io.meta.hitJal(b) := bankHits(realBank) && meta_entry.btbType === BTBtype.J
io.meta.hitJal(b) := if2_bankHits(realBank) && meta_entry.btbType === BTBtype.J
}
def pdInfoToBTBtype(pd: PreDecodeInfo) = {
......@@ -244,35 +244,35 @@ class BTB extends BasePredictor with BTBParams{
XSDebug("isInNextRow: ")
(0 until BtbBanks).foreach(i => {
XSDebug(false, true.B, "%d ", isInNextRow(i))
XSDebug(false, true.B, "%d ", if1_isInNextRow(i))
if (i == BtbBanks-1) { XSDebug(false, true.B, "\n") }
})
val validLatch = RegNext(io.pc.valid)
XSDebug(io.pc.valid, "read: pc=0x%x, baseBank=%d, realMask=%b\n", bankAlignedPC, baseBank, realMask)
XSDebug(io.pc.valid, "read: pc=0x%x, baseBank=%d, realMask=%b\n", if1_bankAlignedPC, if1_baseBank, if1_realMask)
XSDebug(validLatch, "read_resp: pc=0x%x, readIdx=%d-------------------------------\n",
pcLatch, btbAddr.getIdx(pcLatch))
if2_pc, btbAddr.getIdx(if2_pc))
if (debug_verbose) {
for (i <- 0 until BtbBanks){
for (j <- 0 until BtbWays) {
XSDebug(validLatch, "read_resp[w=%d][b=%d][r=%d] is valid(%d) mask(%d), tag=0x%x, offset=0x%x, type=%d, isExtend=%d, isRVC=%d\n",
j.U, i.U, realRowLatch(i), metaRead(j)(i).valid, realMaskLatch(i), metaRead(j)(i).tag, dataRead(j)(i).offset, metaRead(j)(i).btbType, dataRead(j)(i).extended, metaRead(j)(i).isRVC)
j.U, i.U, if2_realRow(i), if2_metaRead(j)(i).valid, if2_realMask(i), if2_metaRead(j)(i).tag, if2_dataRead(j)(i).offset, if2_metaRead(j)(i).btbType, if2_dataRead(j)(i).extended, if2_metaRead(j)(i).isRVC)
}
}
}
// e.g: baseBank == 5 => (5, 6,..., 15, 0, 1, 2, 3, 4)
val bankIdxInOrder = VecInit((0 until BtbBanks).map(b => (baseBankLatch +& b.U)(log2Up(BtbBanks)-1,0)))
val bankIdxInOrder = VecInit((0 until BtbBanks).map(b => (if2_baseBank +& b.U)(log2Up(BtbBanks)-1,0)))
for (i <- 0 until BtbBanks) {
val idx = bankIdxInOrder(i)
XSDebug(validLatch && bankHits(bankIdxInOrder(i)), "resp(%d): bank(%d) hits, tgt=%x, isRVC=%d, type=%d\n",
XSDebug(validLatch && if2_bankHits(bankIdxInOrder(i)), "resp(%d): bank(%d) hits, tgt=%x, isRVC=%d, type=%d\n",
i.U, idx, io.resp.targets(i), io.resp.isRVC(i), io.resp.types(i))
}
XSDebug(updateValid, "update_req: cycle=%d, pc=0x%x, target=0x%x, misPred=%d, offset=%x, extended=%d, way=%d, bank=%d, row=0x%x\n",
u.brInfo.debug_btb_cycle, u.pc, new_target, u.isMisPred, new_offset, new_extended, updateWay, updateBankIdx, updateRow)
for (i <- 0 until BtbBanks) {
// Conflict when not hit and allocating a valid entry
val conflict = metaRead(allocWays(i))(i).valid && !bankHits(i)
val conflict = if2_metaRead(allocWays(i))(i).valid && !if2_bankHits(i)
XSDebug(conflict, "bank(%d) is trying to allocate a valid way(%d)\n", i.U, allocWays(i))
// There is another circumstance when a branch is on its way to update while another
// branch chose the same way to udpate, then after the first branch is wrote in,
......
......@@ -121,7 +121,7 @@ class IFU extends XSModule with HasIFUConst
// val if2_newPtr, if3_newPtr, if4_newPtr = Wire(UInt(log2Up(ExtHistoryLength).W))
val if1_gh, if2_gh, if3_gh, if4_gh = Wire(new GlobalHistory)
val if2_predicted_gh, if3_predicted_gh, if4_predicted_gh = Wire(new GlobalHistory)
val final_gh = RegInit(0.U.asTypeOf(new GlobalHistory))
......@@ -149,7 +149,7 @@ class IFU extends XSModule with HasIFUConst
}
val if2_bp = bpu.io.out(0)
// val if2_GHInfo = wrapGHInfo(if2_bp, if2_predHist)
// if taken, bp_redirect should be true
// when taken on half RVI, we suppress this redirect signal
......@@ -159,14 +159,6 @@ class IFU extends XSModule with HasIFUConst
}
if2_predicted_gh := if2_gh.update(if2_bp.hasNotTakenBrs, if2_bp.takenOnBr)
// when (if2_fire && if2_GHInfo.shifted) {
// val if2_newPtr = if2_GHInfo.newPtr()
// updatePtr := true.B
// newPtr := if2_newPtr
// extHist(if2_newPtr) := if2_GHInfo.takenOnBr.asUInt
// }
//********************** IF3 ****************************//
val if3_valid = RegInit(init = false.B)
......@@ -192,9 +184,9 @@ class IFU extends XSModule with HasIFUConst
// val if4_prevHalfInstr = Wire(new PrevHalfInstr)
// 32-bit instr crosses 2 pages, and the higher 16-bit triggers page fault
val crossPageIPF = WireInit(false.B)
val if3_pendingPrevHalfInstr = if3_prevHalfInstr.valid
// the previous half of RVI instruction waits until it meets its last half
val if3_prevHalfInstrMet = if3_pendingPrevHalfInstr && (if3_prevHalfInstr.pc + 2.U) === if3_pc && if3_valid
// set to invalid once consumed or redirect from backend
......@@ -230,7 +222,7 @@ class IFU extends XSModule with HasIFUConst
// GHInfo from last pred does not corresponds with this packet
// if3_ghInfoNotIdenticalRedirect
)
val if3_target = WireInit(snpc(if3_pc))
/* when (prevHalfMetRedirect) {
......@@ -263,14 +255,14 @@ class IFU extends XSModule with HasIFUConst
val if4_mask = RegEnable(icacheResp.mask, if3_fire)
val if4_snpc = Mux(inLoop, if4_pc + (PopCount(if4_mask) << 1), snpc(if4_pc))
val if4_predHist = RegEnable(if3_predHist, enable=if3_fire)
// wait until prevHalfInstr written into reg
if4_ready := (if4_fire && !hasPrevHalfInstrReq || !if4_valid || if4_flush) && GTimer() > 500.U
when (if4_flush) { if4_valid := false.B }
.elsewhen (if3_fire) { if4_valid := true.B }
.elsewhen (if4_fire) { if4_valid := false.B }
val if4_bp = Wire(new BranchPrediction)
if4_bp := bpu.io.out(2)
if4_bp.takens := bpu.io.out(2).takens & if4_mask
......@@ -294,7 +286,7 @@ class IFU extends XSModule with HasIFUConst
if4_bp.targets(i) := if4_jal_tgts(i)
}
}
// we need this to tell BPU the prediction of prev half
// because the prediction is with the start of each inst
val if4_prevHalfInstr = RegInit(0.U.asTypeOf(new PrevHalfInstr))
......@@ -361,11 +353,6 @@ class IFU extends XSModule with HasIFUConst
when (if4_redirect) {
if1_npc := if4_target
}
// val if4_newPtr = if4_GHInfo.newPtr()
// updatePtr := true.B
// newPtr := if4_newPtr
// extHist(if4_newPtr) := if4_GHInfo.takenOnBr.asUInt
// }
when (if4_fire) {
final_gh := if4_predicted_gh
......@@ -442,7 +429,7 @@ class IFU extends XSModule with HasIFUConst
bpu.io.predecode.mask := if4_pd.mask
bpu.io.predecode.lastHalf := if4_pd.lastHalf
bpu.io.predecode.pd := if4_pd.pd
bpu.io.predecode.hasLastHalfRVI := if4_pc =/= if4_pd.pc(0)
bpu.io.predecode.hasLastHalfRVI := if4_prevHalfInstrMet
bpu.io.realMask := if4_mask
bpu.io.prevHalf := if4_prevHalfInstr
......
......@@ -50,6 +50,7 @@ class RAS extends BasePredictor
}
override val io = IO(new RASIO)
override val debug = true
@chiselName
class RASStack(val rasSize: Int) extends XSModule {
......@@ -66,6 +67,11 @@ class RAS extends BasePredictor
val copy_out_mem = Output(Vec(rasSize, rasEntry()))
val copy_out_sp = Output(UInt(log2Up(rasSize).W))
})
val debugIO = IO(new Bundle{
val write_entry = Output(rasEntry())
val alloc_new = Output(Bool())
val sp = Output(UInt(log2Up(rasSize).W))
})
@chiselName
class Stack(val size: Int) extends XSModule {
val io = IO(new Bundle {
......@@ -98,9 +104,13 @@ class RAS extends BasePredictor
val alloc_new = io.new_addr =/= top_addr
stack.wen := io.push_valid || io.pop_valid && top_ctr =/= 1.U
stack.wIdx := Mux(io.pop_valid && top_ctr =/= 1.U, sp - 1.U, Mux(alloc_new, sp, sp - 1.U))
stack.wdata := Mux(io.pop_valid && top_ctr =/= 1.U,
RASEntry(top_addr, top_ctr - 1.U),
Mux(alloc_new, RASEntry(io.new_addr, 1.U), RASEntry(top_addr, top_ctr + 1.U)))
val write_addr = Mux(io.pop_valid && top_ctr =/= 1.U, top_addr, io.new_addr)
val write_ctr = Mux(io.pop_valid && top_ctr =/= 1.U, top_ctr - 1.U, Mux(alloc_new, 1.U, top_ctr + 1.U))
val write_entry = RASEntry(write_addr, write_ctr)
stack.wdata := write_entry
debugIO.write_entry := write_entry
debugIO.alloc_new := alloc_new
debugIO.sp := sp
when (io.push_valid && alloc_new) {
sp := sp + 1.U
......@@ -138,7 +148,9 @@ class RAS extends BasePredictor
// val commit_ras = Reg(Vec(RasSize, rasEntry()))
// val commit_sp = RegInit(0.U(log2Up(RasSize).W))
val spec_ras = Module(new RASStack(RasSize)).io
val spec = Module(new RASStack(RasSize))
val spec_ras = spec.io
val spec_push = WireInit(false.B)
val spec_pop = WireInit(false.B)
......@@ -153,7 +165,8 @@ class RAS extends BasePredictor
spec_push := !spec_is_full && io.callIdx.valid && io.pc.valid
spec_pop := !spec_is_empty && io.is_ret && io.pc.valid
val commit_ras = Module(new RASStack(RasSize)).io
val commit = Module(new RASStack(RasSize))
val commit_ras = commit.io
val commit_push = WireInit(false.B)
val commit_pop = WireInit(false.B)
......@@ -179,7 +192,7 @@ class RAS extends BasePredictor
spec_ras.copy_valid := copy_next
spec_ras.copy_in_mem := commit_ras.copy_out_mem
spec_ras.copy_in_sp := commit_ras.copy_out_sp
commit_ras.copy_valid := DontCare
commit_ras.copy_valid := false.B
commit_ras.copy_in_mem := DontCare
commit_ras.copy_in_sp := DontCare
......@@ -189,26 +202,28 @@ class RAS extends BasePredictor
io.branchInfo.rasToqAddr := DontCare
if (BPUDebug && debug) {
// XSDebug("----------------RAS(spec)----------------\n")
// XSDebug(" index addr ctr \n")
// for(i <- 0 until RasSize){
// XSDebug(" (%d) 0x%x %d",i.U,spec_ras(i).retAddr,spec_ras(i).ctr)
// when(i.U === spec_sp){XSDebug(false,true.B," <----sp")}
// XSDebug(false,true.B,"\n")
// }
// XSDebug("----------------RAS(commit)----------------\n")
// XSDebug(" index addr ctr \n")
// for(i <- 0 until RasSize){
// XSDebug(" (%d) 0x%x %d",i.U,commit_ras(i).retAddr,commit_ras(i).ctr)
// when(i.U === commit_sp){XSDebug(false,true.B," <----sp")}
// XSDebug(false,true.B,"\n")
// }
// XSDebug(spec_push, "(spec_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",spec_ras_write.retAddr,spec_ras_write.ctr,sepc_alloc_new,spec_sp.asUInt)
// XSDebug(spec_pop, "(spec_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
// XSDebug(commit_push, "(commit_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",commit_ras_write.retAddr,commit_ras_write.ctr,sepc_alloc_new,commit_sp.asUInt)
// XSDebug(commit_pop, "(commit_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
// XSDebug("copyValid:%d copyNext:%d \n",copy_valid,copy_next)
val spec_debug = spec.debugIO
val commit_debug = commit.debugIO
XSDebug("----------------RAS(spec)----------------\n")
XSDebug(" index addr ctr \n")
for(i <- 0 until RasSize){
XSDebug(" (%d) 0x%x %d",i.U,spec_ras.copy_out_mem(i).retAddr,spec_ras.copy_out_mem(i).ctr)
when(i.U === spec_ras.copy_out_sp){XSDebug(false,true.B," <----sp")}
XSDebug(false,true.B,"\n")
}
XSDebug("----------------RAS(commit)----------------\n")
XSDebug(" index addr ctr \n")
for(i <- 0 until RasSize){
XSDebug(" (%d) 0x%x %d",i.U,commit_ras.copy_out_mem(i).retAddr,commit_ras.copy_out_mem(i).ctr)
when(i.U === commit_ras.copy_out_sp){XSDebug(false,true.B," <----sp")}
XSDebug(false,true.B,"\n")
}
XSDebug(spec_push, "(spec_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",spec_new_addr,spec_debug.write_entry.ctr,spec_debug.alloc_new,spec_debug.sp.asUInt)
XSDebug(spec_pop, "(spec_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
XSDebug(commit_push, "(commit_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",commit_new_addr,commit_debug.write_entry.ctr,commit_debug.alloc_new,commit_debug.sp.asUInt)
XSDebug(commit_pop, "(commit_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
XSDebug("copyValid:%d copyNext:%d \n",copy_valid,copy_next)
}
......
......@@ -121,26 +121,26 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val tageEntrySz = 1 + tagLen + TageCtrBits
val bankAlignedPC = bankAligned(io.req.bits.pc)
val if2_bankAlignedPC = bankAligned(io.req.bits.pc)
// this bank means cache bank
val startsAtOddBank = bankInGroup(bankAlignedPC)(0)
val if2_startsAtOddBank = bankInGroup(if2_bankAlignedPC)(0)
// use real address to index
// val unhashed_idxes = VecInit((0 until TageBanks).map(b => ((io.req.bits.pc >> 1.U) + b.U) >> log2Up(TageBanks).U))
val unhashed_idx = Wire(Vec(2, UInt((log2Ceil(nRows)+tagLen).W)))
val if2_unhashed_idx = Wire(Vec(2, UInt((log2Ceil(nRows)+tagLen).W)))
// the first bank idx always correspond with pc
unhashed_idx(0) := io.req.bits.pc >> (1+log2Ceil(TageBanks))
if2_unhashed_idx(0) := io.req.bits.pc >> (1+log2Ceil(TageBanks))
// when pc is at odd bank, the second bank is at the next idx
unhashed_idx(1) := unhashed_idx(0) + startsAtOddBank
if2_unhashed_idx(1) := if2_unhashed_idx(0) + if2_startsAtOddBank
// val idxes_and_tags = (0 until TageBanks).map(b => compute_tag_and_hash(unhashed_idxes(b.U), io.req.bits.hist))
// val (idx, tag) = compute_tag_and_hash(unhashed_idx, io.req.bits.hist)
val idxes_and_tags = unhashed_idx.map(compute_tag_and_hash(_, io.req.bits.hist))
// val idxes = VecInit(idxes_and_tags.map(_._1))
// val tags = VecInit(idxes_and_tags.map(_._2))
// val idxes_and_tags = (0 until TageBanks).map(b => compute_tag_and_hash(if2_unhashed_idxes(b.U), io.req.bits.hist))
// val (idx, tag) = compute_tag_and_hash(if2_unhashed_idx, io.req.bits.hist)
val if2_idxes_and_tags = if2_unhashed_idx.map(compute_tag_and_hash(_, io.req.bits.hist))
// val idxes = VecInit(if2_idxes_and_tags.map(_._1))
// val tags = VecInit(if2_idxes_and_tags.map(_._2))
val idxes_latch = RegEnable(VecInit(idxes_and_tags.map(_._1)), io.req.valid)
val tags_latch = RegEnable(VecInit(idxes_and_tags.map(_._2)), io.req.valid)
// and_tags_latch = RegEnable(idxes_and_tags, enable=io.req.valid)
val if3_idxes = RegEnable(VecInit(if2_idxes_and_tags.map(_._1)), io.req.valid)
val if3_tags = RegEnable(VecInit(if2_idxes_and_tags.map(_._2)), io.req.valid)
// and_if3_tags = RegEnable(if2_idxes_and_tags, enable=io.req.valid)
// val idxLatch = RegEnable(idx, enable=io.req.valid)
// val tagLatch = RegEnable(tag, enable=io.req.valid)
......@@ -175,59 +175,59 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val lo_us = List.fill(TageBanks)(Module(new HL_Bank(nRows)))
val table = List.fill(TageBanks)(Module(new SRAMTemplate(new TageEntry, set=nRows, shouldReset=false, holdRead=true, singlePort=false)))
val hi_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
val lo_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
val table_r = WireInit(0.U.asTypeOf(Vec(TageBanks, new TageEntry)))
val if3_hi_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
val if3_lo_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
val if3_table_r = WireInit(0.U.asTypeOf(Vec(TageBanks, new TageEntry)))
val baseBank = io.req.bits.pc(log2Up(TageBanks), 1)
val baseBankLatch = RegEnable(baseBank, enable=io.req.valid)
val if2_baseBank = io.req.bits.pc(log2Up(TageBanks), 1)
val if3_baseBank = RegEnable(if2_baseBank, enable=io.req.valid)
val bankIdxInOrder = VecInit((0 until TageBanks).map(b => (baseBankLatch +& b.U)(log2Up(TageBanks)-1, 0)))
val if3_bankIdxInOrder = VecInit((0 until TageBanks).map(b => (if3_baseBank +& b.U)(log2Up(TageBanks)-1, 0)))
val realMask = Mux(startsAtOddBank,
val if2_realMask = Mux(if2_startsAtOddBank,
Cat(io.req.bits.mask(bankWidth-1,0), io.req.bits.mask(PredictWidth-1, bankWidth)),
io.req.bits.mask)
val maskLatch = RegEnable(realMask, enable=io.req.valid)
val if3_realMask = RegEnable(if2_realMask, enable=io.req.valid)
(0 until TageBanks).map(
b => {
val idxes = VecInit(idxes_and_tags.map(_._1))
val idx = (if (b < bankWidth) Mux(startsAtOddBank, idxes(1), idxes(0))
else Mux(startsAtOddBank, idxes(0), idxes(1)))
hi_us(b).io.r.req.valid := io.req.valid && realMask(b)
val idxes = VecInit(if2_idxes_and_tags.map(_._1))
val idx = (if (b < bankWidth) Mux(if2_startsAtOddBank, idxes(1), idxes(0))
else Mux(if2_startsAtOddBank, idxes(0), idxes(1)))
hi_us(b).io.r.req.valid := io.req.valid && if2_realMask(b)
hi_us(b).io.r.req.bits.setIdx := idx
lo_us(b).io.r.req.valid := io.req.valid && realMask(b)
lo_us(b).io.r.req.valid := io.req.valid && if2_realMask(b)
lo_us(b).io.r.req.bits.setIdx := idx
table(b).reset := reset.asBool
table(b).io.r.req.valid := io.req.valid && realMask(b)
table(b).io.r.req.valid := io.req.valid && if2_realMask(b)
table(b).io.r.req.bits.setIdx := idx
hi_us_r(b) := hi_us(b).io.r.resp.data
lo_us_r(b) := lo_us(b).io.r.resp.data
table_r(b) := table(b).io.r.resp.data(0)
if3_hi_us_r(b) := hi_us(b).io.r.resp.data
if3_lo_us_r(b) := lo_us(b).io.r.resp.data
if3_table_r(b) := table(b).io.r.resp.data(0)
}
)
val startsAtOddBankLatch = RegEnable(startsAtOddBank, io.req.valid)
val if3_startsAtOddBank = RegEnable(if2_startsAtOddBank, io.req.valid)
val req_rhits = VecInit((0 until TageBanks).map(b => {
val tag = (if (b < bankWidth) Mux(startsAtOddBank, tags_latch(1), tags_latch(0))
else Mux(startsAtOddBank, tags_latch(0), tags_latch(1)))
val bank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U)
else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U))
table_r(bank).valid && table_r(bank).tag === tag
val if3_req_rhits = VecInit((0 until TageBanks).map(b => {
val tag = (if (b < bankWidth) Mux(if3_startsAtOddBank, if3_tags(1), if3_tags(0))
else Mux(if3_startsAtOddBank, if3_tags(0), if3_tags(1)))
val bank = (if (b < bankWidth) Mux(if3_startsAtOddBank, (b+bankWidth).U, b.U)
else Mux(if3_startsAtOddBank, (b-bankWidth).U, b.U))
if3_table_r(bank).valid && if3_table_r(bank).tag === tag
}))
(0 until TageBanks).map(b => {
val bank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U)
else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U))
io.resp(b).valid := req_rhits(b) && maskLatch(b)
io.resp(b).bits.ctr := table_r(bank).ctr
io.resp(b).bits.u := Cat(hi_us_r(bank),lo_us_r(bank))
val bank = (if (b < bankWidth) Mux(if3_startsAtOddBank, (b+bankWidth).U, b.U)
else Mux(if3_startsAtOddBank, (b-bankWidth).U, b.U))
io.resp(b).valid := if3_req_rhits(b) && if3_realMask(b)
io.resp(b).bits.ctr := if3_table_r(bank).ctr
io.resp(b).bits.u := Cat(if3_hi_us_r(bank),if3_lo_us_r(bank))
})
......@@ -292,7 +292,7 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
// when (RegNext(wrbypass_rhit)) {
// for (b <- 0 until TageBanks) {
// when (RegNext(wrbypass_rctr_hits(b.U + baseBank))) {
// io.resp(b).bits.ctr := rhit_ctrs(bankIdxInOrder(b))
// io.resp(b).bits.ctr := rhit_ctrs(if3_bankIdxInOrder(b))
// }
// }
// }
......@@ -335,17 +335,17 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val u = io.update
val b = PriorityEncoder(u.mask)
val ub = PriorityEncoder(u.uMask)
val idx = idxes_and_tags.map(_._1)
val tag = idxes_and_tags.map(_._2)
val idx = if2_idxes_and_tags.map(_._1)
val tag = if2_idxes_and_tags.map(_._2)
XSDebug(io.req.valid, "tableReq: pc=0x%x, hist=%x, idx=(%d,%d), tag=(%x,%x), baseBank=%d, mask=%b, realMask=%b\n",
io.req.bits.pc, io.req.bits.hist, idx(0), idx(1), tag(0), tag(1), baseBank, io.req.bits.mask, realMask)
io.req.bits.pc, io.req.bits.hist, idx(0), idx(1), tag(0), tag(1), if2_baseBank, io.req.bits.mask, if2_realMask)
for (i <- 0 until TageBanks) {
XSDebug(RegNext(io.req.valid) && req_rhits(i), "TageTableResp[%d]: idx=(%d,%d), hit:%d, ctr:%d, u:%d\n",
i.U, idxes_latch(0), idxes_latch(1), req_rhits(i), io.resp(i).bits.ctr, io.resp(i).bits.u)
XSDebug(RegNext(io.req.valid) && if3_req_rhits(i), "TageTableResp[%d]: idx=(%d,%d), hit:%d, ctr:%d, u:%d\n",
i.U, if3_idxes(0), if3_idxes(1), if3_req_rhits(i), io.resp(i).bits.ctr, io.resp(i).bits.u)
}
XSDebug(RegNext(io.req.valid), "TageTableResp: hits:%b, maskLatch is %b\n", req_rhits.asUInt, maskLatch)
XSDebug(RegNext(io.req.valid) && !req_rhits.reduce(_||_), "TageTableResp: no hits!\n")
XSDebug(RegNext(io.req.valid), "TageTableResp: hits:%b, maskLatch is %b\n", if3_req_rhits.asUInt, if3_realMask)
XSDebug(RegNext(io.req.valid) && !if3_req_rhits.reduce(_||_), "TageTableResp: no hits!\n")
XSDebug(io.update.mask.reduce(_||_), "update Table: pc:%x, fetchIdx:%d, hist:%x, bank:%d, taken:%d, alloc:%d, oldCtr:%d\n",
u.pc, u.fetchIdx, u.hist, b, u.taken(b), u.alloc(b), u.oldCtr(b))
......@@ -435,12 +435,12 @@ class Tage extends BaseTage {
override val debug = true
// Keep the table responses to process in s3
val resps = VecInit(tables.map(t => RegEnable(t.io.resp, enable=io.s3Fire)))
val scResps = VecInit(scTables.map(t => RegEnable(t.io.resp, enable=io.s3Fire)))
val if4_resps = RegEnable(VecInit(tables.map(t => t.io.resp)), enable=io.s3Fire)
val if4_scResps = RegEnable(VecInit(scTables.map(t => t.io.resp)), enable=io.s3Fire)
// val flushLatch = RegNext(io.flush)
val s2_bim = RegEnable(io.bim, enable=io.pc.valid) // actually it is s2Fire
val s3_bim = RegEnable(s2_bim, enable=io.s3Fire)
val if3_bim = RegEnable(io.bim, enable=io.pc.valid) // actually it is s2Fire
val if4_bim = RegEnable(if3_bim, enable=io.s3Fire)
val debug_pc_s2 = RegEnable(io.pc.bits, enable=io.pc.valid)
val debug_pc_s3 = RegEnable(debug_pc_s2, enable=io.s3Fire)
......@@ -482,37 +482,37 @@ class Tage extends BaseTage {
// access tag tables and output meta info
for (w <- 0 until TageBanks) {
val tageTaken = WireInit(s3_bim.ctrs(w)(1).asBool)
var altPred = s3_bim.ctrs(w)(1)
val finalAltPred = WireInit(s3_bim.ctrs(w)(1))
var provided = false.B
var provider = 0.U
io.resp.takens(w) := s3_bim.ctrs(w)(1)
val if4_tageTaken = WireInit(if4_bim.ctrs(w)(1).asBool)
var if4_altPred = if4_bim.ctrs(w)(1)
val if4_finalAltPred = WireInit(if4_bim.ctrs(w)(1))
var if4_provided = false.B
var if4_provider = 0.U
io.resp.takens(w) := if4_bim.ctrs(w)(1)
for (i <- 0 until TageNTables) {
val hit = resps(i)(w).valid
val ctr = resps(i)(w).bits.ctr
val hit = if4_resps(i)(w).valid
val ctr = if4_resps(i)(w).bits.ctr
when (hit) {
io.resp.takens(w) := Mux(ctr === 3.U || ctr === 4.U, altPred, ctr(2)) // Use altpred on weak taken
tageTaken := Mux(ctr === 3.U || ctr === 4.U, altPred, ctr(2))
finalAltPred := altPred
io.resp.takens(w) := Mux(ctr === 3.U || ctr === 4.U, if4_altPred, ctr(2)) // Use altpred on weak taken
if4_tageTaken := Mux(ctr === 3.U || ctr === 4.U, if4_altPred, ctr(2))
if4_finalAltPred := if4_altPred
}
provided = provided || hit // Once hit then provide
provider = Mux(hit, i.U, provider) // Use the last hit as provider
altPred = Mux(hit, ctr(2), altPred) // Save current pred as potential altpred
if4_provided = if4_provided || hit // Once hit then provide
if4_provider = Mux(hit, i.U, if4_provider) // Use the last hit as provider
if4_altPred = Mux(hit, ctr(2), if4_altPred) // Save current pred as potential altpred
}
io.resp.hits(w) := provided
io.meta(w).provider.valid := provided
io.meta(w).provider.bits := provider
io.meta(w).altDiffers := finalAltPred =/= io.resp.takens(w)
io.meta(w).providerU := resps(provider)(w).bits.u
io.meta(w).providerCtr := resps(provider)(w).bits.ctr
io.meta(w).taken := tageTaken
io.resp.hits(w) := if4_provided
io.meta(w).provider.valid := if4_provided
io.meta(w).provider.bits := if4_provider
io.meta(w).altDiffers := if4_finalAltPred =/= io.resp.takens(w)
io.meta(w).providerU := if4_resps(if4_provider)(w).bits.u
io.meta(w).providerCtr := if4_resps(if4_provider)(w).bits.ctr
io.meta(w).taken := if4_tageTaken
// Create a mask fo tables which did not hit our query, and also contain useless entries
// and also uses a longer history than the provider
val allocatableSlots = (VecInit(resps.map(r => !r(w).valid && r(w).bits.u === 0.U)).asUInt &
~(LowerMask(UIntToOH(provider), TageNTables) & Fill(TageNTables, provided.asUInt))
val allocatableSlots = (VecInit(if4_resps.map(r => !r(w).valid && r(w).bits.u === 0.U)).asUInt &
~(LowerMask(UIntToOH(if4_provider), TageNTables) & Fill(TageNTables, if4_provided.asUInt))
)
val allocLFSR = LFSR64()(TageNTables - 1, 0)
val firstEntry = PriorityEncoder(allocatableSlots)
......@@ -525,12 +525,12 @@ class Tage extends BaseTage {
scMeta := DontCare
val scTableSums = VecInit(
(0 to 1) map { i => {
// val providerCtr = resps(provider)(w).bits.ctr.zext()
// val providerCtr = if4_resps(if4_provider)(w).bits.ctr.zext()
// val pvdrCtrCentered = (((providerCtr - 4.S) << 1) + 1.S) << 3
// sum += pvdrCtrCentered
if (EnableSC) {
(0 until SCNTables) map { j =>
scTables(j).getCenteredValue(scResps(j)(w).ctr(i))
scTables(j).getCenteredValue(if4_scResps(j)(w).ctr(i))
} reduce (_+_) // TODO: rewrite with adder tree
}
else 0.S
......@@ -539,21 +539,21 @@ class Tage extends BaseTage {
)
if (EnableSC) {
scMeta.tageTaken := tageTaken
scMeta.scUsed := provided
scMeta.scPred := tageTaken
scMeta.tageTaken := if4_tageTaken
scMeta.scUsed := if4_provided
scMeta.scPred := if4_tageTaken
scMeta.sumAbs := 0.U
when (provided) {
val providerCtr = resps(provider)(w).bits.ctr.zext()
when (if4_provided) {
val providerCtr = if4_resps(if4_provider)(w).bits.ctr.zext()
val pvdrCtrCentered = ((((providerCtr - 4.S) << 1).asSInt + 1.S) << 3).asSInt
val totalSum = scTableSums(tageTaken.asUInt) + pvdrCtrCentered
val totalSum = scTableSums(if4_tageTaken.asUInt) + pvdrCtrCentered
val sumAbs = totalSum.abs().asUInt
val sumBelowThreshold = totalSum.abs.asUInt < useThreshold
val scPred = totalSum >= 0.S
scMeta.sumAbs := sumAbs
scMeta.ctrs := VecInit(scResps.map(r => r(w).ctr(tageTaken.asUInt)))
scMeta.ctrs := VecInit(if4_scResps.map(r => r(w).ctr(if4_tageTaken.asUInt)))
for (i <- 0 until SCNTables) {
XSDebug(RegNext(io.s3Fire), p"SCTable(${i.U})(${w.U}): ctr:(${scResps(i)(w).ctr(0)},${scResps(i)(w).ctr(1)})\n")
XSDebug(RegNext(io.s3Fire), p"SCTable(${i.U})(${w.U}): ctr:(${if4_scResps(i)(w).ctr(0)},${if4_scResps(i)(w).ctr(1)})\n")
}
XSDebug(RegNext(io.s3Fire), p"SC(${w.U}): pvdCtr(${providerCtr}), pvdCentred(${pvdrCtrCentered}), totalSum(${totalSum}), abs(${sumAbs}) useThres(${useThreshold}), scPred(${scPred})\n")
// Use prediction from Statistical Corrector
......@@ -664,7 +664,7 @@ class Tage extends BaseTage {
XSDebug(RegNext(io.s3Fire), "s3FireOnLastCycle: resp: pc=%x, hist=%x, hits=%b, takens=%b\n",
debug_pc_s3, debug_hist_s3, io.resp.hits.asUInt, io.resp.takens.asUInt)
for (i <- 0 until TageNTables) {
XSDebug(RegNext(io.s3Fire), "TageTable(%d): valids:%b, resp_ctrs:%b, resp_us:%b\n", i.U, VecInit(resps(i).map(_.valid)).asUInt, Cat(resps(i).map(_.bits.ctr)), Cat(resps(i).map(_.bits.u)))
XSDebug(RegNext(io.s3Fire), "TageTable(%d): valids:%b, resp_ctrs:%b, resp_us:%b\n", i.U, VecInit(if4_resps(i).map(_.valid)).asUInt, Cat(if4_resps(i).map(_.bits.ctr)), Cat(if4_resps(i).map(_.bits.u)))
}
XSDebug(io.update.valid, "update: pc=%x, fetchpc=%x, cycle=%d, hist=%x, taken:%d, misPred:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc(%d):%d\n",
u.pc, u.pc - (bri.fetchIdx << 1.U), bri.debug_tage_cycle, updateHist, u.taken, u.isMisPred, bri.bimCtr, m.provider.valid, m.provider.bits, m.altDiffers, m.providerU, m.providerCtr, m.allocate.valid, m.allocate.bits)
......
......@@ -70,9 +70,9 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.LOAD)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).lqIdx.value)
val tailMask = (((1.U((LoadQueueSize + 1).W)) << deqPtr).asUInt - 1.U)(LoadQueueSize - 1, 0)
val headMask = (((1.U((LoadQueueSize + 1).W)) << enqPtr).asUInt - 1.U)(LoadQueueSize - 1, 0)
val enqDeqMask1 = tailMask ^ headMask
val deqMask = UIntToMask(deqPtr, LoadQueueSize)
val enqMask = UIntToMask(enqPtr, LoadQueueSize)
val enqDeqMask1 = deqMask ^ enqMask
val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1)
// Enqueue at dispatch
......@@ -172,7 +172,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
allocated(i) && miss(i) && !inflight
})
val missRefillSel = getFirstOne(missRefillSelVec, tailMask)
val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
io.dcache.req.valid := missRefillSelVec.asUInt.orR
io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
......@@ -307,7 +307,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
// allocatedMask: dequeuePtr can go to the next 1-bit
val allocatedMask = VecInit((0 until LoadQueueSize).map(i => allocated(i) || !enqDeqMask(i)))
// find the first one from deqPtr (deqPtr)
val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, deqPtrExt.flag)
val nextTail1 = getFirstOneWithFlag(allocatedMask, deqMask, deqPtrExt.flag)
val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, enqPtrExt)
deqPtrExt := nextTail
......@@ -319,9 +319,6 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
}
})
// rollback check
val rollback = Wire(Vec(StorePipelineWidth, Valid(new Redirect)))
def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
val length = mask.length
val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
......@@ -372,91 +369,88 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
})
// store backward query and rollback
// val needCheck = Seq.fill(8)(WireInit(true.B))
(0 until StorePipelineWidth).foreach(i => {
rollback(i) := DontCare
when(io.storeIn(i).valid) {
val startIndex = io.storeIn(i).bits.uop.lqIdx.value
val lqIdxMask = ((1.U((LoadQueueSize + 1).W) << startIndex).asUInt - 1.U)(LoadQueueSize - 1, 0)
val xorMask = lqIdxMask ^ headMask
val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
// check if load already in lq needs to be rolledback
val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
// TODO: update refilled data
val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
Cat(violationVec).orR() && entryNeedCheck
}))
val lqViolation = lqViolationVec.asUInt().orR()
val lqViolationIndex = getFirstOne(lqViolationVec, lqIdxMask)
val lqViolationUop = uop(lqViolationIndex)
XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")
// when l/s writeback to roq together, check if rollback is needed
val wbViolationVec = VecInit((0 until LoadPipelineWidth).map(j => {
io.loadIn(j).valid &&
isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
}))
val wbViolation = wbViolationVec.asUInt().orR()
val wbViolationUop = getOldestInTwo(wbViolationVec, io.loadIn.map(_.bits.uop))
XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")
// check if rollback is needed for load in l1
val l1ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => {
io.forward(j).valid && // L4 valid\
isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.forward(j).mask).orR
}))
val l1Violation = l1ViolationVec.asUInt().orR()
val l1ViolationUop = getOldestInTwo(l1ViolationVec, io.forward.map(_.uop))
XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")
val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)
rollback(i).valid := Cat(rollbackValidVec).orR
val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
val oneAfterZero = mask(1)(0)
val rollbackUop = Mux(oneAfterZero && mask(2)(0),
rollbackUopVec(0),
Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))
rollback(i).bits.roqIdx := rollbackUop.roqIdx - 1.U
rollback(i).bits.isReplay := true.B
rollback(i).bits.isMisPred := false.B
rollback(i).bits.isException := false.B
rollback(i).bits.isFlushPipe := false.B
rollback(i).bits.target := rollbackUop.cf.pc
rollback(i).bits.brTag := rollbackUop.brTag
XSDebug(
l1Violation,
"need rollback (l4 load) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
)
XSDebug(
lqViolation,
"need rollback (ld wb before store) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
)
XSDebug(
wbViolation,
"need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
)
}.otherwise {
rollback(i).valid := false.B
}
})
def detectRollback(i: Int) = {
val startIndex = io.storeIn(i).bits.uop.lqIdx.value
val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
val xorMask = lqIdxMask ^ enqMask
val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
// check if load already in lq needs to be rolledback
val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
// TODO: update refilled data
val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
Cat(violationVec).orR() && entryNeedCheck
})))
val lqViolation = lqViolationVec.asUInt().orR()
val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
val lqViolationUop = uop(lqViolationIndex)
// lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
// lqViolationUop.lqIdx.value := lqViolationIndex
XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")
// when l/s writeback to roq together, check if rollback is needed
val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
io.loadIn(j).valid &&
isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
})))
val wbViolation = wbViolationVec.asUInt().orR()
val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")
// check if rollback is needed for load in l1
val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
io.forward(j).valid && // L1 valid
isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.forward(j).mask).orR
})))
val l1Violation = l1ViolationVec.asUInt().orR()
val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.forward.map(_.uop))))
XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")
val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)
val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
val oneAfterZero = mask(1)(0)
val rollbackUop = Mux(oneAfterZero && mask(2)(0),
rollbackUopVec(0),
Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))
XSDebug(
l1Violation,
"need rollback (l4 load) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
)
XSDebug(
lqViolation,
"need rollback (ld wb before store) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
)
XSDebug(
wbViolation,
"need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
)
(RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
}
def rollbackSel(a: Valid[Redirect], b: Valid[Redirect]): ValidIO[Redirect] = {
// rollback check
val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
for (i <- 0 until StorePipelineWidth) {
val detectedRollback = detectRollback(i)
rollback(i).valid := detectedRollback._1
rollback(i).bits := detectedRollback._2
}
def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
Mux(
a.valid,
Mux(
......@@ -468,7 +462,21 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
)
}
io.rollback := ParallelOperation(rollback, rollbackSel)
val rollbackSelected = ParallelOperation(rollback, rollbackSel)
val lastCycleRedirect = RegNext(io.brqRedirect)
io.rollback := DontCare
// Note that we use roqIdx - 1.U to flush the load instruction itself.
// Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
io.rollback.valid := rollbackSelected.valid && (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx))
io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx - 1.U
io.rollback.bits.isReplay := true.B
io.rollback.bits.isMisPred := false.B
io.rollback.bits.isException := false.B
io.rollback.bits.isFlushPipe := false.B
io.rollback.bits.target := rollbackSelected.bits.cf.pc
io.rollback.bits.brTag := rollbackSelected.bits.brTag
// Memory mapped IO / other uncached operations
......@@ -496,7 +504,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
io.uncache.resp.ready := true.B
when(io.uncache.req.fire()){
when (io.uncache.req.fire()) {
pending(deqPtr) := false.B
}
......
......@@ -63,8 +63,8 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val storeCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.STORE)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).sqIdx.value)
val tailMask = (((1.U((StoreQueueSize + 1).W)) << deqPtr).asUInt - 1.U)(StoreQueueSize - 1, 0)
val headMask = (((1.U((StoreQueueSize + 1).W)) << enqPtr).asUInt - 1.U)(StoreQueueSize - 1, 0)
val tailMask = UIntToMask(deqPtr, StoreQueueSize)
val headMask = UIntToMask(enqPtr, StoreQueueSize)
val enqDeqMask1 = tailMask ^ headMask
val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1)
......@@ -228,7 +228,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
val differentFlag = deqPtrExt.flag =/= io.forward(i).sqIdx.flag
val forwardMask = ((1.U((StoreQueueSize + 1).W)) << io.forward(i).sqIdx.value).asUInt - 1.U
val forwardMask = UIntToMask(io.forward(i).sqIdx.value, StoreQueueSize)
val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B)))
for (j <- 0 until StoreQueueSize) {
storeWritebackedVec(j) := datavalid(j) && allocated(j) // all datavalid terms need to be checked
......
......@@ -64,7 +64,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
// we send feedback right after we receives request
// also, we always treat amo as tlb hit
// since we will continue polling tlb all by ourself
io.tlbFeedback.valid := RegNext(io.in.fire())
io.tlbFeedback.valid := RegNext(RegNext(io.in.valid))
io.tlbFeedback.bits.hit := true.B
io.tlbFeedback.bits.roqIdx := in.uop.roqIdx
......
......@@ -21,35 +21,23 @@ class LoadUnit_S0 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new ExuInput))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dtlbReq = DecoupledIO(new TlbReq)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val dcacheReq = DecoupledIO(new DCacheLoadReq)
})
val s0_uop = io.in.bits.uop
val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm
val s0_paddr = io.dtlbResp.bits.paddr
val s0_tlb_miss = io.dtlbResp.bits.miss
val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))
// query DTLB
io.dtlbReq.valid := io.out.valid
io.dtlbReq.valid := io.in.valid
io.dtlbReq.bits.vaddr := s0_vaddr
io.dtlbReq.bits.cmd := TlbCmd.read
io.dtlbReq.bits.roqIdx := s0_uop.roqIdx
io.dtlbReq.bits.debug.pc := s0_uop.cf.pc
io.dtlbResp.ready := io.out.ready // TODO: check it: io.out.fire()?
// feedback tlb result to RS
// Note: can be moved to s1
io.tlbFeedback.valid := io.out.valid
io.tlbFeedback.bits.hit := !s0_tlb_miss
io.tlbFeedback.bits.roqIdx := s0_uop.roqIdx
// query DCache
io.dcacheReq.valid := io.in.valid && !s0_uop.roqIdx.needFlush(io.redirect)
io.dcacheReq.valid := io.in.valid
io.dcacheReq.bits.cmd := MemoryOpConstants.M_XRD
io.dcacheReq.bits.addr := s0_vaddr
io.dcacheReq.bits.mask := s0_mask
......@@ -72,21 +60,18 @@ class LoadUnit_S0 extends XSModule {
"b11".U -> (s0_vaddr(2, 0) === 0.U) //d
))
io.out.valid := io.dcacheReq.fire() && // dcache may not accept load request
!io.in.bits.uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid && io.dcacheReq.ready
io.out.bits := DontCare
io.out.bits.vaddr := s0_vaddr
io.out.bits.paddr := s0_paddr
io.out.bits.tlbMiss := io.dtlbResp.bits.miss
io.out.bits.mask := s0_mask
io.out.bits.uop := s0_uop
io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.in.ready := io.out.fire()
io.in.ready := !io.in.valid || (io.out.ready && io.dcacheReq.ready)
XSDebug(io.dcacheReq.fire(), "[DCACHE LOAD REQ] pc %x vaddr %x paddr will be %x\n",
s0_uop.cf.pc, s0_vaddr, s0_paddr
XSDebug(io.dcacheReq.fire(),
p"[DCACHE LOAD REQ] pc ${Hexadecimal(s0_uop.cf.pc)}, vaddr ${Hexadecimal(s0_vaddr)}\n"
)
}
......@@ -97,20 +82,28 @@ class LoadUnit_S1 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val s1_paddr = Output(UInt(PAddrBits.W))
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val dcachePAddr = Output(UInt(PAddrBits.W))
val sbuffer = new LoadForwardQueryIO
val lsq = new LoadForwardQueryIO
})
val s1_uop = io.in.bits.uop
val s1_paddr = io.in.bits.paddr
val s1_tlb_miss = io.in.bits.tlbMiss
val s1_paddr = io.dtlbResp.bits.paddr
val s1_tlb_miss = io.dtlbResp.bits.miss
val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr) && !io.out.bits.uop.cf.exceptionVec.asUInt.orR
val s1_mask = io.in.bits.mask
io.out.bits := io.in.bits // forwardXX field will be updated in s1
io.s1_paddr := s1_paddr
io.dtlbResp.ready := true.B
// feedback tlb result to RS
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.roqIdx := s1_uop.roqIdx
io.dcachePAddr := s1_paddr
// load forward query datapath
io.sbuffer.valid := io.in.valid
......@@ -127,15 +120,13 @@ class LoadUnit_S1 extends XSModule {
io.lsq.mask := s1_mask
io.lsq.pc := s1_uop.cf.pc // FIXME: remove it
io.out.bits.forwardMask := io.sbuffer.forwardMask
io.out.bits.forwardData := io.sbuffer.forwardData
io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid && !s1_tlb_miss
io.out.bits.paddr := s1_paddr
io.out.bits.mmio := s1_mmio
io.out.bits.tlbMiss := s1_tlb_miss
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.in.ready := io.out.ready || !io.in.valid
io.in.ready := !io.in.valid || io.out.ready
}
......@@ -146,9 +137,9 @@ class LoadUnit_S2 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val lsq = new LoadForwardQueryIO
val sbuffer = new LoadForwardQueryIO
})
val s2_uop = io.in.bits.uop
......@@ -197,7 +188,7 @@ class LoadUnit_S2 extends XSModule {
// TODO: ECC check
io.out.valid := io.in.valid // && !s2_uop.needFlush(io.redirect) will cause comb. loop
io.out.valid := io.in.valid
// Inst will be canceled in store queue / lsq,
// so we do not need to care about flush in load / store unit's out.valid
io.out.bits := io.in.bits
......@@ -208,10 +199,16 @@ class LoadUnit_S2 extends XSModule {
io.in.ready := io.out.ready || !io.in.valid
// merge forward result
// lsq has higher priority than sbuffer
io.lsq := DontCare
io.sbuffer := DontCare
// generate XLEN/8 Muxs
for (i <- 0 until XLEN / 8) {
when(io.lsq.forwardMask(i)) {
when (io.sbuffer.forwardMask(i)) {
io.out.bits.forwardMask(i) := true.B
io.out.bits.forwardData(i) := io.sbuffer.forwardData(i)
}
when (io.lsq.forwardMask(i)) {
io.out.bits.forwardMask(i) := true.B
io.out.bits.forwardData(i) := io.lsq.forwardData(i)
}
......@@ -224,18 +221,6 @@ class LoadUnit_S2 extends XSModule {
}
// class LoadUnit_S3 extends XSModule {
// val io = IO(new Bundle() {
// val in = Flipped(Decoupled(new LsPipelineBundle))
// val out = Decoupled(new LsPipelineBundle)
// val redirect = Flipped(ValidIO(new Redirect))
// })
// io.in.ready := true.B
// io.out.bits := io.in.bits
// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
// }
class LoadUnit extends XSModule {
val io = IO(new Bundle() {
val ldin = Flipped(Decoupled(new ExuInput))
......@@ -251,33 +236,27 @@ class LoadUnit extends XSModule {
val load_s0 = Module(new LoadUnit_S0)
val load_s1 = Module(new LoadUnit_S1)
val load_s2 = Module(new LoadUnit_S2)
// val load_s3 = Module(new LoadUnit_S3)
load_s0.io.in <> io.ldin
load_s0.io.redirect <> io.redirect
load_s0.io.dtlbReq <> io.dtlb.req
load_s0.io.dtlbResp <> io.dtlb.resp
load_s0.io.dcacheReq <> io.dcache.req
load_s0.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, false.B)
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, load_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect))
io.dcache.s1_paddr := load_s1.io.out.bits.paddr
load_s1.io.redirect <> io.redirect
load_s1.io.dtlbResp <> io.dtlb.resp
load_s1.io.tlbFeedback <> io.tlbFeedback
io.dcache.s1_paddr <> load_s1.io.dcachePAddr
io.dcache.s1_kill := DontCare // FIXME
io.sbuffer <> load_s1.io.sbuffer
io.lsq.forward <> load_s1.io.lsq
load_s1.io.sbuffer <> io.sbuffer
load_s1.io.lsq <> io.lsq.forward
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, false.B)
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect))
load_s2.io.redirect <> io.redirect
load_s2.io.dcacheResp <> io.dcache.resp
load_s2.io.lsq := DontCare
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
// PipelineConnect(load_s2.io.fp_out, load_s3.io.in, true.B, false.B)
// load_s3.io.redirect <> io.redirect
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData
load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask
XSDebug(load_s0.io.out.valid,
p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " +
......
......@@ -12,10 +12,7 @@ class StoreUnit_S0 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new ExuInput))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dtlbReq = DecoupledIO(new TlbReq)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
})
// send req to dtlb
......@@ -26,16 +23,15 @@ class StoreUnit_S0 extends XSModule {
io.dtlbReq.bits.cmd := TlbCmd.write
io.dtlbReq.bits.roqIdx := io.in.bits.uop.roqIdx
io.dtlbReq.bits.debug.pc := io.in.bits.uop.cf.pc
io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready?
io.out.bits := DontCare
io.out.bits.vaddr := saddr
io.out.bits.paddr := io.dtlbResp.bits.paddr
io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.bits.uop := io.in.bits.uop
io.out.bits.miss := io.dtlbResp.bits.miss
io.out.bits.miss := DontCare
io.out.bits.mask := genWmask(io.out.bits.vaddr, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.valid := io.in.valid && !io.dtlbResp.bits.miss && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid
io.in.ready := io.out.ready
// exception check
......@@ -46,18 +42,7 @@ class StoreUnit_S0 extends XSModule {
"b11".U -> (io.out.bits.vaddr(2,0) === 0.U) //d
))
io.out.bits.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned
io.out.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
// Send TLB feedback to store issue queue
// TODO: should be moved to S1
io.tlbFeedback.valid := RegNext(io.in.valid && io.out.ready)
io.tlbFeedback.bits.hit := RegNext(!io.out.bits.miss)
io.tlbFeedback.bits.roqIdx := RegNext(io.out.bits.uop.roqIdx)
XSDebug(io.tlbFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n",
io.tlbFeedback.bits.hit,
io.tlbFeedback.bits.roqIdx.asUInt
)
}
// Load Pipeline Stage 1
......@@ -67,30 +52,41 @@ class StoreUnit_S1 extends XSModule {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
// val fp_out = Decoupled(new LsPipelineBundle)
val stout = DecoupledIO(new ExuOutput) // writeback store
val redirect = Flipped(ValidIO(new Redirect))
val lsq = ValidIO(new LsPipelineBundle)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
})
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
// writeback to LSQ
val s1_paddr = io.dtlbResp.bits.paddr
val s1_tlb_miss = io.dtlbResp.bits.miss
io.in.ready := true.B
io.out.bits := io.in.bits
io.out.bits.miss := false.B
io.out.bits.mmio := AddressSpace.isMMIO(io.in.bits.paddr)
io.out.valid := io.in.fire() // TODO: && ! FP
io.stout.bits.uop := io.in.bits.uop
// io.stout.bits.uop.cf.exceptionVec := // TODO: update according to TLB result
io.stout.bits.data := DontCare
io.stout.bits.redirectValid := false.B
io.stout.bits.redirect := DontCare
io.stout.bits.brUpdate := DontCare
io.stout.bits.debug.isMMIO := io.out.bits.mmio
io.stout.bits.fflags := DontCare
io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready?
// Send TLB feedback to store issue queue
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.roqIdx := io.in.bits.uop.roqIdx
XSDebug(io.tlbFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n",
io.tlbFeedback.bits.hit,
io.tlbFeedback.bits.roqIdx.asUInt
)
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
io.lsq.valid := io.in.valid // TODO: && ! FP
io.lsq.bits := io.in.bits
io.lsq.bits.paddr := s1_paddr
io.lsq.bits.miss := false.B
io.lsq.bits.mmio := AddressSpace.isMMIO(s1_paddr)
io.lsq.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
// mmio inst with exception will be writebacked immediately
val hasException = io.out.bits.uop.cf.exceptionVec.asUInt.orR
io.stout.valid := io.in.fire() && (!io.out.bits.mmio || hasException) // mmio inst will be writebacked immediately
io.out.valid := io.in.valid && (!io.out.bits.mmio || hasException) && !s1_tlb_miss
io.out.bits := io.lsq.bits
// if fp
// io.fp_out.valid := ...
......@@ -98,17 +94,24 @@ class StoreUnit_S1 extends XSModule {
}
// class StoreUnit_S2 extends XSModule {
// val io = IO(new Bundle() {
// val in = Flipped(Decoupled(new LsPipelineBundle))
// val out = Decoupled(new LsPipelineBundle)
// val redirect = Flipped(ValidIO(new Redirect))
// })
class StoreUnit_S2 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val stout = DecoupledIO(new ExuOutput) // writeback store
})
io.in.ready := true.B
// io.in.ready := true.B
// io.out.bits := io.in.bits
// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
// }
io.stout.valid := io.in.valid
io.stout.bits.uop := io.in.bits.uop
io.stout.bits.data := DontCare
io.stout.bits.redirectValid := false.B
io.stout.bits.redirect := DontCare
io.stout.bits.brUpdate := DontCare
io.stout.bits.debug.isMMIO := io.in.bits.mmio
io.stout.bits.fflags := DontCare
}
class StoreUnit extends XSModule {
val io = IO(new Bundle() {
......@@ -122,25 +125,21 @@ class StoreUnit extends XSModule {
val store_s0 = Module(new StoreUnit_S0)
val store_s1 = Module(new StoreUnit_S1)
// val store_s2 = Module(new StoreUnit_S2)
val store_s2 = Module(new StoreUnit_S2)
store_s0.io.in <> io.stin
store_s0.io.redirect <> io.redirect
store_s0.io.dtlbReq <> io.dtlb.req
store_s0.io.dtlbResp <> io.dtlb.resp
store_s0.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, false.B)
// PipelineConnect(store_s1.io.fp_out, store_s2.io.in, true.B, false.B)
PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, store_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect))
store_s1.io.redirect <> io.redirect
store_s1.io.stout <> io.stout
// send result to sq
io.lsq.valid := store_s1.io.out.valid
io.lsq.bits := store_s1.io.out.bits
store_s1.io.lsq <> io.lsq // send result to sq
store_s1.io.dtlbResp <> io.dtlb.resp
store_s1.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(store_s1.io.out, store_s2.io.in, true.B, store_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect))
store_s2.io.stout <> io.stout
store_s1.io.out.ready := true.B
private def printPipeLine(pipeline: LsPipelineBundle, cond: Bool, name: String): Unit = {
XSDebug(cond,
p"$name" + p" pc ${Hexadecimal(pipeline.uop.cf.pc)} " +
......@@ -154,4 +153,4 @@ class StoreUnit extends XSModule {
printPipeLine(store_s0.io.out.bits, store_s0.io.out.valid, "S0")
printPipeLine(store_s1.io.out.bits, store_s1.io.out.valid, "S1")
}
\ No newline at end of file
}
......@@ -104,7 +104,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
def isOneOf(key: UInt, seq: Seq[UInt]): Bool =
if(seq.isEmpty) false.B else Cat(seq.map(_===key)).orR()
def witdhMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f
def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f
def maskData(mask: UInt, data: UInt): UInt = {
......@@ -160,7 +160,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req))
val mergeMask = witdhMap(i =>
val mergeMask = widthMap(i =>
req.valid && stateCanMerge(state_old(i)) && getTag(req.bits.addr)===mem_old(i).tag
)
val canMerge = Cat(mergeMask).orR()
......@@ -184,7 +184,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
state_new.zip(mem_new)
}
val bufferRead = VecInit((0 until StoreBufferSize) map (i => buffer.read(i.U)))
val bufferRead = VecInit((0 until StoreBufferSize) map (i => buffer(i)))
val initialSbuffer = stateVec.zip(bufferRead)
val updatedSbuffer = io.in.zipWithIndex.foldLeft[Seq[SbufferEntry]](initialSbuffer)(enqSbuffer)
val updatedState = updatedSbuffer.map(_._1)
......@@ -205,8 +205,8 @@ class NewSbuffer extends XSModule with HasSbufferCst {
XSDebug(req.fire(),
p"accept req [$i]: " +
p"addr:${Hexadecimal(req.bits.addr)} " +
p"mask:${Binary(req.bits.mask)} " +
p"data:${Hexadecimal(req.bits.data)}\n"
p"mask:${Binary(req.bits.mask)} " +
p"data:${Hexadecimal(req.bits.data)}\n"
)
XSDebug(req.valid && !req.ready,
p"req [$i] blocked by sbuffer\n"
......@@ -257,7 +257,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
def noSameBlockInflight(idx: UInt): Bool = {
val tag = updatedSbufferLine(idx).tag
!Cat(witdhMap(i => {
!Cat(widthMap(i => {
// stateVec(idx) itself must not be s_inflight*
isOneOf(stateVec(i), Seq(s_inflight_req, s_inflight_resp)) &&
tag===updatedSbufferLine(i).tag
......@@ -316,62 +316,42 @@ class NewSbuffer extends XSModule with HasSbufferCst {
// ---------------------- Load Data Forward ---------------------
// (buff, do_forward)
// pass 'do_forward' here to avoid duplicated tag compare
type ForwardBuf = (SbufferLine, Bool)
def forwardQuery(forward: LoadForwardQueryIO, buff: ForwardBuf): LoadForwardQueryIO = {
val bufLine = buff._1
val do_forward = buff._2
val forwardWire = WireInit(forward)
val forwardMask = forwardWire.forwardMask
val forwardData = forwardWire.forwardData
val dataVec = VecInit((0 until CacheLineBytes).map(i =>
bufLine.data(i*8+7, i*8)
))
when(do_forward){
(0 until DataBytes).map(i => {
val lineOffset = Cat(getWordOffset(forward.paddr), i.U(3.W))
when(bufLine.mask(lineOffset) && forward.mask(i)){
forwardMask(i) := true.B
forwardData(i) := dataVec(lineOffset)
}
})
}
forwardWire
}
for((forward, i) <- io.forward.zipWithIndex){
val tag_matches = witdhMap(i => bufferRead(i).tag===getTag(forward.paddr))
val valid_tag_matches = witdhMap(i => tag_matches(i) && stateVec(i)===s_valid)
val inflight_tag_matches = witdhMap(i =>
for ((forward, i) <- io.forward.zipWithIndex) {
val tag_matches = widthMap(i => bufferRead(i).tag===getTag(forward.paddr))
val valid_tag_matches = widthMap(i => tag_matches(i) && stateVec(i)===s_valid)
val inflight_tag_matches = widthMap(i =>
tag_matches(i) && (stateVec(i)===s_inflight_req || stateVec(i)===s_inflight_resp)
)
val (valid_forward_idx, valid_tag_match) = PriorityEncoderWithFlag(valid_tag_matches)
val (inflight_forwad_idx, inflight_tag_match) = PriorityEncoderWithFlag(inflight_tag_matches)
val line_offset_mask = UIntToOH(getWordOffset(forward.paddr))
val valid_line = bufferRead(valid_forward_idx)
val inflight_line = bufferRead(inflight_forwad_idx)
val valid_tag_match_reg = valid_tag_matches.map(RegNext(_))
val inflight_tag_match_reg = inflight_tag_matches.map(RegNext(_))
val line_offset_reg = RegNext(line_offset_mask)
val initialForward = WireInit(forward)
initialForward.forwardMask := 0.U.asTypeOf(Vec(DataBytes, Bool()))
initialForward.forwardData := DontCare
val selectedValidLine = Mux1H(valid_tag_match_reg, bufferRead)
val selectedValidMask = Mux1H(line_offset_reg, selectedValidLine.mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool()))))
val selectedValidData = Mux1H(line_offset_reg, selectedValidLine.data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
val forwardResult = Seq(
(inflight_line, inflight_tag_match),
(valid_line, valid_tag_match)
).foldLeft(initialForward)(forwardQuery)
val selectedInflightLine = Mux1H(inflight_tag_match_reg, bufferRead)
val selectedInflightMask = Mux1H(line_offset_reg, selectedInflightLine.mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool()))))
val selectedInflightData = Mux1H(line_offset_reg, selectedInflightLine.data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
forward.forwardMask := forwardResult.forwardMask
forward.forwardData := forwardResult.forwardData
for (j <- 0 until DataBytes) {
forward.forwardMask(j) := false.B
forward.forwardData(j) := DontCare
XSDebug(inflight_tag_match,
p"inflight tag match: forward [$i] <> buf[$inflight_forwad_idx]\n"
)
XSDebug(valid_tag_match,
p"valid tag match: forward [$i] <> buf[$valid_forward_idx]\n"
)
XSDebug(inflight_tag_match || valid_tag_match,
// valid entries have higher priority than inflight entries
when (selectedInflightMask(j)) {
forward.forwardMask(j) := true.B
forward.forwardData(j) := selectedInflightData(j)
}
when (selectedValidMask(j)) {
forward.forwardMask(j) := true.B
forward.forwardData(j) := selectedValidData(j)
}
}
XSDebug(Cat(inflight_tag_matches).orR || Cat(valid_tag_matches).orR,
p"[$i] forward paddr:${Hexadecimal(forward.paddr)}\n"
)
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册