未验证 提交 856013d6 编写于 作者: S Steve Gou 提交者: GitHub

Merge pull request #1428 from OpenXiangShan/ftq-timing

add one cycle on ifu/backend redirect
......@@ -336,11 +336,14 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
// predictors.io.out.ready := io.bpu_to_ftq.resp.ready
val redirect_req = io.ftq_to_bpu.redirect
val do_redirect = RegNext(redirect_req, init=0.U.asTypeOf(io.ftq_to_bpu.redirect))
// Pipeline logic
s2_redirect := false.B
s3_redirect := false.B
s3_flush := io.ftq_to_bpu.redirect.valid
s3_flush := redirect_req.valid // flush when redirect comes
s2_flush := s3_flush || s3_redirect
s1_flush := s2_flush || s2_redirect
......@@ -357,9 +360,10 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
s3_ready := s3_fire || !s3_valid
s2_fire := s2_valid && s3_components_ready && s3_ready
when(s0_fire) { s1_valid := true.B }
.elsewhen(s1_flush) { s1_valid := false.B }
.elsewhen(s1_fire) { s1_valid := false.B }
when (redirect_req.valid) { s1_valid := false.B }
.elsewhen(s0_fire) { s1_valid := true.B }
.elsewhen(s1_flush) { s1_valid := false.B }
.elsewhen(s1_fire) { s1_valid := false.B }
predictors.io.s1_fire := s1_fire
......@@ -570,11 +574,11 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
io.bpu_to_ftq.resp.bits.s3.hasRedirect := s3_redirect
io.bpu_to_ftq.resp.bits.s3.ftq_idx := s3_ftq_idx
val redirect = io.ftq_to_bpu.redirect.bits
val redirect = do_redirect.bits
predictors.io.update := io.ftq_to_bpu.update
predictors.io.update.bits.ghist := getHist(io.ftq_to_bpu.update.bits.histPtr)
predictors.io.redirect := io.ftq_to_bpu.redirect
predictors.io.redirect := do_redirect
// Redirect logic
val shift = redirect.cfiUpdate.shift
......@@ -592,7 +596,7 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
val updated_ptr = oldPtr - shift
val updated_fh = VecInit((0 to numBr).map(i => oldFh.update(ghv, oldPtr, i, taken && addIntoHist)))(shift)
val redirect_ghv_wens = (0 until HistoryLength).map(n =>
(0 until numBr).map(b => oldPtr.value === (n.U(log2Ceil(HistoryLength).W) + b.U) && shouldShiftVec(b) && io.ftq_to_bpu.redirect.valid))
(0 until numBr).map(b => oldPtr.value === (n.U(log2Ceil(HistoryLength).W) + b.U) && shouldShiftVec(b) && do_redirect.valid))
val redirect_ghv_wdatas = (0 until HistoryLength).map(n =>
Mux1H(
(0 until numBr).map(b => oldPtr.value === (n.U(log2Ceil(HistoryLength).W) + b.U) && shouldShiftVec(b)),
......@@ -607,7 +611,7 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
updated_ghist(i) := taken && addIntoHist && (i==0).B
}
}
when(io.ftq_to_bpu.redirect.valid) {
when(do_redirect.valid) {
s0_ghist := updated_ghist.asUInt
}
}
......@@ -615,9 +619,9 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
// val updatedGh = oldGh.update(shift, taken && addIntoHist)
npcGen.register(io.ftq_to_bpu.redirect.valid, redirect.cfiUpdate.target, Some("redirect_target"), 2)
foldedGhGen.register(io.ftq_to_bpu.redirect.valid, updated_fh, Some("redirect_FGHT"), 2)
ghistPtrGen.register(io.ftq_to_bpu.redirect.valid, updated_ptr, Some("redirect_GHPtr"), 2)
npcGen.register(do_redirect.valid, do_redirect.bits.cfiUpdate.target, Some("redirect_target"), 2)
foldedGhGen.register(do_redirect.valid, updated_fh, Some("redirect_FGHT"), 2)
ghistPtrGen.register(do_redirect.valid, updated_ptr, Some("redirect_GHPtr"), 2)
ghvBitWriteGens.zip(redirect_ghv_wens).zipWithIndex.map{case ((b, w), i) =>
b.register(w.reduce(_||_), redirect_ghv_wdatas(i), Some(s"redirect_new_bit_$i"), 2)
}
......@@ -671,6 +675,7 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
XSPerfAccumulate("s2_redirect", s2_redirect)
XSPerfAccumulate("s3_redirect", s3_redirect)
XSPerfAccumulate("s1_not_valid", !s1_valid)
val perfEvents = predictors.asInstanceOf[Composer].getPerfEvents
generatePerfEvent()
......
......@@ -102,7 +102,6 @@ class NewIFU(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper
with HasPerfEvents
{
println(s"icache ways: ${nWays} sets:${nSets}")
val io = IO(new NewIFUIO)
val (toFtq, fromFtq) = (io.ftqInter.toFtq, io.ftqInter.fromFtq)
val (toICache, fromICache) = (VecInit(io.icacheInter.map(_.req)), VecInit(io.icacheInter.map(_.resp)))
......@@ -158,9 +157,9 @@ class NewIFU(implicit p: Parameters) extends XSModule
fromFtq.req.ready := toICache(0).ready && toICache(1).ready && f2_ready && GTimer() > 500.U
toICache(0).valid := fromFtq.req.valid && !f0_flush
toICache(0).valid := fromFtq.req.valid //&& !f0_flush
toICache(0).bits.vaddr := fromFtq.req.bits.startAddr
toICache(1).valid := fromFtq.req.valid && f0_doubleLine && !f0_flush
toICache(1).valid := fromFtq.req.valid && f0_doubleLine //&& !f0_flush
toICache(1).bits.vaddr := fromFtq.req.bits.nextlineStart//fromFtq.req.bits.startAddr + (PredictWidth * 2).U //TODO: timing critical
/** <PERF> f0 fetch bubble */
......
......@@ -528,7 +528,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
}
bpuPtr := bpuPtr + enq_fire
ifuPtr := ifuPtr + io.toIfu.req.fire
ifuPtr := ifuPtr + (io.toIfu.req.fire && allowToIfu)
// only use ftb result to assign hit status
when (bpu_s2_resp.valid) {
......@@ -570,7 +570,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
io.toIfu.req.valid := entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
io.toIfu.req.bits.ftqIdx := ifuPtr
io.toIfu.req.bits.nextStartAddr := update_target(ifuPtr.value)
io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
......
......@@ -61,12 +61,13 @@ case class ICacheParameters(
trait HasICacheParameters extends HasL1CacheParameters with HasInstrMMIOConst with HasIFUConst{
val cacheParams = icacheParameters
val dataCodeUnit = 8
val dataUnitNum = blockBits/dataCodeUnit
val dataCodeUnit = 16
val dataCodeUnitNum = blockBits/dataCodeUnit
def highestIdxBit = log2Ceil(nSets) - 1
def dataCodeBits = cacheParams.dataCode.width(dataCodeUnit)
def dataEntryBits = dataCodeBits * dataUnitNum
def encDataUnitBits = cacheParams.dataCode.width(dataCodeUnit)
def dataCodeBits = encDataUnitBits - dataCodeUnit
def dataCodeEntryBits = dataCodeBits * dataCodeUnitNum
val ICacheSets = cacheParams.nSets
val ICacheWays = cacheParams.nWays
......@@ -186,6 +187,7 @@ class ICacheMetaArray()(implicit p: Parameters) extends ICacheArray
tagArray
}
//Parity Decode
val read_metas = Wire(Vec(2,Vec(nWays,new ICacheMetadata())))
for((tagArray,i) <- tagArrays.zipWithIndex){
......@@ -194,7 +196,7 @@ class ICacheMetaArray()(implicit p: Parameters) extends ICacheArray
val read_meta_wrong = read_meta_decoded.map{ way_bits_decoded => way_bits_decoded.error}
val read_meta_corrected = VecInit(read_meta_decoded.map{ way_bits_decoded => way_bits_decoded.corrected})
read_metas(i) := read_meta_corrected.asTypeOf(Vec(nWays,new ICacheMetadata()))
(0 until nWays).map{ w => io.readResp.errors(i)(w) := read_meta_wrong(w) && RegNext(io.read.fire)}
(0 until nWays).map{ w => io.readResp.errors(i)(w) := RegNext(read_meta_wrong(w)) && RegNext(RegNext(io.read.fire))}
}
//Parity Encode
......@@ -268,6 +270,20 @@ class ICacheMetaArray()(implicit p: Parameters) extends ICacheArray
class ICacheDataArray(implicit p: Parameters) extends ICacheArray
{
def getECCFromEncUnit(encUnit: UInt) = {
require(encUnit.getWidth == encDataUnitBits)
encUnit(encDataUnitBits - 1, dataCodeUnit)
}
def getECCFromBlock(cacheblock: UInt) = {
// require(cacheblock.getWidth == blockBits)
VecInit((0 until dataCodeUnitNum).map { w =>
val unit = cacheblock(dataCodeUnit * (w + 1) - 1, dataCodeUnit * w)
getECCFromEncUnit(cacheParams.dataCode.encode(unit))
})
}
val io=IO{new Bundle{
val write = Flipped(DecoupledIO(new ICacheDataWriteBundle))
val read = Flipped(DecoupledIO(new ICacheReadBundle))
......@@ -292,11 +308,12 @@ class ICacheDataArray(implicit p: Parameters) extends ICacheArray
val write_bank_0 = WireInit(io.write.valid && !io.write.bits.bankIdx)
val write_bank_1 = WireInit(io.write.valid && io.write.bits.bankIdx)
val write_data_bits = Wire(UInt(dataEntryBits.W))
val write_data_bits = Wire(UInt(blockBits.W))
val write_data_code = Wire(UInt(dataCodeEntryBits.W))
val dataArrays = (0 until 2) map { i =>
val dataArray = Module(new SRAMTemplate(
UInt(dataEntryBits.W),
UInt(blockBits.W),
set=nSets/2,
way=nWays,
shouldReset = true,
......@@ -320,22 +337,46 @@ class ICacheDataArray(implicit p: Parameters) extends ICacheArray
dataArray
}
val codeArrays = (0 until 2) map { i =>
val codeArray = Module(new SRAMTemplate(
UInt(dataCodeEntryBits.W),
set=nSets/2,
way=nWays,
shouldReset = true,
holdRead = true,
singlePort = true
))
if(i == 0) {
codeArray.io.r.req.valid := port_0_read_0 || port_1_read_0
codeArray.io.r.req.bits.apply(setIdx=bank_0_idx(highestIdxBit,1))
codeArray.io.w.req.valid := write_bank_0
codeArray.io.w.req.bits.apply(data=write_data_code, setIdx=io.write.bits.virIdx(highestIdxBit,1), waymask=io.write.bits.waymask)
}
else {
codeArray.io.r.req.valid := port_0_read_1 || port_1_read_1
codeArray.io.r.req.bits.apply(setIdx=bank_1_idx(highestIdxBit,1))
codeArray.io.w.req.valid := write_bank_1
codeArray.io.w.req.bits.apply(data=write_data_code, setIdx=io.write.bits.virIdx(highestIdxBit,1), waymask=io.write.bits.waymask)
}
codeArray
}
//Parity Decode
val read_datas = Wire(Vec(2,Vec(nWays,UInt(blockBits.W) )))
for((dataArray,i) <- dataArrays.zipWithIndex){
val read_data_bits = dataArray.io.r.resp.asTypeOf(Vec(nWays,Vec(dataUnitNum, UInt(dataCodeBits.W))))
val read_data_decoded = read_data_bits.map{way_bits => way_bits.map(unit => cacheParams.dataCode.decode(unit))}
val read_data_wrong = VecInit(read_data_decoded.map{way_bits_decoded => VecInit(way_bits_decoded.map(unit_decoded => unit_decoded.error ))})
val read_data_corrected = VecInit(read_data_decoded.map{way_bits_decoded => VecInit(way_bits_decoded.map(unit_decoded => unit_decoded.corrected )).asUInt})
read_datas(i) := read_data_corrected.asTypeOf(Vec(nWays,UInt(blockBits.W)))
(0 until nWays).map{ w => io.readResp.errors(i)(w) := RegNext(io.read.fire()) && read_data_wrong(w).asUInt.orR }
val read_codes = Wire(Vec(2,Vec(nWays,UInt(dataCodeEntryBits.W) )))
for(((dataArray,codeArray),i) <- dataArrays.zip(codeArrays).zipWithIndex){
read_datas(i) := dataArray.io.r.resp.asTypeOf(Vec(nWays,UInt(blockBits.W)))
read_codes(i) := codeArray.io.r.resp.asTypeOf(Vec(nWays,UInt(dataCodeEntryBits.W)))
(0 until nWays).map{ w => io.readResp.errors(i)(w) := RegNext(io.read.fire()) && read_codes(i)(w).asUInt.orR }
}
//Parity Encode
val write = io.write.bits
val write_data = WireInit(write.data.asTypeOf(Vec(dataUnitNum, UInt(dataCodeUnit.W))))
val write_data_encoded = VecInit(write_data.map( unit_bits => cacheParams.dataCode.encode(unit_bits) ))
write_data_bits := write_data_encoded.asUInt
val write_data = WireInit(write.data)
write_data_code := getECCFromBlock(write_data).asUInt
write_data_bits := write_data
io.readResp.datas(0) := Mux( port_0_read_1_reg, read_datas(1) , read_datas(0))
io.readResp.datas(1) := Mux( port_1_read_0_reg, read_datas(0) , read_datas(1))
......
......@@ -112,7 +112,7 @@ class IPrefetchPipe(implicit p: Parameters) extends IPrefetchModule
fromITLB.ready := true.B
fromFtq.req.ready := (!enableBit || (enableBit && p0_fire)) && GTimer() > 500.U
fromFtq.req.ready := (!enableBit || (enableBit && p3_ready)) && GTimer() > 500.U
/** Prefetch Stage 1: cache probe filter */
val p1_valid = generatePipeControl(lastFire = p0_fire, thisFire = p1_fire || p1_discard, thisFlush = false.B, lastFlush = false.B)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册