未验证 提交 2a3050c2 编写于 作者: J Jay 提交者: GitHub

Optimize IFU and PreDecode timing (#1347)

* ICache: add ReplacePipe for Probe & Release

* remove ProbeUnit

* Probe & Release enter ReplacePipe

* fix bugs when running Linux on MinimalConfig

* TODO: set conflict for ReplacePipe

* ICache: fix ReplacePipe invalid write bug

* chores: code clean up

* IFU: optimize timing

* PreDecode: separate into 2 module for timing optimization

* IBuffer: add enqEnable to replace valid for timing

* IFU/ITLB: optimize timing

* IFU: calculate cut_ptr in f1

* TLB: send req in f1 and wait resp in f2

* ICacheMainPipe: add tlb miss logic in s0

* Optimize IFU timing

* IFU: fix lastHalfRVI bug

* IFU: fix performance bug

* IFU: optimize MMIO commit timing

* IFU: optmize trigger timing and add frontendTrigger

* fix compile error

* IFU: fix mmio stuck bug
上级 dd95524e
......@@ -110,7 +110,8 @@ class MinimalConfig(n: Int = 1) extends Config(
name = "itlb",
fetchi = true,
useDmode = false,
sameCycle = true,
sameCycle = false,
missSameCycle = true,
normalReplacer = Some("plru"),
superReplacer = Some("plru"),
normalNWays = 4,
......
......@@ -167,7 +167,8 @@ case class XSCoreParameters
name = "itlb",
fetchi = true,
useDmode = false,
sameCycle = true,
sameCycle = false,
missSameCycle = true,
normalNWays = 32,
normalReplacer = Some("plru"),
superNWays = 4,
......
......@@ -43,7 +43,7 @@ trait HasBPUConst extends HasXSParameter {
val numBpStages = BP_STAGES.length
val debug = true
val resetVector = 0x10000000L//TODO: set reset vec
val resetVector = 0x10000000L
// TODO: Replace log2Up by log2Ceil
}
......
......@@ -76,10 +76,10 @@ class Exception(implicit p: Parameters) extends XSBundle {
class FetchToIBuffer(implicit p: Parameters) extends XSBundle {
val instrs = Vec(PredictWidth, UInt(32.W))
val valid = UInt(PredictWidth.W)
val enqEnable = UInt(PredictWidth.W)
val pd = Vec(PredictWidth, new PreDecodeInfo)
val pc = Vec(PredictWidth, UInt(VAddrBits.W))
val foldpc = Vec(PredictWidth, UInt(MemPredPCWidth.W))
//val exception = new Exception
val ftqPtr = new FtqPtr
val ftqOffset = Vec(PredictWidth, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W)))
val ipf = Vec(PredictWidth, Bool())
......
......@@ -108,11 +108,11 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH
ibuf.io.waddr(i) := tail_vec(offset(i)).value
ibuf.io.wdata(i) := inWire
ibuf.io.wen(i) := io.in.bits.valid(i) && io.in.fire && !io.flush
ibuf.io.wen(i) := io.in.bits.enqEnable(i) && io.in.fire && !io.flush
}
when (io.in.fire && !io.flush) {
tail_vec := VecInit(tail_vec.map(_ + PopCount(io.in.bits.valid)))
tail_vec := VecInit(tail_vec.map(_ + PopCount(io.in.bits.enqEnable)))
}
// Dequeue
......
......@@ -48,10 +48,6 @@ trait HasPdConst extends HasXSParameter with HasICacheParameters with HasIFUCons
val max_width = rvi_offset.getWidth
SignExt(Mux(rvc, SignExt(rvc_offset, max_width), SignExt(rvi_offset, max_width)), XLEN)
}
def getBasicBlockIdx( pc: UInt, start: UInt ): UInt = {
val byteOffset = pc - start
(byteOffset - instBytes.U)(log2Ceil(PredictWidth),instOffsetBits)
}
def NOP = "h4501".U(16.W)
}
......@@ -83,20 +79,11 @@ class PreDecodeInfo extends Bundle { // 8 bit
}
class PreDecodeResp(implicit p: Parameters) extends XSBundle with HasPdConst {
val pc = Vec(PredictWidth, UInt(VAddrBits.W))
val instrs = Vec(PredictWidth, UInt(32.W))
val pd = Vec(PredictWidth, (new PreDecodeInfo))
val takens = Vec(PredictWidth, Bool())
val misOffset = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
val cfiOffset = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
val target = UInt(VAddrBits.W)
val jalTarget = UInt(VAddrBits.W)
val hasLastHalf = Bool()
val realEndPC = UInt(VAddrBits.W)
val instrRange = Vec(PredictWidth, Bool())
val pageFault = Vec(PredictWidth, Bool())
val accessFault = Vec(PredictWidth, Bool())
val crossPageIPF = Vec(PredictWidth, Bool())
val pd = Vec(PredictWidth, new PreDecodeInfo)
val hasHalfValid = Vec(PredictWidth, Bool())
val expInstr = Vec(PredictWidth, UInt(32.W))
val jumpOffset = Vec(PredictWidth, UInt(XLEN.W))
// val hasLastHalf = Bool()
val triggered = Vec(PredictWidth, new TriggerCf)
}
......@@ -106,37 +93,13 @@ class PreDecode(implicit p: Parameters) extends XSModule with HasPdConst{
val out = Output(new PreDecodeResp)
})
val instValid = io.in.instValid
val data = io.in.data
val pcStart = io.in.startAddr
val pcEnd = io.in.fallThruAddr
val pcEndError = io.in.fallThruError
val isDoubleLine = io.in.isDoubleLine
val bbOffset = io.in.ftqOffset.bits
val bbTaken = io.in.ftqOffset.valid
val bbTarget = io.in.target
val oversize = io.in.oversize
val pageFault = io.in.pageFault
val accessFault = io.in.accessFault
val validStart = Wire(Vec(PredictWidth, Bool()))
dontTouch(validStart)
val validEnd = Wire(Vec(PredictWidth, Bool()))
val targets = Wire(Vec(PredictWidth, UInt(VAddrBits.W)))
val misPred = Wire(Vec(PredictWidth, Bool()))
val takens = Wire(Vec(PredictWidth, Bool()))
val falseHit = Wire(Vec(PredictWidth, Bool()))
val instRange = Wire(Vec(PredictWidth, Bool()))
//"real" means signals that are genrated by repaired end pc of this basic block using predecode information
val realEndPC = Wire(UInt(VAddrBits.W))
val realHasLastHalf = Wire(Vec(PredictWidth, Bool()))
val realMissPred = Wire(Vec(PredictWidth, Bool()))
val realTakens = Wire(Vec(PredictWidth, Bool()))
// val lastHalfMatch = io.in.lastHalfMatch
val validStart, validEnd = Wire(Vec(PredictWidth, Bool()))
val h_validStart, h_validEnd = Wire(Vec(PredictWidth, Bool()))
val rawInsts = if (HasCExtension) VecInit((0 until PredictWidth).map(i => Cat(data(i+1), data(i))))
else VecInit((0 until PredictWidth).map(i => data(i)))
val nextLinePC = addrAlign(pcStart, 64, VAddrBits) + 64.U
else VecInit((0 until PredictWidth).map(i => data(i)))
// Frontend Triggers
val tdata = Reg(Vec(4, new MatchTriggerIO))
......@@ -150,41 +113,32 @@ class PreDecode(implicit p: Parameters) extends XSModule with HasPdConst{
val chainMapping = Map(0 -> 0, 2 -> 3, 3 -> 4)
for (i <- 0 until PredictWidth) {
//TODO: Terrible timing for pc comparing
val isNextLine = (io.out.pc(i) > nextLinePC)
val nullInstruction = isNextLine && !isDoubleLine
val hasPageFault = ((io.out.pc(i) < nextLinePC && pageFault(0)) || ((io.out.pc(i) > nextLinePC || io.out.pc(i) === nextLinePC) && pageFault(1)))
val hasAccessFault = ((io.out.pc(i) < nextLinePC && accessFault(0)) || ((io.out.pc(i) > nextLinePC || io.out.pc(i) === nextLinePC) && accessFault(1)))
val exception = hasPageFault || hasAccessFault
val inst = Mux(exception || nullInstruction , NOP, WireInit(rawInsts(i)))
val inst =WireInit(rawInsts(i))
val expander = Module(new RVCExpander)
val currentIsRVC = isRVC(inst)
val currentPC = io.in.pc(i)
expander.io.in := inst
val isFirstInBlock = i.U === 0.U
val isLastInBlock = (i == PredictWidth - 1).B
val currentPC = pcStart + (i << 1).U((log2Ceil(PredictWidth)+1).W)
val currentIsRVC = isRVC(inst) && HasCExtension.B
val lastIsValidEnd = if (i == 0) { !io.in.lastHalfMatch } else { validEnd(i-1) || isFirstInBlock || !HasCExtension.B }
val brType::isCall::isRet::Nil = brInfo(inst)
val jalOffset = jal_offset(inst, currentIsRVC)
val brOffset = br_offset(inst, currentIsRVC)
//val lastIsValidEnd = if (i == 0) { !lastHalfMatch } else { validEnd(i-1) || !HasCExtension.B }
val lastIsValidEnd = if (i == 0) { true.B } else { validEnd(i-1) || !HasCExtension.B }
validStart(i) := (lastIsValidEnd || !HasCExtension.B)
validEnd(i) := validStart(i) && currentIsRVC || !validStart(i) || !HasCExtension.B
val brType::isCall::isRet::Nil = brInfo(inst)
val jalOffset = jal_offset(inst, currentIsRVC)
val brOffset = br_offset(inst, currentIsRVC)
//prepared for last half match
//TODO if HasCExtension
val h_lastIsValidEnd = if (i == 0) { false.B } else { h_validEnd(i-1) || !HasCExtension.B }
h_validStart(i) := (h_lastIsValidEnd || !HasCExtension.B)
h_validEnd(i) := h_validStart(i) && currentIsRVC || !h_validStart(i) || !HasCExtension.B
io.out.pd(i).valid := (lastIsValidEnd || !HasCExtension.B)
io.out.pd(i).isRVC := currentIsRVC
io.out.pd(i).brType := brType
io.out.pd(i).isCall := isCall
io.out.pd(i).isRet := isRet
io.out.pc(i) := currentPC
io.out.crossPageIPF(i) := (io.out.pc(i) === addrAlign(realEndPC, 64, VAddrBits) - 2.U)&& !pageFault(0) && pageFault(1) && !currentIsRVC
// io.out.triggered(i) := TriggerCmp(Mux(currentIsRVC, inst(15,0), inst), tInstData, matchType, triggerEnable) && TriggerCmp(currentPC, tPcData, matchType, triggerEnable)
io.out.triggered(i).triggerTiming := VecInit(Seq.fill(10)(false.B))
io.out.triggered(i).triggerHitVec := VecInit(Seq.fill(10)(false.B))
io.out.triggered(i).triggerChainVec := VecInit(Seq.fill(5)(false.B))
io.out.hasHalfValid(i) := h_validStart(i)
io.out.triggered(i).triggerTiming := DontCare//VecInit(Seq.fill(10)(false.B))
io.out.triggered(i).triggerHitVec := DontCare//VecInit(Seq.fill(10)(false.B))
io.out.triggered(i).triggerChainVec := DontCare//VecInit(Seq.fill(5)(false.B))
for (j <- 0 until 4) {
val hit = Mux(tdata(j).select, TriggerCmp(Mux(currentIsRVC, inst(15, 0), inst), tdata(j).tdata2, tdata(j).matchType, triggerEnable(j)),
TriggerCmp(currentPC, tdata(j).tdata2, tdata(j).matchType, triggerEnable(j)))
......@@ -192,81 +146,28 @@ class PreDecode(implicit p: Parameters) extends XSModule with HasPdConst{
io.out.triggered(i).triggerTiming(triggerMapping(j)) := hit && tdata(j).timing
if(chainMapping.contains(j)) io.out.triggered(i).triggerChainVec(chainMapping(j)) := hit && tdata(j).chain
}
io.out.pageFault(i) := hasPageFault || io.out.crossPageIPF(i)
io.out.accessFault(i) := hasAccessFault
io.out.pd(i).valid := validStart(i)
io.out.pd(i).isRVC := currentIsRVC
io.out.pd(i).brType := brType
io.out.pd(i).isCall := isCall
io.out.pd(i).isRet := isRet
expander.io.in := inst
io.out.instrs(i) := expander.io.out.bits
takens(i) := (validStart(i) && (bbTaken && bbOffset === i.U && !io.out.pd(i).notCFI || io.out.pd(i).isJal || io.out.pd(i).isRet))
val jumpTarget = io.out.pc(i) + Mux(io.out.pd(i).isBr, brOffset, jalOffset)
targets(i) := Mux(takens(i), jumpTarget, pcEnd)
//Banch and jal have wrong targets
val targetFault = (validStart(i) && i.U === bbOffset && bbTaken && (io.out.pd(i).isBr || io.out.pd(i).isJal) && bbTarget =/= targets(i))
//An not-CFI instruction is predicted taken
val notCFIFault = (validStart(i) && i.U === bbOffset && io.out.pd(i).notCFI && bbTaken)
//A jal instruction is predicted not taken
val jalFault = (validStart(i) && !bbTaken && io.out.pd(i).isJal) || (validStart(i) && bbTaken && i.U < bbOffset && io.out.pd(i).isJal)
//A ret instruction is predicted not taken
val retFault = (validStart(i) && !bbTaken && io.out.pd(i).isRet) || (validStart(i) && bbTaken && i.U < bbOffset && io.out.pd(i).isRet)
//An invalid instruction is predicted taken
val invalidInsFault = (!validStart(i) && i.U === bbOffset && bbTaken)
misPred(i) := targetFault || notCFIFault || jalFault || retFault || invalidInsFault || pcEndError
falseHit(i) := invalidInsFault || notCFIFault
realMissPred(i) := misPred(i) && instRange(i)
realHasLastHalf(i) := instValid && currentPC === (realEndPC - 2.U) && validStart(i) && instRange(i) && !currentIsRVC
realTakens(i) := takens(i) && instRange(i)
io.out.expInstr(i) := expander.io.out.bits
io.out.jumpOffset(i) := Mux(io.out.pd(i).isBr, brOffset, jalOffset)
}
//TODO:
val beyondFetch = ((pcStart + 34.U === realEndPC) && oversize && validEnd.last && isRVC(data.last)) && HasCExtension.B && !io.out.cfiOffset.valid
val jumpOH = VecInit(io.out.pd.zipWithIndex.map{ case(inst, i) => inst.isJal && validStart(i) }) //TODO: need jalr?
val jumpOffset = PriorityEncoder(jumpOH)
val rvcOH = VecInit(io.out.pd.map(inst => inst.isRVC))
val jumpPC = io.out.pc(jumpOffset)
val jumpIsRVC = rvcOH(jumpOffset)
val jumpNextPC = jumpPC + Mux(jumpIsRVC, 2.U, 4.U)
val (hasFalseHit, hasJump) = (ParallelOR(falseHit), ParallelOR(jumpOH))
val endRange = ((Fill(PredictWidth, 1.U(1.W)) >> (~getBasicBlockIdx(realEndPC, pcStart))) | (Fill(PredictWidth, oversize)))
val takeRange = Fill(PredictWidth, !ParallelOR(takens)) | Fill(PredictWidth, 1.U(1.W)) >> (~PriorityEncoder(takens))
val fixCross = ((pcStart + (FetchWidth * 4).U) > nextLinePC || (pcStart + (FetchWidth * 4).U) === nextLinePC) && !isDoubleLine
val boundPC = Mux(fixCross, nextLinePC - 2.U ,pcStart + (FetchWidth * 4).U)
instRange := VecInit((0 until PredictWidth).map(i => endRange(i) && takeRange(i)))
realEndPC := Mux(hasFalseHit, Mux(hasJump && ((jumpNextPC < boundPC) || (jumpNextPC === boundPC) ), jumpNextPC, boundPC), pcEnd)
val validLastOffset = Mux(io.out.pd((PredictWidth - 1).U).valid, (PredictWidth - 1).U, (PredictWidth - 2).U)
io.out.misOffset.valid := ParallelOR(realMissPred) || beyondFetch
io.out.misOffset.bits := Mux(beyondFetch, PredictWidth.U, Mux(pcEndError,validLastOffset,PriorityEncoder(realMissPred)))
io.out.instrRange.zipWithIndex.map{case (bit,i) => bit := instRange(i).asBool()}
io.out.cfiOffset.valid := ParallelOR(realTakens)
io.out.cfiOffset.bits := PriorityEncoder(realTakens)
io.out.target := Mux(beyondFetch,io.out.pc.last + 2.U ,Mux(io.out.cfiOffset.valid, targets(io.out.cfiOffset.bits), realEndPC))
io.out.takens := realTakens
io.out.jalTarget := targets(jumpOffset)
io.out.hasLastHalf := realHasLastHalf.reduce(_||_)
io.out.realEndPC := realEndPC
// io.out.hasLastHalf := !io.out.pd(PredictWidth - 1).isRVC && io.out.pd(PredictWidth - 1).valid
for (i <- 0 until PredictWidth) {
XSDebug(true.B,
p"instr ${Hexadecimal(io.out.instrs(i))}, " +
p"validStart ${Binary(validStart(i))}, " +
p"validEnd ${Binary(validEnd(i))}, " +
p"pc ${Hexadecimal(io.out.pc(i))}, " +
p"isRVC ${Binary(io.out.pd(i).isRVC)}, " +
p"brType ${Binary(io.out.pd(i).brType)}, " +
p"isRet ${Binary(io.out.pd(i).isRet)}, " +
p"isCall ${Binary(io.out.pd(i).isCall)}\n"
p"instr ${Hexadecimal(io.out.expInstr(i))}, " +
p"validStart ${Binary(validStart(i))}, " +
p"validEnd ${Binary(validEnd(i))}, " +
p"isRVC ${Binary(io.out.pd(i).isRVC)}, " +
p"brType ${Binary(io.out.pd(i).brType)}, " +
p"isRet ${Binary(io.out.pd(i).isRet)}, " +
p"isCall ${Binary(io.out.pd(i).isCall)}\n"
)
}
}
......@@ -283,3 +184,132 @@ class RVCExpander(implicit p: Parameters) extends XSModule {
io.out := new RVCDecoder(io.in, XLEN).passthrough
}
}
/* ---------------------------------------------------------------------
* Predict result check
*
* ---------------------------------------------------------------------
*/
object FaultType {
def noFault = "b000".U
def jalFault = "b001".U //not CFI taken or invalid instruction taken
def retFault = "b010".U //not CFI taken or invalid instruction taken
def targetFault = "b011".U
def faulsePred = "b100".U //not CFI taken or invalid instruction taken
def apply() = UInt(3.W)
}
class CheckInfo extends Bundle { // 8 bit
val value = UInt(3.W)
def isjalFault = value === FaultType.jalFault
def isRetFault = value === FaultType.retFault
def istargetFault = value === FaultType.targetFault
def isfaulsePred = value === FaultType.faulsePred
}
class PredCheckerResp(implicit p: Parameters) extends XSBundle with HasPdConst {
//to Ibuffer write port (timing critical)
val fixedRange = Vec(PredictWidth, Bool())
val fixedTaken = Vec(PredictWidth, Bool())
//to Ftq write back port (not timing critical)
val fixedTarget = Vec(PredictWidth, UInt(VAddrBits.W))
val fixedMissPred = Vec(PredictWidth, Bool())
val faultType = Vec(PredictWidth, new CheckInfo)
}
class PredChecker(implicit p: Parameters) extends XSModule with HasPdConst {
val io = IO( new Bundle{
val in = Input(new IfuToPredChecker)
val out = Output(new PredCheckerResp)
})
val (takenIdx, predTaken) = (io.in.ftqOffset.bits, io.in.ftqOffset.valid)
val predTarget = (io.in.target)
val (instrRange, instrValid) = (io.in.instrRange, io.in.instrValid)
val (pds, pc, jumpOffset) = (io.in.pds, io.in.pc, io.in.jumpOffset)
val jalFaultVec, retFaultVec, targetFault, notCFITaken, invalidTaken = Wire(Vec(PredictWidth, Bool()))
/** remask fault may appear together with other faults, but other faults are exclusive
* so other f ault mast use fixed mask to keep only one fault would be found and redirect to Ftq
* we first detecct remask fault and then use fixedRange to do second check
**/
/** first check: remask Fault */
jalFaultVec := VecInit(pds.zipWithIndex.map{case(pd, i) => pd.isJal && instrRange(i) && instrValid(i) && (takenIdx > i.U && predTaken || !predTaken) })
retFaultVec := VecInit(pds.zipWithIndex.map{case(pd, i) => pd.isRet && instrRange(i) && instrValid(i) && (takenIdx > i.U && predTaken || !predTaken) })
val remaskFault = VecInit((0 until PredictWidth).map(i => jalFaultVec(i) || retFaultVec(i)))
val remaskIdx = ParallelPriorityEncoder(remaskFault.asUInt)
val needRemask = ParallelOR(remaskFault)
val fixedRange = instrRange.asUInt & (Fill(PredictWidth, !needRemask) | Fill(PredictWidth, 1.U(1.W)) >> ~remaskIdx)
io.out.fixedRange := fixedRange.asTypeOf((Vec(PredictWidth, Bool())))
io.out.fixedTaken := VecInit(pds.zipWithIndex.map{case(pd, i) => instrValid (i) && fixedRange(i) && (pd.isRet || pd.isJal || takenIdx === i.U && predTaken && !pd.notCFI) })
/** second check: faulse prediction fault and target fault */
notCFITaken := VecInit(pds.zipWithIndex.map{case(pd, i) => fixedRange(i) && instrValid(i) && i.U === takenIdx && pd.notCFI && predTaken })
invalidTaken := VecInit(pds.zipWithIndex.map{case(pd, i) => fixedRange(i) && !instrValid(i) && i.U === takenIdx && predTaken })
/** target calculation */
val jumpTargets = VecInit(pds.zipWithIndex.map{case(pd,i) => pc(i) + jumpOffset(i)})
targetFault := VecInit(pds.zipWithIndex.map{case(pd,i) => fixedRange(i) && instrValid(i) && (pd.isJal || pd.isBr) && takenIdx === i.U && predTaken && (predTarget =/= jumpTargets(i))})
val seqTargets = VecInit((0 until PredictWidth).map(i => pc(i) + Mux(pds(i).isRVC || !pds(i).valid, 2.U, 4.U ) ))
io.out.faultType.zipWithIndex.map{case(faultType, i) => faultType.value := Mux(jalFaultVec(i) , FaultType.jalFault ,
Mux(retFaultVec(i), FaultType.retFault ,
Mux(targetFault(i), FaultType.targetFault ,
Mux(notCFITaken(i) || invalidTaken(i) ,FaultType.faulsePred, FaultType.noFault))))}
io.out.fixedMissPred.zipWithIndex.map{case(missPred, i ) => missPred := jalFaultVec(i) || retFaultVec(i) || notCFITaken(i) || invalidTaken(i) || targetFault(i)}
io.out.fixedTarget.zipWithIndex.map{case(target, i) => target := Mux(jalFaultVec(i) || targetFault(i), jumpTargets(i), seqTargets(i) )}
}
class FrontendTrigger(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle(){
val frontendTrigger = Input(new FrontendTdataDistributeIO)
val csrTriggerEnable = Input(Vec(4, Bool()))
val triggered = Output(Vec(PredictWidth, new TriggerCf))
val pds = Input(Vec(PredictWidth, new PreDecodeInfo))
val pc = Input(Vec(PredictWidth, UInt(VAddrBits.W)))
val data = if(HasCExtension) Input(Vec(PredictWidth + 1, UInt(16.W)))
else Input(Vec(PredictWidth, UInt(32.W)))
})
val data = io.data
val rawInsts = if (HasCExtension) VecInit((0 until PredictWidth).map(i => Cat(data(i+1), data(i))))
else VecInit((0 until PredictWidth).map(i => data(i)))
val tdata = Reg(Vec(4, new MatchTriggerIO))
when(io.frontendTrigger.t.valid) {
tdata(io.frontendTrigger.t.bits.addr) := io.frontendTrigger.t.bits.tdata
}
io.triggered.map{i => i := 0.U.asTypeOf(new TriggerCf)}
val triggerEnable = RegInit(VecInit(Seq.fill(4)(false.B))) // From CSR, controlled by priv mode, etc.
triggerEnable := io.csrTriggerEnable
val triggerMapping = Map(0 -> 0, 1 -> 1, 2 -> 6, 3 -> 8)
val chainMapping = Map(0 -> 0, 2 -> 3, 3 -> 4)
for (i <- 0 until PredictWidth) {
val currentPC = io.pc(i)
val currentIsRVC = io.pds(i).isRVC
val inst = WireInit(rawInsts(i))
io.triggered(i).triggerTiming := VecInit(Seq.fill(10)(false.B))
io.triggered(i).triggerHitVec := VecInit(Seq.fill(10)(false.B))
io.triggered(i).triggerChainVec := VecInit(Seq.fill(5)(false.B))
for (j <- 0 until 4) {
val hit = Mux(tdata(j).select, TriggerCmp(Mux(currentIsRVC, inst(15, 0), inst), tdata(j).tdata2, tdata(j).matchType, triggerEnable(j)),
TriggerCmp(currentPC, tdata(j).tdata2, tdata(j).matchType, triggerEnable(j)))
io.triggered(i).triggerHitVec(triggerMapping(j)) := hit
io.triggered(i).triggerTiming(triggerMapping(j)) := hit && tdata(j).timing
if(chainMapping.contains(j)) io.triggered(i).triggerChainVec(chainMapping(j)) := hit && tdata(j).chain
}
}
}
......@@ -101,6 +101,5 @@ class ICacheProbeReq(implicit p: Parameters) extends ICacheBundle {
class ICacheVictimInfor(implicit p: Parameters) extends ICacheBundle {
val valid = Bool()
//val ptag = UInt(tagBits.W)
val vidx = UInt(idxBits.W)
}
\ No newline at end of file
......@@ -81,24 +81,15 @@ class ICachePerfInfo(implicit p: Parameters) extends ICacheBundle{
}
class ICacheMainPipeInterface(implicit p: Parameters) extends ICacheBundle {
/* internal interface */
/*** internal interface ***/
val metaArray = new ICacheMetaReqBundle
val dataArray = new ICacheDataReqBundle
val mshr = Vec(PortNumber, new ICacheMSHRBundle)
/* outside interface */
/*** outside interface ***/
val fetch = Vec(PortNumber, new ICacheMainPipeBundle)
val pmp = Vec(PortNumber, new ICachePMPBundle)
val itlb = Vec(PortNumber, new BlockTlbRequestIO)
val respStall = Input(Bool())
// val toReleaseUnit = Vec(2, Decoupled(new ReleaseReq))
// val victimInfor = new Bundle() {
// val s1 = Vec(2, Output(new ICacheVictimInfor()))
// val s2 = Vec(2, Output(new ICacheVictimInfor()))
// }
// val setInfor = new Bundle(){
// val s1 = Vec(2, Output(new ICacheSetInfor()))
// val s2 = Vec(2, Output(new ICacheSetInfor()))
// }
val perfInfo = Output(new ICachePerfInfo)
}
......@@ -107,7 +98,8 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
val io = IO(new ICacheMainPipeInterface)
val (fromIFU, toIFU) = (io.fetch.map(_.req), io.fetch.map(_.resp))
val (toMeta, toData, metaResp, dataResp) = (io.metaArray.toIMeta, io.dataArray.toIData, io.metaArray.fromIMeta, io.dataArray.fromIData)
val (toMeta, metaResp) = (io.metaArray.toIMeta, io.metaArray.fromIMeta)
val (toData, dataResp) = (io.dataArray.toIData, io.dataArray.fromIData)
val (toMSHR, fromMSHR) = (io.mshr.map(_.toMSHR), io.mshr.map(_.fromMSHR))
val (toITLB, fromITLB) = (io.itlb.map(_.req), io.itlb.map(_.resp))
val (toPMP, fromPMP) = (io.pmp.map(_.req), io.pmp.map(_.resp))
......@@ -115,15 +107,20 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
val s0_ready, s1_ready, s2_ready = WireInit(false.B)
val s0_fire, s1_fire , s2_fire = WireInit(false.B)
//Stage 1
/**
******************************************************************************
* Stage 0
* -
*
******************************************************************************
*/
val s0_valid = fromIFU.map(_.valid).reduce(_||_)
val s0_req_vaddr = VecInit(fromIFU.map(_.bits.vaddr))
val s0_req_vsetIdx = VecInit(fromIFU.map(_.bits.vsetIdx))
val s0_only_fisrt = fromIFU(0).valid && !fromIFU(0).valid
val s0_double_line = fromIFU(0).valid && fromIFU(1).valid
s0_fire := s0_valid && s1_ready
//fetch: send addr to Meta/TLB and Data simultaneously
val fetch_req = List(toMeta, toData)
for(i <- 0 until 2) {
......@@ -131,22 +128,64 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
fetch_req(i).bits.isDoubleLine := s0_double_line
fetch_req(i).bits.vSetIdx := s0_req_vsetIdx
}
//TODO: fix GTimer() condition
fromIFU.map(_.ready := fetch_req(0).ready && fetch_req(1).ready && s1_ready && GTimer() > 500.U)
toITLB(0).valid := s0_valid
toITLB(0).bits.size := 3.U // TODO: fix the size
toITLB(0).bits.vaddr := s0_req_vaddr(0)//addrAlign(s1_req_vaddr(0), blockBytes, VAddrBits)
toITLB(0).bits.debug.pc := s0_req_vaddr(0)//addrAlign(s1_req_vaddr(0), blockBytes, VAddrBits)
toITLB(1).valid := s0_valid && s0_double_line
toITLB(1).bits.size := 3.U // TODO: fix the size
toITLB(1).bits.vaddr := s0_req_vaddr(1)//addrAlign(s1_req_vaddr(1), blockBytes, VAddrBits)
toITLB(1).bits.debug.pc := s0_req_vaddr(1)//addrAlign(s1_req_vaddr(1), blockBytes, VAddrBits)
toITLB.map{port =>
port.bits.cmd := TlbCmd.exec
port.bits.robIdx := DontCare
port.bits.debug.isFirstIssue := DontCare
}
val t_idle :: t_miss :: t_fixed :: Nil = Enum(3)
val tlb_status = RegInit(VecInit(Seq.fill(PortNumber)(t_idle)))
dontTouch(tlb_status)
val tlb_miss_vec = VecInit((0 until PortNumber).map( i => toITLB(i).valid && fromITLB(i).bits.miss ))
val tlb_resp = Wire(Vec(2, Bool()))//VecInit((0 until PortNumber).map( i => !fromITLB(i).bits.miss )).reduce(_&&_)
tlb_resp(0) := !fromITLB(0).bits.miss
tlb_resp(1) := !fromITLB(1).bits.miss || !s0_double_line
val tlb_all_resp = tlb_resp.reduce(_&&_)
// val tlb_miss_slot = Seq.fill(PortNumber)(RegInit(0.U.asTypeOf(new TlbResp)))
(0 until PortNumber).map { i =>
when(tlb_miss_vec(i)){
tlb_status(i) := t_miss
}
when(tlb_status(i) === t_miss && !fromITLB(i).bits.miss){
tlb_status(i) := t_idle
}
}
s0_fire := s0_valid && s1_ready && tlb_all_resp && fetch_req(0).ready && fetch_req(1).ready
//TODO: fix GTimer() condition
fromIFU.map(_.ready := fetch_req(0).ready && fetch_req(1).ready &&
tlb_all_resp &&
s1_ready && GTimer() > 500.U )
// XSPerfAccumulate("ifu_bubble_ftq_not_valid", !f0_valid )
// XSPerfAccumulate("ifu_bubble_pipe_stall", f0_valid && fetch_req(0).ready && fetch_req(1).ready && !s1_ready )
// XSPerfAccumulate("ifu_bubble_sram_0_busy", f0_valid && !fetch_req(0).ready )
// XSPerfAccumulate("ifu_bubble_sram_1_busy", f0_valid && !fetch_req(1).ready )
//---------------------------------------------
// Fetch Stage 2 :
// * Send req to ITLB and TLB Response (Get Paddr)
// * ICache Response (Get Meta and Data)
// * Hit Check (Generate hit signal and hit vector)
// * Get victim way
//---------------------------------------------
/**
******************************************************************************
* Stage 1
* -
*
******************************************************************************
*/
//TODO: handle fetch exceptions
......@@ -162,30 +201,21 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
s1_ready := s2_ready && tlbRespAllValid || !s1_valid
s1_fire := s1_valid && tlbRespAllValid && s2_ready
toITLB(0).valid := s1_valid
toITLB(0).bits.size := 3.U // TODO: fix the size
toITLB(0).bits.vaddr := s1_req_vaddr(0)
toITLB(0).bits.debug.pc := s1_req_vaddr(0)
toITLB(1).valid := s1_valid && s1_double_line
toITLB(1).bits.size := 3.U // TODO: fix the size
toITLB(1).bits.vaddr := s1_req_vaddr(1)
toITLB(1).bits.debug.pc := s1_req_vaddr(1)
fromITLB.map(_.ready := true.B)
toITLB.map{port =>
port.bits.cmd := TlbCmd.exec
port.bits.robIdx := DontCare
port.bits.debug.isFirstIssue := DontCare
}
// val tlbRespValid = fromITLB.map(_.valid)
val s1_tlb_all_resp_wire = RegNext(s0_fire) //TODO: if there is another iTLB req source, here should have ID/address compare
val s1_tlb_all_resp_reg = RegInit(false.B)
fromITLB.map(_.ready := true.B)
when(s1_valid && s1_tlb_all_resp_wire && !s2_ready) {s1_tlb_all_resp_reg := true.B}
.elsewhen(s1_fire && s1_tlb_all_resp_reg) {s1_tlb_all_resp_reg := false.B}
val (tlbRespValid, tlbRespPAddr) = (fromITLB.map(_.valid), VecInit(fromITLB.map(_.bits.paddr)))
val (tlbRespMiss) = fromITLB.map(port => port.bits.miss && port.valid)
val (tlbExcpPF, tlbExcpAF) = (fromITLB.map(port => port.bits.excp.pf.instr && port.valid),
fromITLB.map(port => (port.bits.excp.af.instr) && port.valid))
tlbRespAllValid := s1_tlb_all_resp_wire || s1_tlb_all_resp_reg
tlbRespAllValid := tlbRespValid(0) && (tlbRespValid(1) || !s1_double_line)
//response
val tlbRespPAddr = ResultHoldBypass(valid = s1_tlb_all_resp_wire, data = VecInit(fromITLB.map(_.bits.paddr)))
val tlbExcpPF = ResultHoldBypass(valid = s1_tlb_all_resp_wire, data = VecInit(fromITLB.map(port => port.bits.excp.pf.instr && port.valid)))
val tlbExcpAF = ResultHoldBypass(valid = s1_tlb_all_resp_wire, data = VecInit(fromITLB.map(port => port.bits.excp.af.instr && port.valid)))
val s1_req_paddr = tlbRespPAddr
val s1_req_ptags = VecInit(s1_req_paddr.map(get_phy_tag(_)))
......@@ -207,20 +237,6 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
val s1_victim_oh = ResultHoldBypass(data = VecInit(replacers.zipWithIndex.map{case (replacer, i) => UIntToOH(replacer.way(s1_req_vsetIdx(i)))}), valid = RegNext(s0_fire))
val s1_victim_coh = VecInit(s1_victim_oh.zipWithIndex.map {case(oh, port) => Mux1H(oh, s1_meta_cohs(port))})
// val s1_victim_tag = VecInit(s1_victim_oh.zipWithIndex.map {case(oh, port) => Mux1H(oh, s1_meta_ptags(port))})
// val s1_victim_data = VecInit(s1_victim_oh.zipWithIndex.map {case(oh, port) => Mux1H(oh, s1_data_cacheline(port))})
// val s1_need_replace = VecInit(s1_victim_coh.zipWithIndex.map{case(coh, port) => coh.isValid() && s1_bank_miss(port)})
//
// (0 until PortNumber).map{ i =>
// io.victimInfor.s1(i).valid := s1_valid && s1_need_replace(i)
// io.victimInfor.s1(i).ptag := s1_victim_tag(i)
// io.victimInfor.s1(i).vidx := get_idx(s1_req_vaddr(i))
// }
// (0 until PortNumber).map{ i =>
// io.setInfor.s1(i).valid := s1_bank_miss(i)
// io.setInfor.s1(i).vidx := s1_req_vsetIdx(i)
// }
assert(PopCount(s1_tag_match_vec(0)) <= 1.U && PopCount(s1_tag_match_vec(1)) <= 1.U, "Multiple hit in main pipe")
......@@ -252,13 +268,13 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
XSPerfAccumulate("ifu_bubble_s1_tlb_miss", s1_valid && !tlbRespAllValid )
//---------------------------------------------
// Fetch Stage 2 :
// * get data from last stage (hit from s1_hit_data/miss from missQueue response)
// * if at least one needed cacheline miss, wait for miss queue response (a wait_state machine) THIS IS TOO UGLY!!!
// * cut cacheline(s) and send to PreDecode
// * check if prediction is right (branch target and type, jump direction and type , jal target )
//---------------------------------------------
/**
******************************************************************************
* Stage 2
* -
*
******************************************************************************
*/
val s2_fetch_finish = Wire(Bool())
val s2_valid = generatePipeControl(lastFire = s1_fire, thisFire = s2_fire, thisFlush = false.B, lastFlush = false.B)
......@@ -267,7 +283,6 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
s2_ready := (s2_valid && s2_fetch_finish && !io.respStall) || (!s2_valid && s2_miss_available)
s2_fire := s2_valid && s2_fetch_finish && !io.respStall
val pmpExcpAF = fromPMP.map(port => port.instr)
val mmio = fromPMP.map(port => port.mmio) // TODO: handle it
val (s2_req_paddr , s2_req_vaddr) = (RegEnable(next = s1_req_paddr, enable = s1_fire), RegEnable(next = s1_req_vaddr, enable = s1_fire))
......@@ -279,6 +294,7 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
val s2_port_hit = RegEnable(next = s1_port_hit, enable = s1_fire)
val s2_bank_miss = RegEnable(next = s1_bank_miss, enable = s1_fire)
val sec_meet_vec = Wire(Vec(2, Bool()))
val s2_fixed_hit_vec = VecInit((0 until 2).map(i => s2_port_hit(i) || sec_meet_vec(i)))
val s2_fixed_hit = (s2_valid && s2_fixed_hit_vec(0) && s2_fixed_hit_vec(1) && s2_double_line) || (s2_valid && s2_fixed_hit_vec(0) && !s2_double_line)
......@@ -286,22 +302,22 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
//replacement
val s2_waymask = RegEnable(next = s1_victim_oh, enable = s1_fire)
val s2_victim_coh = RegEnable(next = s1_victim_coh, enable = s1_fire)
// val s2_victim_tag = RegEnable(next = s1_victim_tag, enable = s1_fire)
// val s2_victim_data = RegEnable(next = s1_victim_data, enable = s1_fire)
// val s2_need_replace = RegEnable(next = s1_need_replace, enable = s1_fire)
// val s2_has_replace = s2_need_replace.asUInt.orR
/** exception and pmp logic **/
//PMP Result
val pmpExcpAF = Wire(Vec(PortNumber, Bool()))
pmpExcpAF(0) := fromPMP(0).instr
pmpExcpAF(1) := fromPMP(1).instr && s2_double_line
//exception information
val s2_except_pf = RegEnable(next = VecInit(tlbExcpPF), enable = s1_fire)
val s2_except_af = VecInit(RegEnable(next = VecInit(tlbExcpAF), enable = s1_fire).zip(pmpExcpAF).map(a => a._1 || DataHoldBypass(a._2, RegNext(s1_fire)).asBool))
val s2_except_pf = RegEnable(next =tlbExcpPF, enable = s1_fire)
val s2_except_af = VecInit(RegEnable(next = tlbExcpAF, enable = s1_fire).zip(pmpExcpAF).map(a => a._1 || DataHoldBypass(a._2, RegNext(s1_fire)).asBool))
val s2_except = VecInit((0 until 2).map{i => s2_except_pf(i) || s2_except_af(i)})
val s2_has_except = s2_valid && (s2_except_af.reduce(_||_) || s2_except_pf.reduce(_||_))
//MMIO
val s2_mmio = DataHoldBypass(io.pmp(0).resp.mmio && !s2_except_af(0) && !s2_except_pf(0), RegNext(s1_fire)).asBool()
io.pmp.zipWithIndex.map { case (p, i) =>
p.req.valid := s2_fire
p.req.valid := s2_fire
p.req.bits.addr := s2_req_paddr(i)
p.req.bits.size := 3.U // TODO
p.req.bits.cmd := TlbCmd.exec
......@@ -313,7 +329,7 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
val port_miss_fix = VecInit(Seq(fromMSHR(0).fire() && !s2_port_hit(0), fromMSHR(1).fire() && s2_double_line && !s2_port_hit(1) ))
class MissSlot(implicit p: Parameters) extends XSBundle with HasICacheParameters {
class MissSlot(implicit p: Parameters) extends ICacheBundle {
val m_vSetIdx = UInt(idxBits.W)
val m_pTag = UInt(tagBits.W)
val m_data = UInt(blockBits.W)
......@@ -331,7 +347,7 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
val sec_meet_1_miss = fix_sec_miss(1) || fix_sec_miss(3)
sec_meet_vec := VecInit(Seq(sec_meet_0_miss,sec_meet_1_miss ))
//only raise at the first cycle of s2_valid
/*** miss/hit pattern: <Control Signal> only raise at the first cycle of s2_valid ***/
val only_0_miss = RegNext(s1_fire) && !s2_hit && !s2_double_line && !s2_has_except && !sec_meet_0_miss && !s2_mmio
val only_0_hit = RegNext(s1_fire) && s2_hit && !s2_double_line && !s2_mmio
val hit_0_hit_1 = RegNext(s1_fire) && s2_hit && s2_double_line && !s2_mmio
......@@ -351,6 +367,7 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
bit || valid
}
/*** miss/hit pattern latch: <Control Signal> latch the miss/hit patter if pipeline stop ***/
val miss_0_hit_1_latch = holdReleaseLatch(valid = miss_0_hit_1, release = s2_fire, flush = false.B)
val miss_0_miss_1_latch = holdReleaseLatch(valid = miss_0_miss_1, release = s2_fire, flush = false.B)
val only_0_miss_latch = holdReleaseLatch(valid = only_0_miss, release = s2_fire, flush = false.B)
......@@ -366,7 +383,7 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
def waitSecondComeIn(missState: UInt): Bool = (missState === m_wait_sec_miss)
// deal with secondary miss when s1 enter f2
/*** deal with secondary miss when s1 enter f2 ***/
def getMissSituat(slotNum : Int, missNum : Int ) :Bool = {
RegNext(s1_fire) && (missSlot(slotNum).m_vSetIdx === s2_req_vsetIdx(missNum)) && (missSlot(slotNum).m_pTag === s2_req_ptags(missNum)) && !s2_port_hit(missNum) && waitSecondComeIn(missStateQueue(slotNum)) && !s2_mmio
}
......@@ -476,13 +493,13 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
missStateQueue(i) := m_wait_sec_miss
}
//only the first cycle to check whether meet the secondary miss
/*** Only the first cycle to check whether meet the secondary miss ***/
when(missStateQueue(i) === m_wait_sec_miss){
//the seondary req has been fix by this slot and another also hit || the secondary req for other cacheline and hit
/*** The seondary req has been fix by this slot and another also hit || the secondary req for other cacheline and hit ***/
when((slot_slove(i) && s2_fire) || (!slot_slove(i) && s2_fire) ) {
missStateQueue(i) := m_invalid
}
//the seondary req has been fix by this slot but another miss/f3 not ready || the seondary req for other cacheline and miss
/*** The seondary req has been fix by this slot but another miss/f3 not ready || the seondary req for other cacheline and miss ***/
.elsewhen((slot_slove(i) && !s2_fire && s2_valid) || (s2_valid && !slot_slove(i) && !s2_fire) ){
missStateQueue(i) := m_check_final
}
......@@ -497,52 +514,8 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
}
}
// val release_idle :: release_wait_fire ::Nil = Enum(2)
// val release_state = RegInit(VecInit(Seq.fill(2)(release_idle)) )
// val s2_need_release = VecInit((0 until PortNumber).map(i =>s2_valid && s2_need_replace(i) && !s2_mmio && !s2_except_af(i) && !s2_except_pf(i)))
//
// val toRealseUnit = io.toReleaseUnit
// (0 until 2).map{ i =>
// switch(release_state(i)){
// is(release_idle){
// when(s2_need_release(i)){
// release_state(i) := Mux(toRealseUnit(i).fire() , release_wait_fire ,release_idle )
// }
// }
//
// is(release_wait_fire){
// when(s2_fire){ release_state(i) := release_idle}
// }
// }
// toRealseUnit(i).valid := s2_valid && s2_need_release(i) && (release_state(i) === release_idle)
// toRealseUnit(i).bits.addr := get_block_addr(Cat(s2_victim_tag(i), get_untag(s2_req_vaddr(i))) )
// toRealseUnit(i).bits.param := s2_victim_coh(i).onCacheControl(M_FLUSH)._2
// toRealseUnit(i).bits.voluntary := true.B
// toRealseUnit(i).bits.hasData := s2_victim_coh(i) === ClientStates.Dirty
// toRealseUnit(i).bits.dirty := s2_victim_coh(i) === ClientStates.Dirty
// toRealseUnit(i).bits.data := s2_victim_data(i)
// toRealseUnit(i).bits.waymask := s2_waymask(i)
// toRealseUnit(i).bits.vidx := s2_req_vsetIdx(i)
// }
// (0 until PortNumber).map{ i =>
// io.victimInfor.s2(i).valid := s2_valid && s2_need_release(i)
// io.victimInfor.s2(i).ptag := s2_victim_tag(i)
// io.victimInfor.s2(i).vidx := get_idx(s2_req_vaddr(i))
// }
//
// (0 until PortNumber).map{ i =>
// io.setInfor.s2(i).valid := s2_bank_miss(i) && s2_valid
// io.setInfor.s2(i).vidx := s1_req_vsetIdx(i)
// }
val miss_all_fix = wait_state === wait_finish
// val release_all_fix = VecInit((0 until PortNumber).map(i => !s2_need_release(i) || release_state(i) === release_wait_fire))
s2_fetch_finish := ((s2_valid && s2_fixed_hit) || miss_all_fix || hit_0_except_1_latch || except_0_latch || s2_mmio) //&& release_all_fix.reduce(_&&_)
s2_fetch_finish := ((s2_valid && s2_fixed_hit) || miss_all_fix || hit_0_except_1_latch || except_0_latch || s2_mmio)
XSPerfAccumulate("ifu_bubble_s2_miss", s2_valid && !s2_fetch_finish )
......
......@@ -115,9 +115,15 @@ class ReplacePipe(implicit p: Parameters) extends ICacheModule{
io.status.r1_set.valid := r1_valid
io.status.r1_set.bits := r1_req.vidx
//---------------------------------------------
//---------------------------------------------
/**
******************************************************************************
*
*
*
******************************************************************************
*/
val r2_valid = generatePipeControl(lastFire = r1_fire, thisFire = r2_fire, thisFlush = false.B, lastFlush = false.B)
r2_ready := r2_valid && io.release_req.ready || !r2_valid
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册