未验证 提交 14a67055 编写于 作者: S sfencevma 提交者: GitHub

ldu, stu: Refactoring the code for ldu/stu (#2171)

* add new ldu and stu

* add fast replay kill at s1

* fix pointer chasing cancel

* pick flushpipe_rvc

* merge flushpipe_rvc

* fix s3_cache_rep and s3_feedbacked

* fix fast replay condition

---------
Co-authored-by: NLyn <lyn@Lyns-MacBook-Pro.local>
上级 dcf3a679
......@@ -291,6 +291,7 @@ class MicroOpRbExt(implicit p: Parameters) extends XSBundleWithMicroOp {
}
class Redirect(implicit p: Parameters) extends XSBundle {
val isRVC = Bool()
val robIdx = new RobPtr
val ftqIdx = new FtqPtr
val ftqOffset = UInt(log2Up(PredictWidth).W)
......@@ -376,6 +377,7 @@ class RobCommitInfo(implicit p: Parameters) extends XSBundle {
val ftqIdx = new FtqPtr
val ftqOffset = UInt(log2Up(PredictWidth).W)
val isMove = Bool()
val isRVC = Bool()
// these should be optimized for synthesis verilog
val pc = UInt(VAddrBits.W)
......
......@@ -173,7 +173,7 @@ case class XSCoreParameters
EnsbufferWidth: Int = 2,
UncacheBufferSize: Int = 4,
EnableLoadToLoadForward: Boolean = true,
EnableFastForward: Boolean = false,
EnableFastForward: Boolean = true,
EnableLdVioCheckAfterReset: Boolean = true,
EnableSoftPrefetchAfterReset: Boolean = true,
EnableCacheErrorAfterReset: Boolean = true,
......
......@@ -248,7 +248,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
val l2_pf_enable = Output(Bool())
val perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks, new PerfEvent))
val beu_errors = Output(new XSL1BusErrors())
val l2Hint = Input(Valid(new L2ToL1Hint()))
val l2_hint = Input(Valid(new L2ToL1Hint()))
})
println(s"FPGAPlatform:${env.FPGAPlatform} EnableDebug:${env.EnableDebug}")
......@@ -424,8 +424,8 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.robio.exception.bits.uop.ctrl.commitType)
memBlock.io.debug_ls <> ctrlBlock.io.robio.debug_ls
memBlock.io.lsTopdownInfo <> ctrlBlock.io.robio.lsTopdownInfo
memBlock.io.l2Hint.valid := io.l2Hint.valid
memBlock.io.l2Hint.bits.sourceId := io.l2Hint.bits.sourceId
memBlock.io.l2_hint.valid := io.l2_hint.valid
memBlock.io.l2_hint.bits.sourceId := io.l2_hint.bits.sourceId
val itlbRepeater1 = PTWFilter(itlbParams.fenceDelay,frontend.io.ptw, fenceio.sfence, csrioIn.tlb, l2tlbParams.ifilterSize)
val itlbRepeater2 = PTWRepeaterNB(passReady = false, itlbParams.fenceDelay, itlbRepeater1.io.ptw, ptw.io.tlb(0), fenceio.sfence, csrioIn.tlb)
......
......@@ -174,12 +174,12 @@ class XSTile()(implicit p: Parameters) extends LazyModule
// misc.module.beu_errors.l2.ecc_error.valid := l2cache.get.module.io.ecc_error.valid
// misc.module.beu_errors.l2.ecc_error.bits := l2cache.get.module.io.ecc_error.bits
misc.module.beu_errors.l2 <> 0.U.asTypeOf(misc.module.beu_errors.l2)
core.module.io.l2Hint.bits.sourceId := l2cache.get.module.io.l2_hint.bits
core.module.io.l2Hint.valid := l2cache.get.module.io.l2_hint.valid
core.module.io.l2_hint.bits.sourceId := l2cache.get.module.io.l2_hint.bits
core.module.io.l2_hint.valid := l2cache.get.module.io.l2_hint.valid
} else {
misc.module.beu_errors.l2 <> 0.U.asTypeOf(misc.module.beu_errors.l2)
core.module.io.l2Hint.bits.sourceId := DontCare
core.module.io.l2Hint.valid := false.B
core.module.io.l2_hint.bits.sourceId := DontCare
core.module.io.l2_hint.valid := false.B
}
// Modules are reset one by one
......
......@@ -358,7 +358,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
val pc_from_csr = io.robio.toCSR.isXRet || DelayN(rob.io.exception.valid, 4)
val rob_flush_pc = RegEnable(Mux(flushRedirect.bits.flushItself(),
flushPC, // replay inst
flushPC + 4.U // flush pipe
flushPC + Mux(flushRedirect.bits.isRVC, 2.U, 4.U) // flush pipe
), flushRedirect.valid)
val flushTarget = Mux(pc_from_csr, io.robio.toCSR.trapTarget, rob_flush_pc)
when (frontendFlushValid) {
......
......@@ -118,7 +118,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
val debug_ls = new DebugLSIO
val lsTopdownInfo = Vec(exuParameters.LduCnt, Output(new LsTopdownInfo))
val l2Hint = Input(Valid(new L2ToL1Hint()))
val l2_hint = Input(Valid(new L2ToL1Hint()))
})
override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.writeback))
......@@ -181,20 +181,20 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// However, atom exception will be writebacked to rob
// using store writeback port
val loadWritebackOverride = Mux(atomicsUnit.io.out.valid, atomicsUnit.io.out.bits, loadUnits.head.io.loadOut.bits)
val loadOut0 = Wire(Decoupled(new ExuOutput))
loadOut0.valid := atomicsUnit.io.out.valid || loadUnits.head.io.loadOut.valid
loadOut0.bits := loadWritebackOverride
atomicsUnit.io.out.ready := loadOut0.ready
loadUnits.head.io.loadOut.ready := loadOut0.ready
val loadWritebackOverride = Mux(atomicsUnit.io.out.valid, atomicsUnit.io.out.bits, loadUnits.head.io.ldout.bits)
val ldout0 = Wire(Decoupled(new ExuOutput))
ldout0.valid := atomicsUnit.io.out.valid || loadUnits.head.io.ldout.valid
ldout0.bits := loadWritebackOverride
atomicsUnit.io.out.ready := ldout0.ready
loadUnits.head.io.ldout.ready := ldout0.ready
when(atomicsUnit.io.out.valid){
loadOut0.bits.uop.cf.exceptionVec := 0.U(16.W).asBools // exception will be writebacked via store wb port
ldout0.bits.uop.cf.exceptionVec := 0.U(16.W).asBools // exception will be writebacked via store wb port
}
val ldExeWbReqs = loadOut0 +: loadUnits.tail.map(_.io.loadOut)
val ldExeWbReqs = ldout0 +: loadUnits.tail.map(_.io.ldout)
io.writeback <> ldExeWbReqs ++ VecInit(storeUnits.map(_.io.stout)) ++ VecInit(stdExeUnits.map(_.io.out))
io.otherFastWakeup := DontCare
io.otherFastWakeup.take(2).zip(loadUnits.map(_.io.fastUop)).foreach{case(a,b)=> a := b}
io.otherFastWakeup.take(2).zip(loadUnits.map(_.io.fast_uop)).foreach{case(a,b)=> a := b}
val stOut = io.writeback.drop(exuParameters.LduCnt).dropRight(exuParameters.StuCnt)
// prefetch to l1 req
......@@ -206,7 +206,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits(0).io.prefetch_req.bits.confidence := 0.U
l1_pf_req.ready := (l1_pf_req.bits.confidence > 0.U) ||
loadUnits.map(!_.io.loadIn.valid).reduce(_ || _)
loadUnits.map(!_.io.ldin.valid).reduce(_ || _)
// l1 pf fuzzer interface
val DebugEnableL1PFFuzzer = false
......@@ -370,10 +370,10 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val fastReplaySel = loadUnits.zipWithIndex.map { case (ldu, i) => {
val wrapper = Wire(Valid(new BalanceEntry))
wrapper.valid := ldu.io.fastReplayOut.valid
wrapper.bits.req := ldu.io.fastReplayOut.bits
wrapper.bits.balance := ldu.io.fastReplayOut.bits.replayInfo.cause(LoadReplayCauses.bankConflict)
wrapper.bits.port := i.U
wrapper.valid := ldu.io.fast_rep_out.valid
wrapper.bits.req := ldu.io.fast_rep_out.bits
wrapper.bits.balance := ldu.io.fast_rep_out.bits.rep_info.bank_conflict
wrapper.bits.port := i.U
wrapper
}}
val balanceFastReplaySel = balanceReOrder(fastReplaySel)
......@@ -383,34 +383,34 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits(i).io.isFirstIssue := true.B
// get input form dispatch
loadUnits(i).io.loadIn <> io.issue(i)
loadUnits(i).io.feedbackSlow <> io.rsfeedback(i).feedbackSlow
loadUnits(i).io.feedbackFast <> io.rsfeedback(i).feedbackFast
loadUnits(i).io.ldin <> io.issue(i)
loadUnits(i).io.feedback_slow <> io.rsfeedback(i).feedbackSlow
loadUnits(i).io.feedback_fast <> io.rsfeedback(i).feedbackFast
loadUnits(i).io.rsIdx := io.rsfeedback(i).rsIdx
// fast replay
loadUnits(i).io.fastReplayIn.valid := balanceFastReplaySel(i).valid
loadUnits(i).io.fastReplayIn.bits := balanceFastReplaySel(i).bits.req
loadUnits(i).io.fast_rep_in.valid := balanceFastReplaySel(i).valid
loadUnits(i).io.fast_rep_in.bits := balanceFastReplaySel(i).bits.req
loadUnits(i).io.fastReplayOut.ready := false.B
loadUnits(i).io.fast_rep_out.ready := false.B
for (j <- 0 until exuParameters.LduCnt) {
when (balanceFastReplaySel(j).valid && balanceFastReplaySel(j).bits.port === i.U) {
loadUnits(i).io.fastReplayOut.ready := loadUnits(j).io.fastReplayIn.ready
loadUnits(i).io.fast_rep_out.ready := loadUnits(j).io.fast_rep_in.ready
}
}
// get input form dispatch
loadUnits(i).io.loadIn <> io.issue(i)
loadUnits(i).io.ldin <> io.issue(i)
// dcache access
loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
// forward
loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
loadUnits(i).io.tlDchannel := dcache.io.lsu.forward_D(i)
loadUnits(i).io.tl_d_channel := dcache.io.lsu.forward_D(i)
loadUnits(i).io.forward_mshr <> dcache.io.lsu.forward_mshr(i)
// ld-ld violation check
loadUnits(i).io.lsq.loadLoadViolationQuery <> lsq.io.ldu.loadLoadViolationQuery(i)
loadUnits(i).io.lsq.storeLoadViolationQuery <> lsq.io.ldu.storeLoadViolationQuery(i)
loadUnits(i).io.lsq.ldld_nuke_query <> lsq.io.ldu.ldld_nuke_query(i)
loadUnits(i).io.lsq.stld_nuke_query <> lsq.io.ldu.stld_nuke_query(i)
loadUnits(i).io.csrCtrl <> csrCtrl
// dcache refill req
loadUnits(i).io.refill <> delayedDcacheRefill
......@@ -420,9 +420,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits(i).io.pmp <> pmp_check(i).resp
// st-ld violation query
for (s <- 0 until StorePipelineWidth) {
loadUnits(i).io.reExecuteQuery(s) := storeUnits(s).io.reExecuteQuery
loadUnits(i).io.stld_nuke_query(s) := storeUnits(s).io.stld_nuke_query
}
loadUnits(i).io.lqReplayFull <> lsq.io.lqReplayFull
loadUnits(i).io.lq_rep_full <> lsq.io.lq_rep_full
// prefetch
prefetcherOpt.foreach(pf => {
pf.io.ld_in(i).valid := Mux(pf_train_on_hit,
......@@ -432,34 +432,36 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
)
)
pf.io.ld_in(i).bits := loadUnits(i).io.prefetch_train.bits
pf.io.ld_in(i).bits.uop.cf.pc := Mux(loadUnits(i).io.s2IsPointerChasing, io.loadPc(i), RegNext(io.loadPc(i)))
pf.io.ld_in(i).bits.uop.cf.pc := Mux(loadUnits(i).io.s2_ptr_chasing, io.loadPc(i), RegNext(io.loadPc(i)))
})
// load to load fast forward: load(i) prefers data(i)
val fastPriority = (i until exuParameters.LduCnt) ++ (0 until i)
val fastValidVec = fastPriority.map(j => loadUnits(j).io.fastpathOut.valid)
val fastDataVec = fastPriority.map(j => loadUnits(j).io.fastpathOut.data)
val fastValidVec = fastPriority.map(j => loadUnits(j).io.l2l_fwd_out.valid)
val fastDataVec = fastPriority.map(j => loadUnits(j).io.l2l_fwd_out.data)
val fastErrorVec = fastPriority.map(j => loadUnits(j).io.l2l_fwd_out.dly_ld_err)
val fastMatchVec = fastPriority.map(j => io.loadFastMatch(i)(j))
loadUnits(i).io.fastpathIn.valid := VecInit(fastValidVec).asUInt.orR
loadUnits(i).io.fastpathIn.data := ParallelPriorityMux(fastValidVec, fastDataVec)
loadUnits(i).io.l2l_fwd_in.valid := VecInit(fastValidVec).asUInt.orR
loadUnits(i).io.l2l_fwd_in.data := ParallelPriorityMux(fastValidVec, fastDataVec)
loadUnits(i).io.l2l_fwd_in.dly_ld_err := ParallelPriorityMux(fastValidVec, fastErrorVec)
val fastMatch = ParallelPriorityMux(fastValidVec, fastMatchVec)
loadUnits(i).io.loadFastMatch := fastMatch
loadUnits(i).io.loadFastImm := io.loadFastImm(i)
loadUnits(i).io.ld_fast_match := fastMatch
loadUnits(i).io.ld_fast_imm := io.loadFastImm(i)
loadUnits(i).io.replay <> lsq.io.replay(i)
loadUnits(i).io.l2Hint <> io.l2Hint
loadUnits(i).io.l2_hint <> io.l2_hint
// passdown to lsq (load s2)
lsq.io.ldu.loadIn(i) <> loadUnits(i).io.lsq.loadIn
lsq.io.loadOut(i) <> loadUnits(i).io.lsq.loadOut
lsq.io.ldRawDataOut(i) <> loadUnits(i).io.lsq.ldRawData
lsq.io.ldu.ldin(i) <> loadUnits(i).io.lsq.ldin
lsq.io.ldout(i) <> loadUnits(i).io.lsq.uncache
lsq.io.ld_raw_data(i) <> loadUnits(i).io.lsq.ld_raw_data
lsq.io.trigger(i) <> loadUnits(i).io.lsq.trigger
lsq.io.l2Hint.valid := io.l2Hint.valid
lsq.io.l2Hint.bits.sourceId := io.l2Hint.bits.sourceId
lsq.io.l2_hint.valid := io.l2_hint.valid
lsq.io.l2_hint.bits.sourceId := io.l2_hint.bits.sourceId
// alter writeback exception info
io.s3_delayed_load_error(i) := loadUnits(i).io.s3_delayedLoadError
io.s3_delayed_load_error(i) := loadUnits(i).io.s3_dly_ld_err
// update mem dependency predictor
// io.memPredUpdate(i) := DontCare
......@@ -509,9 +511,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
stdExeUnits(i).io.fromFp := DontCare
stdExeUnits(i).io.out := DontCare
stu.io.redirect <> redirect
stu.io.feedbackSlow <> io.rsfeedback(exuParameters.LduCnt + i).feedbackSlow
stu.io.rsIdx <> io.rsfeedback(exuParameters.LduCnt + i).rsIdx
stu.io.redirect <> redirect
stu.io.feedback_slow <> io.rsfeedback(exuParameters.LduCnt + i).feedbackSlow
stu.io.rsIdx <> io.rsfeedback(exuParameters.LduCnt + i).rsIdx
// NOTE: just for dtlb's perf cnt
stu.io.isFirstIssue <> io.rsfeedback(exuParameters.LduCnt + i).isFirstIssue
stu.io.stin <> io.issue(exuParameters.LduCnt + i)
......@@ -525,7 +527,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io.rsfeedback(exuParameters.LduCnt + i).feedbackFast := DontCare
// Lsq to sta unit
lsq.io.sta.storeMaskIn(i) <> stu.io.storeMaskOut
lsq.io.sta.storeMaskIn(i) <> stu.io.st_mask_out
// Lsq to std unit's rs
lsq.io.std.storeDataIn(i) := stData(i)
......@@ -690,7 +692,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
when (state =/= s_normal) {
// use store wb port instead of load
loadUnits(0).io.loadOut.ready := false.B
loadUnits(0).io.ldout.ready := false.B
// use load_0's TLB
atomicsUnit.io.dtlb <> amoTlb
......@@ -698,13 +700,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits.map(i => i.io.prefetch_req.valid := false.B)
// make sure there's no in-flight uops in load unit
assert(!loadUnits(0).io.loadOut.valid)
assert(!loadUnits(0).io.ldout.valid)
}
for (i <- 0 until exuParameters.StuCnt) when (state === s_atomics(i)) {
atomicsUnit.io.feedbackSlow <> io.rsfeedback(atomic_rs(i)).feedbackSlow
assert(!storeUnits(i).io.feedbackSlow.valid)
assert(!storeUnits(i).io.feedback_slow.valid)
}
lsq.io.exceptionAddr.isStore := io.lsqio.exceptionAddr.isStore
......
......@@ -672,6 +672,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
io.flushOut.valid := (state === s_idle) && valid(deqPtr.value) && (intrEnable || exceptionEnable || isFlushPipe) && !lastCycleFlush
io.flushOut.bits := DontCare
io.flushOut.bits.isRVC := deqDispatchData.isRVC
io.flushOut.bits.robIdx := deqPtr
io.flushOut.bits.ftqIdx := deqDispatchData.ftqIdx
io.flushOut.bits.ftqOffset := deqDispatchData.ftqOffset
......@@ -985,6 +986,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
wdata.ftqIdx := req.cf.ftqPtr
wdata.ftqOffset := req.cf.ftqOffset
wdata.isMove := req.eliminatedMove
wdata.isRVC := req.cf.pd.isRVC
wdata.pc := req.cf.pc
}
dispatchData.io.raddr := commitReadAddr_next
......
......@@ -531,6 +531,8 @@ class DCacheLoadIO(implicit p: Parameters) extends DCacheWordIO
val s2_hit = Input(Bool()) // hit signal for lsu,
val s2_first_hit = Input(Bool())
val s2_bank_conflict = Input(Bool())
val s2_wpu_pred_fail = Input(Bool())
val s2_mq_nack = Input(Bool())
// debug
val debug_s1_hit_way = Input(UInt(nWays.W))
......
......@@ -336,10 +336,10 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
real_miss := !s2_hit_dup_lsu
}
// io.debug_s2_cache_miss := real_miss
resp.bits.miss := real_miss || io.bank_conflict_slow || s2_wpu_pred_fail
resp.bits.miss := real_miss
io.lsu.s2_first_hit := s2_req.isFirstIssue && s2_hit
// load pipe need replay when there is a bank conflict or wpu predict fail
resp.bits.replay := (resp.bits.miss && (!io.miss_req.fire() || s2_nack)) || io.bank_conflict_slow || s2_wpu_pred_fail
resp.bits.replay := DontCare
resp.bits.replayCarry.valid := resp.bits.miss
resp.bits.replayCarry.real_way_en := s2_real_way_en
resp.bits.meta_prefetch := s2_hit_prefetch
......@@ -365,6 +365,8 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
io.lsu.debug_s1_hit_way := s1_tag_match_way_dup_dc
io.lsu.s1_disable_fast_wakeup := io.disable_ld_fast_wakeup
io.lsu.s2_bank_conflict := io.bank_conflict_slow
io.lsu.s2_wpu_pred_fail := s2_wpu_pred_fail
io.lsu.s2_mq_nack := (resp.bits.miss && (!io.miss_req.fire() || s2_nack))
assert(RegNext(s1_ready && s2_ready), "load pipeline should never be blocked")
// --------------------------------------------------------------------------------
......
......@@ -79,6 +79,8 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundleWithMicroOp with
// For load replay
val isLoadReplay = Bool()
val isFastPath = Bool()
val isFastReplay = Bool()
val replayCarry = new ReplayCarry
// For dcache miss load
......@@ -88,9 +90,12 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundleWithMicroOp with
val forward_tlDchannel = Bool()
val dcacheRequireReplay = Bool()
val delayedLoadError = Bool()
val lateKill = Bool()
val feedbacked = Bool()
// loadQueueReplay index.
val sleepIndex = UInt(log2Up(LoadQueueReplaySize).W)
val schedIndex = UInt(log2Up(LoadQueueReplaySize).W)
}
class LdPrefetchTrainBundle(implicit p: Parameters) extends LsPipelineBundle {
......@@ -116,7 +121,7 @@ class LdPrefetchTrainBundle(implicit p: Parameters) extends LsPipelineBundle {
isFirstIssue := input.isFirstIssue
hasROBEntry := input.hasROBEntry
dcacheRequireReplay := input.dcacheRequireReplay
sleepIndex := input.sleepIndex
schedIndex := input.schedIndex
meta_prefetch := DontCare
meta_access := DontCare
......@@ -125,17 +130,22 @@ class LdPrefetchTrainBundle(implicit p: Parameters) extends LsPipelineBundle {
replayCarry := DontCare
atomic := DontCare
isLoadReplay := DontCare
isFastPath := DontCare
isFastReplay := DontCare
handledByMSHR := DontCare
replacementUpdated := DontCare
delayedLoadError := DontCare
lateKill := DontCare
feedbacked := DontCare
}
}
class LqWriteBundle(implicit p: Parameters) extends LsPipelineBundle {
// load inst replay informations
val replayInfo = new LoadToLsqReplayIO
val rep_info = new LoadToLsqReplayIO
// queue entry data, except flag bits, will be updated if writeQueue is true,
// valid bit in LqWriteBundle will be ignored
val lqDataWenDup = Vec(6, Bool()) // dirty reg dup
val data_wen_dup = Vec(6, Bool()) // dirty reg dup
def fromLsPipelineBundle(input: LsPipelineBundle) = {
......@@ -158,16 +168,21 @@ class LqWriteBundle(implicit p: Parameters) extends LsPipelineBundle {
isFirstIssue := input.isFirstIssue
hasROBEntry := input.hasROBEntry
isLoadReplay := input.isLoadReplay
isFastPath := input.isFastPath
isFastReplay := input.isFastReplay
mshrid := input.mshrid
forward_tlDchannel := input.forward_tlDchannel
replayCarry := input.replayCarry
dcacheRequireReplay := input.dcacheRequireReplay
sleepIndex := input.sleepIndex
schedIndex := input.schedIndex
handledByMSHR := input.handledByMSHR
replacementUpdated := input.replacementUpdated
delayedLoadError := input.delayedLoadError
lateKill := input.lateKill
feedbacked := input.feedbacked
replayInfo := DontCare
lqDataWenDup := DontCare
rep_info := DontCare
data_wen_dup := DontCare
}
}
......@@ -225,39 +240,35 @@ class PipeLoadForwardQueryIO(implicit p: Parameters) extends LoadForwardQueryIO
//
// Note that query req may be !ready, as dcache is releasing a block
// If it happens, a replay from rs is needed.
class LoadViolationQueryReq(implicit p: Parameters) extends XSBundleWithMicroOp { // provide lqIdx
class LoadNukeQueryReq(implicit p: Parameters) extends XSBundleWithMicroOp { // provide lqIdx
// mask: load's data mask.
val mask = UInt(8.W)
val mask = UInt(8.W)
// paddr: load's paddr.
val paddr = UInt(PAddrBits.W)
val paddr = UInt(PAddrBits.W)
// dataInvalid: load data is invalid.
val datavalid = Bool()
val data_valid = Bool()
}
class LoadViolationQueryResp(implicit p: Parameters) extends XSBundle {
// replayFromFetch: ld-ld violation check success, replay from fetch.
val replayFromFetch = Bool()
class LoadNukeQueryResp(implicit p: Parameters) extends XSBundle {
// rep_frm_fetch: ld-ld violation check success, replay from fetch.
val rep_frm_fetch = Bool()
}
class LoadViolationQueryIO(implicit p: Parameters) extends XSBundle {
val req = Decoupled(new LoadViolationQueryReq)
val resp = Flipped(Valid(new LoadViolationQueryResp))
val preReq = Output(Bool())
val release = Output(Bool())
class LoadNukeQueryIO(implicit p: Parameters) extends XSBundle {
val req = Decoupled(new LoadNukeQueryReq)
val resp = Flipped(Valid(new LoadNukeQueryResp))
val revoke = Output(Bool())
}
class LoadReExecuteQueryIO(implicit p: Parameters) extends XSBundle {
class StoreNukeQueryIO(implicit p: Parameters) extends XSBundle {
// robIdx: Requestor's (a store instruction) rob index for match logic.
val robIdx = new RobPtr
// paddr: requestor's (a store instruction) physical address for match logic.
val paddr = UInt(PAddrBits.W)
val paddr = UInt(PAddrBits.W)
// mask: requestor's (a store instruction) data width mask for match logic.
val mask = UInt(8.W)
val mask = UInt(8.W)
}
// Store byte valid mask write bundle
......@@ -281,14 +292,14 @@ class LoadDataFromDcacheBundle(implicit p: Parameters) extends DCacheBundle {
val addrOffset = UInt(3.W) // for data selection
// forward tilelink D channel
val forward_D = Input(Bool())
val forwardData_D = Input(Vec(8, UInt(8.W)))
val forward_D = Bool()
val forwardData_D = Vec(8, UInt(8.W))
// forward mshr data
val forward_mshr = Input(Bool())
val forwardData_mshr = Input(Vec(8, UInt(8.W)))
val forward_mshr = Bool()
val forwardData_mshr = Vec(8, UInt(8.W))
val forward_result_valid = Input(Bool())
val forward_result_valid = Bool()
def dcacheData(): UInt = {
// old dcache
......
......@@ -48,8 +48,8 @@ class InflightBlockInfo(implicit p: Parameters) extends XSBundle {
class LsqEnqIO(implicit p: Parameters) extends XSBundle {
val canAccept = Output(Bool())
val needAlloc = Vec(exuParameters.LsExuCnt, Input(UInt(2.W)))
val req = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new MicroOp)))
val resp = Vec(exuParameters.LsExuCnt, Output(new LSIdx))
val req = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new MicroOp)))
val resp = Vec(exuParameters.LsExuCnt, Output(new LSIdx))
}
// Load / Store Queue Wrapper for XiangShan Out of Order LSU
......@@ -59,9 +59,9 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
val brqRedirect = Flipped(ValidIO(new Redirect))
val enq = new LsqEnqIO
val ldu = new Bundle() {
val storeLoadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) // from load_s2
val loadLoadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) // from load_s2
val loadIn = Vec(StorePipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
val ldin = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
}
val sta = new Bundle() {
val storeMaskIn = Vec(StorePipelineWidth, Flipped(Valid(new StoreMaskBundle))) // from store_s0, store mask, send to sq from rs
......@@ -71,8 +71,8 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
val std = new Bundle() {
val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new ExuOutput))) // from store_s0, store data, send to sq from rs
}
val loadOut = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
val ldRawDataOut = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
val ldout = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
val sbuffer = Vec(EnsbufferWidth, Decoupled(new DCacheWordReqWithVaddr))
val forward = Vec(LoadPipelineWidth, Flipped(new PipeLoadForwardQueryIO))
......@@ -84,7 +84,7 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
val uncache = new UncacheWordIO
val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
val sqEmpty = Output(Bool())
val lqReplayFull = Output(Bool())
val lq_rep_full = Output(Bool())
val sqFull = Output(Bool())
val lqFull = Output(Bool())
val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize+1).W))
......@@ -96,7 +96,7 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
val exceptionAddr = new ExceptionAddrIO
val trigger = Vec(LoadPipelineWidth, new LqTriggerIO)
val issuePtrExt = Output(new SqPtr)
val l2Hint = Input(Valid(new L2ToL1Hint()))
val l2_hint = Input(Valid(new L2ToL1Hint()))
})
val loadQueue = Module(new LoadQueue)
......@@ -140,44 +140,43 @@ class LsqWrapper(implicit p: Parameters) extends XSModule with HasDCacheParamete
storeQueue.io.storeAddrInRe <> io.sta.storeAddrInRe // from store_s2
storeQueue.io.storeDataIn <> io.std.storeDataIn // from store_s0
storeQueue.io.storeMaskIn <> io.sta.storeMaskIn // from store_s0
storeQueue.io.sbuffer <> io.sbuffer
storeQueue.io.mmioStout <> io.mmioStout
storeQueue.io.rob <> io.rob
storeQueue.io.sbuffer <> io.sbuffer
storeQueue.io.mmioStout <> io.mmioStout
storeQueue.io.rob <> io.rob
storeQueue.io.exceptionAddr.isStore := DontCare
storeQueue.io.sqCancelCnt <> io.sqCancelCnt
storeQueue.io.sqDeq <> io.sqDeq
storeQueue.io.sqEmpty <> io.sqEmpty
storeQueue.io.sqFull <> io.sqFull
storeQueue.io.forward <> io.forward // overlap forwardMask & forwardData, DO NOT CHANGE SEQUENCE
storeQueue.io.sqDeq <> io.sqDeq
storeQueue.io.sqEmpty <> io.sqEmpty
storeQueue.io.sqFull <> io.sqFull
storeQueue.io.forward <> io.forward // overlap forwardMask & forwardData, DO NOT CHANGE SEQUENCE
/* <------- DANGEROUS: Don't change sequence here ! -------> */
// load queue wiring
loadQueue.io.redirect <> io.brqRedirect
loadQueue.io.ldu <> io.ldu
loadQueue.io.loadOut <> io.loadOut
loadQueue.io.ldRawDataOut <> io.ldRawDataOut
loadQueue.io.rob <> io.rob
loadQueue.io.rollback <> io.rollback
loadQueue.io.replay <> io.replay
loadQueue.io.refill <> io.refill
loadQueue.io.release <> io.release
loadQueue.io.trigger <> io.trigger
loadQueue.io.redirect <> io.brqRedirect
loadQueue.io.ldu <> io.ldu
loadQueue.io.ldout <> io.ldout
loadQueue.io.ld_raw_data <> io.ld_raw_data
loadQueue.io.rob <> io.rob
loadQueue.io.rollback <> io.rollback
loadQueue.io.replay <> io.replay
loadQueue.io.refill <> io.refill
loadQueue.io.release <> io.release
loadQueue.io.trigger <> io.trigger
loadQueue.io.exceptionAddr.isStore := DontCare
loadQueue.io.lqCancelCnt <> io.lqCancelCnt
loadQueue.io.lqCancelCnt <> io.lqCancelCnt
loadQueue.io.sq.stAddrReadySqPtr <> storeQueue.io.stAddrReadySqPtr
loadQueue.io.sq.stAddrReadyVec <> storeQueue.io.stAddrReadyVec
loadQueue.io.sq.stAddrReadyVec <> storeQueue.io.stAddrReadyVec
loadQueue.io.sq.stDataReadySqPtr <> storeQueue.io.stDataReadySqPtr
loadQueue.io.sq.stDataReadyVec <> storeQueue.io.stDataReadyVec
loadQueue.io.sq.stIssuePtr <> storeQueue.io.stIssuePtr
loadQueue.io.sq.sqEmpty <> storeQueue.io.sqEmpty
loadQueue.io.sta.storeAddrIn <> io.sta.storeAddrIn // store_s1
loadQueue.io.std.storeDataIn <> io.std.storeDataIn // store_s0
loadQueue.io.lqFull <> io.lqFull
loadQueue.io.lqReplayFull <> io.lqReplayFull
loadQueue.io.lqDeq <> io.lqDeq
loadQueue.io.l2Hint.valid := io.l2Hint.valid
loadQueue.io.l2Hint.bits.sourceId := io.l2Hint.bits.sourceId
loadQueue.io.sq.stDataReadyVec <> storeQueue.io.stDataReadyVec
loadQueue.io.sq.stIssuePtr <> storeQueue.io.stIssuePtr
loadQueue.io.sq.sqEmpty <> storeQueue.io.sqEmpty
loadQueue.io.sta.storeAddrIn <> io.sta.storeAddrIn // store_s1
loadQueue.io.std.storeDataIn <> io.std.storeDataIn // store_s0
loadQueue.io.lqFull <> io.lqFull
loadQueue.io.lq_rep_full <> io.lq_rep_full
loadQueue.io.lqDeq <> io.lqDeq
loadQueue.io.l2_hint <> io.l2_hint
// rob commits for lsq is delayed for two cycles, which causes the delayed update for deqPtr in lq/sq
// s0: commit
......
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.mem
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import utils._
import utility._
import xiangshan._
import xiangshan.backend.fu.fpu.FPU
import xiangshan.backend.rob.RobLsqIO
import xiangshan.cache._
import xiangshan.frontend.FtqPtr
import xiangshan.ExceptionNO._
import xiangshan.cache.dcache.ReplayCarry
import xiangshan.backend.rob.RobPtr
class LqExceptionBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
val redirect = Flipped(Valid(new Redirect))
val req = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
val exceptionAddr = new ExceptionAddrIO
})
val req_valid = RegInit(false.B)
val req = Reg(new LqWriteBundle)
// enqueue
// s1:
val s1_req = VecInit(io.req.map(_.bits))
val s1_valid = VecInit(io.req.map(x => x.valid))
// s2: delay 1 cycle
val s2_req = RegNext(s1_req)
val s2_valid = (0 until LoadPipelineWidth).map(i =>
RegNext(s1_valid(i)) &&
!s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) &&
!s2_req(i).uop.robIdx.needFlush(io.redirect)
)
val s2_has_exception = s2_req.map(x => ExceptionNO.selectByFu(x.uop.cf.exceptionVec, lduCfg).asUInt.orR)
val s2_enqueue = Wire(Vec(LoadPipelineWidth, Bool()))
for (w <- 0 until LoadPipelineWidth) {
s2_enqueue(w) := s2_valid(w) && s2_has_exception(w)
}
when (req.uop.robIdx.needFlush(io.redirect)) {
req_valid := false.B
} .elsewhen (s2_enqueue.asUInt.orR) {
req_valid := req_valid || true.B
}
def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
assert(valid.length == bits.length)
if (valid.length == 0 || valid.length == 1) {
(valid, bits)
} else if (valid.length == 2) {
val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
for (i <- res.indices) {
res(i).valid := valid(i)
res(i).bits := bits(i)
}
val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
(Seq(oldest.valid), Seq(oldest.bits))
} else {
val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
selectOldest(left._1 ++ right._1, left._2 ++ right._2)
}
}
val reqSel = selectOldest(s2_enqueue, s2_req)
when (req_valid) {
req := Mux(reqSel._1(0) && isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx), reqSel._2(0), req)
} .elsewhen (s2_enqueue.asUInt.orR) {
req := reqSel._2(0)
}
io.exceptionAddr.vaddr := req.vaddr
XSPerfAccumulate("exception", !RegNext(req_valid) && req_valid)
// end
}
\ No newline at end of file
......@@ -78,73 +78,7 @@ class LqTriggerIO(implicit p: Parameters) extends XSBundle {
val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool()))
}
class LqExceptionBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
val redirect = Flipped(Valid(new Redirect))
val req = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
val exceptionAddr = new ExceptionAddrIO
})
val req_valid = RegInit(false.B)
val req = Reg(new LqWriteBundle)
// enqueue
// s1:
val s1_req = VecInit(io.req.map(_.bits))
val s1_valid = VecInit(io.req.map(x => x.valid))
// s2: delay 1 cycle
val s2_req = RegNext(s1_req)
val s2_valid = (0 until LoadPipelineWidth).map(i =>
RegNext(s1_valid(i)) &&
!s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) &&
!s2_req(i).uop.robIdx.needFlush(io.redirect)
)
val s2_has_exception = s2_req.map(x => ExceptionNO.selectByFu(x.uop.cf.exceptionVec, lduCfg).asUInt.orR)
val s2_enqueue = Wire(Vec(LoadPipelineWidth, Bool()))
for (w <- 0 until LoadPipelineWidth) {
s2_enqueue(w) := s2_valid(w) && s2_has_exception(w)
}
when (req.uop.robIdx.needFlush(io.redirect)) {
req_valid := false.B
} .elsewhen (s2_enqueue.asUInt.orR) {
req_valid := req_valid || true.B
}
def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
assert(valid.length == bits.length)
if (valid.length == 0 || valid.length == 1) {
(valid, bits)
} else if (valid.length == 2) {
val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
for (i <- res.indices) {
res(i).valid := valid(i)
res(i).bits := bits(i)
}
val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
(Seq(oldest.valid), Seq(oldest.bits))
} else {
val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
selectOldest(left._1 ++ right._1, left._2 ++ right._2)
}
}
val reqSel = selectOldest(s2_enqueue, s2_req)
when (req_valid) {
req := Mux(reqSel._1(0) && isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx), reqSel._2(0), req)
} .elsewhen (s2_enqueue.asUInt.orR) {
req := reqSel._2(0)
}
io.exceptionAddr.vaddr := req.vaddr
XSPerfAccumulate("exception", !RegNext(req_valid) && req_valid)
// end
}
class LoadQueue(implicit p: Parameters) extends XSModule
with HasDCacheParameters
......@@ -156,9 +90,9 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val redirect = Flipped(Valid(new Redirect))
val enq = new LqEnqIO
val ldu = new Bundle() {
val storeLoadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) // from load_s2
val loadLoadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) // from load_s2
val loadIn = Vec(StorePipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
val ldin = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
}
val sta = new Bundle() {
val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
......@@ -168,14 +102,14 @@ class LoadQueue(implicit p: Parameters) extends XSModule
}
val sq = new Bundle() {
val stAddrReadySqPtr = Input(new SqPtr)
val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool()))
val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool()))
val stDataReadySqPtr = Input(new SqPtr)
val stDataReadyVec = Input(Vec(StoreQueueSize, Bool()))
val stIssuePtr = Input(new SqPtr)
val sqEmpty = Input(Bool())
val stDataReadyVec = Input(Vec(StoreQueueSize, Bool()))
val stIssuePtr = Input(new SqPtr)
val sqEmpty = Input(Bool())
}
val loadOut = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
val ldRawDataOut = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
val ldout = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
val refill = Flipped(ValidIO(new Refill))
val release = Flipped(Valid(new Release))
......@@ -187,9 +121,9 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val lqFull = Output(Bool())
val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
val lqReplayFull = Output(Bool())
val lq_rep_full = Output(Bool())
val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
val l2Hint = Input(Valid(new L2ToL1Hint()))
val l2_hint = Input(Valid(new L2ToL1Hint()))
})
val loadQueueRAR = Module(new LoadQueueRAR) // read-after-read violation
......@@ -203,37 +137,35 @@ class LoadQueue(implicit p: Parameters) extends XSModule
* LoadQueueRAR
*/
loadQueueRAR.io.redirect <> io.redirect
loadQueueRAR.io.release <> io.release
loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr
loadQueueRAR.io.release <> io.release
loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr
for (w <- 0 until LoadPipelineWidth) {
loadQueueRAR.io.query(w).req <> io.ldu.loadLoadViolationQuery(w).req // from load_s1
loadQueueRAR.io.query(w).resp <> io.ldu.loadLoadViolationQuery(w).resp // to load_s2
loadQueueRAR.io.query(w).preReq := io.ldu.loadLoadViolationQuery(w).preReq // from load_s1
loadQueueRAR.io.query(w).release := io.ldu.loadLoadViolationQuery(w).release // from load_s3
loadQueueRAR.io.query(w).req <> io.ldu.ldld_nuke_query(w).req // from load_s1
loadQueueRAR.io.query(w).resp <> io.ldu.ldld_nuke_query(w).resp // to load_s2
loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
}
/**
* LoadQueueRAW
*/
loadQueueRAW.io.redirect <> io.redirect
loadQueueRAW.io.storeIn <> io.sta.storeAddrIn
loadQueueRAW.io.redirect <> io.redirect
loadQueueRAW.io.storeIn <> io.sta.storeAddrIn
loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr
loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr
for (w <- 0 until LoadPipelineWidth) {
loadQueueRAW.io.query(w).req <> io.ldu.storeLoadViolationQuery(w).req // from load_s1
loadQueueRAW.io.query(w).resp <> io.ldu.storeLoadViolationQuery(w).resp // to load_s2
loadQueueRAW.io.query(w).preReq := io.ldu.storeLoadViolationQuery(w).preReq // from load_s1
loadQueueRAW.io.query(w).release := io.ldu.storeLoadViolationQuery(w).release // from load_s3
loadQueueRAW.io.query(w).req <> io.ldu.stld_nuke_query(w).req // from load_s1
loadQueueRAW.io.query(w).resp <> io.ldu.stld_nuke_query(w).resp // to load_s2
loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
}
/**
* VirtualLoadQueue
*/
virtualLoadQueue.io.redirect <> io.redirect
virtualLoadQueue.io.enq <> io.enq
virtualLoadQueue.io.loadIn <> io.ldu.loadIn // from load_s3
virtualLoadQueue.io.lqFull <> io.lqFull
virtualLoadQueue.io.lqDeq <> io.lqDeq
virtualLoadQueue.io.redirect <> io.redirect
virtualLoadQueue.io.enq <> io.enq
virtualLoadQueue.io.ldin <> io.ldu.ldin // from load_s3
virtualLoadQueue.io.lqFull <> io.lqFull
virtualLoadQueue.io.lqDeq <> io.lqDeq
virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt
/**
......@@ -241,23 +173,23 @@ class LoadQueue(implicit p: Parameters) extends XSModule
*/
exceptionBuffer.io.redirect <> io.redirect
for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) {
buff.valid := io.ldu.loadIn(w).valid // from load_s3
buff.bits := io.ldu.loadIn(w).bits
buff.valid := io.ldu.ldin(w).valid // from load_s3
buff.bits := io.ldu.ldin(w).bits
}
io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
/**
* Load uncache buffer
*/
uncacheBuffer.io.redirect <> io.redirect
uncacheBuffer.io.loadOut <> io.loadOut
uncacheBuffer.io.loadRawDataOut <> io.ldRawDataOut
uncacheBuffer.io.rob <> io.rob
uncacheBuffer.io.uncache <> io.uncache
uncacheBuffer.io.trigger <> io.trigger
uncacheBuffer.io.redirect <> io.redirect
uncacheBuffer.io.ldout <> io.ldout
uncacheBuffer.io.ld_raw_data <> io.ld_raw_data
uncacheBuffer.io.rob <> io.rob
uncacheBuffer.io.uncache <> io.uncache
uncacheBuffer.io.trigger <> io.trigger
for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
buff.valid := io.ldu.loadIn(w).valid // from load_s3
buff.bits := io.ldu.loadIn(w).bits // from load_s3
buff.valid := io.ldu.ldin(w).valid // from load_s3
buff.bits := io.ldu.ldin(w).bits // from load_s3
}
// rollback
......@@ -292,23 +224,23 @@ class LoadQueue(implicit p: Parameters) extends XSModule
/**
* LoadQueueReplay
*/
loadQueueReplay.io.redirect <> io.redirect
loadQueueReplay.io.enq <> io.ldu.loadIn // from load_s3
loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1
loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0
loadQueueReplay.io.replay <> io.replay
loadQueueReplay.io.refill <> io.refill
loadQueueReplay.io.redirect <> io.redirect
loadQueueReplay.io.enq <> io.ldu.ldin // from load_s3
loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1
loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0
loadQueueReplay.io.replay <> io.replay
loadQueueReplay.io.refill <> io.refill
loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec
loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec
loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec
loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty
loadQueueReplay.io.lqFull <> io.lqReplayFull
loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec
loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty
loadQueueReplay.io.lqFull <> io.lq_rep_full
loadQueueReplay.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr
loadQueueReplay.io.rarFull <> loadQueueRAR.io.lqFull
loadQueueReplay.io.rawFull <> loadQueueRAW.io.lqFull
loadQueueReplay.io.l2_hint <> io.l2_hint
loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
loadQueueReplay.io.ldWbPtr := virtualLoadQueue.io.ldWbPtr
loadQueueReplay.io.rarFull := loadQueueRAR.io.lqFull
loadQueueReplay.io.rawFull := loadQueueRAW.io.lqFull
loadQueueReplay.io.l2Hint <> io.l2Hint
val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
XSPerfAccumulate("full_mask_000", full_mask === 0.U)
......
......@@ -31,10 +31,19 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
with HasPerfEvents
{
val io = IO(new Bundle() {
// control
val redirect = Flipped(Valid(new Redirect))
val query = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO))
// violation query
val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO))
// release cacheline
val release = Flipped(Valid(new Release))
// from VirtualLoadQueue
val ldWbPtr = Input(new LqPtr)
// global
val lqFull = Output(Bool())
})
......@@ -124,7 +133,7 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
// Fill info
uop(enqIndex) := enq.bits.uop
released(enqIndex) :=
enq.bits.datavalid &&
enq.bits.data_valid &&
(release2Cycle.valid &&
enq.bits.paddr(PAddrBits-1, DCacheLineOffset) === release2Cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) ||
release1Cycle.valid &&
......@@ -150,17 +159,17 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
}
}
// if need replay release entry
// if need replay revoke entry
val lastCanAccept = RegNext(VecInit(needEnqueue.zip(enqValidVec).map(x => x._1 && x._2)))
val lastAllocIndex = RegNext(enqIndexVec)
for ((release, w) <- io.query.map(_.release).zipWithIndex) {
val releaseValid = release && lastCanAccept(w)
val releaseIndex = lastAllocIndex(w)
for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) {
val revokeValid = revoke && lastCanAccept(w)
val revokeIndex = lastAllocIndex(w)
when (allocated(releaseIndex) && releaseValid) {
allocated(releaseIndex) := false.B
freeMaskVec(releaseIndex) := true.B
when (allocated(revokeIndex) && revokeValid) {
allocated(revokeIndex) := false.B
freeMaskVec(revokeIndex) := true.B
}
}
......@@ -186,7 +195,7 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
// Load-to-Load violation check result
val ldLdViolationMask = WireInit(matchMask & RegNext(released.asUInt))
ldLdViolationMask.suggestName("ldLdViolationMask_" + w)
query.resp.bits.replayFromFetch := ldLdViolationMask.orR
query.resp.bits.rep_frm_fetch := ldLdViolationMask.orR
}
......@@ -211,7 +220,7 @@ class LoadQueueRAR(implicit p: Parameters) extends XSModule
val canEnqCount = PopCount(io.query.map(_.req.fire))
val validCount = freeList.io.validCount
val allowEnqueue = validCount <= (LoadQueueRARSize - LoadPipelineWidth).U
val ldLdViolationCount = PopCount(io.query.map(_.resp).map(resp => resp.valid && resp.bits.replayFromFetch))
val ldLdViolationCount = PopCount(io.query.map(_.resp).map(resp => resp.valid && resp.bits.rep_frm_fetch))
QueuePerf(LoadQueueRARSize, validCount, !allowEnqueue)
XSPerfAccumulate("enq", canEnqCount)
......
......@@ -34,13 +34,22 @@ class LoadQueueRAW(implicit p: Parameters) extends XSModule
with HasPerfEvents
{
val io = IO(new Bundle() {
// control
val redirect = Flipped(ValidIO(new Redirect))
val query = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO))
// violation query
val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO))
// from store unit s1
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
// global rollback flush
val rollback = Output(Valid(new Redirect))
// to LoadQueueReplay
val stAddrReadySqPtr = Input(new SqPtr)
val stIssuePtr = Input(new SqPtr)
val lqFull = Output(Bool())
val stIssuePtr = Input(new SqPtr)
val lqFull = Output(Bool())
})
println("LoadQueueRAW: size " + LoadQueueRAWSize)
......@@ -144,13 +153,13 @@ class LoadQueueRAW(implicit p: Parameters) extends XSModule
// Fill info
uop(enqIndex) := enq.bits.uop
datavalid(enqIndex) := enq.bits.datavalid
datavalid(enqIndex) := enq.bits.data_valid
}
}
for ((query, w) <- io.query.map(_.resp).zipWithIndex) {
query.valid := RegNext(io.query(w).req.valid)
query.bits.replayFromFetch := RegNext(false.B)
query.bits.rep_frm_fetch := RegNext(false.B)
}
// LoadQueueRAW deallocate
......@@ -175,13 +184,13 @@ class LoadQueueRAW(implicit p: Parameters) extends XSModule
val lastCanAccept = RegNext(VecInit(needEnqueue.zip(enqValidVec).map(x => x._1 && x._2)))
val lastAllocIndex = RegNext(enqIndexVec)
for ((release, w) <- io.query.map(_.release).zipWithIndex) {
val releaseValid = release && lastCanAccept(w)
val releaseIndex = lastAllocIndex(w)
for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) {
val revokeValid = revoke && lastCanAccept(w)
val revokeIndex = lastAllocIndex(w)
when (allocated(releaseIndex) && releaseValid) {
allocated(releaseIndex) := false.B
freeMaskVec(releaseIndex) := true.B
when (allocated(revokeIndex) && revokeValid) {
allocated(revokeIndex) := false.B
freeMaskVec(revokeIndex) := true.B
}
}
freeList.io.free := freeMaskVec.asUInt
......@@ -334,8 +343,8 @@ class LoadQueueRAW(implicit p: Parameters) extends XSModule
val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W)))
for (w <- 0 until StorePipelineWidth) {
val detectedRollback = detectRollback(w)
rollbackLqWb(w).valid := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles)
rollbackLqWb(w).bits.uop := detectedRollback._2
rollbackLqWb(w).valid := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles)
rollbackLqWb(w).bits.uop := detectedRollback._2
rollbackLqWb(w).bits.flag := w.U
stFtqIdx(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqPtr, TotalSelectCycles)
stFtqOffset(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqOffset, TotalSelectCycles)
......@@ -356,15 +365,15 @@ class LoadQueueRAW(implicit p: Parameters) extends XSModule
val rollbackStFtqOffset = stFtqOffset(rollbackUopExt.flag)
// check if rollback request is still valid in parallel
io.rollback.bits := DontCare
io.rollback.bits.robIdx := rollbackUop.robIdx
io.rollback.bits.ftqIdx := rollbackUop.cf.ftqPtr
io.rollback.bits.stFtqIdx := rollbackStFtqIdx
io.rollback.bits.ftqOffset := rollbackUop.cf.ftqOffset
io.rollback.bits := DontCare
io.rollback.bits.robIdx := rollbackUop.robIdx
io.rollback.bits.ftqIdx := rollbackUop.cf.ftqPtr
io.rollback.bits.stFtqIdx := rollbackStFtqIdx
io.rollback.bits.ftqOffset := rollbackUop.cf.ftqOffset
io.rollback.bits.stFtqOffset := rollbackStFtqOffset
io.rollback.bits.level := RedirectLevel.flush
io.rollback.bits.interrupt := DontCare
io.rollback.bits.cfiUpdate := DontCare
io.rollback.bits.level := RedirectLevel.flush
io.rollback.bits.interrupt := DontCare
io.rollback.bits.cfiUpdate := DontCare
io.rollback.bits.cfiUpdate.target := rollbackUop.cf.pc
io.rollback.bits.debug_runahead_checkpoint_id := rollbackUop.debugInfo.runahead_checkpoint_id
// io.rollback.bits.pc := DontCare
......
......@@ -512,14 +512,14 @@ class StoreQueue(implicit p: Parameters) extends XSModule
val addrInvalidSqIdx2 = OHToUInt(Reverse(PriorityEncoderOH(Reverse(addrInvalidMask2Reg))))
val addrInvalidSqIdx = Mux(addrInvalidMask2Reg.orR, addrInvalidSqIdx2, addrInvalidSqIdx1)
when (addrInvalidFlag) {
when (addrInvalidFlag && !RegNext(io.forward(i).uop.cf.loadWaitStrict)) {
io.forward(i).addrInvalidSqIdx.flag := Mux(!s2_differentFlag || addrInvalidSqIdx >= s2_deqPtrExt.value, s2_deqPtrExt.flag, s2_enqPtrExt.flag)
io.forward(i).addrInvalidSqIdx.value := addrInvalidSqIdx
} .otherwise {
// may be store inst has been written to sbuffer already.
io.forward(i).addrInvalidSqIdx := RegNext(io.forward(i).uop.sqIdx)
}
io.forward(i).addrInvalid := addrInvalidFlag
io.forward(i).addrInvalid := Mux(RegNext(io.forward(i).uop.cf.loadWaitStrict), RegNext(hasInvalidAddr), addrInvalidFlag)
// data invalid sq index
// make chisel happy
......
......@@ -38,8 +38,8 @@ class UncacheBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModu
val req = Flipped(Valid(new LqWriteBundle))
// writeback mmio data
val loadOut = DecoupledIO(new ExuOutput)
val loadRawDataOut = Output(new LoadDataFromLQBundle)
val ldout = DecoupledIO(new ExuOutput)
val ld_raw_data = Output(new LoadDataFromLQBundle)
// rob: uncache commit
val rob = Flipped(new RobLsqIO)
......@@ -77,18 +77,18 @@ class UncacheBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModu
XSError(req_valid, p"UncacheBuffer: You can not write an valid entry: $entryIndex")
req_valid := true.B
req := io.req.bits
} .elsewhen (io.loadOut.fire) {
} .elsewhen (io.ldout.fire) {
req_valid := false.B
}
when (io.req.valid) {
when (io.req.bits.lqDataWenDup(5)) {
when (io.req.bits.data_wen_dup(5)) {
triggerResult := io.trigger.hitLoadAddrTriggerHitVec
}
}
io.trigger.lqLoadAddrTriggerHitVec := Mux(
io.loadOut.valid,
io.ldout.valid,
RegNext(triggerResult),
VecInit(Seq.fill(3)(false.B))
)
......@@ -136,13 +136,13 @@ class UncacheBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModu
io.select := uncacheState =/= s_idle
io.uncache.req.valid := uncacheState === s_req
io.uncache.req.bits := DontCare
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
io.uncache.req.valid := uncacheState === s_req
io.uncache.req.bits := DontCare
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
io.uncache.req.bits.data := DontCare
io.uncache.req.bits.addr := req.paddr
io.uncache.req.bits.mask := req.mask
io.uncache.req.bits.id := io.id
io.uncache.req.bits.id := io.id
io.uncache.req.bits.instrtype := DontCare
io.uncache.req.bits.replayCarry := DontCare
io.uncache.req.bits.atomic := true.B
......@@ -180,34 +180,34 @@ class UncacheBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModu
))
val rdataPartialLoad = rdataHelper(selUop, rdataSel)
io.loadOut.valid := (uncacheState === s_wait) && !uncacheCommitFired
io.loadOut.bits := DontCare
io.loadOut.bits.uop := selUop
io.loadOut.bits.uop.lqIdx := req.uop.lqIdx
io.loadOut.bits.data := rdataPartialLoad
io.loadOut.bits.redirectValid := false.B
io.loadOut.bits.redirect := DontCare
io.loadOut.bits.debug.isMMIO := true.B
io.loadOut.bits.debug.paddr := req.paddr
io.loadOut.bits.debug.vaddr := req.vaddr
io.loadOut.bits.fflags := DontCare
io.loadRawDataOut.lqData := uncacheData
io.loadRawDataOut.uop := req.uop
io.loadRawDataOut.addrOffset := req.paddr
io.ldout.valid := (uncacheState === s_wait) && !uncacheCommitFired
io.ldout.bits := DontCare
io.ldout.bits.uop := selUop
io.ldout.bits.uop.lqIdx := req.uop.lqIdx
io.ldout.bits.data := rdataPartialLoad
io.ldout.bits.redirectValid := false.B
io.ldout.bits.redirect := DontCare
io.ldout.bits.debug.isMMIO := true.B
io.ldout.bits.debug.paddr := req.paddr
io.ldout.bits.debug.vaddr := req.vaddr
io.ldout.bits.fflags := DontCare
io.ld_raw_data.lqData := uncacheData
io.ld_raw_data.uop := req.uop
io.ld_raw_data.addrOffset := req.paddr
val dummyCtrl = RegNext(io.loadOut.valid)
val dummyCtrl = RegNext(io.ldout.valid)
uncacheCommitFire := false.B
when (io.loadOut.fire && dummyCtrl) {
when (io.ldout.fire && dummyCtrl) {
req_valid := false.B
uncacheCommitFire := true.B
uncacheCommitFired := true.B
XSInfo("int load miss write to cbd robidx %d lqidx %d pc 0x%x mmio %x\n",
io.loadOut.bits.uop.robIdx.asUInt,
io.loadOut.bits.uop.lqIdx.asUInt,
io.loadOut.bits.uop.cf.pc,
io.ldout.bits.uop.robIdx.asUInt,
io.ldout.bits.uop.lqIdx.asUInt,
io.ldout.bits.uop.cf.pc,
true.B
)
}
......@@ -218,14 +218,15 @@ class UncacheBufferEntry(entryIndex: Int)(implicit p: Parameters) extends XSModu
class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
// control
val redirect = Flipped(Valid(new Redirect))
//
val req = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
// writeback mmio data
val loadOut = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
val loadRawDataOut = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
val ldout = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
// rob: uncache commit
val rob = Flipped(new RobLsqIO)
......@@ -275,9 +276,9 @@ class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQue
// set writeback default
for (w <- 0 until LoadPipelineWidth) {
io.loadOut(w).valid := false.B
io.loadOut(w).bits := DontCare
io.loadRawDataOut(w) := DontCare
io.ldout(w).valid := false.B
io.ldout(w).bits := DontCare
io.ld_raw_data(w) := DontCare
}
// set trigger default
......@@ -304,7 +305,7 @@ class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQue
!s2_req(i).uop.robIdx.needFlush(io.redirect)
})
val s2_has_exception = s2_req.map(x => ExceptionNO.selectByFu(x.uop.cf.exceptionVec, lduCfg).asUInt.orR)
val s2_need_replay = s2_req.map(_.replayInfo.needReplay())
val s2_need_replay = s2_req.map(_.rep_info.need_rep)
val s2_enqueue = Wire(Vec(LoadPipelineWidth, Bool()))
for (w <- 0 until LoadPipelineWidth) {
......@@ -331,19 +332,19 @@ class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQue
//
val uncacheReq = Wire(Valid(io.uncache.req.bits.cloneType))
val loadOut = Wire(Valid(io.loadOut(0).bits.cloneType))
val loadRawDataOut = Wire(io.loadRawDataOut(0).cloneType)
val ldout = Wire(Valid(io.ldout(0).bits.cloneType))
val ld_raw_data = Wire(io.ld_raw_data(0).cloneType)
val lqLoadAddrTriggerHitVec = Wire(io.trigger(0).lqLoadAddrTriggerHitVec.cloneType)
val commitFire = Wire(Bool())
// init
uncacheReq.valid := false.B
uncacheReq.bits := DontCare
loadOut.valid := false.B
loadOut.bits := DontCare
loadRawDataOut := DontCare
uncacheReq.bits := DontCare
ldout.valid := false.B
ldout.bits := DontCare
ld_raw_data := DontCare
lqLoadAddrTriggerHitVec := DontCare
commitFire := false.B
commitFire := false.B
entries.zipWithIndex.foreach {
case (e, i) =>
......@@ -362,16 +363,15 @@ class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQue
// uncache logic
e.io.rob <> io.rob
e.io.uncache.req.ready <> io.uncache.req.ready
e.io.loadOut.ready <> io.loadOut(0).ready
e.io.ldout.ready <> io.ldout(0).ready
when (e.io.select) {
uncacheReq.valid := e.io.uncache.req.valid
uncacheReq.bits := e.io.uncache.req.bits
loadOut.valid := e.io.loadOut.valid
loadOut.bits := e.io.loadOut.bits
loadRawDataOut := e.io.loadRawDataOut
commitFire := e.io.commitFire
ldout := e.io.ldout
ld_raw_data := e.io.ld_raw_data
commitFire := e.io.commitFire
// Read vaddr for mem exception
// no inst will be commited 1 cycle before tval update
// read vaddr for mmio, and only port 0 is used
......@@ -384,10 +384,10 @@ class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQue
}
io.uncache.req.valid := RegNext(uncacheReq.valid)
io.uncache.req.bits := RegNext(uncacheReq.bits)
io.loadOut(0).valid := RegNext(loadOut.valid) && !RegNext(commitFire)
io.loadOut(0).bits := RegNext(loadOut.bits)
io.loadRawDataOut(0) := RegNext(loadRawDataOut)
io.uncache.req.bits := RegNext(uncacheReq.bits)
io.ldout(0).valid := RegNext(ldout.valid) && !RegNext(commitFire)
io.ldout(0).bits := RegNext(ldout.bits)
io.ld_raw_data(0) := RegNext(ld_raw_data)
io.trigger(0).lqLoadAddrTriggerHitVec := RegNext(lqLoadAddrTriggerHitVec)
for (i <- 0 until LoadPipelineWidth) {
......@@ -404,7 +404,7 @@ class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQue
// dealloc logic
entries.zipWithIndex.foreach {
case (e, i) =>
when ((e.io.select && io.loadOut(0).fire) || e.io.flush) {
when ((e.io.select && io.ldout(0).fire) || e.io.flush) {
freeMaskVec(i) := true.B
}
}
......@@ -461,11 +461,11 @@ class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQue
}
val (rollbackValid, rollbackUop) = detectRollback()
io.rollback.bits := DontCare
io.rollback.bits.robIdx := rollbackUop.robIdx
io.rollback.bits.ftqIdx := rollbackUop.cf.ftqPtr
io.rollback.bits := DontCare
io.rollback.bits.robIdx := rollbackUop.robIdx
io.rollback.bits.ftqIdx := rollbackUop.cf.ftqPtr
io.rollback.bits.ftqOffset := rollbackUop.cf.ftqOffset
io.rollback.bits.level := RedirectLevel.flush
io.rollback.bits.level := RedirectLevel.flush
io.rollback.bits.cfiUpdate.target := rollbackUop.cf.pc
io.rollback.bits.debug_runahead_checkpoint_id := rollbackUop.debugInfo.runahead_checkpoint_id
......@@ -483,15 +483,15 @@ class UncacheBuffer(implicit p: Parameters) extends XSModule with HasCircularQue
XSPerfAccumulate("mmioCycle", VecInit(entries.map(_.io.select)).asUInt.orR)
XSPerfAccumulate("mmioCnt", io.uncache.req.fire)
XSPerfAccumulate("mmio_writeback_success", io.loadOut(0).fire)
XSPerfAccumulate("mmio_writeback_blocked", io.loadOut(0).valid && !io.loadOut(0).ready)
XSPerfAccumulate("mmio_writeback_success", io.ldout(0).fire)
XSPerfAccumulate("mmio_writeback_blocked", io.ldout(0).valid && !io.ldout(0).ready)
XSPerfAccumulate("uncache_full_rollback", io.rollback.valid)
val perfEvents: Seq[(String, UInt)] = Seq(
("mmioCycle", VecInit(entries.map(_.io.select)).asUInt.orR),
("mmioCnt", io.uncache.req.fire),
("mmio_writeback_success", io.loadOut(0).fire),
("mmio_writeback_blocked", io.loadOut(0).valid && !io.loadOut(0).ready),
("mmio_writeback_success", io.ldout(0).fire),
("mmio_writeback_blocked", io.ldout(0).valid && !io.ldout(0).ready),
("uncache_full_rollback", io.rollback.valid)
)
// end
......
......@@ -32,12 +32,18 @@ class VirtualLoadQueue(implicit p: Parameters) extends XSModule
with HasPerfEvents
{
val io = IO(new Bundle() {
val redirect = Flipped(Valid(new Redirect))
val enq = new LqEnqIO
val loadIn = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle)))
val ldWbPtr = Output(new LqPtr)
val lqFull = Output(Bool())
val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
// control
val redirect = Flipped(Valid(new Redirect))
// from dispatch
val enq = new LqEnqIO
// from ldu s3
val ldin = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle)))
// to LoadQueueReplay and LoadQueueRAR
val ldWbPtr = Output(new LqPtr)
// global
val lqFull = Output(Bool())
// to dispatch
val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
})
......@@ -184,56 +190,56 @@ class VirtualLoadQueue(implicit p: Parameters) extends XSModule
for(i <- 0 until LoadPipelineWidth) {
// most lq status need to be updated immediately after load writeback to lq
// flag bits in lq needs to be updated accurately
io.loadIn(i).ready := true.B
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
io.ldin(i).ready := true.B
val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value
when (io.loadIn(i).valid) {
val hasExceptions = ExceptionNO.selectByFu(io.loadIn(i).bits.uop.cf.exceptionVec, lduCfg).asUInt.orR
val needReplay = io.loadIn(i).bits.replayInfo.needReplay()
when (io.ldin(i).valid) {
val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.cf.exceptionVec, lduCfg).asUInt.orR
val need_rep = io.ldin(i).bits.rep_info.need_rep
when (!needReplay) {
when (!need_rep) {
// update control flag
addrvalid(loadWbIndex) := hasExceptions || !io.loadIn(i).bits.tlbMiss
addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss
datavalid(loadWbIndex) :=
(if (EnableFastForward) {
hasExceptions ||
io.loadIn(i).bits.mmio ||
!io.loadIn(i).bits.miss && // dcache miss
!io.loadIn(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs
io.ldin(i).bits.mmio ||
!io.ldin(i).bits.miss && // dcache miss
!io.ldin(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs
} else {
hasExceptions ||
io.loadIn(i).bits.mmio ||
!io.loadIn(i).bits.miss
io.ldin(i).bits.mmio ||
!io.ldin(i).bits.miss
})
//
when (io.loadIn(i).bits.lqDataWenDup(1)) {
uop(loadWbIndex).pdest := io.loadIn(i).bits.uop.pdest
when (io.ldin(i).bits.data_wen_dup(1)) {
uop(loadWbIndex).pdest := io.ldin(i).bits.uop.pdest
}
when (io.loadIn(i).bits.lqDataWenDup(2)) {
uop(loadWbIndex).cf := io.loadIn(i).bits.uop.cf
when (io.ldin(i).bits.data_wen_dup(2)) {
uop(loadWbIndex).cf := io.ldin(i).bits.uop.cf
}
when (io.loadIn(i).bits.lqDataWenDup(3)) {
uop(loadWbIndex).ctrl := io.loadIn(i).bits.uop.ctrl
when (io.ldin(i).bits.data_wen_dup(3)) {
uop(loadWbIndex).ctrl := io.ldin(i).bits.uop.ctrl
}
when (io.loadIn(i).bits.lqDataWenDup(4)) {
uop(loadWbIndex).debugInfo := io.loadIn(i).bits.uop.debugInfo
when (io.ldin(i).bits.data_wen_dup(4)) {
uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo
}
uop(loadWbIndex).debugInfo := io.loadIn(i).bits.replayInfo.debug
uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug
// Debug info
debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
debug_paddr(loadWbIndex) := io.loadIn(i).bits.paddr
debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio
debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr
XSInfo(io.loadIn(i).valid, "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x\n",
io.loadIn(i).bits.uop.lqIdx.asUInt,
io.loadIn(i).bits.uop.cf.pc,
io.loadIn(i).bits.vaddr,
io.loadIn(i).bits.paddr,
io.loadIn(i).bits.mask,
io.loadIn(i).bits.forwardData.asUInt,
io.loadIn(i).bits.forwardMask.asUInt,
io.loadIn(i).bits.mmio
XSInfo(io.ldin(i).valid, "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x\n",
io.ldin(i).bits.uop.lqIdx.asUInt,
io.ldin(i).bits.uop.cf.pc,
io.ldin(i).bits.vaddr,
io.ldin(i).bits.paddr,
io.ldin(i).bits.mask,
io.ldin(i).bits.forwardData.asUInt,
io.ldin(i).bits.forwardMask.asUInt,
io.ldin(i).bits.mmio
)
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册