未验证 提交 4e289ebb 编写于 作者: W William Wang 提交者: GitHub

lq: update data field iff load_s2 valid (#1680)

Now we update data field (fwd data, uop) in load queue when load_s2
is valid. It will help to on lq wen fanout problem.

State flags will be treated differently. They are still updated
accurately according to loadIn.valid
上级 817720cc
......@@ -274,7 +274,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
val rfWriteback = outer.wbArbiter.module.io.out
// memblock error exception writeback, 1 cycle after normal writeback
wb2Ctrl.io.delayedLoadError <> memBlock.io.delayedLoadError
wb2Ctrl.io.s3_delayed_load_error <> memBlock.io.s3_delayed_load_error
wb2Ctrl.io.redirect <> ctrlBlock.io.redirect
outer.wb2Ctrl.generateWritebackIO()
......
......@@ -70,7 +70,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val stIssuePtr = Output(new SqPtr())
// out
val writeback = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, DecoupledIO(new ExuOutput))
val delayedLoadError = Vec(exuParameters.LduCnt, Output(Bool()))
val s3_delayed_load_error = Vec(exuParameters.LduCnt, Output(Bool()))
val otherFastWakeup = Vec(exuParameters.LduCnt + 2 * exuParameters.StuCnt, ValidIO(new MicroOp))
// misc
val stIn = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
......@@ -279,15 +279,15 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// passdown to lsq (load s2)
lsq.io.loadIn(i) <> loadUnits(i).io.lsq.loadIn
lsq.io.ldout(i) <> loadUnits(i).io.lsq.ldout
lsq.io.loadDataForwarded(i) <> loadUnits(i).io.lsq.loadDataForwarded
lsq.io.s2_load_data_forwarded(i) <> loadUnits(i).io.lsq.s2_load_data_forwarded
lsq.io.trigger(i) <> loadUnits(i).io.lsq.trigger
// passdown to lsq (load s3)
lsq.io.dcacheRequireReplay(i) <> loadUnits(i).io.lsq.dcacheRequireReplay
lsq.io.delayedLoadError(i) <> loadUnits(i).io.delayedLoadError
lsq.io.s3_dcache_require_replay(i) <> loadUnits(i).io.lsq.s3_dcache_require_replay
lsq.io.s3_delayed_load_error(i) <> loadUnits(i).io.s3_delayed_load_error
// alter writeback exception info
io.delayedLoadError(i) := loadUnits(i).io.lsq.delayedLoadError
io.s3_delayed_load_error(i) := loadUnits(i).io.lsq.s3_delayed_load_error
// update mem dependency predictor
// io.memPredUpdate(i) := DontCare
......
......@@ -357,7 +357,7 @@ class Wb2Ctrl(configs: Seq[ExuConfig])(implicit p: Parameters) extends LazyModul
val redirect = Flipped(ValidIO(new Redirect))
val in = Vec(configs.length, Input(Decoupled(new ExuOutput)))
val out = Vec(configs.length, ValidIO(new ExuOutput))
val delayedLoadError = Vec(LoadPipelineWidth, Input(Bool())) // Dirty fix of data ecc error timing
val s3_delayed_load_error = Vec(LoadPipelineWidth, Input(Bool())) // Dirty fix of data ecc error timing
})
val redirect = RegNextWithEnable(io.redirect)
......@@ -373,7 +373,7 @@ class Wb2Ctrl(configs: Seq[ExuConfig])(implicit p: Parameters) extends LazyModul
if(EnableAccurateLoadError){
for ((((out, in), config), delayed_error) <- io.out.zip(io.in).zip(configs)
.filter(_._2.hasLoadError)
.zip(io.delayedLoadError)
.zip(io.s3_delayed_load_error)
){
// overwrite load exception writeback
out.bits.uop.cf.exceptionVec(loadAccessFault) := delayed_error ||
......
......@@ -73,6 +73,32 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundle {
val isFirstIssue = Bool()
}
class LqWriteBundle(implicit p: Parameters) extends LsPipelineBundle {
// queue entry data, except flag bits, will be updated if writeQueue is true,
// valid bit in LqWriteBundle will be ignored
val writeQueueData = Bool()
def fromLsPipelineBundle(input: LsPipelineBundle) = {
vaddr := input.vaddr
paddr := input.paddr
mask := input.mask
data := input.data
uop := input.uop
wlineflag := input.wlineflag
miss := input.miss
tlbMiss := input.tlbMiss
ptwBack := input.ptwBack
mmio := input.mmio
rsIdx := input.rsIdx
forwardMask := input.forwardMask
forwardData := input.forwardData
isSoftPrefetch := input.isSoftPrefetch
isFirstIssue := input.isFirstIssue
writeQueueData := false.B
}
}
class LoadForwardQueryIO(implicit p: Parameters) extends XSBundle {
val vaddr = Output(UInt(VAddrBits.W))
val paddr = Output(UInt(PAddrBits.W))
......
......@@ -57,13 +57,13 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
val hartId = Input(UInt(8.W))
val enq = new LsqEnqIO
val brqRedirect = Flipped(ValidIO(new Redirect))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeInRe = Vec(StorePipelineWidth, Input(new LsPipelineBundle()))
val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new ExuOutput))) // store data, send to sq from rs
val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool()))
val delayedLoadError = Vec(LoadPipelineWidth, Input(Bool()))
val dcacheRequireReplay = Vec(LoadPipelineWidth, Input(Bool()))
val s2_load_data_forwarded = Vec(LoadPipelineWidth, Input(Bool()))
val s3_delayed_load_error = Vec(LoadPipelineWidth, Input(Bool()))
val s3_dcache_require_replay = Vec(LoadPipelineWidth, Input(Bool()))
val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReqWithVaddr))
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
......@@ -116,9 +116,9 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
loadQueue.io.brqRedirect <> io.brqRedirect
loadQueue.io.loadIn <> io.loadIn
loadQueue.io.storeIn <> io.storeIn
loadQueue.io.loadDataForwarded <> io.loadDataForwarded
loadQueue.io.delayedLoadError <> io.delayedLoadError
loadQueue.io.dcacheRequireReplay <> io.dcacheRequireReplay
loadQueue.io.s2_load_data_forwarded <> io.s2_load_data_forwarded
loadQueue.io.s3_delayed_load_error <> io.s3_delayed_load_error
loadQueue.io.s3_dcache_require_replay <> io.s3_dcache_require_replay
loadQueue.io.ldout <> io.ldout
loadQueue.io.rob <> io.rob
loadQueue.io.rollback <> io.rollback
......
......@@ -84,11 +84,11 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val io = IO(new Bundle() {
val enq = new LqEnqIO
val brqRedirect = Flipped(ValidIO(new Redirect))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool()))
val delayedLoadError = Vec(LoadPipelineWidth, Input(Bool()))
val dcacheRequireReplay = Vec(LoadPipelineWidth, Input(Bool()))
val s2_load_data_forwarded = Vec(LoadPipelineWidth, Input(Bool()))
val s3_delayed_load_error = Vec(LoadPipelineWidth, Input(Bool()))
val s3_dcache_require_replay = Vec(LoadPipelineWidth, Input(Bool()))
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
val load_s1 = Vec(LoadPipelineWidth, Flipped(new PipeLoadForwardQueryIO)) // TODO: to be renamed
val loadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO))
......@@ -192,6 +192,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
// most lq status need to be updated immediately after load writeback to lq
// flag bits in lq needs to be updated accurately
when(io.loadIn(i).fire()) {
when(io.loadIn(i).bits.miss) {
XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
......@@ -218,46 +219,51 @@ class LoadQueue(implicit p: Parameters) extends XSModule
io.loadIn(i).bits.mmio
)}
if(EnableFastForward){
datavalid(loadWbIndex) := (!io.loadIn(i).bits.miss || io.loadDataForwarded(i)) &&
datavalid(loadWbIndex) := (!io.loadIn(i).bits.miss || io.s2_load_data_forwarded(i)) &&
!io.loadIn(i).bits.mmio && // mmio data is not valid until we finished uncache access
!io.dcacheRequireReplay(i) // do not writeback if that inst will be resend from rs
!io.s3_dcache_require_replay(i) // do not writeback if that inst will be resend from rs
} else {
datavalid(loadWbIndex) := (!io.loadIn(i).bits.miss || io.loadDataForwarded(i)) &&
datavalid(loadWbIndex) := (!io.loadIn(i).bits.miss || io.s2_load_data_forwarded(i)) &&
!io.loadIn(i).bits.mmio // mmio data is not valid until we finished uncache access
}
writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
val loadWbData = Wire(new LQDataEntry)
loadWbData.paddr := io.loadIn(i).bits.paddr
loadWbData.mask := io.loadIn(i).bits.mask
loadWbData.data := io.loadIn(i).bits.forwardData.asUInt // fwd data
loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
dataModule.io.wb.wen(i) := true.B
vaddrTriggerResultModule.io.waddr(i) := loadWbIndex
vaddrTriggerResultModule.io.wdata(i) := io.trigger(i).hitLoadAddrTriggerHitVec
vaddrTriggerResultModule.io.wen(i) := true.B
debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
debug_paddr(loadWbIndex) := io.loadIn(i).bits.paddr
val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
if(EnableFastForward){
miss(loadWbIndex) := dcacheMissed && !io.loadDataForwarded(i) && !io.dcacheRequireReplay(i)
miss(loadWbIndex) := dcacheMissed && !io.s2_load_data_forwarded(i) && !io.s3_dcache_require_replay(i)
} else {
miss(loadWbIndex) := dcacheMissed && !io.loadDataForwarded(i)
miss(loadWbIndex) := dcacheMissed && !io.s2_load_data_forwarded(i)
}
pending(loadWbIndex) := io.loadIn(i).bits.mmio
released(loadWbIndex) := release2cycle.valid &&
io.loadIn(i).bits.paddr(PAddrBits-1, DCacheLineOffset) === release2cycle.bits.paddr(PAddrBits-1, DCacheLineOffset) ||
release1cycle.valid &&
io.loadIn(i).bits.paddr(PAddrBits-1, DCacheLineOffset) === release1cycle.bits.paddr(PAddrBits-1, DCacheLineOffset)
}
// data bit in lq can be updated when load_s2 valid
when(io.loadIn(i).bits.writeQueueData){
val loadWbData = Wire(new LQDataEntry)
loadWbData.paddr := io.loadIn(i).bits.paddr
loadWbData.mask := io.loadIn(i).bits.mask
loadWbData.data := io.loadIn(i).bits.forwardData.asUInt // fwd data
loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
dataModule.io.wb.wen(i) := true.B
// dirty code for load instr
uop(loadWbIndex).pdest := io.loadIn(i).bits.uop.pdest
uop(loadWbIndex).cf := io.loadIn(i).bits.uop.cf
uop(loadWbIndex).ctrl := io.loadIn(i).bits.uop.ctrl
uop(loadWbIndex).debugInfo := io.loadIn(i).bits.uop.debugInfo
vaddrTriggerResultModule.io.waddr(i) := loadWbIndex
vaddrTriggerResultModule.io.wdata(i) := io.trigger(i).hitLoadAddrTriggerHitVec
vaddrTriggerResultModule.io.wen(i) := true.B
}
// vaddrModule write is delayed, as vaddrModule will not be read right after write
......@@ -276,17 +282,17 @@ class LoadQueue(implicit p: Parameters) extends XSModule
dataModule.io.refill.paddr := io.dcache.bits.addr
dataModule.io.refill.data := io.dcache.bits.data
val dcacheRequireReplay = WireInit(VecInit((0 until LoadPipelineWidth).map(i =>{
RegNext(io.loadIn(i).fire()) && RegNext(io.dcacheRequireReplay(i))
val s3_dcache_require_replay = WireInit(VecInit((0 until LoadPipelineWidth).map(i =>{
RegNext(io.loadIn(i).fire()) && RegNext(io.s3_dcache_require_replay(i))
})))
dontTouch(dcacheRequireReplay)
dontTouch(s3_dcache_require_replay)
(0 until LoadQueueSize).map(i => {
dataModule.io.refill.refillMask(i) := allocated(i) && miss(i)
when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) {
datavalid(i) := true.B
miss(i) := false.B
when(!dcacheRequireReplay.asUInt.orR){
when(!s3_dcache_require_replay.asUInt.orR){
refilling(i) := true.B
}
when(io.dcache.bits.error) {
......@@ -300,10 +306,10 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val lastCycleLoadWbIndex = RegNext(loadWbIndex)
// update miss state in load s3
if(!EnableFastForward){
// dcacheRequireReplay will be used to update lq flag 1 cycle after for better timing
// s3_dcache_require_replay will be used to update lq flag 1 cycle after for better timing
//
// io.dcacheRequireReplay comes from dcache miss req reject, which is quite slow to generate
when(dcacheRequireReplay(i)) {
// io.s3_dcache_require_replay comes from dcache miss req reject, which is quite slow to generate
when(s3_dcache_require_replay(i)) {
// do not writeback if that inst will be resend from rs
// rob writeback will not be triggered by a refill before inst replay
miss(lastCycleLoadWbIndex) := false.B // disable refill listening
......@@ -312,7 +318,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
}
}
// update load error state in load s3
when(RegNext(io.loadIn(i).fire()) && io.delayedLoadError(i)){
when(RegNext(io.loadIn(i).fire()) && io.s3_delayed_load_error(i)){
uop(lastCycleLoadWbIndex).cf.exceptionVec(loadAccessFault) := true.B
}
}
......
......@@ -27,11 +27,11 @@ import xiangshan.cache._
import xiangshan.cache.mmu.{TlbCmd, TlbReq, TlbRequestIO, TlbResp}
class LoadToLsqIO(implicit p: Parameters) extends XSBundle {
val loadIn = ValidIO(new LsPipelineBundle)
val loadIn = ValidIO(new LqWriteBundle)
val ldout = Flipped(DecoupledIO(new ExuOutput))
val loadDataForwarded = Output(Bool())
val delayedLoadError = Output(Bool())
val dcacheRequireReplay = Output(Bool())
val s2_load_data_forwarded = Output(Bool())
val s3_delayed_load_error = Output(Bool())
val s3_dcache_require_replay = Output(Bool())
val forward = new PipeLoadForwardQueryIO
val loadViolationQuery = new LoadViolationQueryIO
val trigger = Flipped(new LqTriggerIO)
......@@ -279,11 +279,11 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
val dataInvalidSqIdx = Input(UInt())
val sbuffer = new LoadForwardQueryIO
val dataForwarded = Output(Bool())
val dcacheRequireReplay = Output(Bool())
val s3_dcache_require_replay = Output(Bool())
val fullForward = Output(Bool())
val fastpath = Output(new LoadToLoadIO)
val dcache_kill = Output(Bool())
val delayedLoadError = Output(Bool())
val s3_delayed_load_error = Output(Bool())
val loadViolationQueryResp = Flipped(Valid(new LoadViolationQueryResp))
val csrCtrl = Flipped(new CustomCSRCtrlIO)
val sentFastUop = Input(Bool())
......@@ -321,11 +321,11 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
// at the same time, error info (including error paddr) will be write to
// an customized CSR "CACHE_ERROR"
if (EnableAccurateLoadError) {
io.delayedLoadError := io.dcacheResp.bits.error_delayed &&
io.s3_delayed_load_error := io.dcacheResp.bits.error_delayed &&
io.csrCtrl.cache_error_enable &&
RegNext(io.out.valid)
} else {
io.delayedLoadError := false.B
io.s3_delayed_load_error := false.B
}
val actually_mmio = pmp.mmio
......@@ -465,9 +465,9 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
// s2_cache_replay is quite slow to generate, send it separately to LQ
if (EnableFastForward) {
io.dcacheRequireReplay := s2_cache_replay && !fullForward
io.s3_dcache_require_replay := s2_cache_replay && !fullForward
} else {
io.dcacheRequireReplay := s2_cache_replay &&
io.s3_dcache_require_replay := s2_cache_replay &&
!io.rsFeedback.bits.hit &&
!io.dataForwarded &&
!s2_is_prefetch &&
......@@ -521,8 +521,8 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper with
val fastpathIn = Input(Vec(LoadPipelineWidth, new LoadToLoadIO))
val loadFastMatch = Input(UInt(exuParameters.LduCnt.W))
val delayedLoadError = Output(Bool()) // load ecc error
// Note that io.delayedLoadError and io.lsq.delayedLoadError is different
val s3_delayed_load_error = Output(Bool()) // load ecc error
// Note that io.s3_delayed_load_error and io.lsq.s3_delayed_load_error is different
val csrCtrl = Flipped(new CustomCSRCtrlIO)
})
......@@ -567,7 +567,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper with
load_s2.io.sbuffer.forwardMaskFast <> io.sbuffer.forwardMaskFast // should not be used in load_s2
load_s2.io.sbuffer.dataInvalid <> io.sbuffer.dataInvalid // always false
load_s2.io.sbuffer.matchInvalid <> io.sbuffer.matchInvalid
load_s2.io.dataForwarded <> io.lsq.loadDataForwarded
load_s2.io.dataForwarded <> io.lsq.s2_load_data_forwarded
load_s2.io.fastpath <> io.fastpathOut
load_s2.io.dataInvalidSqIdx := io.lsq.forward.dataInvalidSqIdx // provide dataInvalidSqIdx to make wakeup faster
load_s2.io.loadViolationQueryResp <> io.lsq.loadViolationQuery.resp
......@@ -619,7 +619,9 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper with
// Current dcache use MSHR
// Load queue will be updated at s2 for both hit/miss int/fp load
io.lsq.loadIn.valid := load_s2.io.out.valid
io.lsq.loadIn.bits := load_s2.io.out.bits
// generate LqWriteBundle from LsPipelineBundle
io.lsq.loadIn.bits.fromLsPipelineBundle(load_s2.io.out.bits)
io.lsq.loadIn.bits.writeQueueData := load_s2.io.in.valid
// write to rob and writeback bus
val s2_wb_valid = load_s2.io.out.valid && !load_s2.io.out.bits.miss && !load_s2.io.out.bits.mmio
......@@ -640,8 +642,8 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper with
load_s2.io.out.ready := true.B
// load s3
io.lsq.dcacheRequireReplay := load_s2.io.dcacheRequireReplay
io.lsq.delayedLoadError := load_s2.io.delayedLoadError
io.lsq.s3_dcache_require_replay := load_s2.io.s3_dcache_require_replay
io.lsq.s3_delayed_load_error := load_s2.io.s3_delayed_load_error
val load_wb_reg = RegNext(Mux(hitLoadOut.valid, hitLoadOut.bits, io.lsq.ldout.bits))
io.ldout.bits := load_wb_reg
......@@ -649,17 +651,17 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper with
RegNext(io.lsq.ldout.valid) && !RegNext(io.lsq.ldout.bits.uop.robIdx.needFlush(io.redirect)) && !RegNext(hitLoadOut.valid)
io.ldout.bits.uop.cf.exceptionVec(loadAccessFault) := load_wb_reg.uop.cf.exceptionVec(loadAccessFault) ||
RegNext(hitLoadOut.valid) && load_s2.io.delayedLoadError
RegNext(hitLoadOut.valid) && load_s2.io.s3_delayed_load_error
// delayedLoadError path is not used for now, as we writeback load result in load_s3
// s3_delayed_load_error path is not used for now, as we writeback load result in load_s3
// but we keep this path for future use
io.delayedLoadError := false.B
io.s3_delayed_load_error := false.B
io.lsq.ldout.ready := !hitLoadOut.valid
when(io.feedbackSlow.valid && !io.feedbackSlow.bits.hit){
assert(RegNext(!hitLoadOut.valid))
assert(RegNext(!io.lsq.loadIn.valid) || RegNext(load_s2.io.dcacheRequireReplay))
assert(RegNext(!io.lsq.loadIn.valid) || RegNext(load_s2.io.s3_dcache_require_replay))
}
val lastValidData = RegEnable(io.ldout.bits.data, io.ldout.fire())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册