提交 c1af2986 编写于 作者: W William Wang

lq: opt lq data wen (load_s2_valid) fanout (#1687)

上级 4a2390a4
......@@ -75,7 +75,7 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundleWithMicroOp {
class LqWriteBundle(implicit p: Parameters) extends LsPipelineBundle {
// queue entry data, except flag bits, will be updated if writeQueue is true,
// valid bit in LqWriteBundle will be ignored
val writeQueueData = Bool()
val lq_data_wen_dup = Vec(6, Bool()) // dirty reg dup
def fromLsPipelineBundle(input: LsPipelineBundle) = {
vaddr := input.vaddr
......@@ -94,7 +94,7 @@ class LqWriteBundle(implicit p: Parameters) extends LsPipelineBundle {
isSoftPrefetch := input.isSoftPrefetch
isFirstIssue := input.isFirstIssue
writeQueueData := false.B
lq_data_wen_dup := DontCare
}
}
......
......@@ -246,7 +246,29 @@ class LoadQueue(implicit p: Parameters) extends XSModule
}
// data bit in lq can be updated when load_s2 valid
when(io.loadIn(i).bits.writeQueueData){
// when(io.loadIn(i).bits.lq_data_wen){
// val loadWbData = Wire(new LQDataEntry)
// loadWbData.paddr := io.loadIn(i).bits.paddr
// loadWbData.mask := io.loadIn(i).bits.mask
// loadWbData.data := io.loadIn(i).bits.forwardData.asUInt // fwd data
// loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
// dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
// dataModule.io.wb.wen(i) := true.B
// // dirty code for load instr
// uop(loadWbIndex).pdest := io.loadIn(i).bits.uop.pdest
// uop(loadWbIndex).cf := io.loadIn(i).bits.uop.cf
// uop(loadWbIndex).ctrl := io.loadIn(i).bits.uop.ctrl
// uop(loadWbIndex).debugInfo := io.loadIn(i).bits.uop.debugInfo
// vaddrTriggerResultModule.io.waddr(i) := loadWbIndex
// vaddrTriggerResultModule.io.wdata(i) := io.trigger(i).hitLoadAddrTriggerHitVec
// vaddrTriggerResultModule.io.wen(i) := true.B
// }
// dirty code to reduce load_s2.valid fanout
when(io.loadIn(i).bits.lq_data_wen_dup(0)){
val loadWbData = Wire(new LQDataEntry)
loadWbData.paddr := io.loadIn(i).bits.paddr
loadWbData.mask := io.loadIn(i).bits.mask
......@@ -254,16 +276,23 @@ class LoadQueue(implicit p: Parameters) extends XSModule
loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
dataModule.io.wb.wen(i) := true.B
// dirty code for load instr
}
// dirty code for load instr
when(io.loadIn(i).bits.lq_data_wen_dup(1)){
uop(loadWbIndex).pdest := io.loadIn(i).bits.uop.pdest
}
when(io.loadIn(i).bits.lq_data_wen_dup(2)){
uop(loadWbIndex).cf := io.loadIn(i).bits.uop.cf
}
when(io.loadIn(i).bits.lq_data_wen_dup(3)){
uop(loadWbIndex).ctrl := io.loadIn(i).bits.uop.ctrl
}
when(io.loadIn(i).bits.lq_data_wen_dup(4)){
uop(loadWbIndex).debugInfo := io.loadIn(i).bits.uop.debugInfo
}
when(io.loadIn(i).bits.lq_data_wen_dup(5)){
vaddrTriggerResultModule.io.waddr(i) := loadWbIndex
vaddrTriggerResultModule.io.wdata(i) := io.trigger(i).hitLoadAddrTriggerHitVec
vaddrTriggerResultModule.io.wen(i) := true.B
}
......
......@@ -677,7 +677,14 @@ class LoadUnit(implicit p: Parameters) extends XSModule
io.lsq.loadIn.valid := load_s2.io.out.valid
// generate LqWriteBundle from LsPipelineBundle
io.lsq.loadIn.bits.fromLsPipelineBundle(load_s2.io.out.bits)
io.lsq.loadIn.bits.writeQueueData := load_s2.io.in.valid
// generate duplicated load queue data wen
val load_s2_valid_vec = RegInit(0.U(6.W))
val load_s2_leftFire = load_s1.io.out.valid && load_s2.io.in.ready
load_s2_valid_vec := 0x0.U(6.W)
when (load_s2_leftFire) { load_s2_valid_vec := 0x3f.U(6.W)}
when (load_s1.io.out.bits.uop.robIdx.needFlush(io.redirect)) { load_s2_valid_vec := 0x0.U(6.W) }
assert(RegNext(load_s2.io.in.valid === load_s2_valid_vec(0)))
io.lsq.loadIn.bits.lq_data_wen_dup := load_s2_valid_vec.asBools()
// s2_dcache_require_replay signal will be RegNexted, then used in s3
io.lsq.s2_dcache_require_replay := load_s2.io.s2_dcache_require_replay
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册