提交 0a47e4a1 编写于 作者: W William Wang

lq: update paddr in lq in load_s1 and load_s2 (#1707)

Now we use 2 cycles to update paddr in lq. In this way,
paddr in lq is still valid in load_s3
上级 72e3aa13
...@@ -287,6 +287,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -287,6 +287,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// Lsq to load unit's rs // Lsq to load unit's rs
// passdown to lsq (load s1)
lsq.io.loadPaddrIn(i) <> loadUnits(i).io.lsq.loadPaddrIn
// passdown to lsq (load s2) // passdown to lsq (load s2)
lsq.io.loadIn(i) <> loadUnits(i).io.lsq.loadIn lsq.io.loadIn(i) <> loadUnits(i).io.lsq.loadIn
lsq.io.ldout(i) <> loadUnits(i).io.lsq.ldout lsq.io.ldout(i) <> loadUnits(i).io.lsq.ldout
......
...@@ -57,6 +57,7 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet ...@@ -57,6 +57,7 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
val hartId = Input(UInt(8.W)) val hartId = Input(UInt(8.W))
val enq = new LsqEnqIO val enq = new LsqEnqIO
val brqRedirect = Flipped(ValidIO(new Redirect)) val brqRedirect = Flipped(ValidIO(new Redirect))
val loadPaddrIn = Vec(LoadPipelineWidth, Flipped(Valid(new LqPaddrWriteBundle)))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle))) val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeInRe = Vec(StorePipelineWidth, Input(new LsPipelineBundle())) val storeInRe = Vec(StorePipelineWidth, Input(new LsPipelineBundle()))
...@@ -116,6 +117,7 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet ...@@ -116,6 +117,7 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
// load queue wiring // load queue wiring
loadQueue.io.brqRedirect <> io.brqRedirect loadQueue.io.brqRedirect <> io.brqRedirect
loadQueue.io.loadPaddrIn <> io.loadPaddrIn
loadQueue.io.loadIn <> io.loadIn loadQueue.io.loadIn <> io.loadIn
loadQueue.io.storeIn <> io.storeIn loadQueue.io.storeIn <> io.storeIn
loadQueue.io.s2_load_data_forwarded <> io.s2_load_data_forwarded loadQueue.io.s2_load_data_forwarded <> io.s2_load_data_forwarded
......
...@@ -69,6 +69,11 @@ class LqEnqIO(implicit p: Parameters) extends XSBundle { ...@@ -69,6 +69,11 @@ class LqEnqIO(implicit p: Parameters) extends XSBundle {
val resp = Vec(exuParameters.LsExuCnt, Output(new LqPtr)) val resp = Vec(exuParameters.LsExuCnt, Output(new LqPtr))
} }
class LqPaddrWriteBundle(implicit p: Parameters) extends XSBundle {
val paddr = Output(UInt(PAddrBits.W))
val lqIdx = Output(new LqPtr)
}
class LqTriggerIO(implicit p: Parameters) extends XSBundle { class LqTriggerIO(implicit p: Parameters) extends XSBundle {
val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool())) val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool()))
val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool())) val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool()))
...@@ -84,6 +89,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule ...@@ -84,6 +89,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val io = IO(new Bundle() { val io = IO(new Bundle() {
val enq = new LqEnqIO val enq = new LqEnqIO
val brqRedirect = Flipped(ValidIO(new Redirect)) val brqRedirect = Flipped(ValidIO(new Redirect))
val loadPaddrIn = Vec(LoadPipelineWidth, Flipped(Valid(new LqPaddrWriteBundle)))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle))) val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val s2_load_data_forwarded = Vec(LoadPipelineWidth, Input(Bool())) val s2_load_data_forwarded = Vec(LoadPipelineWidth, Input(Bool()))
...@@ -189,6 +195,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule ...@@ -189,6 +195,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
*/ */
for (i <- 0 until LoadPipelineWidth) { for (i <- 0 until LoadPipelineWidth) {
dataModule.io.wb.wen(i) := false.B dataModule.io.wb.wen(i) := false.B
dataModule.io.paddr.wen(i) := false.B
vaddrTriggerResultModule.io.wen(i) := false.B vaddrTriggerResultModule.io.wen(i) := false.B
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
...@@ -296,6 +303,12 @@ class LoadQueue(implicit p: Parameters) extends XSModule ...@@ -296,6 +303,12 @@ class LoadQueue(implicit p: Parameters) extends XSModule
vaddrTriggerResultModule.io.wen(i) := true.B vaddrTriggerResultModule.io.wen(i) := true.B
} }
when(io.loadPaddrIn(i).valid) {
dataModule.io.paddr.wen(i) := true.B
dataModule.io.paddr.waddr(i) := io.loadPaddrIn(i).bits.lqIdx.value
dataModule.io.paddr.wdata(i) := io.loadPaddrIn(i).bits.paddr
}
// vaddrModule write is delayed, as vaddrModule will not be read right after write // vaddrModule write is delayed, as vaddrModule will not be read right after write
vaddrModule.io.waddr(i) := RegNext(loadWbIndex) vaddrModule.io.waddr(i) := RegNext(loadWbIndex)
vaddrModule.io.wdata(i) := RegNext(io.loadIn(i).bits.vaddr) vaddrModule.io.wdata(i) := RegNext(io.loadIn(i).bits.vaddr)
......
...@@ -26,13 +26,16 @@ import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants} ...@@ -26,13 +26,16 @@ import xiangshan.cache.{DCacheWordIO, DCacheLineIO, MemoryOpConstants}
import xiangshan.mem._ import xiangshan.mem._
import xiangshan.backend.rob.RobPtr import xiangshan.backend.rob.RobPtr
class LQDataEntry(implicit p: Parameters) extends XSBundle { class LQDataEntryWoPaddr(implicit p: Parameters) extends XSBundle {
val paddr = UInt(PAddrBits.W)
val mask = UInt(8.W) val mask = UInt(8.W)
val data = UInt(XLEN.W) val data = UInt(XLEN.W)
val fwdMask = Vec(8, Bool()) val fwdMask = Vec(8, Bool())
} }
class LQDataEntry(implicit p: Parameters) extends LQDataEntryWoPaddr {
val paddr = UInt(PAddrBits.W)
}
// Data module define // Data module define
// These data modules are like SyncDataModuleTemplate, but support cam-like ops // These data modules are like SyncDataModuleTemplate, but support cam-like ops
...@@ -42,7 +45,7 @@ class LQDataEntry(implicit p: Parameters) extends XSBundle { ...@@ -42,7 +45,7 @@ class LQDataEntry(implicit p: Parameters) extends XSBundle {
// * st-ld violation addr cam // * st-ld violation addr cam
// * data release addr cam // * data release addr cam
// * data refill addr cam // * data refill addr cam
class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters { class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int, numWBanks: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters {
val io = IO(new Bundle { val io = IO(new Bundle {
// normal read/write ports // normal read/write ports
val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
...@@ -61,6 +64,11 @@ class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Pa ...@@ -61,6 +64,11 @@ class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Pa
val refillMmask = Output(Vec(numEntries, Bool())) val refillMmask = Output(Vec(numEntries, Bool()))
}) })
require(isPow2(numWBanks))
require(numWBanks >= 2)
val numEntryPerBank = numEntries / numWBanks
val data = Reg(Vec(numEntries, UInt((PAddrBits).W))) val data = Reg(Vec(numEntries, UInt((PAddrBits).W)))
// read ports // read ports
...@@ -68,10 +76,50 @@ class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Pa ...@@ -68,10 +76,50 @@ class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Pa
io.rdata(i) := data(RegNext(io.raddr(i))) io.rdata(i) := data(RegNext(io.raddr(i)))
} }
// below is the write ports (with priorities) // write ports
for (i <- 0 until numWrite) { val waddr_dec = io.waddr.map(a => UIntToOH(a))
when (io.wen(i)) { def selectBankMask(in: UInt, bank: Int): UInt = {
data(io.waddr(i)) := io.wdata(i) in((bank + 1) * numEntryPerBank - 1, bank * numEntryPerBank)
}
for (bank <- 0 until numWBanks) {
// write ports
// s0: write to bank level buffer
val s0_bank_waddr_dec = waddr_dec.map(a => selectBankMask(a, bank))
val s0_bank_write_en = io.wen.zip(s0_bank_waddr_dec).map(w => w._1 && w._2.orR)
s0_bank_waddr_dec.zipWithIndex.map(a =>
a._1.suggestName("s0_bank_waddr_dec" + bank + "_" + a._2)
)
s0_bank_write_en.zipWithIndex.map(a =>
a._1.suggestName("s0_bank_write_en" + bank + "_" + a._2)
)
// s1: write data to entries
val s1_bank_waddr_dec = s0_bank_waddr_dec.zip(s0_bank_write_en).map(w => RegEnable(w._1, w._2))
val s1_bank_wen = RegNext(VecInit(s0_bank_write_en))
val s1_wdata = io.wdata.zip(s0_bank_write_en).map(w => RegEnable(w._1, w._2))
s1_bank_waddr_dec.zipWithIndex.map(a =>
a._1.suggestName("s1_bank_waddr_dec" + bank + "_" + a._2)
)
s1_bank_wen.zipWithIndex.map(a =>
a._1.suggestName("s1_bank_wen" + bank + "_" + a._2)
)
s1_wdata.zipWithIndex.map(a =>
a._1.suggestName("s1_wdata" + bank + "_" + a._2)
)
// entry write
for (entry <- 0 until numEntryPerBank) {
// write ports
val s1_entry_write_en_vec = s1_bank_wen.zip(s1_bank_waddr_dec).map(w => w._1 && w._2(entry))
val s1_entry_write_en = VecInit(s1_entry_write_en_vec).asUInt.orR
val s1_entry_write_data = Mux1H(s1_entry_write_en_vec, s1_wdata)
when (s1_entry_write_en) {
data(bank * numEntryPerBank + entry) := s1_entry_write_data
}
s1_entry_write_en_vec.zipWithIndex.map(a =>
a._1.suggestName("s1_entry_write_en_vec" + bank + "_" + entry + "_" + a._2)
)
s1_entry_write_en.suggestName("s1_entry_write_en" + bank + "_" + entry)
s1_entry_write_data.suggestName("s1_entry_write_data" + bank + "_" + entry)
} }
} }
...@@ -119,10 +167,12 @@ class LQMaskModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Par ...@@ -119,10 +167,12 @@ class LQMaskModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Par
io.rdata(i) := data(RegNext(io.raddr(i))) io.rdata(i) := data(RegNext(io.raddr(i)))
} }
// below is the write ports (with priorities) // write ports
for (i <- 0 until numWrite) { val waddr_dec = io.waddr.map(a => UIntToOH(a))
when (io.wen(i)) { for (j <- 0 until numEntries) {
data(io.waddr(i)) := io.wdata(i) val write_wen = io.wen.zip(waddr_dec).map(w => w._1 && w._2(j))
when (VecInit(write_wen).asUInt.orR) {
data(j) := Mux1H(write_wen, io.wdata)
} }
} }
...@@ -190,6 +240,7 @@ class LQDataModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Par ...@@ -190,6 +240,7 @@ class LQDataModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Par
} }
// write ctrl info // write ctrl info
// TODO: optimize that
when (io.wen(i)) { when (io.wen(i)) {
fwdMask(io.waddr(i)) := io.fwdMaskWdata(i) fwdMask(io.waddr(i)) := io.fwdMaskWdata(i)
} }
...@@ -235,6 +286,11 @@ class LQDataModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Par ...@@ -235,6 +286,11 @@ class LQDataModule(numEntries: Int, numRead: Int, numWrite: Int)(implicit p: Par
// and their interconnect // and their interconnect
class LoadQueueDataWrapper(size: Int, wbNumRead: Int, wbNumWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { class LoadQueueDataWrapper(size: Int, wbNumRead: Int, wbNumWrite: Int)(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
val io = IO(new Bundle() { val io = IO(new Bundle() {
val paddr = new Bundle() {
val wen = Vec(wbNumWrite, Input(Bool()))
val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W)))
val wdata = Input(Vec(wbNumWrite, UInt(PAddrBits.W)))
}
val wb = new Bundle() { val wb = new Bundle() {
val wen = Vec(wbNumWrite, Input(Bool())) val wen = Vec(wbNumWrite, Input(Bool()))
val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W))) val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W)))
...@@ -285,7 +341,7 @@ class LoadQueueDataWrapper(size: Int, wbNumRead: Int, wbNumWrite: Int)(implicit ...@@ -285,7 +341,7 @@ class LoadQueueDataWrapper(size: Int, wbNumRead: Int, wbNumWrite: Int)(implicit
}) })
// data module // data module
val paddrModule = Module(new LQPaddrModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth)) val paddrModule = Module(new LQPaddrModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth, numWBanks = LoadQueueNWriteBanks))
val maskModule = Module(new LQMaskModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth)) val maskModule = Module(new LQMaskModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth))
val dataModule = Module(new LQDataModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth+1)) val dataModule = Module(new LQDataModule(size, numRead = LoadPipelineWidth+1, numWrite = LoadPipelineWidth+1))
...@@ -319,21 +375,22 @@ class LoadQueueDataWrapper(size: Int, wbNumRead: Int, wbNumWrite: Int)(implicit ...@@ -319,21 +375,22 @@ class LoadQueueDataWrapper(size: Int, wbNumRead: Int, wbNumWrite: Int)(implicit
maskModule.io.wen(i) := false.B maskModule.io.wen(i) := false.B
dataModule.io.wen(i) := false.B dataModule.io.wen(i) := false.B
paddrModule.io.waddr(i) := io.wb.waddr(i)
maskModule.io.waddr(i) := io.wb.waddr(i) maskModule.io.waddr(i) := io.wb.waddr(i)
dataModule.io.waddr(i) := io.wb.waddr(i) dataModule.io.waddr(i) := io.wb.waddr(i)
paddrModule.io.wdata(i) := io.wb.wdata(i).paddr
maskModule.io.wdata(i) := io.wb.wdata(i).mask maskModule.io.wdata(i) := io.wb.wdata(i).mask
dataModule.io.wdata(i) := io.wb.wdata(i).data dataModule.io.wdata(i) := io.wb.wdata(i).data
dataModule.io.fwdMaskWdata(i) := io.wb.wdata(i).fwdMask.asUInt dataModule.io.fwdMaskWdata(i) := io.wb.wdata(i).fwdMask.asUInt
dataModule.io.paddrWdata(i) := io.wb.wdata(i).paddr dataModule.io.paddrWdata(i) := io.wb.wdata(i).paddr
when(io.wb.wen(i)){ when(io.wb.wen(i)){
paddrModule.io.wen(i) := true.B
maskModule.io.wen(i) := true.B maskModule.io.wen(i) := true.B
dataModule.io.wen(i) := true.B dataModule.io.wen(i) := true.B
} }
paddrModule.io.wen(i) := io.paddr.wen(i)
paddrModule.io.waddr(i) := io.paddr.waddr(i)
paddrModule.io.wdata(i) := io.paddr.wdata(i)
}) })
// write port wbNumWrite // write port wbNumWrite
......
...@@ -28,6 +28,7 @@ import xiangshan.cache.mmu.{TlbCmd, TlbReq, TlbRequestIO, TlbResp} ...@@ -28,6 +28,7 @@ import xiangshan.cache.mmu.{TlbCmd, TlbReq, TlbRequestIO, TlbResp}
class LoadToLsqIO(implicit p: Parameters) extends XSBundle { class LoadToLsqIO(implicit p: Parameters) extends XSBundle {
val loadIn = ValidIO(new LqWriteBundle) val loadIn = ValidIO(new LqWriteBundle)
val loadPaddrIn = ValidIO(new LqPaddrWriteBundle)
val ldout = Flipped(DecoupledIO(new ExuOutput)) val ldout = Flipped(DecoupledIO(new ExuOutput))
val s2_load_data_forwarded = Output(Bool()) val s2_load_data_forwarded = Output(Bool())
val s3_delayed_load_error = Output(Bool()) val s3_delayed_load_error = Output(Bool())
...@@ -611,6 +612,11 @@ class LoadUnit(implicit p: Parameters) extends XSModule ...@@ -611,6 +612,11 @@ class LoadUnit(implicit p: Parameters) extends XSModule
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, PipelineConnect(load_s1.io.out, load_s2.io.in, true.B,
load_s1.io.out.bits.uop.robIdx.needFlush(io.redirect) || cancelPointerChasing) load_s1.io.out.bits.uop.robIdx.needFlush(io.redirect) || cancelPointerChasing)
// provide paddr for lq
io.lsq.loadPaddrIn.valid := load_s1.io.out.valid
io.lsq.loadPaddrIn.bits.lqIdx := load_s1.io.out.bits.uop.lqIdx
io.lsq.loadPaddrIn.bits.paddr := load_s1.io.lsuPAddr
// load s2 // load s2
io.dcache.s2_kill := load_s2.io.dcache_kill // to kill mmio resp which are redirected io.dcache.s2_kill := load_s2.io.dcache_kill // to kill mmio resp which are redirected
load_s2.io.dcacheResp <> io.dcache.resp load_s2.io.dcacheResp <> io.dcache.resp
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册