未验证 提交 10551d4e 编写于 作者: Y Yinan Xu 提交者: GitHub

lsq: add LsqEnqCtrl to optimize enqueue timing (#1380)

This commit adds an LsqEnqCtrl module to add one more clock cycle
between dispatch and load/store queue.

LsqEnqCtrl maintains the lqEnqPtr/sqEnqPtr and lqCounter/sqCounter.
They are used to determine whether load/store queue can accept new
instructions. After that, instructions are sent to load/store queue.
This module decouples queue allocation and real enqueue.

Besides, uop storage in load/store queue are optimized. In dispatch,
only robIdx is required. Other information is naturally conveyed in
the pipeline and can be stored later in load/store queue if needed.
For example, exception vector, trigger, ftqIdx, pdest, etc are
unnecessary before the instruction leaves the load/store pipeline.
上级 67c26c34
......@@ -287,6 +287,12 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
ctrlBlock.io.stIn <> memBlock.io.stIn
ctrlBlock.io.memoryViolation <> memBlock.io.memoryViolation
exuBlocks.head.io.scheExtra.enqLsq.get <> memBlock.io.enqLsq
exuBlocks.foreach(b => {
b.io.scheExtra.lcommit := ctrlBlock.io.robio.lsq.lcommit
b.io.scheExtra.scommit := memBlock.io.sqDeq
b.io.scheExtra.lqCancelCnt := memBlock.io.lqCancelCnt
b.io.scheExtra.sqCancelCnt := memBlock.io.sqCancelCnt
})
val sourceModules = outer.writebackSources.map(_.map(_.module.asInstanceOf[HasWritebackSourceImp]))
outer.ctrlBlock.generateWritebackIO()
......
......@@ -93,6 +93,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val dcacheMSHRFull = Output(Bool())
}
val perfEventsPTW = Input(Vec(19, new PerfEvent))
val lqCancelCnt = Output(UInt(log2Up(LoadQueueSize + 1).W))
val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
val sqDeq = Output(UInt(2.W))
})
override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.writeback))
......@@ -415,6 +418,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// lsq.io.dcache <> dcache.io.lsu.lsq
lsq.io.dcache := RegNext(dcache.io.lsu.lsq)
lsq.io.release := dcache.io.lsu.release
lsq.io.lqCancelCnt <> io.lqCancelCnt
lsq.io.sqCancelCnt <> io.sqCancelCnt
lsq.io.sqDeq <> io.sqDeq
// LSQ to store buffer
lsq.io.sbuffer <> sbuffer.io.in
......
......@@ -29,7 +29,7 @@ import xiangshan.backend.fu.fpu.FMAMidResultIO
import xiangshan.backend.issue.ReservationStationWrapper
import xiangshan.backend.regfile.{Regfile, RfReadPort}
import xiangshan.backend.rename.{BusyTable, BusyTableReadIO}
import xiangshan.mem.{LsqEnqIO, MemWaitUpdateReq, SqPtr}
import xiangshan.mem.{LsqEnqCtrl, LsqEnqIO, MemWaitUpdateReq, SqPtr}
class DispatchArbiter(func: Seq[MicroOp => Bool])(implicit p: Parameters) extends XSModule {
val numTarget = func.length
......@@ -251,6 +251,11 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
val stIssuePtr = Input(new SqPtr())
// special ports for load / store rs
val enqLsq = if (outer.numReplayPorts > 0) Some(Flipped(new LsqEnqIO)) else None
val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
// from lsq
val lqCancelCnt = Input(UInt(log2Up(LoadQueueSize + 1).W))
val sqCancelCnt = Input(UInt(log2Up(StoreQueueSize + 1).W))
val memWaitUpdateReq = Flipped(new MemWaitUpdateReq)
// debug
val debug_int_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
......@@ -283,7 +288,16 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
val dispatch2 = outer.dispatch2.map(_.module)
// dirty code for ls dp
dispatch2.foreach(dp => if (dp.io.enqLsq.isDefined) dp.io.enqLsq.get <> io.extra.enqLsq.get)
dispatch2.foreach(dp => if (dp.io.enqLsq.isDefined) {
val lsqCtrl = Module(new LsqEnqCtrl)
lsqCtrl.io.redirect <> io.redirect
lsqCtrl.io.enq <> dp.io.enqLsq.get
lsqCtrl.io.lcommit := io.extra.lcommit
lsqCtrl.io.scommit := io.extra.scommit
lsqCtrl.io.lqCancelCnt := io.extra.lqCancelCnt
lsqCtrl.io.sqCancelCnt := io.extra.sqCancelCnt
io.extra.enqLsq.get <> lsqCtrl.io.enqLsq
})
io.in <> dispatch2.flatMap(_.io.in)
val readIntState = dispatch2.flatMap(_.io.readIntState.getOrElse(Seq()))
......
......@@ -78,6 +78,9 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
val issuePtrExt = Output(new SqPtr)
val sqFull = Output(Bool())
val lqFull = Output(Bool())
val lqCancelCnt = Output(UInt(log2Up(LoadQueueSize + 1).W))
val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
val sqDeq = Output(UInt(2.W))
val trigger = Vec(LoadPipelineWidth, new LqTriggerIO)
})
......@@ -121,6 +124,7 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
loadQueue.io.release <> io.release
loadQueue.io.trigger <> io.trigger
loadQueue.io.exceptionAddr.isStore := DontCare
loadQueue.io.lqCancelCnt <> io.lqCancelCnt
// store queue wiring
// storeQueue.io <> DontCare
......@@ -133,6 +137,8 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
storeQueue.io.rob <> io.rob
storeQueue.io.exceptionAddr.isStore := DontCare
storeQueue.io.issuePtrExt <> io.issuePtrExt
storeQueue.io.sqCancelCnt <> io.sqCancelCnt
storeQueue.io.sqDeq <> io.sqDeq
loadQueue.io.load_s1 <> io.forward
storeQueue.io.forward <> io.forward // overlap forwardMask & forwardData, DO NOT CHANGE SEQUENCE
......@@ -196,3 +202,80 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
val perfEvents = Seq(loadQueue, storeQueue).flatMap(_.getPerfEvents)
generatePerfEvent()
}
class LsqEnqCtrl(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle {
val redirect = Flipped(ValidIO(new Redirect))
// to dispatch
val enq = new LsqEnqIO
// from rob
val lcommit = Input(UInt(log2Up(CommitWidth + 1).W))
val scommit = Input(UInt(log2Up(CommitWidth + 1).W))
// from/tp lsq
val lqCancelCnt = Input(UInt(log2Up(LoadQueueSize + 1).W))
val sqCancelCnt = Input(UInt(log2Up(StoreQueueSize + 1).W))
val enqLsq = Flipped(new LsqEnqIO)
})
val lqPtr = RegInit(0.U.asTypeOf(new LqPtr))
val sqPtr = RegInit(0.U.asTypeOf(new SqPtr))
val lqCounter = RegInit(LoadQueueSize.U(log2Up(LoadQueueSize + 1).W))
val sqCounter = RegInit(StoreQueueSize.U(log2Up(StoreQueueSize + 1).W))
val canAccept = RegInit(false.B)
val loadEnqNumber = PopCount(io.enq.req.zip(io.enq.needAlloc).map(x => x._1.valid && x._2(0)))
val storeEnqNumber = PopCount(io.enq.req.zip(io.enq.needAlloc).map(x => x._1.valid && x._2(1)))
// How to update ptr and counter:
// (1) by default, updated according to enq/commit
// (2) when redirect and dispatch queue is empty, update according to lsq
val t1_redirect = RegNext(io.redirect.valid)
val t2_redirect = RegNext(t1_redirect)
val t2_update = t2_redirect && !VecInit(io.enq.needAlloc.map(_.orR)).asUInt.orR
val t3_update = RegNext(t2_update)
val t3_lqCancelCnt = RegNext(io.lqCancelCnt)
val t3_sqCancelCnt = RegNext(io.sqCancelCnt)
when (t3_update) {
lqPtr := lqPtr - t3_lqCancelCnt
lqCounter := lqCounter + io.lcommit + t3_lqCancelCnt
sqPtr := sqPtr - t3_sqCancelCnt
sqCounter := sqCounter + io.scommit + t3_sqCancelCnt
}.elsewhen (!io.redirect.valid && io.enq.canAccept) {
lqPtr := lqPtr + loadEnqNumber
lqCounter := lqCounter + io.lcommit - loadEnqNumber
sqPtr := sqPtr + storeEnqNumber
sqCounter := sqCounter + io.scommit - storeEnqNumber
}.otherwise {
lqCounter := lqCounter + io.lcommit
sqCounter := sqCounter + io.scommit
}
val maxAllocate = Seq(exuParameters.LduCnt, exuParameters.StuCnt).max
val ldCanAccept = lqCounter >= loadEnqNumber +& maxAllocate.U
val sqCanAccept = sqCounter >= storeEnqNumber +& maxAllocate.U
// It is possible that t3_update and enq are true at the same clock cycle.
// For example, if redirect.valid lasts more than one clock cycle,
// after the last redirect, new instructions may enter but previously redirect
// has not been resolved (updated according to the cancel count from LSQ).
// To solve the issue easily, we block enqueue when t3_update, which is RegNext(t2_update).
io.enq.canAccept := RegNext(ldCanAccept && sqCanAccept && !t2_update)
val lqOffset = Wire(Vec(io.enq.resp.length, UInt(log2Up(maxAllocate + 1).W)))
val sqOffset = Wire(Vec(io.enq.resp.length, UInt(log2Up(maxAllocate + 1).W)))
for ((resp, i) <- io.enq.resp.zipWithIndex) {
lqOffset(i) := PopCount(io.enq.needAlloc.take(i).map(a => a(0)))
resp.lqIdx := lqPtr + lqOffset(i)
sqOffset(i) := PopCount(io.enq.needAlloc.take(i).map(a => a(1)))
resp.sqIdx := sqPtr + sqOffset(i)
}
io.enqLsq.needAlloc := RegNext(io.enq.needAlloc)
io.enqLsq.req.zip(io.enq.req).zip(io.enq.resp).foreach{ case ((toLsq, enq), resp) =>
val do_enq = enq.valid && !io.redirect.valid && io.enq.canAccept
toLsq.valid := RegNext(do_enq)
toLsq.bits := RegEnable(enq.bits, do_enq)
toLsq.bits.lqIdx := RegEnable(resp.lqIdx, do_enq)
toLsq.bits.sqIdx := RegEnable(resp.sqIdx, do_enq)
}
}
......@@ -99,6 +99,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val uncache = new DCacheWordIO
val exceptionAddr = new ExceptionAddrIO
val lqFull = Output(Bool())
val lqCancelCnt = Output(UInt(log2Up(LoadQueueSize + 1).W))
val trigger = Vec(LoadPipelineWidth, new LqTriggerIO)
})
......@@ -128,11 +129,13 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr))))
val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
val deqPtrExtNext = Wire(new LqPtr)
val allowEnqueue = RegInit(true.B)
val enqPtr = enqPtrExt(0).value
val deqPtr = deqPtrExt.value
val validCount = distanceBetween(enqPtrExt(0), deqPtrExt)
val allowEnqueue = validCount <= (LoadQueueSize - 2).U
val deqMask = UIntToMask(deqPtr, LoadQueueSize)
val enqMask = UIntToMask(enqPtr, LoadQueueSize)
......@@ -145,12 +148,14 @@ class LoadQueue(implicit p: Parameters) extends XSModule
*/
io.enq.canAccept := allowEnqueue
val canEnqueue = io.enq.req.map(_.valid)
val enqCancel = io.enq.req.map(_.bits.robIdx.needFlush(io.brqRedirect))
for (i <- 0 until io.enq.req.length) {
val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
val lqIdx = enqPtrExt(offset)
val index = lqIdx.value
when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid) {
uop(index) := io.enq.req(i).bits
val index = io.enq.req(i).bits.lqIdx.value
when (canEnqueue(i) && !enqCancel(i)) {
uop(index).robIdx := io.enq.req(i).bits.robIdx
allocated(index) := true.B
datavalid(index) := false.B
writebacked(index) := false.B
......@@ -158,6 +163,8 @@ class LoadQueue(implicit p: Parameters) extends XSModule
miss(index) := false.B
pending(index) := false.B
error(index) := false.B
XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n")
XSError(index =/= lqIdx.value, s"must be the same entry $i\n")
}
io.enq.resp(i) := lqIdx
}
......@@ -227,10 +234,11 @@ class LoadQueue(implicit p: Parameters) extends XSModule
val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
miss(loadWbIndex) := dcacheMissed && !io.loadDataForwarded(i) && !io.needReplayFromRS(i)
pending(loadWbIndex) := io.loadIn(i).bits.mmio
// dirty code for load instr
uop(loadWbIndex).pdest := io.loadIn(i).bits.uop.pdest
uop(loadWbIndex).cf := io.loadIn(i).bits.uop.cf
uop(loadWbIndex).ctrl := io.loadIn(i).bits.uop.ctrl
uop(loadWbIndex).debugInfo := io.loadIn(i).bits.uop.debugInfo
// update replayInst (replay from fetch) bit,
// for replayInst may be set to true in load pipeline
uop(loadWbIndex).ctrl.replayInst := io.loadIn(i).bits.uop.ctrl.replayInst
}
// vaddrModule write is delayed, as vaddrModule will not be read right after write
vaddrModule.io.waddr(i) := RegNext(loadWbIndex)
......@@ -378,6 +386,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
(0 until CommitWidth).map(i => {
when(commitCount > i.U){
allocated((deqPtrExt+i.U).value) := false.B
XSError(!allocated((deqPtrExt+i.U).value), s"why commit invalid entry $i?\n")
}
})
......@@ -755,19 +764,19 @@ class LoadQueue(implicit p: Parameters) extends XSModule
for (i <- 0 until LoadQueueSize) {
needCancel(i) := uop(i).robIdx.needFlush(io.brqRedirect) && allocated(i)
when (needCancel(i)) {
allocated(i) := false.B
allocated(i) := false.B
}
}
/**
* update pointers
*/
val lastEnqCancel = PopCount(RegNext(VecInit(canEnqueue.zip(enqCancel).map(x => x._1 && x._2))))
val lastCycleCancelCount = PopCount(RegNext(needCancel))
// when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid, PopCount(io.enq.req.map(_.valid)), 0.U)
val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept, PopCount(io.enq.req.map(_.valid)), 0.U)
when (lastCycleRedirect.valid) {
// we recover the pointers in the next cycle after redirect
enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
enqPtrExt := VecInit(enqPtrExt.map(_ - (lastCycleCancelCount + lastEnqCancel)))
}.otherwise {
enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
}
......@@ -775,9 +784,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
deqPtrExtNext := deqPtrExt + commitCount
deqPtrExt := deqPtrExtNext
val validCount = distanceBetween(enqPtrExt(0), deqPtrExt)
allowEnqueue := validCount + enqNumber <= (LoadQueueSize - io.enq.req.length).U
io.lqCancelCnt := RegNext(lastCycleCancelCount + lastEnqCancel)
/**
* misc
......
......@@ -79,6 +79,8 @@ class StoreQueue(implicit p: Parameters) extends XSModule
val sqempty = Output(Bool())
val issuePtrExt = Output(new SqPtr) // used to wake up delayed load/store
val sqFull = Output(Bool())
val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
val sqDeq = Output(UInt(2.W))
})
println("StoreQueue: size:" + StoreQueueSize)
......@@ -130,12 +132,14 @@ class StoreQueue(implicit p: Parameters) extends XSModule
val cmtPtrExt = RegInit(VecInit((0 until CommitWidth).map(_.U.asTypeOf(new SqPtr))))
val issuePtrExt = RegInit(0.U.asTypeOf(new SqPtr))
val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
val allowEnqueue = RegInit(true.B)
val enqPtr = enqPtrExt(0).value
val deqPtr = deqPtrExt(0).value
val cmtPtr = cmtPtrExt(0).value
val validCount = distanceBetween(enqPtrExt(0), deqPtrExt(0))
val allowEnqueue = validCount <= (StoreQueueSize - 2).U
val deqMask = UIntToMask(deqPtr, StoreQueueSize)
val enqMask = UIntToMask(enqPtr, StoreQueueSize)
......@@ -151,12 +155,15 @@ class StoreQueue(implicit p: Parameters) extends XSModule
)
))
// deqPtrExtNext traces which inst is about to leave store queue
val deqPtrExtNext = WireInit(Mux(io.sbuffer(1).fire(),
val deqPtrExtNext = Mux(io.sbuffer(1).fire(),
VecInit(deqPtrExt.map(_ + 2.U)),
Mux(io.sbuffer(0).fire() || io.mmioStout.fire(),
VecInit(deqPtrExt.map(_ + 1.U)),
deqPtrExt
)
)
io.sqDeq := RegNext(Mux(io.sbuffer(1).fire(), 2.U,
Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 1.U, 0.U)
))
for (i <- 0 until StorePipelineWidth) {
dataModule.io.raddr(i) := rdataPtrExtNext(i).value
......@@ -173,17 +180,21 @@ class StoreQueue(implicit p: Parameters) extends XSModule
* Currently, StoreQueue only allows enqueue when #emptyEntries > EnqWidth
*/
io.enq.canAccept := allowEnqueue
val canEnqueue = io.enq.req.map(_.valid)
val enqCancel = io.enq.req.map(_.bits.robIdx.needFlush(io.brqRedirect))
for (i <- 0 until io.enq.req.length) {
val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
val sqIdx = enqPtrExt(offset)
val index = sqIdx.value
when (io.enq.req(i).valid && io.enq.canAccept && io.enq.lqCanAccept && !io.brqRedirect.valid) {
uop(index) := io.enq.req(i).bits
val index = io.enq.req(i).bits.sqIdx.value
when (canEnqueue(i) && !enqCancel(i)) {
uop(index).robIdx := io.enq.req(i).bits.robIdx
allocated(index) := true.B
datavalid(index) := false.B
addrvalid(index) := false.B
committed(index) := false.B
pending(index) := false.B
XSError(!io.enq.canAccept || !io.enq.lqCanAccept, s"must accept $i\n")
XSError(index =/= sqIdx.value, s"must be the same entry $i\n")
}
io.enq.resp(i) := sqIdx
}
......@@ -255,6 +266,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
// mmio(stWbIndex) := io.storeIn(i).bits.mmio
uop(stWbIndex).ctrl := io.storeIn(i).bits.uop.ctrl
uop(stWbIndex).debugInfo := io.storeIn(i).bits.uop.debugInfo
XSInfo("store addr write to sq idx %d pc 0x%x miss:%d vaddr %x paddr %x mmio %x\n",
io.storeIn(i).bits.uop.sqIdx.value,
......@@ -585,20 +597,20 @@ class StoreQueue(implicit p: Parameters) extends XSModule
for (i <- 0 until StoreQueueSize) {
needCancel(i) := uop(i).robIdx.needFlush(io.brqRedirect) && allocated(i) && !committed(i)
when (needCancel(i)) {
allocated(i) := false.B
allocated(i) := false.B
}
}
/**
* update pointers
*/
val lastEnqCancel = PopCount(RegNext(VecInit(canEnqueue.zip(enqCancel).map(x => x._1 && x._2))))
val lastCycleRedirect = RegNext(io.brqRedirect.valid)
val lastCycleCancelCount = PopCount(RegNext(needCancel))
// when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
val enqNumber = Mux(io.enq.canAccept && io.enq.lqCanAccept && !io.brqRedirect.valid, PopCount(io.enq.req.map(_.valid)), 0.U)
val enqNumber = Mux(io.enq.canAccept && io.enq.lqCanAccept, PopCount(io.enq.req.map(_.valid)), 0.U)
when (lastCycleRedirect) {
// we recover the pointers in the next cycle after redirect
enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
enqPtrExt := VecInit(enqPtrExt.map(_ - (lastCycleCancelCount + lastEnqCancel)))
}.otherwise {
enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
}
......@@ -607,9 +619,9 @@ class StoreQueue(implicit p: Parameters) extends XSModule
rdataPtrExt := rdataPtrExtNext
val dequeueCount = Mux(io.sbuffer(1).fire(), 2.U, Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 1.U, 0.U))
val validCount = distanceBetween(enqPtrExt(0), deqPtrExt(0))
allowEnqueue := validCount + enqNumber <= (StoreQueueSize - io.enq.req.length).U
// If redirect at T0, sqCancelCnt is at T2
io.sqCancelCnt := RegNext(lastCycleCancelCount + lastEnqCancel)
// io.sqempty will be used by sbuffer
// We delay it for 1 cycle for better timing
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册