提交 43913318 编写于 作者: Y Yinan Xu

dispatch: remove replay logic

上级 55e377db
......@@ -111,4 +111,8 @@ object GenMask {
def apply(pos: Int) = {
(1.U << pos).asUInt()
}
}
\ No newline at end of file
}
object UIntToMask {
def apply(ptr: UInt) = UIntToOH(ptr) - 1.U
}
......@@ -68,10 +68,7 @@ case class XSCoreParameters
LsDqSize = 96,
IntDqDeqWidth = 4,
FpDqDeqWidth = 4,
LsDqDeqWidth = 4,
IntDqReplayWidth = 4,
FpDqReplayWidth = 4,
LsDqReplayWidth = 4
LsDqDeqWidth = 4
),
exuParameters: ExuParameters = ExuParameters(
JmpCnt = 1,
......@@ -148,7 +145,6 @@ trait HasXSParameter {
val LoadQueueSize = core.LoadQueueSize
val StoreQueueSize = core.StoreQueueSize
val dpParams = core.dpParams
val ReplayWidth = dpParams.IntDqReplayWidth + dpParams.FpDqReplayWidth + dpParams.LsDqReplayWidth
val exuParameters = core.exuParameters
val NRIntReadPorts = core.NRIntReadPorts
val NRIntWritePorts = core.NRIntWritePorts
......@@ -388,7 +384,6 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
memBlock.io.lsqio.commits <> ctrlBlock.io.roqio.commits
memBlock.io.lsqio.roqDeqPtr <> ctrlBlock.io.roqio.roqDeqPtr
memBlock.io.lsqio.oldestStore <> ctrlBlock.io.oldestStore
memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.roqio.exception.bits.lqIdx
memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.roqio.exception.bits.sqIdx
memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.ctrl.commitType)
......
......@@ -56,7 +56,6 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
val commits = Vec(CommitWidth, ValidIO(new RoqCommit))
val roqDeqPtr = Output(new RoqPtr)
}
val oldestStore = Input(Valid(new RoqPtr))
})
val decode = Module(new DecodeStage)
......@@ -112,11 +111,6 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
dispatch.io.redirect.bits <> redirect
dispatch.io.enqRoq <> roq.io.enq
dispatch.io.enqLsq <> io.toLsBlock.enqLsq
dispatch.io.dequeueRoqIndex.valid := roq.io.commitRoqIndex.valid || io.oldestStore.valid
dispatch.io.dequeueRoqIndex.bits := Mux(io.oldestStore.valid,
io.oldestStore.bits,
roq.io.commitRoqIndex.bits
)
dispatch.io.readIntRf <> io.toIntBlock.readRf
dispatch.io.readFpRf <> io.toFpBlock.readRf
dispatch.io.allocPregs.zipWithIndex.foreach { case (preg, i) =>
......@@ -145,12 +139,6 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
intBusyTable.io.pregRdy <> dispatch.io.intPregRdy
fpBusyTable.io.rfReadAddr <> dispatch.io.readFpRf.map(_.addr)
fpBusyTable.io.pregRdy <> dispatch.io.fpPregRdy
for(i <- 0 until ReplayWidth){
intBusyTable.io.replayPregs(i).valid := dispatch.io.replayPregReq(i).isInt
fpBusyTable.io.replayPregs(i).valid := dispatch.io.replayPregReq(i).isFp
intBusyTable.io.replayPregs(i).bits := dispatch.io.replayPregReq(i).preg
fpBusyTable.io.replayPregs(i).bits := dispatch.io.replayPregReq(i).preg
}
roq.io.memRedirect := DontCare
roq.io.memRedirect.valid := false.B
......
......@@ -55,7 +55,6 @@ class MemBlock
val exceptionAddr = new ExceptionAddrIO // to csr
val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit))) // to lsq
val roqDeqPtr = Input(new RoqPtr) // to lsq
val oldestStore = Output(Valid(new RoqPtr)) // to dispatch
}
})
......@@ -209,7 +208,6 @@ class MemBlock
// Lsq
lsq.io.commits <> io.lsqio.commits
lsq.io.enq <> io.fromCtrlBlock.enqLsq
lsq.io.oldestStore <> io.lsqio.oldestStore
lsq.io.brqRedirect := io.fromCtrlBlock.redirect
lsq.io.roqDeqPtr := io.lsqio.roqDeqPtr
io.toCtrlBlock.replay <> lsq.io.rollback
......
......@@ -17,10 +17,7 @@ case class DispatchParameters
LsDqSize: Int,
IntDqDeqWidth: Int,
FpDqDeqWidth: Int,
LsDqDeqWidth: Int,
IntDqReplayWidth: Int,
FpDqReplayWidth: Int,
LsDqReplayWidth: Int
LsDqDeqWidth: Int
)
class Dispatch extends XSModule {
......@@ -30,6 +27,8 @@ class Dispatch extends XSModule {
// from rename
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val renameBypass = Input(new RenameBypassInfo)
// to busytable: set pdest to busy (not ready) when they are dispatched
val allocPregs = Vec(RenameWidth, Output(new ReplayPregReq))
// enq Roq
val enqRoq = new Bundle {
val canAccept = Input(Bool())
......@@ -44,16 +43,12 @@ class Dispatch extends XSModule {
val req = Vec(RenameWidth, ValidIO(new MicroOp))
val resp = Vec(RenameWidth, Input(new LSIdx))
}
val dequeueRoqIndex = Input(Valid(new RoqPtr))
// read regfile
val readIntRf = Vec(NRIntReadPorts, Flipped(new RfReadPort))
val readFpRf = Vec(NRFpReadPorts, Flipped(new RfReadPort))
// read reg status (busy/ready)
val intPregRdy = Vec(NRIntReadPorts, Input(Bool()))
val fpPregRdy = Vec(NRFpReadPorts, Input(Bool()))
// replay: set preg status to not ready
val replayPregReq = Output(Vec(ReplayWidth, new ReplayPregReq))
val allocPregs = Vec(RenameWidth, Output(new ReplayPregReq))
// to reservation stations
val numExist = Input(Vec(exuParameters.ExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.ExuCnt, DecoupledIO(new MicroOp))
......@@ -61,9 +56,9 @@ class Dispatch extends XSModule {
})
val dispatch1 = Module(new Dispatch1)
val intDq = Module(new DispatchQueue(dpParams.IntDqSize, dpParams.DqEnqWidth, dpParams.IntDqDeqWidth, dpParams.IntDqReplayWidth))
val fpDq = Module(new DispatchQueue(dpParams.FpDqSize, dpParams.DqEnqWidth, dpParams.FpDqDeqWidth, dpParams.FpDqReplayWidth))
val lsDq = Module(new DispatchQueue(dpParams.LsDqSize, dpParams.DqEnqWidth, dpParams.LsDqDeqWidth, dpParams.LsDqReplayWidth))
val intDq = Module(new DispatchQueue(dpParams.IntDqSize, dpParams.DqEnqWidth, dpParams.IntDqDeqWidth))
val fpDq = Module(new DispatchQueue(dpParams.FpDqSize, dpParams.DqEnqWidth, dpParams.FpDqDeqWidth))
val lsDq = Module(new DispatchQueue(dpParams.LsDqSize, dpParams.DqEnqWidth, dpParams.LsDqDeqWidth))
// pipeline between rename and dispatch
// accepts all at once
......@@ -88,30 +83,8 @@ class Dispatch extends XSModule {
// dispatch queue: queue uops and dispatch them to different reservation stations or issue queues
// it may cancel the uops
intDq.io.redirect <> io.redirect
intDq.io.dequeueRoqIndex <> io.dequeueRoqIndex
intDq.io.replayPregReq.zipWithIndex.map { case(replay, i) =>
io.replayPregReq(i) <> replay
}
intDq.io.otherWalkDone := !fpDq.io.inReplayWalk && !lsDq.io.inReplayWalk
fpDq.io.redirect <> io.redirect
fpDq.io.dequeueRoqIndex <> io.dequeueRoqIndex
fpDq.io.replayPregReq.zipWithIndex.map { case(replay, i) =>
io.replayPregReq(i + dpParams.IntDqReplayWidth) <> replay
}
fpDq.io.otherWalkDone := !intDq.io.inReplayWalk && !lsDq.io.inReplayWalk
lsDq.io.redirect <> io.redirect
lsDq.io.dequeueRoqIndex <> io.dequeueRoqIndex
lsDq.io.replayPregReq.zipWithIndex.map { case(replay, i) =>
io.replayPregReq(i + dpParams.IntDqReplayWidth + dpParams.FpDqReplayWidth) <> replay
}
lsDq.io.otherWalkDone := !intDq.io.inReplayWalk && !fpDq.io.inReplayWalk
if (!env.FPGAPlatform) {
val inWalk = intDq.io.inReplayWalk || fpDq.io.inReplayWalk || lsDq.io.inReplayWalk
ExcitingUtils.addSource(inWalk, "perfCntCondDpqReplay", Perf)
}
// Int dispatch queue to Int reservation stations
val intDispatch = Module(new Dispatch2Int)
......
......@@ -7,61 +7,42 @@ import xiangshan.backend.decode.SrcType
import xiangshan._
import xiangshan.backend.roq.RoqPtr
class DispatchQueueIO(enqnum: Int, deqnum: Int, replayWidth: Int) extends XSBundle {
class DispatchQueueIO(enqnum: Int, deqnum: Int) extends XSBundle {
val enq = Vec(enqnum, Flipped(ValidIO(new MicroOp)))
val enqReady = Output(Bool())
val deq = Vec(deqnum, DecoupledIO(new MicroOp))
val dequeueRoqIndex = Input(Valid(new RoqPtr))
val redirect = Flipped(ValidIO(new Redirect))
val replayPregReq = Output(Vec(replayWidth, new ReplayPregReq))
val inReplayWalk = Output(Bool())
val otherWalkDone = Input(Bool())
override def cloneType: DispatchQueueIO.this.type =
new DispatchQueueIO(enqnum, deqnum, replayWidth).asInstanceOf[this.type]
new DispatchQueueIO(enqnum, deqnum).asInstanceOf[this.type]
}
// dispatch queue: accepts at most enqnum uops from dispatch1 and dispatches deqnum uops at every clock cycle
class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new DispatchQueueIO(enqnum, deqnum, replayWidth))
class DispatchQueue(size: Int, enqnum: Int, deqnum: Int) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new DispatchQueueIO(enqnum, deqnum))
val indexWidth = log2Ceil(size)
val s_invalid :: s_valid :: s_dispatched :: Nil = Enum(3)
val s_invalid :: s_valid:: Nil = Enum(2)
// queue data array
val uopEntries = Mem(size, new MicroOp)
val stateEntries = RegInit(VecInit(Seq.fill(size)(s_invalid)))
// head: first valid entry (dispatched entry)
val headPtr = RegInit(0.U.asTypeOf(new CircularQueuePtr(size)))
// dispatch: first entry that has not been dispatched
val dispatchPtr = RegInit(0.U.asTypeOf(new CircularQueuePtr(size)))
val headPtrMask = UIntToMask(headPtr.value)
// tail: first invalid entry (free entry)
val tailPtr = RegInit(0.U.asTypeOf(new CircularQueuePtr(size)))
val tailPtrMask = UIntToMask(tailPtr.value)
// TODO: make ptr a vector to reduce latency?
// commit: starting from head ptr
val commitIndex = (0 until CommitWidth).map(i => headPtr + i.U).map(_.value)
// deq: starting from dispatch ptr
val deqIndex = (0 until deqnum).map(i => dispatchPtr + i.U).map(_.value)
// deq: starting from head ptr
val deqIndex = (0 until deqnum).map(i => headPtr + i.U).map(_.value)
// enq: starting from tail ptr
val enqIndex = (0 until enqnum).map(i => tailPtr + i.U).map(_.value)
val validEntries = distanceBetween(tailPtr, headPtr)
val dispatchEntries = distanceBetween(tailPtr, dispatchPtr)
val commitEntries = validEntries - dispatchEntries
val emptyEntries = size.U - validEntries
def rangeMask(start: CircularQueuePtr, end: CircularQueuePtr): UInt = {
val startMask = (1.U((size + 1).W) << start.value).asUInt - 1.U
val endMask = (1.U((size + 1).W) << end.value).asUInt - 1.U
val xorMask = startMask(size - 1, 0) ^ endMask(size - 1, 0)
Mux(start.flag === end.flag, xorMask, ~xorMask)
}
val dispatchedMask = rangeMask(headPtr, dispatchPtr)
val allWalkDone = !io.inReplayWalk && io.otherWalkDone
val canEnqueue = validEntries <= (size - enqnum).U && allWalkDone
val isTrueEmpty = ~Cat((0 until size).map(i => stateEntries(i) === s_valid)).orR
val canEnqueue = validEntries <= (size - enqnum).U
val canActualEnqueue = canEnqueue && !(io.redirect.valid /*&& !io.redirect.bits.isReplay*/)
/**
......@@ -87,27 +68,13 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) exten
// dequeue: from s_valid to s_dispatched
for (i <- 0 until deqnum) {
when (io.deq(i).fire()) {
stateEntries(deqIndex(i)) := s_dispatched
when (io.deq(i).fire() && !io.redirect.valid) {
stateEntries(deqIndex(i)) := s_invalid
XSError(stateEntries(deqIndex(i)) =/= s_valid, "state of the dispatch entry is not s_valid\n")
}
}
// commit: from s_dispatched to s_invalid
val needDequeue = Wire(Vec(size, Bool()))
val deqRoqIdx = io.dequeueRoqIndex.bits
for (i <- 0 until size) {
needDequeue(i) := stateEntries(i) === s_dispatched && io.dequeueRoqIndex.valid && !isAfter(uopEntries(i).roqIdx, deqRoqIdx) && dispatchedMask(i)
when (needDequeue(i)) {
stateEntries(i) := s_invalid
}
XSInfo(needDequeue(i), p"dispatched entry($i)(pc = ${Hexadecimal(uopEntries(i).cf.pc)}) " +
p"roqIndex 0x${Hexadecimal(uopEntries(i).roqIdx.asUInt)} " +
p"left dispatch queue with deqRoqIndex 0x${Hexadecimal(io.dequeueRoqIndex.bits.asUInt)}\n")
}
// redirect: cancel uops currently in the queue
val mispredictionValid = io.redirect.valid //&& io.redirect.bits.isMisPred
val exceptionValid = io.redirect.valid && io.redirect.bits.isException
......@@ -116,7 +83,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) exten
val needCancel = Wire(Vec(size, Bool()))
for (i <- 0 until size) {
roqNeedFlush(i) := uopEntries(i.U).roqIdx.needFlush(io.redirect)
needCancel(i) := stateEntries(i) =/= s_invalid && ((roqNeedFlush(i) && mispredictionValid) || exceptionValid || flushPipeValid) && !needDequeue(i)
needCancel(i) := stateEntries(i) =/= s_invalid && ((roqNeedFlush(i) && mispredictionValid) || exceptionValid || flushPipeValid)
when (needCancel(i)) {
stateEntries(i) := s_invalid
......@@ -127,182 +94,76 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) exten
p"cancelled with redirect roqIndex 0x${Hexadecimal(io.redirect.bits.roqIdx.asUInt)}\n")
}
// replay: from s_dispatched to s_valid
val replayValid = false.B//io.redirect.valid && io.redirect.bits.isReplay
val needReplay = Wire(Vec(size, Bool()))
for (i <- 0 until size) {
needReplay(i) := roqNeedFlush(i) && stateEntries(i) === s_dispatched && replayValid
when (needReplay(i)) {
stateEntries(i) := s_valid
}
XSInfo(needReplay(i), p"dispatched entry($i)(pc = ${Hexadecimal(uopEntries(i.U).cf.pc)}) " +
p"replayed with roqIndex ${io.redirect.bits.roqIdx}\n")
}
/**
* Part 2: walk
*
* Instead of keeping the walking distances, we keep the walking target position for simplicity.
*
* (1) replay: move dispatchPtr to the first needReplay entry
* (2) redirect (branch misprediction): move dispatchPtr, tailPtr to the first cancelled entry
* Part 2: update indices
*
* tail: (1) enqueue; (2) redirect
* head: dequeue
*/
// getFirstIndex: get the head index of consecutive ones
// note that it returns the position starting from either the leftmost or the rightmost
// 00000001 => 0
// 00111000 => 3
// 11000111 => 2
// 10000000 => 1
// 00000000 => 7
// 11111111 => 7
def getFirstMaskPosition(mask: Seq[Bool]) = {
Mux(mask(size - 1),
PriorityEncoder(mask.reverse.map(m => !m)),
PriorityEncoder(mask)
)
}
val maskedNeedReplay = Cat(needReplay.reverse) & dispatchedMask
val allCancel = Cat(needCancel).andR
val someReplay = Cat(maskedNeedReplay).orR
val allReplay = Cat(maskedNeedReplay).andR
XSDebug(replayValid, p"needReplay: ${Binary(Cat(needReplay))}\n")
XSDebug(replayValid, p"dispatchedMask: ${Binary(dispatchedMask)}\n")
XSDebug(replayValid, p"maskedNeedReplay: ${Binary(maskedNeedReplay)}\n")
// when nothing or everything is cancelled or replayed, the pointers remain unchanged
// if any uop is cancelled or replayed, the pointer should go to the first zero before all ones
// position: target index
// (1) if leftmost bits are ones, count continuous ones from leftmost (target position is the last one)
// (2) if leftmost bit is zero, count rightmost zero btis (target position is the first one)
// if all bits are one, we need to keep the index unchanged
// 00000000, 11111111: unchanged
// otherwise: firstMaskPosition
val cancelPosition = Mux(!Cat(needCancel).orR || allCancel, tailPtr.value, getFirstMaskPosition(needCancel))
val replayPosition = Mux(!someReplay || allReplay, dispatchPtr.value, getFirstMaskPosition(maskedNeedReplay.asBools))
XSDebug(replayValid, p"getFirstMaskPosition: ${getFirstMaskPosition(maskedNeedReplay.asBools)}\n")
assert(cancelPosition.getWidth == indexWidth)
assert(replayPosition.getWidth == indexWidth)
// If the highest bit is one, the direction flips.
// Otherwise, the direction keeps the same.
val tailCancelPtr = Wire(new CircularQueuePtr(size))
tailCancelPtr.flag := Mux(needCancel(size - 1), ~tailPtr.flag, tailPtr.flag)
tailCancelPtr.value := Mux(needCancel(size - 1) && !allCancel, size.U - cancelPosition, cancelPosition)
// In case of branch mis-prediction:
// If mis-prediction happens after dispatchPtr, the pointer keeps the same as before.
// If dispatchPtr needs to be cancelled, reset dispatchPtr to tailPtr.
val dispatchCancelPtr = Mux(needCancel(dispatchPtr.value) || dispatchEntries === 0.U, tailCancelPtr, dispatchPtr)
// In case of replay, we need to walk back and recover preg states in the busy table.
// We keep track of the number of entries needed to be walked instead of target position to reduce overhead
// for 11111111, replayPosition is unuseful. We naively set Cnt to size.U
val dispatchReplayCnt = Mux(
allReplay, size.U,
Mux(maskedNeedReplay(size - 1),
// replay makes flag flipped
dispatchPtr.value + replayPosition,
// the new replay does not change the flag
Mux(dispatchPtr.value <= replayPosition,
// but we are currently in a replay that changes the flag
dispatchPtr.value + (size.U - replayPosition),
dispatchPtr.value - replayPosition)))
val dispatchReplayCntReg = RegInit(0.U)
// actually, if deqIndex points to head uops and they are replayed, there's no need for extraWalk
// however, to simplify logic, we simply let it do extra walk now
val needExtraReplayWalk = Cat((0 until deqnum).map(i => needReplay(deqIndex(i)))).orR
val needExtraReplayWalkReg = RegNext(needExtraReplayWalk && replayValid, false.B)
val inReplayWalk = dispatchReplayCntReg =/= 0.U || needExtraReplayWalkReg
val dispatchReplayStep = Mux(needExtraReplayWalkReg, 0.U, Mux(dispatchReplayCntReg > replayWidth.U, replayWidth.U, dispatchReplayCntReg))
when (exceptionValid) {
dispatchReplayCntReg := 0.U
}.elsewhen (inReplayWalk && mispredictionValid && needCancel((dispatchPtr - 1.U).value)) {
val distance = distanceBetween(dispatchPtr, tailCancelPtr)
dispatchReplayCntReg := Mux(dispatchReplayCntReg > distance, dispatchReplayCntReg - distance, 0.U)
}.elsewhen (replayValid && someReplay) {
dispatchReplayCntReg := dispatchReplayCnt - dispatchReplayStep
}.elsewhen (!needExtraReplayWalkReg) {
dispatchReplayCntReg := dispatchReplayCntReg - dispatchReplayStep
}
io.inReplayWalk := inReplayWalk
val replayIndex = (0 until replayWidth).map(i => (dispatchPtr - (i + 1).U).value)
for (i <- 0 until replayWidth) {
val index = Mux(needExtraReplayWalkReg, (if (i < deqnum) deqIndex(i) else 0.U), replayIndex(i))
val shouldResetDest = inReplayWalk && stateEntries(index) === s_valid
io.replayPregReq(i).isInt := shouldResetDest && uopEntries(index).ctrl.rfWen && uopEntries(index).ctrl.ldest =/= 0.U
io.replayPregReq(i).isFp := shouldResetDest && uopEntries(index).ctrl.fpWen
io.replayPregReq(i).preg := uopEntries(index).pdest
XSDebug(shouldResetDest, p"replay $i: " +
p"type (${uopEntries(index).ctrl.rfWen}, ${uopEntries(index).ctrl.fpWen}) " +
p"pdest ${uopEntries(index).pdest} ldest ${uopEntries(index).ctrl.ldest}\n")
}
/**
* Part 3: update indices
*
* tail: (1) enqueue; (2) walk in case of redirect
* dispatch: (1) dequeue; (2) walk in case of replay; (3) walk in case of redirect
* head: commit
*/
// enqueue
val numEnq = Mux(canActualEnqueue, PriorityEncoder(io.enq.map(!_.valid) :+ true.B), 0.U)
XSError(numEnq =/= 0.U && (mispredictionValid || exceptionValid), "should not enqueue when redirect\n")
tailPtr := Mux(exceptionValid,
0.U.asTypeOf(new CircularQueuePtr(size)),
Mux(mispredictionValid,
tailCancelPtr,
tailPtr + numEnq)
)
// dequeue
val numDeqTry = Mux(dispatchEntries > deqnum.U, deqnum.U, dispatchEntries)
val numDeqTry = Mux(validEntries > deqnum.U, deqnum.U, validEntries)
val numDeqFire = PriorityEncoder(io.deq.zipWithIndex.map{case (deq, i) =>
// For dequeue, the first entry should never be s_invalid
// Otherwise, there should be a redirect and tail walks back
// in this case, we set numDeq to 0
!deq.fire() && (if (i == 0) true.B else stateEntries(deqIndex(i)) =/= s_dispatched)
!deq.fire() && (if (i == 0) true.B else stateEntries(deqIndex(i)) =/= s_invalid)
} :+ true.B)
val numDeq = Mux(numDeqTry > numDeqFire, numDeqFire, numDeqTry)
dispatchPtr := Mux(exceptionValid,
// agreement with reservation station: don't dequeue when redirect.valid
val headPtrNext = Mux(mispredictionValid, headPtr, headPtr + numDeq)
headPtr := Mux(exceptionValid, 0.U.asTypeOf(new CircularQueuePtr(size)), headPtrNext)
// For branch mis-prediction or memory violation replay,
// we delay updating the indices for one clock cycle.
// For now, we simply use PopCount to count #instr cancelled.
val lastCycleMisprediction = RegNext(io.redirect.valid && !(io.redirect.bits.isException || io.redirect.bits.isFlushPipe))
// find the last one's position, starting from headPtr and searching backwards
val validBitVec = VecInit((0 until size).map(i => stateEntries(i) === s_valid))
val loValidBitVec = Cat((0 until size).map(i => validBitVec(i) && headPtrMask(i)))
val hiValidBitVec = Cat((0 until size).map(i => validBitVec(i) && ~headPtrMask(i)))
val flippedFlag = loValidBitVec.orR
val lastOneIndex = size.U - PriorityEncoder(Mux(loValidBitVec.orR, loValidBitVec, hiValidBitVec))
val walkedTailPtr = Wire(new CircularQueuePtr(size))
walkedTailPtr.flag := flippedFlag ^ headPtr.flag
walkedTailPtr.value := lastOneIndex
// enqueue
val numEnq = Mux(canActualEnqueue, PriorityEncoder(io.enq.map(!_.valid) :+ true.B), 0.U)
XSError(numEnq =/= 0.U && (mispredictionValid || exceptionValid), "should not enqueue when redirect\n")
tailPtr := Mux(exceptionValid,
0.U.asTypeOf(new CircularQueuePtr(size)),
Mux(mispredictionValid && (!inReplayWalk || needCancel((dispatchPtr - 1.U).value)),
dispatchCancelPtr,
Mux(inReplayWalk, dispatchPtr - dispatchReplayStep, dispatchPtr + numDeq))
Mux(lastCycleMisprediction,
Mux(isTrueEmpty, headPtr, walkedTailPtr),
tailPtr + numEnq)
)
headPtr := Mux(exceptionValid, 0.U.asTypeOf(new CircularQueuePtr(size)), headPtr + PopCount(needDequeue))
/**
* Part 4: set output and input
* Part 3: set output and input
*/
// TODO: remove this when replay moves to roq
for (i <- 0 until deqnum) {
io.deq(i).bits := uopEntries(deqIndex(i))
// do not dequeue when io.redirect valid because it may cause dispatchPtr work improperly
io.deq(i).valid := stateEntries(deqIndex(i)) === s_valid && !io.redirect.valid && allWalkDone
io.deq(i).valid := stateEntries(deqIndex(i)) === s_valid && !lastCycleMisprediction// && !io.redirect.valid
}
// debug: dump dispatch queue states
XSDebug(p"head: $headPtr, tail: $tailPtr, dispatch: $dispatchPtr, " +
p"replayCnt: $dispatchReplayCntReg, needExtraReplayWalkReg: $needExtraReplayWalkReg\n")
XSDebug(p"head: $headPtr, tail: $tailPtr\n")
XSDebug(p"state: ")
stateEntries.reverse.foreach { s =>
XSDebug(false, s === s_invalid, "-")
XSDebug(false, s === s_valid, "v")
XSDebug(false, s === s_dispatched, "d")
}
XSDebug(false, true.B, "\n")
XSDebug(p"ptr: ")
(0 until size).reverse.foreach { i =>
val isPtr = i.U === headPtr.value || i.U === tailPtr.value || i.U === dispatchPtr.value
val isPtr = i.U === headPtr.value || i.U === tailPtr.value
XSDebug(false, isPtr, "^")
XSDebug(false, !isPtr, " ")
}
XSDebug(false, true.B, "\n")
XSError(isAfter(headPtr, tailPtr), p"assert greaterOrEqualThan(tailPtr: $tailPtr, headPtr: $headPtr) failed\n")
XSError(isAfter(dispatchPtr, tailPtr) && !inReplayWalk, p"assert greaterOrEqualThan(tailPtr: $tailPtr, dispatchPtr: $dispatchPtr) failed\n")
XSError(isAfter(headPtr, dispatchPtr), p"assert greaterOrEqualThan(dispatchPtr: $dispatchPtr, headPtr: $headPtr) failed\n")
XSError(validEntries < dispatchEntries && !inReplayWalk, "validEntries should be less than dispatchEntries\n")
}
......@@ -823,7 +823,6 @@ class CSR extends FunctionUnit with HasCSRConst
"MbpIWrong" -> (0xb0b, "perfCntCondMbpIWrong" ),
"MbpRRight" -> (0xb0c, "perfCntCondMbpRRight" ),
"MbpRWrong" -> (0xb0d, "perfCntCondMbpRWrong" ),
"DpqReplay" -> (0xb0e, "perfCntCondDpqReplay" ),
"RoqWalk" -> (0xb0f, "perfCntCondRoqWalk" ),
"RoqWaitInt" -> (0xb10, "perfCntCondRoqWaitInt" ),
"RoqWaitFp" -> (0xb11, "perfCntCondRoqWaitFp" ),
......
......@@ -5,8 +5,6 @@ import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.exu.{Exu, ExuConfig}
import java.rmi.registry.Registry
import java.{util => ju}
class BypassQueue(number: Int) extends XSModule {
val io = IO(new Bundle {
......@@ -206,9 +204,11 @@ class ReservationStationCtrl
// enq
val tailAfterRealDeq = tailPtr - (issFire && !needFeedback|| bubReg)
val isFull = tailAfterRealDeq.flag // tailPtr===qsize.U
tailPtr := tailAfterRealDeq + io.enqCtrl.fire()
// agreement with dispatch: don't fire when io.redirect.valid
val enqFire = io.enqCtrl.fire() && !io.redirect.valid
tailPtr := tailAfterRealDeq + enqFire
io.enqCtrl.ready := !isFull && !io.redirect.valid // TODO: check this redirect && need more optimization
io.enqCtrl.ready := !isFull
val enqUop = io.enqCtrl.bits
val srcSeq = Seq(enqUop.psrc1, enqUop.psrc2, enqUop.psrc3)
val srcTypeSeq = Seq(enqUop.ctrl.src1Type, enqUop.ctrl.src2Type, enqUop.ctrl.src3Type)
......@@ -222,7 +222,7 @@ class ReservationStationCtrl
(srcType === SrcType.reg && src === 0.U)
}
when (io.enqCtrl.fire()) {
when (enqFire) {
stateQueue(enqIdx_ctrl) := s_valid
srcQueue(enqIdx_ctrl).zipWithIndex.map{ case (s, i) =>
s := Mux(enqBpVec(i) || stateCheck(srcSeq(i), srcTypeSeq(i)), true.B,
......@@ -249,7 +249,7 @@ class ReservationStationCtrl
io.data.enqPtr := idxQueue(Mux(tailPtr.flag, deqIdx, tailPtr.value))
io.data.deqPtr.valid := selValid
io.data.deqPtr.bits := idxQueue(selectedIdxWire)
io.data.enqCtrl.valid := io.enqCtrl.fire
io.data.enqCtrl.valid := enqFire
io.data.enqCtrl.bits := io.enqCtrl.bits
// other io
......@@ -335,8 +335,8 @@ class ReservationStationData
// enq
val enqPtr = enq(log2Up(IssQueSize)-1,0)
val enqPtrReg = RegEnable(enqPtr, enqCtrl.fire())
val enqEn = enqCtrl.fire()
val enqPtrReg = RegEnable(enqPtr, enqCtrl.valid)
val enqEn = enqCtrl.valid
val enqEnReg = RegNext(enqEn)
when (enqEn) {
uop(enqPtr) := enqUop
......@@ -407,7 +407,7 @@ class ReservationStationData
val srcSeq = Seq(enqUop.psrc1, enqUop.psrc2, enqUop.psrc3)
val srcTypeSeq = Seq(enqUop.ctrl.src1Type, enqUop.ctrl.src2Type, enqUop.ctrl.src3Type)
io.ctrl.srcUpdate(IssQueSize).zipWithIndex.map{ case (h, i) =>
val (bpHit, bpHitReg, bpData)= bypass(srcSeq(i), srcTypeSeq(i), enqCtrl.fire())
val (bpHit, bpHitReg, bpData)= bypass(srcSeq(i), srcTypeSeq(i), enqCtrl.valid)
when (bpHitReg) { data(enqPtrReg)(i) := bpData }
h := bpHit
// NOTE: enq bp is done here
......
......@@ -12,8 +12,6 @@ class BusyTable(numReadPorts: Int, numWritePorts: Int) extends XSModule {
val allocPregs = Vec(RenameWidth, Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
// set preg state to ready (write back regfile + roq walk)
val wbPregs = Vec(numWritePorts, Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
// set preg state to busy when replay
val replayPregs = Vec(ReplayWidth, Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
// read preg state
val rfReadAddr = Vec(numReadPorts, Input(UInt(PhyRegIdxWidth.W)))
val pregRdy = Vec(numReadPorts, Output(Bool()))
......@@ -27,17 +25,15 @@ class BusyTable(numReadPorts: Int, numWritePorts: Int) extends XSModule {
val wbMask = reqVecToMask(io.wbPregs)
val allocMask = reqVecToMask(io.allocPregs)
val replayMask = reqVecToMask(io.replayPregs)
val tableAfterWb = table & (~wbMask).asUInt
val tableAfterAlloc = tableAfterWb | allocMask
val tableAfterReplay = tableAfterAlloc | replayMask
for((raddr, rdy) <- io.rfReadAddr.zip(io.pregRdy)){
rdy := !tableAfterWb(raddr)
}
table := tableAfterReplay
table := tableAfterAlloc
// for((alloc, i) <- io.allocPregs.zipWithIndex){
// when(alloc.valid){
......
......@@ -55,7 +55,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val exeWbResults = Vec(numWbPorts, Flipped(ValidIO(new ExuOutput)))
val commits = Vec(CommitWidth, Valid(new RoqCommit))
val bcommit = Output(UInt(BrTagWidth.W))
val commitRoqIndex = Output(Valid(new RoqPtr))
val roqDeqPtr = Output(new RoqPtr)
val csr = new RoqCSRIO
})
......@@ -336,9 +335,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
}
val retireCounter = Mux(state === s_idle, commitCnt, 0.U)
XSInfo(retireCounter > 0.U, "retired %d insts\n", retireCounter)
val commitOffset = PriorityEncoder((validCommit :+ false.B).map(!_))
io.commitRoqIndex.valid := state === s_idle
io.commitRoqIndex.bits := deqPtrExt + commitOffset
// commit branch to brq
io.bcommit := PopCount(cfiCommitVec)
......
......@@ -249,7 +249,6 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
val dcache = new DCacheLineIO
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
val oldestStore = Output(Valid(new RoqPtr))
val exceptionAddr = new ExceptionAddrIO
})
......@@ -292,7 +291,6 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
storeQueue.io.mmioStout <> io.mmioStout
storeQueue.io.commits <> io.commits
storeQueue.io.roqDeqPtr <> io.roqDeqPtr
storeQueue.io.oldestStore <> io.oldestStore
storeQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx
storeQueue.io.exceptionAddr.isStore := DontCare
......
......@@ -38,7 +38,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
// val refill = Flipped(Valid(new DCacheLineReq ))
val oldestStore = Output(Valid(new RoqPtr))
val exceptionAddr = new ExceptionAddrIO
})
......@@ -178,13 +177,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
(selValid, selVec)
}
// select the last writebacked instruction
val validStoreVec = VecInit((0 until StoreQueueSize).map(i => !(allocated(i) && datavalid(i))))
val storeNotValid = SqPtr(false.B, getFirstOne(validStoreVec, tailMask))
val storeValidIndex = (storeNotValid - 1.U).value
io.oldestStore.valid := allocated(deqPtrExt.value) && datavalid(deqPtrExt.value) && !commited(storeValidIndex)
io.oldestStore.bits := uop(storeValidIndex).roqIdx
// writeback finished mmio store
io.mmioStout.bits.uop := uop(deqPtr)
io.mmioStout.bits.uop.sqIdx := deqPtrExt
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册