未验证 提交 cc563521 编写于 作者: W William Wang 提交者: GitHub

Merge pull request #281 from RISCVERS/opt-dispatch1

optimize dispatch1 timing
......@@ -71,6 +71,7 @@ EMU_LDFLAGS += -lpthread -lSDL2 -ldl -lz
VEXTRA_FLAGS = -I$(abspath $(BUILD_DIR)) --x-assign unique -O3 -CFLAGS "$(EMU_CXXFLAGS)" -LDFLAGS "$(EMU_LDFLAGS)"
# Verilator trace support
EMU_TRACE ?=
ifeq ($(EMU_TRACE),1)
VEXTRA_FLAGS += --trace
endif
......@@ -82,6 +83,7 @@ VEXTRA_FLAGS += --threads $(EMU_THREADS) --threads-dpi none
endif
# Verilator savable
EMU_SNAPSHOT ?=
ifeq ($(EMU_SNAPSHOT),1)
VEXTRA_FLAGS += --savable
EMU_CXXFLAGS += -DVM_SAVABLE
......
......@@ -29,7 +29,11 @@ class CtrlToFpBlockIO extends XSBundle {
class CtrlToLsBlockIO extends XSBundle {
val enqIqCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
val enqIqData = Vec(exuParameters.LsExuCnt, Output(new ExuInput))
val lsqIdxReq = Vec(RenameWidth, DecoupledIO(new MicroOp))
val enqLsq = new Bundle() {
val canAccept = Input(Bool())
val req = Vec(RenameWidth, ValidIO(new MicroOp))
val resp = Vec(RenameWidth, Input(new LSIdx))
}
val redirect = ValidIO(new Redirect)
}
......@@ -101,10 +105,8 @@ class CtrlBlock extends XSModule {
rename.io.out <> dispatch.io.fromRename
dispatch.io.redirect <> redirect
dispatch.io.toRoq <> roq.io.dp1Req
dispatch.io.roqIdxs <> roq.io.roqIdxs
dispatch.io.toLsq <> io.toLsBlock.lsqIdxReq
dispatch.io.lsIdxs <> io.fromLsBlock.lsqIdxResp
dispatch.io.enqRoq <> roq.io.enq
dispatch.io.enqLsq <> io.toLsBlock.enqLsq
dispatch.io.dequeueRoqIndex.valid := roq.io.commitRoqIndex.valid || io.oldestStore.valid
dispatch.io.dequeueRoqIndex.bits := Mux(io.oldestStore.valid,
io.oldestStore.bits,
......@@ -147,9 +149,6 @@ class CtrlBlock extends XSModule {
roq.io.memRedirect <> io.fromLsBlock.replay
roq.io.brqRedirect <> brq.io.redirect
roq.io.dp1Req <> dispatch.io.toRoq
roq.io.exeWbResults.take(roqWbSize-1).zip(
io.fromIntBlock.wbRegs ++ io.fromFpBlock.wbRegs ++ io.fromLsBlock.stOut
).foreach{
......
......@@ -15,7 +15,6 @@ import xiangshan.backend.fu.FunctionUnit.{lduCfg, mouCfg, stuCfg}
class LsBlockToCtrlIO extends XSBundle {
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput)) // write to roq
val numExist = Vec(exuParameters.LsExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
val lsqIdxResp = Vec(RenameWidth, Output(new LSIdx))
val replay = ValidIO(new Redirect)
}
......@@ -196,14 +195,13 @@ class MemBlock
storeUnits(i).io.lsq <> lsq.io.storeIn(i)
io.toCtrlBlock.stOut(i).valid := lsq.io.stout(i).valid
io.toCtrlBlock.stOut(i).bits := lsq.io.stout(i).bits
lsq.io.stout(i).ready := true.B
lsq.io.stout(i).ready := true.B
}
// Lsq
lsq.io.commits <> io.lsqio.commits
lsq.io.dp1Req <> io.fromCtrlBlock.lsqIdxReq
lsq.io.enq <> io.fromCtrlBlock.enqLsq
lsq.io.oldestStore <> io.lsqio.oldestStore
lsq.io.lsIdxs <> io.toCtrlBlock.lsqIdxResp
lsq.io.brqRedirect := io.fromCtrlBlock.redirect
lsq.io.roqDeqPtr := io.lsqio.roqDeqPtr
io.toCtrlBlock.replay <> lsq.io.rollback
......
......@@ -224,10 +224,10 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
// misprediction or replay
stateQueue.zipWithIndex.foreach({case(s, i) =>
val ptr = BrqPtr(brQueue(i).ptrFlag, i.U)
when(
(io.redirect.valid && ptr.needBrFlush(io.redirect.bits.brTag)) ||
(s.isWb && brQueue(i).exuOut.uop.roqIdx.needFlush(io.memRedirect))
){
when(s.isWb && brQueue(i).exuOut.uop.roqIdx.needFlush(io.memRedirect)){
s := s_idle
}
when(io.redirect.valid && ptr.needBrFlush(io.redirect.bits.brTag)){
s := s_invalid
}
})
......
......@@ -43,10 +43,11 @@ class DecodeBuffer extends XSModule {
if(i > 0 ){
io.out(i).valid := validVec(i) &&
!flush &&
Mux(r.ctrl.noSpecExec,
!ParallelOR(validVec.take(i)),
!ParallelOR(io.out.zip(validVec).take(i).map(x => x._2 && x._1.bits.ctrl.noSpecExec))
) && !io.isWalking
// Mux(r.ctrl.noSpecExec,
!ParallelOR(validVec.take(i))//,
// !ParallelOR(io.out.zip(validVec).take(i).map(x => x._2 && x._1.bits.ctrl.noSpecExec))
//) &&
!io.isWalking
} else {
require( i == 0)
io.out(i).valid := validVec(i) && !flush && !io.isWalking
......
......@@ -29,13 +29,19 @@ class Dispatch extends XSModule {
// from rename
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
// enq Roq
val toRoq = Vec(RenameWidth, DecoupledIO(new MicroOp))
// get RoqIdx
val roqIdxs = Input(Vec(RenameWidth, new RoqPtr))
val enqRoq = new Bundle {
val canAccept = Input(Bool())
val isEmpty = Input(Bool())
val extraWalk = Vec(RenameWidth, Output(Bool()))
val req = Vec(RenameWidth, ValidIO(new MicroOp))
val resp = Vec(RenameWidth, Input(new RoqPtr))
}
// enq Lsq
val toLsq = Vec(RenameWidth, DecoupledIO(new MicroOp))
// get LsIdx
val lsIdxs = Input(Vec(RenameWidth, new LSIdx))
val enqLsq = new Bundle() {
val canAccept = Input(Bool())
val req = Vec(RenameWidth, ValidIO(new MicroOp))
val resp = Vec(RenameWidth, Input(new LSIdx))
}
val dequeueRoqIndex = Input(Valid(new RoqPtr))
// read regfile
val readIntRf = Vec(NRIntReadPorts, Flipped(new RfReadPort))
......@@ -59,18 +65,20 @@ class Dispatch extends XSModule {
// pipeline between rename and dispatch
// accepts all at once
val redirectValid = io.redirect.valid && !io.redirect.bits.isReplay
for (i <- 0 until RenameWidth) {
PipelineConnect(io.fromRename(i), dispatch1.io.fromRename(i), dispatch1.io.recv(i), false.B)
PipelineConnect(io.fromRename(i), dispatch1.io.fromRename(i), dispatch1.io.recv(i), redirectValid)
}
// dispatch 1: accept uops from rename and dispatch them to the three dispatch queues
dispatch1.io.redirect <> io.redirect
dispatch1.io.toRoq <> io.toRoq
dispatch1.io.roqIdxs <> io.roqIdxs
dispatch1.io.toLsq <> io.toLsq
dispatch1.io.lsIdx <> io.lsIdxs
dispatch1.io.enqRoq <> io.enqRoq
dispatch1.io.enqLsq <> io.enqLsq
dispatch1.io.toIntDqReady <> intDq.io.enqReady
dispatch1.io.toIntDq <> intDq.io.enq
dispatch1.io.toFpDqReady <> fpDq.io.enqReady
dispatch1.io.toFpDq <> fpDq.io.enq
dispatch1.io.toLsDqReady <> lsDq.io.enqReady
dispatch1.io.toLsDq <> lsDq.io.enq
dispatch1.io.allocPregs <> io.allocPregs
......
......@@ -15,27 +15,39 @@ class Dispatch1 extends XSModule {
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val recv = Output(Vec(RenameWidth, Bool()))
// enq Roq
val toRoq = Vec(RenameWidth, DecoupledIO(new MicroOp))
// get RoqIdx
val roqIdxs = Input(Vec(RenameWidth, new RoqPtr))
val enqRoq = new Bundle {
val canAccept = Input(Bool())
val isEmpty = Input(Bool())
// if set, Roq needs extra walk
val extraWalk = Vec(RenameWidth, Output(Bool()))
val req = Vec(RenameWidth, ValidIO(new MicroOp))
val resp = Vec(RenameWidth, Input(new RoqPtr))
}
// enq Lsq
val toLsq = Vec(RenameWidth, DecoupledIO(new MicroOp))
// get LsIdx
val lsIdx = Input(Vec(RenameWidth, new LSIdx))
val enqLsq = new Bundle() {
val canAccept = Input(Bool())
val req = Vec(RenameWidth, ValidIO(new MicroOp))
val resp = Vec(RenameWidth, Input(new LSIdx))
}
val allocPregs = Vec(RenameWidth, Output(new ReplayPregReq))
// to dispatch queue
val toIntDq = Vec(dpParams.DqEnqWidth, DecoupledIO(new MicroOp))
val toFpDq = Vec(dpParams.DqEnqWidth, DecoupledIO(new MicroOp))
val toLsDq = Vec(dpParams.DqEnqWidth, DecoupledIO(new MicroOp))
val toIntDqReady = Input(Bool())
val toIntDq = Vec(dpParams.DqEnqWidth, ValidIO(new MicroOp))
val toFpDqReady = Input(Bool())
val toFpDq = Vec(dpParams.DqEnqWidth, ValidIO(new MicroOp))
val toLsDqReady = Input(Bool())
val toLsDq = Vec(dpParams.DqEnqWidth, ValidIO(new MicroOp))
})
/**
* Part 1: choose the target dispatch queue and the corresponding write ports
*/
// valid bits for different dispatch queues
val isInt = WireInit(VecInit(io.fromRename.map(uop => FuType.isIntExu(uop.bits.ctrl.fuType))))
val isFp = WireInit(VecInit(io.fromRename.map(uop => FuType.isFpExu (uop.bits.ctrl.fuType))))
val isLs = WireInit(VecInit(io.fromRename.map(uop => FuType.isMemExu(uop.bits.ctrl.fuType))))
val isStore = WireInit(VecInit(io.fromRename.map(uop => FuType.isStoreExu(uop.bits.ctrl.fuType))))
val isInt = VecInit(io.fromRename.map(req => FuType.isIntExu(req.bits.ctrl.fuType)))
val isFp = VecInit(io.fromRename.map(req => FuType.isFpExu (req.bits.ctrl.fuType)))
val isLs = VecInit(io.fromRename.map(req => FuType.isMemExu(req.bits.ctrl.fuType)))
val isStore = VecInit(io.fromRename.map(req => FuType.isStoreExu(req.bits.ctrl.fuType)))
val isBlockBackward = VecInit(io.fromRename.map(_.bits.ctrl.blockBackward))
val isNoSpecExec = VecInit(io.fromRename.map(_.bits.ctrl.noSpecExec))
// generate index mapping
val intIndex = Module(new IndexMapping(RenameWidth, dpParams.DqEnqWidth, false))
......@@ -56,120 +68,117 @@ class Dispatch1 extends XSModule {
}
/**
* Part 2: acquire ROQ (all) and LSQ (load/store only) indexes
* Part 2:
* acquire ROQ (all), LSQ (load/store only) and dispatch queue slots
* only set valid when all of them provides enough entries
*/
val cancelled = WireInit(VecInit(Seq.fill(RenameWidth)(io.redirect.valid && !io.redirect.bits.isReplay)))
val redirectValid = io.redirect.valid && !io.redirect.bits.isReplay
val allResourceReady = io.enqLsq.canAccept && io.enqRoq.canAccept && io.toIntDqReady && io.toFpDqReady && io.toLsDqReady
// Instructions should enter dispatch queues in order.
// When RenameWidth > DqEnqWidth, it's possible that some instructions cannot enter dispatch queue
// because previous instructions cannot enter dispatch queue.
// The reason is that although ROB and LSQ have enough empty slots, dispatch queue has limited enqueue ports.
// Thus, for i >= dpParams.DqEnqWidth, we have to check whether it's previous instructions (and the instruction itself) can enqueue.
// However, since, for instructions with indices less than dpParams.DqEnqWidth,
// they can always enter dispatch queue when ROB and LSQ are ready, we don't need to check whether they can enqueue.
// thisIsBlocked: this instruction is blocked by itself
// thisCanOut: this instruction can enqueue
// nextCanOut: next instructions can out
// notBlockedByPrevious: previous instructions can enqueue
val thisIsBlocked = VecInit((0 until RenameWidth).map(i =>
isNoSpecExec(i) && !io.enqRoq.isEmpty
))
val thisCanOut = VecInit((0 until RenameWidth).map(i => {
// For i in [0, DqEnqWidth), they can always enqueue when ROB and LSQ are ready
if (i < dpParams.DqEnqWidth) !thisIsBlocked(i)
else Cat(Seq(intIndex, fpIndex, lsIndex).map(_.io.reverseMapping(i).valid)).orR && !thisIsBlocked(i)
}))
val nextCanOut = VecInit((0 until RenameWidth).map(i =>
(thisCanOut(i) && !isBlockBackward(i)) || !io.fromRename(i).valid
))
val notBlockedByPrevious = VecInit((0 until RenameWidth).map(i =>
if (i == 0) true.B
else Cat((0 until i).map(j => nextCanOut(j))).andR
))
// this instruction can actually dequeue: 3 conditions
// (1) resources are ready
// (2) previous instructions are ready
val thisCanActualOut = (0 until RenameWidth).map(i => allResourceReady && thisCanOut(i) && notBlockedByPrevious(i))
val uopWithIndex = Wire(Vec(RenameWidth, new MicroOp))
val roqIndexReg = Reg(Vec(RenameWidth, new RoqPtr))
val roqIndexRegValid = RegInit(VecInit(Seq.fill(RenameWidth)(false.B)))
val roqIndexAcquired = WireInit(VecInit(Seq.tabulate(RenameWidth)(i => io.toRoq(i).ready || roqIndexRegValid(i))))
val lsIndexReg = Reg(Vec(RenameWidth, new LSIdx))
val lsIndexRegValid = RegInit(VecInit(Seq.fill(RenameWidth)(false.B)))
val lsqIndexAcquired = WireInit(VecInit(Seq.tabulate(RenameWidth)(i => io.toLsq(i).ready || lsIndexRegValid(i))))
for (i <- 0 until RenameWidth) {
// input for ROQ and LSQ
val commitType = Cat(isLs(i), isStore(i) | isFp(i))
io.toRoq(i).valid := io.fromRename(i).valid && !roqIndexRegValid(i)
io.toRoq(i).bits := io.fromRename(i).bits
io.toRoq(i).bits.ctrl.commitType := commitType
io.toLsq(i).valid := io.fromRename(i).valid && !lsIndexRegValid(i) && isLs(i) && io.fromRename(i).bits.ctrl.fuType =/= FuType.mou && roqIndexAcquired(i) && !cancelled(i)
io.toLsq(i).bits := io.fromRename(i).bits
io.toLsq(i).bits.ctrl.commitType := commitType
io.toLsq(i).bits.roqIdx := Mux(roqIndexRegValid(i), roqIndexReg(i), io.roqIdxs(i))
// receive indexes from ROQ and LSQ
when(io.toRoq(i).fire() && !io.recv(i)) {
roqIndexReg(i) := io.roqIdxs(i)
roqIndexRegValid(i) := true.B
}.elsewhen(io.recv(i)) {
roqIndexRegValid(i) := false.B
}
when(io.toLsq(i).fire() && !io.recv(i)) {
lsIndexReg(i) := io.lsIdx(i)
lsIndexRegValid(i) := true.B
}.elsewhen(io.recv(i)) {
lsIndexRegValid(i) := false.B
}
io.enqRoq.extraWalk(i) := io.fromRename(i).valid && !thisCanActualOut(i)
io.enqRoq.req(i).valid := io.fromRename(i).valid && thisCanActualOut(i)
io.enqRoq.req(i).bits := io.fromRename(i).bits
io.enqRoq.req(i).bits.ctrl.commitType := commitType
val shouldEnqLsq = isLs(i) && io.fromRename(i).bits.ctrl.fuType =/= FuType.mou
io.enqLsq.req(i).valid := io.fromRename(i).valid && shouldEnqLsq && !redirectValid && thisCanActualOut(i)
io.enqLsq.req(i).bits := io.fromRename(i).bits
io.enqLsq.req(i).bits.ctrl.commitType := commitType
io.enqLsq.req(i).bits.roqIdx := io.enqRoq.resp(i)
// append ROQ and LSQ indexed to uop
uopWithIndex(i) := io.fromRename(i).bits
uopWithIndex(i).roqIdx := Mux(roqIndexRegValid(i), roqIndexReg(i), io.roqIdxs(i))
uopWithIndex(i).lqIdx := Mux(lsIndexRegValid(i), lsIndexReg(i), io.lsIdx(i)).lqIdx
uopWithIndex(i).sqIdx := Mux(lsIndexRegValid(i), lsIndexReg(i), io.lsIdx(i)).sqIdx
XSDebug(io.toLsq(i).fire(), p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives lq ${io.lsIdx(i).lqIdx} sq ${io.lsIdx(i).sqIdx}\n")
XSDebug(io.toRoq(i).fire(), p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives nroq ${io.roqIdxs(i)}\n")
if (i > 0) {
XSError(io.toRoq(i).fire() && !io.toRoq(i - 1).ready && io.toRoq(i - 1).valid, p"roq handshake not continuous $i")
}
}
uopWithIndex(i).roqIdx := io.enqRoq.resp(i)
uopWithIndex(i).lqIdx := io.enqLsq.resp(i).lqIdx
uopWithIndex(i).sqIdx := io.enqLsq.resp(i).sqIdx
/**
* Part 3: send uop (should not be cancelled) with correct indexes to dispatch queues
*/
val orderedEnqueue = Wire(Vec(RenameWidth, Bool()))
val canEnqueue = Wire(Vec(RenameWidth, Bool()))
var prevCanEnqueue = true.B
for (i <- 0 until RenameWidth) {
orderedEnqueue(i) := prevCanEnqueue
canEnqueue(i) := !cancelled(i) && roqIndexAcquired(i) && (!isLs(i) || io.fromRename(i).bits.ctrl.fuType === FuType.mou || lsqIndexAcquired(i))
val enqReady = (io.toIntDq(intIndex.io.reverseMapping(i).bits).ready && intIndex.io.reverseMapping(i).valid) ||
(io.toFpDq(fpIndex.io.reverseMapping(i).bits).ready && fpIndex.io.reverseMapping(i).valid) ||
(io.toLsDq(lsIndex.io.reverseMapping(i).bits).ready && lsIndex.io.reverseMapping(i).valid)
prevCanEnqueue = prevCanEnqueue && (!io.fromRename(i).valid || (canEnqueue(i) && enqReady))
XSDebug(io.enqLsq.req(i).valid,
p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives lq ${io.enqLsq.resp(i).lqIdx} sq ${io.enqLsq.resp(i).sqIdx}\n")
XSDebug(io.enqRoq.req(i).valid, p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives nroq ${io.enqRoq.resp(i)}\n")
}
// send uops with correct indexes to dispatch queues
// Note that if one of their previous instructions cannot enqueue, they should not enter dispatch queue.
// We use notBlockedByPrevious here since mapping(i).valid implies there's a valid instruction that can enqueue,
// thus we don't need to check thisCanOut.
for (i <- 0 until dpParams.DqEnqWidth) {
io.toIntDq(i).bits := uopWithIndex(intIndex.io.mapping(i).bits)
io.toIntDq(i).valid := intIndex.io.mapping(i).valid &&
canEnqueue(intIndex.io.mapping(i).bits) &&
orderedEnqueue(intIndex.io.mapping(i).bits)
io.toFpDq(i).bits := uopWithIndex(fpIndex.io.mapping(i).bits)
io.toFpDq(i).valid := fpIndex.io.mapping(i).valid &&
canEnqueue(fpIndex.io.mapping(i).bits) &&
orderedEnqueue(fpIndex.io.mapping(i).bits)
io.toLsDq(i).bits := uopWithIndex(lsIndex.io.mapping(i).bits)
io.toLsDq(i).valid := lsIndex.io.mapping(i).valid &&
canEnqueue(lsIndex.io.mapping(i).bits) &&
orderedEnqueue(lsIndex.io.mapping(i).bits)
// XSDebug(io.toIntDq(i).valid, p"pc 0x${Hexadecimal(io.toIntDq(i).bits.cf.pc)} int index $i\n")
// XSDebug(io.toFpDq(i).valid , p"pc 0x${Hexadecimal(io.toFpDq(i).bits.cf.pc )} fp index $i\n")
// XSDebug(io.toLsDq(i).valid , p"pc 0x${Hexadecimal(io.toLsDq(i).bits.cf.pc )} ls index $i\n")
io.toIntDq(i).bits := uopWithIndex(intIndex.io.mapping(i).bits)
io.toIntDq(i).valid := intIndex.io.mapping(i).valid && allResourceReady &&
!thisIsBlocked(intIndex.io.mapping(i).bits) && notBlockedByPrevious(intIndex.io.mapping(i).bits)
// NOTE: floating point instructions are not noSpecExec currently
// remove commit /**/ when fp instructions are possible to be noSpecExec
io.toFpDq(i).bits := uopWithIndex(fpIndex.io.mapping(i).bits)
io.toFpDq(i).valid := fpIndex.io.mapping(i).valid && allResourceReady &&
/*!thisIsBlocked(fpIndex.io.mapping(i).bits) && */notBlockedByPrevious(fpIndex.io.mapping(i).bits)
io.toLsDq(i).bits := uopWithIndex(lsIndex.io.mapping(i).bits)
io.toLsDq(i).valid := lsIndex.io.mapping(i).valid && allResourceReady &&
!thisIsBlocked(lsIndex.io.mapping(i).bits) && notBlockedByPrevious(lsIndex.io.mapping(i).bits)
XSDebug(io.toIntDq(i).valid, p"pc 0x${Hexadecimal(io.toIntDq(i).bits.cf.pc)} int index $i\n")
XSDebug(io.toFpDq(i).valid , p"pc 0x${Hexadecimal(io.toFpDq(i).bits.cf.pc )} fp index $i\n")
XSDebug(io.toLsDq(i).valid , p"pc 0x${Hexadecimal(io.toLsDq(i).bits.cf.pc )} ls index $i\n")
}
/**
* Part 4: send response to rename when dispatch queue accepts the uop
* Part 3: send response to rename when dispatch queue accepts the uop
*/
val readyVector = (0 until RenameWidth).map(i => !io.fromRename(i).valid || io.recv(i))
for (i <- 0 until RenameWidth) {
io.allocPregs(i).isInt := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.rfWen && (io.fromRename(i).bits.ctrl.ldest =/= 0.U)
io.allocPregs(i).isFp := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.fpWen
io.allocPregs(i).preg := io.fromRename(i).bits.pdest
val enqFire = (io.toIntDq(intIndex.io.reverseMapping(i).bits).fire() && intIndex.io.reverseMapping(i).valid) ||
(io.toFpDq(fpIndex.io.reverseMapping(i).bits).fire() && fpIndex.io.reverseMapping(i).valid) ||
(io.toLsDq(lsIndex.io.reverseMapping(i).bits).fire() && lsIndex.io.reverseMapping(i).valid)
io.recv(i) := enqFire || cancelled(i)
io.recv(i) := thisCanActualOut(i)
io.fromRename(i).ready := Cat(readyVector).andR()
// TODO: add print method for lsIdx
XSInfo(io.recv(i) && !cancelled(i),
p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} type(${isInt(i)}, ${isFp(i)}, ${isLs(i)}) " +
p"roq ${uopWithIndex(i).roqIdx} lq ${uopWithIndex(i).lqIdx} sq ${uopWithIndex(i).sqIdx}" +
p"(${intIndex.io.reverseMapping(i).bits}, ${fpIndex.io.reverseMapping(i).bits}, ${lsIndex.io.reverseMapping(i).bits})\n")
XSInfo(io.recv(i),
p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)}, type(${isInt(i)}, ${isFp(i)}, ${isLs(i)}), " +
p"roq ${uopWithIndex(i).roqIdx}, lq ${uopWithIndex(i).lqIdx}, sq ${uopWithIndex(i).sqIdx}, " +
p"(${intIndex.io.reverseMapping(i).bits}, ${fpIndex.io.reverseMapping(i).bits}, ${lsIndex.io.reverseMapping(i).bits})\n"
)
XSInfo(io.recv(i) && cancelled(i),
p"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} with brTag ${io.fromRename(i).bits.brTag.value} cancelled\n")
XSDebug(io.fromRename(i).valid, "v:%d r:%d pc 0x%x of type %b is in %d-th slot\n",
io.fromRename(i).valid, io.fromRename(i).ready, io.fromRename(i).bits.cf.pc, io.fromRename(i).bits.ctrl.fuType, i.U)
io.allocPregs(i).isInt := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.rfWen && (io.fromRename(i).bits.ctrl.ldest =/= 0.U)
io.allocPregs(i).isFp := io.fromRename(i).valid && io.fromRename(i).bits.ctrl.fpWen
io.allocPregs(i).preg := io.fromRename(i).bits.pdest
}
val renameFireCnt = PopCount(io.recv)
val enqFireCnt = PopCount(io.toIntDq.map(_.fire)) + PopCount(io.toFpDq.map(_.fire)) + PopCount(io.toLsDq.map(_.fire))
val enqFireCnt = PopCount(io.toIntDq.map(_.valid && io.toIntDqReady)) + PopCount(io.toFpDq.map(_.valid && io.toFpDqReady)) + PopCount(io.toLsDq.map(_.valid && io.toLsDqReady))
XSError(enqFireCnt > renameFireCnt, "enqFireCnt should not be greater than renameFireCnt\n")
}
......@@ -8,7 +8,8 @@ import xiangshan._
import xiangshan.backend.roq.RoqPtr
class DispatchQueueIO(enqnum: Int, deqnum: Int, replayWidth: Int) extends XSBundle {
val enq = Vec(enqnum, Flipped(DecoupledIO(new MicroOp)))
val enq = Vec(enqnum, Flipped(ValidIO(new MicroOp)))
val enqReady = Output(Bool())
val deq = Vec(deqnum, DecoupledIO(new MicroOp))
val dequeueRoqIndex = Input(Valid(new RoqPtr))
val redirect = Flipped(ValidIO(new Redirect))
......@@ -59,6 +60,10 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) exten
}
val dispatchedMask = rangeMask(headPtr, dispatchPtr)
val allWalkDone = !io.inReplayWalk && io.otherWalkDone
val canEnqueue = validEntries <= (size - enqnum).U && allWalkDone
val canActualEnqueue = canEnqueue && !(io.redirect.valid && !io.redirect.bits.isReplay)
/**
* Part 1: update states and uops when enqueue, dequeue, commit, redirect/replay
*
......@@ -72,8 +77,9 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) exten
* (5) redirect (replay): from s_dispatched to s_valid (re-dispatch)
*/
// enqueue: from s_invalid to s_valid
io.enqReady := canEnqueue
for (i <- 0 until enqnum) {
when (io.enq(i).fire()) {
when (io.enq(i).valid && canActualEnqueue) {
uopEntries(enqIndex(i)) := io.enq(i).bits
stateEntries(enqIndex(i)) := s_valid
}
......@@ -240,8 +246,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) exten
* head: commit
*/
// enqueue
val numEnqTry = Mux(emptyEntries > enqnum.U, enqnum.U, emptyEntries)
val numEnq = PriorityEncoder(io.enq.map(!_.fire()) :+ true.B)
val numEnq = Mux(canActualEnqueue, PriorityEncoder(io.enq.map(!_.valid) :+ true.B), 0.U)
XSError(numEnq =/= 0.U && (mispredictionValid || exceptionValid), "should not enqueue when redirect\n")
tailPtr := Mux(exceptionValid,
0.U.asTypeOf(new CircularQueuePtr(size)),
......@@ -271,12 +276,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, replayWidth: Int) exten
/**
* Part 4: set output and input
*/
val allWalkDone = !inReplayWalk && io.otherWalkDone
val enqReadyBits = (1.U << numEnqTry).asUInt() - 1.U
for (i <- 0 until enqnum) {
io.enq(i).ready := enqReadyBits(i).asBool() && allWalkDone
}
// TODO: remove this when replay moves to roq
for (i <- 0 until deqnum) {
io.deq(i).bits := uopEntries(deqIndex(i))
// do not dequeue when io.redirect valid because it may cause dispatchPtr work improperly
......
......@@ -70,7 +70,7 @@ class FreeList extends XSModule with HasFreeListConsts with HasCircularQueuePtrH
tailPtr := tailPtrNext
// allocate new pregs to rename instructions
val freeRegs = distanceBetween(headPtr, tailPtr)
val freeRegs = distanceBetween(tailPtr, headPtr)
val hasEnoughRegs = RegNext(freeRegs >= RenameWidth.U, true.B)
XSDebug(p"free regs: $freeRegs\n")
......@@ -108,5 +108,13 @@ class FreeList extends XSModule with HasFreeListConsts with HasCircularQueuePtrH
XSDebug(io.redirect.valid, p"redirect: brqIdx=${io.redirect.bits.brTag.value}\n")
if(env.EnableDebug){
for( i <- 0 until FL_SIZE){
for(j <- i+1 until FL_SIZE){
assert(freeList(i) != freeList(j), s"Found same entry in freelist! (i=$i j=$j)")
}
}
}
}
......@@ -42,8 +42,13 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
val brqRedirect = Input(Valid(new Redirect))
val memRedirect = Input(Valid(new Redirect))
val dp1Req = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val roqIdxs = Output(Vec(RenameWidth, new RoqPtr))
val enq = new Bundle {
val canAccept = Output(Bool())
val isEmpty = Output(Bool())
val extraWalk = Vec(RenameWidth, Input(Bool()))
val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
val resp = Vec(RenameWidth, Output(new RoqPtr))
}
val redirect = Output(Valid(new Redirect))
val exception = Output(new MicroOp)
// exu + brq
......@@ -81,35 +86,44 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
io.roqDeqPtr := deqPtrExt
// Dispatch
val noSpecEnq = io.dp1Req.map(i => i.bits.ctrl.blockBackward)
val hasNoSpec = RegInit(false.B)
when(isEmpty){ hasNoSpec:= false.B }
val validDispatch = io.dp1Req.map(_.valid)
val hasBlockBackward = RegInit(false.B)
val hasNoSpecExec = RegInit(false.B)
val blockBackwardCommit = Cat(io.commits.map(c => c.valid && !c.bits.isWalk && c.bits.uop.ctrl.blockBackward)).orR
val noSpecExecCommit = Cat(io.commits.map(c => c.valid && !c.bits.isWalk && c.bits.uop.ctrl.noSpecExec)).orR
when(blockBackwardCommit){ hasBlockBackward:= false.B }
when(noSpecExecCommit){ hasNoSpecExec:= false.B }
val validDispatch = io.enq.req.map(_.valid)
XSDebug("(ready, valid): ")
for (i <- 0 until RenameWidth) {
val offset = PopCount(validDispatch.take(i))
val roqIdxExt = enqPtrExt + offset
val roqIdx = roqIdxExt.value
when(io.dp1Req(i).fire()){
microOp(roqIdx) := io.dp1Req(i).bits
when(io.enq.req(i).valid) {
microOp(roqIdx) := io.enq.req(i).bits
valid(roqIdx) := true.B
flag(roqIdx) := roqIdxExt.flag
writebacked(roqIdx) := false.B
when(noSpecEnq(i)){ hasNoSpec := true.B }
when(io.enq.req(i).bits.ctrl.blockBackward) {
hasBlockBackward := true.B
}
when(io.enq.req(i).bits.ctrl.noSpecExec) {
hasNoSpecExec := true.B
}
}
io.dp1Req(i).ready := (notFull && !valid(roqIdx) && state === s_idle) &&
(!noSpecEnq(i) || isEmpty) &&
!hasNoSpec
io.roqIdxs(i) := roqIdxExt
XSDebug(false, true.B, "(%d, %d) ", io.dp1Req(i).ready, io.dp1Req(i).valid)
io.enq.resp(i) := roqIdxExt
}
XSDebug(false, true.B, "\n")
val firedDispatch = Cat(io.dp1Req.map(_.fire()))
val validEntries = distanceBetween(enqPtrExt, deqPtrExt)
val firedDispatch = Cat(io.enq.req.map(_.valid))
io.enq.canAccept := (validEntries <= (RoqSize - RenameWidth).U) && !hasBlockBackward
io.enq.isEmpty := isEmpty
XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(firedDispatch)}\n")
val dispatchCnt = PopCount(firedDispatch)
when(firedDispatch.orR){
enqPtrExt := enqPtrExt + dispatchCnt
enqPtrExt := enqPtrExt + PopCount(firedDispatch)
when (firedDispatch.orR) {
XSInfo("dispatched %d insts\n", dispatchCnt)
}
......@@ -141,7 +155,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val deqUop = microOp(deqPtr)
val deqPtrWritebacked = writebacked(deqPtr) && valid(deqPtr)
val intrEnable = io.csr.intrBitSet && !isEmpty && !hasNoSpec &&
val intrEnable = io.csr.intrBitSet && !isEmpty && !hasNoSpecExec &&
deqUop.ctrl.commitType =/= CommitType.STORE && deqUop.ctrl.commitType =/= CommitType.LOAD// TODO: wanna check why has hasCsr(hasNoSpec)
val exceptionEnable = deqPtrWritebacked && Cat(deqUop.cf.exceptionVec).orR()
val isFlushPipe = deqPtrWritebacked && deqUop.ctrl.flushPipe
......@@ -171,7 +185,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// extra space is used weh roq has no enough space, but mispredict recovery needs such info to walk regmap
val needExtraSpaceForMPR = WireInit(VecInit(
List.tabulate(RenameWidth)(i => io.brqRedirect.valid && io.dp1Req(i).valid && !io.dp1Req(i).ready)
List.tabulate(RenameWidth)(i => io.brqRedirect.valid && io.enq.extraWalk(i))
))
val extraSpaceForMPR = Reg(Vec(RenameWidth, new MicroOp))
val usedSpaceForMPR = Reg(Vec(RenameWidth, Bool()))
......@@ -301,7 +315,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// no enough space for walk, allocate extra space
when(needExtraSpaceForMPR.asUInt.orR && io.brqRedirect.valid){
usedSpaceForMPR := needExtraSpaceForMPR
(0 until RenameWidth).foreach(i => extraSpaceForMPR(i) := io.dp1Req(i).bits)
(0 until RenameWidth).foreach(i => extraSpaceForMPR(i) := io.enq.req(i).bits)
state := s_extrawalk
XSDebug("roq full, switched to s_extrawalk. needExtraSpaceForMPR: %b\n", needExtraSpaceForMPR.asUInt)
}
......
......@@ -172,8 +172,11 @@ class InflightBlockInfo extends XSBundle {
// Load / Store Queue Wrapper for XiangShan Out of Order LSU
class LsqWrappper extends XSModule with HasDCacheParameters {
val io = IO(new Bundle() {
val dp1Req = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val lsIdxs = Output(Vec(RenameWidth, new LSIdx))
val enq = new Bundle() {
val canAccept = Output(Bool())
val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
val resp = Vec(RenameWidth, Output(new LSIdx))
}
val brqRedirect = Input(Valid(new Redirect))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
......@@ -193,8 +196,23 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
val loadQueue = Module(new LoadQueue)
val storeQueue = Module(new StoreQueue)
// io.enq logic
// LSQ: send out canAccept when both load queue and store queue are ready
// Dispatch: send instructions to LSQ only when they are ready
io.enq.canAccept := loadQueue.io.enq.canAccept && storeQueue.io.enq.canAccept
for (i <- 0 until RenameWidth) {
val isStore = CommitType.lsInstIsStore(io.enq.req(i).bits.ctrl.commitType)
loadQueue.io.enq.req(i).valid := !isStore && io.enq.req(i).valid
storeQueue.io.enq.req(i).valid := isStore && io.enq.req(i).valid
loadQueue.io.enq.req(i).bits := io.enq.req(i).bits
storeQueue.io.enq.req(i).bits := io.enq.req(i).bits
io.enq.resp(i).lqIdx := loadQueue.io.enq.resp(i)
io.enq.resp(i).sqIdx := storeQueue.io.enq.resp(i)
XSError(!io.enq.canAccept && io.enq.req(i).valid, "should not enqueue LSQ when not")
}
// load queue wiring
loadQueue.io.dp1Req <> io.dp1Req
loadQueue.io.brqRedirect <> io.brqRedirect
loadQueue.io.loadIn <> io.loadIn
loadQueue.io.storeIn <> io.storeIn
......@@ -208,7 +226,6 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
// store queue wiring
// storeQueue.io <> DontCare
storeQueue.io.dp1Req <> io.dp1Req
storeQueue.io.brqRedirect <> io.brqRedirect
storeQueue.io.storeIn <> io.storeIn
storeQueue.io.sbuffer <> io.sbuffer
......@@ -265,15 +282,4 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
assert(!(loadQueue.io.uncache.resp.valid && storeQueue.io.uncache.resp.valid))
assert(!((loadQueue.io.uncache.resp.valid || storeQueue.io.uncache.resp.valid) && uncacheState === s_idle))
// fix valid, allocate lq / sq index
(0 until RenameWidth).map(i => {
val isStore = CommitType.lsInstIsStore(io.dp1Req(i).bits.ctrl.commitType)
loadQueue.io.dp1Req(i).valid := !isStore && io.dp1Req(i).valid
storeQueue.io.dp1Req(i).valid := isStore && io.dp1Req(i).valid
loadQueue.io.lqIdxs(i) <> io.lsIdxs(i).lqIdx
storeQueue.io.sqIdxs(i) <> io.lsIdxs(i).sqIdx
loadQueue.io.lqReady <> storeQueue.io.lqReady
loadQueue.io.sqReady <> storeQueue.io.sqReady
io.dp1Req(i).ready := storeQueue.io.dp1Req(i).ready && loadQueue.io.dp1Req(i).ready
})
}
......@@ -27,10 +27,11 @@ object LqPtr extends HasXSParameter {
// Load Queue
class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
val dp1Req = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val lqReady = Output(Vec(RenameWidth, Bool()))
val sqReady = Input(Vec(RenameWidth, Bool()))
val lqIdxs = Output(Vec(RenameWidth, new LqPtr)) // LSIdx will be assembled in LSQWrapper
val enq = new Bundle() {
val canAccept = Output(Bool())
val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
val resp = Vec(RenameWidth, Output(new LqPtr))
}
val brqRedirect = Input(Valid(new Redirect))
val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
......@@ -75,14 +76,16 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val enqDeqMask = Mux(ringBufferSameFlag, enqDeqMask1, ~enqDeqMask1)
// Enqueue at dispatch
val emptyEntries = LoadQueueSize.U - distanceBetween(ringBufferHeadExtended, ringBufferTailExtended)
XSDebug("(ready, valid): ")
val validEntries = distanceBetween(ringBufferHeadExtended, ringBufferTailExtended)
val firedDispatch = io.enq.req.map(_.valid)
io.enq.canAccept := validEntries <= (LoadQueueSize - RenameWidth).U
XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(firedDispatch))}\n")
for (i <- 0 until RenameWidth) {
val offset = if (i == 0) 0.U else PopCount((0 until i).map(io.dp1Req(_).valid))
val offset = if (i == 0) 0.U else PopCount((0 until i).map(firedDispatch(_)))
val lqIdx = ringBufferHeadExtended + offset
val index = lqIdx.value
when(io.dp1Req(i).fire()) {
uop(index) := io.dp1Req(i).bits
when(io.enq.req(i).valid) {
uop(index) := io.enq.req(i).bits
allocated(index) := true.B
valid(index) := false.B
writebacked(index) := false.B
......@@ -91,16 +94,12 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
listening(index) := false.B
pending(index) := false.B
}
val numTryEnqueue = offset +& io.dp1Req(i).valid
io.lqReady(i) := numTryEnqueue <= emptyEntries
io.dp1Req(i).ready := io.lqReady(i) && io.sqReady(i)
io.lqIdxs(i) := lqIdx
XSDebug(false, true.B, "(%d, %d) ", io.dp1Req(i).ready, io.dp1Req(i).valid)
io.enq.resp(i) := lqIdx
XSError(!io.enq.canAccept && io.enq.req(i).valid, "should not valid when not ready\n")
}
XSDebug(false, true.B, "\n")
val firedDispatch = VecInit((0 until CommitWidth).map(io.dp1Req(_).fire())).asUInt
when(firedDispatch.orR) {
when(Cat(firedDispatch).orR) {
ringBufferHeadExtended := ringBufferHeadExtended + PopCount(firedDispatch)
XSInfo("dispatched %d insts to lq\n", PopCount(firedDispatch))
}
......
......@@ -24,10 +24,11 @@ object SqPtr extends HasXSParameter {
// Store Queue
class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
val dp1Req = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
val lqReady = Input(Vec(RenameWidth, Bool()))
val sqReady = Output(Vec(RenameWidth, Bool()))
val sqIdxs = Output(Vec(RenameWidth, new SqPtr))
val enq = new Bundle() {
val canAccept = Output(Bool())
val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
val resp = Vec(RenameWidth, Output(new SqPtr))
}
val brqRedirect = Input(Valid(new Redirect))
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
......@@ -69,30 +70,28 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val enqDeqMask = Mux(ringBufferSameFlag, enqDeqMask1, ~enqDeqMask1)
// Enqueue at dispatch
val emptyEntries = StoreQueueSize.U - distanceBetween(ringBufferHeadExtended, ringBufferTailExtended)
XSDebug("(ready, valid): ")
val validEntries = distanceBetween(ringBufferHeadExtended, ringBufferTailExtended)
val firedDispatch = io.enq.req.map(_.valid)
io.enq.canAccept := validEntries <= (LoadQueueSize - RenameWidth).U
XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(firedDispatch))}\n")
for (i <- 0 until RenameWidth) {
val offset = if (i == 0) 0.U else PopCount((0 until i).map(io.dp1Req(_).valid))
val offset = if (i == 0) 0.U else PopCount((0 until i).map(firedDispatch(_)))
val sqIdx = ringBufferHeadExtended + offset
val index = sqIdx.value
when(io.dp1Req(i).fire()) {
uop(index) := io.dp1Req(i).bits
when(io.enq.req(i).valid) {
uop(index) := io.enq.req(i).bits
allocated(index) := true.B
datavalid(index) := false.B
writebacked(index) := false.B
commited(index) := false.B
pending(index) := false.B
}
val numTryEnqueue = offset +& io.dp1Req(i).valid
io.sqReady(i) := numTryEnqueue <= emptyEntries
io.dp1Req(i).ready := io.lqReady(i) && io.sqReady(i)
io.sqIdxs(i) := sqIdx
XSDebug(false, true.B, "(%d, %d) ", io.dp1Req(i).ready, io.dp1Req(i).valid)
io.enq.resp(i) := sqIdx
XSError(!io.enq.canAccept && io.enq.req(i).valid, "should not valid when not ready\n")
}
XSDebug(false, true.B, "\n")
val firedDispatch = VecInit((0 until CommitWidth).map(io.dp1Req(_).fire())).asUInt
when(firedDispatch.orR) {
when(Cat(firedDispatch).orR) {
ringBufferHeadExtended := ringBufferHeadExtended + PopCount(firedDispatch)
XSInfo("dispatched %d insts to sq\n", PopCount(firedDispatch))
}
......
......@@ -215,7 +215,7 @@ void ram_finish() {
extern "C" uint64_t ram_read_helper(uint8_t en, uint64_t rIdx) {
if (en && rIdx >= RAMSIZE / sizeof(uint64_t)) {
printf("ERROR: ram idx = 0x%lx out of bound!\n", rIdx);
printf("ERROR: ram rIdx = 0x%lx out of bound!\n", rIdx);
assert(rIdx < RAMSIZE / sizeof(uint64_t));
}
return (en) ? ram[rIdx] : 0;
......@@ -223,6 +223,7 @@ extern "C" uint64_t ram_read_helper(uint8_t en, uint64_t rIdx) {
extern "C" void ram_write_helper(uint64_t wIdx, uint64_t wdata, uint64_t wmask, uint8_t wen) {
if (wen) {
printf("ERROR: ram wIdx = 0x%lx out of bound!\n", wIdx);
assert(wIdx < RAMSIZE / sizeof(uint64_t));
ram[wIdx] = (ram[wIdx] & ~wmask) | (wdata & wmask);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册