提交 449e62db 编写于 作者: W William Wang

Merge remote-tracking branch 'origin/master' into nanhu-lsu-timing-220706

......@@ -99,6 +99,8 @@ object rocketchip extends `rocket-chip`.common.CommonRocketChip {
def chisel3IvyDeps = if(chisel3Module.isEmpty) Agg(
common.getVersion("chisel3")
) else Agg.empty[Dep]
def chisel3PluginIvyDeps = Agg(common.getVersion("chisel3-plugin", cross=true))
}
def hardfloatModule = hardfloatRocket
......
......@@ -61,6 +61,8 @@ class CircularQueuePtr[T <: CircularQueuePtr[T]](val entries: Int) extends Bundl
final def === (that_ptr: T): Bool = this.asUInt()===that_ptr.asUInt()
final def =/= (that_ptr: T): Bool = this.asUInt()=/=that_ptr.asUInt()
def toOH: UInt = UIntToOH(value, entries)
}
trait HasCircularQueuePtrHelper {
......
......@@ -203,14 +203,14 @@ class HPerfCounter(val numPCnt: Int)(implicit p: Parameters) extends XSModule wi
val events_sets = Input(Vec(numPCnt, new PerfEvent))
})
val events_incr_0 = io.events_sets(io.hpm_event( 9, 0))
val events_incr_1 = io.events_sets(io.hpm_event(19, 10))
val events_incr_2 = io.events_sets(io.hpm_event(29, 20))
val events_incr_3 = io.events_sets(io.hpm_event(39, 30))
val events_incr_0 = RegNext(io.events_sets(io.hpm_event( 9, 0)))
val events_incr_1 = RegNext(io.events_sets(io.hpm_event(19, 10)))
val events_incr_2 = RegNext(io.events_sets(io.hpm_event(29, 20)))
val events_incr_3 = RegNext(io.events_sets(io.hpm_event(39, 30)))
val event_op_0 = io.hpm_event(44, 40)
val event_op_1 = io.hpm_event(49, 45)
val event_op_2 = io.hpm_event(54, 50)
val event_op_0 = RegNext(io.hpm_event(44, 40))
val event_op_1 = RegNext(io.hpm_event(49, 45))
val event_op_2 = RegNext(io.hpm_event(54, 50))
val event_step_0 = Mux(event_op_0(0), events_incr_3.value & events_incr_2.value,
......
......@@ -163,7 +163,6 @@ class CtrlSignals(implicit p: Parameters) extends XSBundle {
val noSpecExec = Bool() // wait forward
val blockBackward = Bool() // block backward
val flushPipe = Bool() // This inst will flush all the pipe when commit, like exception but can commit
val isRVF = Bool()
val selImm = SelImm()
val imm = UInt(ImmUnion.maxLen.W)
val commitType = CommitType()
......@@ -175,7 +174,7 @@ class CtrlSignals(implicit p: Parameters) extends XSBundle {
val replayInst = Bool()
private def allSignals = srcType ++ Seq(fuType, fuOpType, rfWen, fpWen,
isXSTrap, noSpecExec, blockBackward, flushPipe, isRVF, selImm)
isXSTrap, noSpecExec, blockBackward, flushPipe, selImm)
def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]): CtrlSignals = {
val decoder = freechips.rocketchip.rocket.DecodeLogic(inst, XDecode.decodeDefault, table)
......@@ -349,6 +348,8 @@ class RobCommitInfo(implicit p: Parameters) extends XSBundle {
class RobCommitIO(implicit p: Parameters) extends XSBundle {
val isWalk = Output(Bool())
val valid = Vec(CommitWidth, Output(Bool()))
// valid bits optimized for walk
val walkValid = Vec(CommitWidth, Output(Bool()))
val info = Vec(CommitWidth, Output(new RobCommitInfo))
def hasWalkInstr = isWalk && valid.asUInt.orR
......
......@@ -101,11 +101,13 @@ class DecodeStage(implicit p: Parameters) extends XSModule with HasPerfEvents {
XSPerfAccumulate("waitInstr", PopCount((0 until DecodeWidth).map(i => io.in(i).valid && !io.in(i).ready)))
XSPerfAccumulate("stall_cycle", hasValid && !io.out(0).ready)
val fusionValid = RegNext(VecInit(fusionDecoder.io.out.map(_.fire)))
val inFire = io.in.map(in => RegNext(in.valid && !in.ready))
val perfEvents = Seq(
("decoder_fused_instr ", PopCount(fusionDecoder.io.out.map(_.fire)) ),
("decoder_waitInstr ", PopCount((0 until DecodeWidth).map(i => io.in(i).valid && !io.in(i).ready))),
("decoder_stall_cycle ", hasValid && !io.out(0).ready ),
("decoder_utilization ", PopCount(io.in.map(_.valid)) ),
("decoder_fused_instr", PopCount(fusionValid) ),
("decoder_waitInstr", PopCount(inFire) ),
("decoder_stall_cycle", hasValid && !io.out(0).ready),
("decoder_utilization", PopCount(io.in.map(_.valid))),
)
generatePerfEvent()
}
......@@ -45,9 +45,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
val s_invalid :: s_valid :: Nil = Enum(2)
// queue data array
val dataModule = Module(new SyncDataModuleTemplate(new MicroOp, size, deqnum, enqnum))
val robIdxEntries = Reg(Vec(size, new RobPtr))
val debug_uopEntries = Mem(size, new MicroOp)
val data = Reg(Vec(size, new MicroOp))
val stateEntries = RegInit(VecInit(Seq.fill(size)(s_invalid)))
class DispatchQueuePtr extends CircularQueuePtr[DispatchQueuePtr](size)
......@@ -55,14 +53,20 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
// head: first valid entry (dispatched entry)
val headPtr = RegInit(VecInit((0 until deqnum).map(_.U.asTypeOf(new DispatchQueuePtr))))
val headPtrMask = UIntToMask(headPtr(0).value, size)
val headPtrOH = RegInit(1.U(size.W))
val headPtrOHShift = CircularShift(headPtrOH)
val headPtrOHVec = VecInit.tabulate(deqnum + 1)(headPtrOHShift.left)
// tail: first invalid entry (free entry)
val tailPtr = RegInit(VecInit((0 until enqnum).map(_.U.asTypeOf(new DispatchQueuePtr))))
val tailPtrMask = UIntToMask(tailPtr(0).value, size)
val tailPtrOH = RegInit(1.U(size.W))
val tailPtrOHShift = CircularShift(tailPtrOH)
val tailPtrOHVec = VecInit.tabulate(enqnum + 1)(tailPtrOHShift.left)
// valid entries counter
val validCounter = RegInit(0.U(log2Ceil(size + 1).W))
val allowEnqueue = RegInit(true.B)
val isTrueEmpty = ~Cat((0 until size).map(i => stateEntries(i) === s_valid)).orR
val isTrueEmpty = !VecInit(stateEntries.map(_ === s_valid)).asUInt.orR
val canEnqueue = allowEnqueue
val canActualEnqueue = canEnqueue && !io.redirect.valid
......@@ -80,41 +84,34 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
*/
// enqueue: from s_invalid to s_valid
io.enq.canAccept := canEnqueue
dataModule.io.wen := VecInit((0 until enqnum).map(_ => false.B))
dataModule.io.waddr := DontCare
dataModule.io.wdata := VecInit(io.enq.req.map(_.bits))
for (i <- 0 until enqnum) {
when(io.enq.req(i).valid && canActualEnqueue) {
dataModule.io.wen(i) := true.B
val sel = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
dataModule.io.waddr(i) := tailPtr(sel).value
robIdxEntries(tailPtr(sel).value) := io.enq.req(i).bits.robIdx
debug_uopEntries(tailPtr(sel).value) := io.enq.req(i).bits
stateEntries(tailPtr(sel).value) := s_valid
XSError(sel =/= PopCount(io.enq.req.take(i).map(_.valid)), "why not continuous??\n")
val enqIndexOH = (0 until enqnum).map(i => tailPtrOHVec(PopCount(io.enq.needAlloc.take(i))))
for (i <- 0 until size) {
val validVec = io.enq.req.map(_.valid).zip(enqIndexOH).map{ case (v, oh) => v && oh(i) }
when (VecInit(validVec).asUInt.orR && canActualEnqueue) {
data(i) := Mux1H(validVec, io.enq.req.map(_.bits))
stateEntries(i) := s_valid
}
}
// dequeue: from s_valid to s_dispatched
for (i <- 0 until deqnum) {
when(io.deq(i).fire() && !io.redirect.valid) {
stateEntries(headPtr(i).value) := s_invalid
// XSError(stateEntries(headPtr(i).value) =/= s_valid, "state of the dispatch entry is not s_valid\n")
for (i <- 0 until size) {
val validVec = io.deq.map(_.fire).zip(headPtrOHVec).map{ case (v, oh) => v && oh(i) }
when (VecInit(validVec).asUInt.orR && !io.redirect.valid) {
stateEntries(i) := s_invalid
}
}
// redirect: cancel uops currently in the queue
val needCancel = Wire(Vec(size, Bool()))
for (i <- 0 until size) {
needCancel(i) := stateEntries(i) =/= s_invalid && robIdxEntries(i).needFlush(io.redirect)
needCancel(i) := stateEntries(i) =/= s_invalid && data(i).robIdx.needFlush(io.redirect)
when(needCancel(i)) {
stateEntries(i) := s_invalid
}
XSInfo(needCancel(i), p"valid entry($i)(pc = ${Hexadecimal(debug_uopEntries(i).cf.pc)}) " +
p"robIndex ${robIdxEntries(i)} " +
XSInfo(needCancel(i), p"valid entry($i)(pc = ${Hexadecimal(data(i).cf.pc)}) " +
p"robIndex ${data(i).robIdx} " +
p"cancelled with redirect robIndex 0x${Hexadecimal(io.redirect.bits.robIdx.asUInt)}\n")
}
......@@ -132,7 +129,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
// For dequeue, the first entry should never be s_invalid
// Otherwise, there should be a redirect and tail walks back
// in this case, we set numDeq to 0
!deq.fire() && (if (i == 0) true.B else stateEntries(headPtr(i).value) =/= s_invalid)
!deq.fire && (if (i == 0) true.B else stateEntries(headPtr(i).value) =/= s_invalid)
} :+ true.B)
val numDeq = Mux(numDeqTry > numDeqFire, numDeqFire, numDeqTry)
// agreement with reservation station: don't dequeue when redirect.valid
......@@ -141,6 +138,8 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
nextHeadPtr(i) := Mux(io.redirect.valid, headPtr(i), headPtr(i) + numDeq)
headPtr(i) := nextHeadPtr(i)
}
headPtrOH := Mux(io.redirect.valid, headPtrOH, headPtrOHVec(numDeq))
XSError(headPtrOH =/= headPtr.head.toOH, p"head: $headPtrOH != UIntToOH(${headPtr.head})")
// For branch mis-prediction or memory violation replay,
// we delay updating the indices for one clock cycle.
......@@ -149,7 +148,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
// find the last one's position, starting from headPtr and searching backwards
val validBitVec = VecInit((0 until size).map(i => stateEntries(i) === s_valid))
val loValidBitVec = Cat((0 until size).map(i => validBitVec(i) && headPtrMask(i)))
val hiValidBitVec = Cat((0 until size).map(i => validBitVec(i) && ~headPtrMask(i)))
val hiValidBitVec = Cat((0 until size).map(i => validBitVec(i) && !headPtrMask(i)))
val flippedFlag = loValidBitVec.orR || validBitVec(size - 1)
val leadingZeros = PriorityEncoder(Mux(loValidBitVec.orR, loValidBitVec, hiValidBitVec))
val lastOneIndex = Mux(leadingZeros === 0.U, 0.U, size.U - leadingZeros)
......@@ -174,6 +173,9 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
tailPtr(i) + numEnq)
)
}
tailPtrOH := Mux(lastLastCycleMisprediction, tailPtr.head.toOH, tailPtrOHVec(numEnq))
val tailPtrOHAccurate = !lastCycleMisprediction && !lastLastCycleMisprediction
XSError(tailPtrOHAccurate && tailPtrOH =/= tailPtr.head.toOH, p"tail: $tailPtrOH != UIntToOH(${tailPtr.head})")
// update valid counter and allowEnqueue reg
validCounter := Mux(io.redirect.valid,
......@@ -187,14 +189,10 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
/**
* Part 3: set output and input
*/
// TODO: remove this when replay moves to rob
dataModule.io.raddr := VecInit(nextHeadPtr.map(_.value))
for (i <- 0 until deqnum) {
io.deq(i).bits := dataModule.io.rdata(i)
io.deq(i).bits.robIdx := robIdxEntries(headPtr(i).value)
// io.deq(i).bits := debug_uopEntries(headPtr(i).value)
io.deq(i).bits := Mux1H(headPtrOHVec(i), data)
// do not dequeue when io.redirect valid because it may cause dispatchPtr work improperly
io.deq(i).valid := stateEntries(headPtr(i).value) === s_valid && !lastCycleMisprediction
io.deq(i).valid := Mux1H(headPtrOHVec(i), stateEntries) === s_valid && !lastCycleMisprediction
}
// debug: dump dispatch queue states
......@@ -217,20 +215,21 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
QueuePerf(size, PopCount(stateEntries.map(_ =/= s_invalid)), !canEnqueue)
io.dqFull := !canEnqueue
XSPerfAccumulate("in", numEnq)
XSPerfAccumulate("out", PopCount(io.deq.map(_.fire())))
XSPerfAccumulate("out", PopCount(io.deq.map(_.fire)))
XSPerfAccumulate("out_try", PopCount(io.deq.map(_.valid)))
val fake_block = currentValidCounter <= (size - enqnum).U && !canEnqueue
XSPerfAccumulate("fake_block", fake_block)
val validEntries = RegNext(PopCount(stateEntries.map(_ =/= s_invalid)))
val perfEvents = Seq(
("dispatchq_in ", numEnq),
("dispatchq_out ", PopCount(io.deq.map(_.fire()))),
("dispatchq_out_try ", PopCount(io.deq.map(_.valid))),
("dispatchq_fake_block", fake_block),
("dispatchq_1_4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) < (size.U / 4.U))),
("dispatchq_2_4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U / 4.U)) & (PopCount(stateEntries.map(_ =/= s_invalid)) <= (size.U / 2.U))),
("dispatchq_3_4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U / 2.U)) & (PopCount(stateEntries.map(_ =/= s_invalid)) <= (size.U * 3.U / 4.U))),
("dispatchq_4_4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U * 3.U / 4.U))),
("dispatchq_in", numEnq ),
("dispatchq_out", PopCount(io.deq.map(_.fire)) ),
("dispatchq_out_try", PopCount(io.deq.map(_.valid)) ),
("dispatchq_fake_block", fake_block ),
("dispatchq_1_4_valid ", validEntries < (size / 4).U ),
("dispatchq_2_4_valid ", validEntries >= (size / 4).U && validEntries <= (size / 2).U ),
("dispatchq_3_4_valid ", validEntries >= (size / 2).U && validEntries <= (size * 3 / 4).U),
("dispatchq_4_4_valid ", validEntries >= (size * 3 / 4).U )
)
generatePerfEvent()
}
......@@ -65,7 +65,7 @@ class Rename(implicit p: Parameters) extends XSModule with HasPerfEvents {
fl.io.walk := io.robCommits.isWalk
// when isWalk, use stepBack to restore head pointer of free list
// (if ME enabled, stepBack of intFreeList should be useless thus optimized out)
fl.io.stepBack := PopCount(io.robCommits.valid.zip(io.robCommits.info).map{case (v, i) => v && needDestRegCommit(isFp, i)})
fl.io.stepBack := PopCount(io.robCommits.walkValid.zip(io.robCommits.info).map{case (v, i) => v && needDestRegCommit(isFp, i)})
}
// walk has higher priority than allocation and thus we don't use isWalk here
// only when both fp and int free list and dispatch1 has enough space can we do allocation
......
......@@ -39,9 +39,7 @@ abstract class BaseFreeList(size: Int)(implicit p: Parameters) extends XSModule
val stepBack = Input(UInt(log2Up(CommitWidth + 1).W))
})
class FreeListPtr extends CircularQueuePtr[FreeListPtr](size) {
def toOH: UInt = UIntToOH(value, size)
}
class FreeListPtr extends CircularQueuePtr[FreeListPtr](size)
object FreeListPtr {
def apply(f: Boolean, v: Int): FreeListPtr = {
......
......@@ -303,7 +303,6 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
val writeback = MixedVec(numWbPorts.map(num => Vec(num, Flipped(ValidIO(new ExuOutput)))))
val commits = new RobCommitIO
val lsq = new RobLsqIO
val bcommit = Output(UInt(log2Up(CommitWidth + 1).W))
val robDeqPtr = Output(new RobPtr)
val csr = new RobCSRIO
val robFull = Output(Bool())
......@@ -562,17 +561,17 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
(v & info.wflags, v & info.fpWen)
}).unzip
val fflags = Wire(Valid(UInt(5.W)))
fflags.valid := Mux(io.commits.isWalk, false.B, Cat(wflags).orR())
fflags.valid := Mux(io.commits.isWalk, false.B, Cat(wflags).orR)
fflags.bits := wflags.zip(fflagsDataRead).map({
case (w, f) => Mux(w, f, 0.U)
}).reduce(_|_)
val dirty_fs = Mux(io.commits.isWalk, false.B, Cat(fpWen).orR())
val dirty_fs = Mux(io.commits.isWalk, false.B, Cat(fpWen).orR)
// when mispredict branches writeback, stop commit in the next 2 cycles
// TODO: don't check all exu write back
val misPredWb = Cat(VecInit(exuWriteback.map(wb =>
wb.bits.redirect.cfiUpdate.isMisPred && wb.bits.redirectValid
))).orR()
))).orR
val misPredBlockCounter = Reg(UInt(3.W))
misPredBlockCounter := Mux(misPredWb,
"b111".U,
......@@ -593,12 +592,15 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
// when intrBitSetReg, allow only one instruction to commit at each clock cycle
val isBlocked = if (i != 0) Cat(commit_block.take(i)).orR || allowOnlyOneCommit else intrEnable || deqHasException || deqHasReplayInst
io.commits.valid(i) := commit_v(i) && commit_w(i) && !isBlocked && !misPredBlock && !isReplaying && !lastCycleFlush && !hasWFI
io.commits.walkValid(i) := DontCare
io.commits.info(i) := dispatchDataRead(i)
when (state === s_walk) {
io.commits.valid(i) := commit_v(i) && shouldWalkVec(i)
io.commits.walkValid(i) := commit_v(i) && shouldWalkVec(i)
}.elsewhen(state === s_extrawalk) {
io.commits.valid(i) := (if (i < RenameWidth) usedSpaceForMPR(RenameWidth-i-1) else false.B)
io.commits.walkValid(i) := (if (i < RenameWidth) usedSpaceForMPR(RenameWidth-i-1) else false.B)
io.commits.info(i) := (if (i < RenameWidth) extraSpaceForMPR(RenameWidth-i-1) else DontCare)
}
......@@ -631,10 +633,6 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
io.csr.fflags := RegNext(fflags)
io.csr.dirty_fs := RegNext(dirty_fs)
// commit branch to brq
val cfiCommitVec = VecInit(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{case(v, t) => v && CommitType.isBranch(t)})
io.bcommit := Mux(io.commits.isWalk, 0.U, PopCount(cfiCommitVec))
// commit load/store to lsq
val ldCommitVec = VecInit((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.info(i).commitType === CommitType.LOAD))
val stCommitVec = VecInit((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.info(i).commitType === CommitType.STORE))
......@@ -941,7 +939,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
val commitIsStore = io.commits.info.map(_.commitType).map(_ === CommitType.STORE)
XSPerfAccumulate("commitInstrStore", ifCommit(PopCount(io.commits.valid.zip(commitIsStore).map{ case (v, t) => v && t })))
XSPerfAccumulate("writeback", PopCount((0 until RobSize).map(i => valid(i) && writebacked(i))))
// XSPerfAccumulate("enqInstr", PopCount(io.dp1Req.map(_.fire())))
// XSPerfAccumulate("enqInstr", PopCount(io.dp1Req.map(_.fire)))
// XSPerfAccumulate("d2rVnR", PopCount(io.dp1Req.map(p => p.valid && !p.ready)))
XSPerfAccumulate("walkInstr", Mux(io.commits.isWalk, PopCount(io.commits.valid), 0.U))
XSPerfAccumulate("walkCycle", state === s_walk || state === s_extrawalk)
......@@ -973,7 +971,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
XSPerfAccumulate(s"${fuName}_latency_execute", ifCommit(latencySum(commitIsFuType, executeLatency)))
XSPerfAccumulate(s"${fuName}_latency_enq_rs_execute", ifCommit(latencySum(commitIsFuType, rsFuLatency)))
XSPerfAccumulate(s"${fuName}_latency_commit", ifCommit(latencySum(commitIsFuType, commitLatency)))
if (fuType == FuType.fmac.litValue()) {
if (fuType == FuType.fmac.litValue) {
val commitIsFma = commitIsFuType.zip(commitDebugUop).map(x => x._1 && x._2.ctrl.fpu.ren3 )
XSPerfAccumulate(s"${fuName}_instr_cnt_fma", ifCommit(PopCount(commitIsFma)))
XSPerfAccumulate(s"${fuName}_latency_enq_rs_execute_fma", ifCommit(latencySum(commitIsFma, rsFuLatency)))
......
......@@ -31,7 +31,8 @@ package object xiangshan {
def imm = "b01".U
def fp = "b10".U
def DC = imm // Don't Care
def DC = imm // Don't Care
def X = BitPat("b??")
def isReg(srcType: UInt) = srcType===reg
def isPc(srcType: UInt) = srcType===pc
......@@ -69,6 +70,8 @@ package object xiangshan {
def stu = "b1101".U
def mou = "b1111".U // for amo, lr, sc, fence
def X = BitPat("b????")
def num = 14
def apply() = UInt(log2Up(num).W)
......@@ -500,6 +503,8 @@ package object xiangshan {
def INVALID_INSTR = "b0110".U
def IMM_B6 = "b1000".U
def X = BitPat("b????")
def apply() = UInt(4.W)
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册