未验证 提交 a338f247 编写于 作者: Y Yinan Xu 提交者: GitHub

Add dispatch and issue performance counters (#770)

In this commit, we add performance counters for dispatch and issue stages
to track the number of instructions dispatched and issued. Active regfile
read ports are counted as ready instruction source registers.
上级 156656b6
...@@ -303,6 +303,16 @@ class MicroOp(implicit p: Parameters) extends CfCtrl { ...@@ -303,6 +303,16 @@ class MicroOp(implicit p: Parameters) extends CfCtrl {
val sqIdx = new SqPtr val sqIdx = new SqPtr
val diffTestDebugLrScValid = Bool() val diffTestDebugLrScValid = Bool()
val debugInfo = new PerfDebugInfo val debugInfo = new PerfDebugInfo
def needRfRPort(index: Int, rfType: Int) : Bool = {
(index, rfType) match {
case (0, 0) => ctrl.src1Type === SrcType.reg && ctrl.lsrc1 =/= 0.U && src1State === SrcState.rdy
case (1, 0) => ctrl.src2Type === SrcType.reg && ctrl.lsrc2 =/= 0.U && src1State === SrcState.rdy
case (0, 1) => ctrl.src1Type === SrcType.fp && src1State === SrcState.rdy
case (1, 1) => ctrl.src2Type === SrcType.fp && src1State === SrcState.rdy
case (2, 1) => ctrl.src3Type === SrcType.fp && src1State === SrcState.rdy
case _ => false.B
}
}
} }
class Redirect(implicit p: Parameters) extends XSBundle { class Redirect(implicit p: Parameters) extends XSBundle {
......
...@@ -94,7 +94,7 @@ class FloatBlock ...@@ -94,7 +94,7 @@ class FloatBlock
// val readPortIndex = RegNext(io.fromCtrlBlock.readPortIndex) // val readPortIndex = RegNext(io.fromCtrlBlock.readPortIndex)
val readPortIndex = Seq(0, 1, 2, 3, 2, 3) val readPortIndex = Seq(0, 1, 2, 3, 2, 3)
val reservedStations = exeUnits.map(_.config).zipWithIndex.map({ case (cfg, i) => val reservationStations = exeUnits.map(_.config).zipWithIndex.map({ case (cfg, i) =>
var certainLatency = -1 var certainLatency = -1
if (cfg.hasCertainLatency) { if (cfg.hasCertainLatency) {
certainLatency = cfg.latency.latencyVal.get certainLatency = cfg.latency.latencyVal.get
...@@ -150,8 +150,8 @@ class FloatBlock ...@@ -150,8 +150,8 @@ class FloatBlock
rs rs
}) })
for(rs <- reservedStations){ for(rs <- reservationStations){
val inBlockUops = reservedStations.filter(x => val inBlockUops = reservationStations.filter(x =>
x.exuCfg.hasCertainLatency && x.exuCfg.writeFpRf x.exuCfg.hasCertainLatency && x.exuCfg.writeFpRf
).map(x => { ).map(x => {
val raw = WireInit(x.io.fastUopOut) val raw = WireInit(x.io.fastUopOut)
...@@ -227,4 +227,8 @@ class FloatBlock ...@@ -227,4 +227,8 @@ class FloatBlock
difftest.io.coreid := 0.U difftest.io.coreid := 0.U
difftest.io.fpr := VecInit(fpRf.io.debug_rports.map(p => ieee(p.data))) difftest.io.fpr := VecInit(fpRf.io.debug_rports.map(p => ieee(p.data)))
} }
val rsDeqCount = PopCount(reservationStations.map(_.io.deq.valid))
XSPerfAccumulate("fp_rs_deq_count", rsDeqCount)
XSPerfHistogram("fp_rs_deq_count", rsDeqCount, true.B, 0, 6, 1)
} }
...@@ -3,7 +3,7 @@ package xiangshan.backend ...@@ -3,7 +3,7 @@ package xiangshan.backend
import chipsalliance.rocketchip.config.Parameters import chipsalliance.rocketchip.config.Parameters
import chisel3._ import chisel3._
import chisel3.util._ import chisel3.util._
import utils.XSPerfAccumulate import utils._
import xiangshan._ import xiangshan._
import xiangshan.backend.exu._ import xiangshan.backend.exu._
import xiangshan.backend.issue.ReservationStation import xiangshan.backend.issue.ReservationStation
...@@ -275,4 +275,7 @@ class IntegerBlock ...@@ -275,4 +275,7 @@ class IntegerBlock
difftest.io.gpr := VecInit(intRf.io.debug_rports.map(_.data)) difftest.io.gpr := VecInit(intRf.io.debug_rports.map(_.data))
} }
val rsDeqCount = PopCount(reservationStations.map(_.io.deq.valid))
XSPerfAccumulate("int_rs_deq_count", rsDeqCount)
XSPerfHistogram("int_rs_deq_count", rsDeqCount, true.B, 0, 7, 1)
} }
...@@ -14,6 +14,7 @@ import xiangshan.mem._ ...@@ -14,6 +14,7 @@ import xiangshan.mem._
import xiangshan.backend.fu.{FenceToSbuffer, HasExceptionNO} import xiangshan.backend.fu.{FenceToSbuffer, HasExceptionNO}
import xiangshan.backend.issue.ReservationStation import xiangshan.backend.issue.ReservationStation
import xiangshan.backend.regfile.RfReadPort import xiangshan.backend.regfile.RfReadPort
import utils._
class LsBlockToCtrlIO(implicit p: Parameters) extends XSBundle { class LsBlockToCtrlIO(implicit p: Parameters) extends XSBundle {
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput)) val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput))
...@@ -382,5 +383,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer) ...@@ -382,5 +383,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io.memInfo.sqFull := RegNext(lsq.io.sqFull) io.memInfo.sqFull := RegNext(lsq.io.sqFull)
io.memInfo.lqFull := RegNext(lsq.io.lqFull) io.memInfo.lqFull := RegNext(lsq.io.lqFull)
io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull) io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
}
val ldDeqCount = PopCount(reservationStations.take(2).map(_.io.deq.valid))
val stDeqCount = PopCount(reservationStations.drop(2).map(_.io.deq.valid))
val rsDeqCount = ldDeqCount + stDeqCount
XSPerfAccumulate("load_rs_deq_count", ldDeqCount)
XSPerfHistogram("load_rs_deq_count", ldDeqCount, true.B, 1, 2, 1)
XSPerfAccumulate("store_rs_deq_count", stDeqCount)
XSPerfHistogram("store_rs_deq_count", stDeqCount, true.B, 1, 2, 1)
XSPerfAccumulate("ls_rs_deq_count", rsDeqCount)
}
...@@ -125,4 +125,26 @@ class Dispatch(implicit p: Parameters) extends XSModule { ...@@ -125,4 +125,26 @@ class Dispatch(implicit p: Parameters) extends XSModule {
io.ctrlInfo.intdqFull := intDq.io.dqFull io.ctrlInfo.intdqFull := intDq.io.dqFull
io.ctrlInfo.fpdqFull := fpDq.io.dqFull io.ctrlInfo.fpdqFull := fpDq.io.dqFull
io.ctrlInfo.lsdqFull := lsDq.io.dqFull io.ctrlInfo.lsdqFull := lsDq.io.dqFull
val intPortsNeeded = intDispatch.io.enqIQCtrl.map(enq => PopCount((0 until 2).map(i => enq.bits.needRfRPort(i, 0))))
val fpPortsNeeded = fpDispatch.io.enqIQCtrl.map(enq => PopCount((0 until 3).map(i => enq.bits.needRfRPort(i, 1))))
val lsPortsNeededInt = lsDispatch.io.enqIQCtrl.map(enq => PopCount((0 until 2).map(i => enq.bits.needRfRPort(i, 0))))
val lsPortsNeededFp = lsDispatch.io.enqIQCtrl.map(enq => PopCount((0 until 2).map(i => enq.bits.needRfRPort(i, 1))))
def get_active_ports(enq: Seq[Bool], ports: Seq[UInt]) = {
enq.zip(ports).map{ case (e, p) => Mux(e, p, 0.U)}.reduce(_ +& _)
}
val intActivePorts = get_active_ports(intDispatch.io.enqIQCtrl.map(_.valid), intPortsNeeded)
val fpActivePorts = get_active_ports(fpDispatch.io.enqIQCtrl.map(_.valid), fpPortsNeeded)
val lsActivePortsInt = get_active_ports(lsDispatch.io.enqIQCtrl.map(_.valid), lsPortsNeededInt)
val lsActivePortsFp = get_active_ports(lsDispatch.io.enqIQCtrl.map(_.valid), lsPortsNeededFp)
val activePortsIntAll = intActivePorts + lsActivePortsInt
val activePortsFpAll = fpActivePorts + lsActivePortsFp
XSPerfAccumulate("int_rf_active_ports_int", intActivePorts)
XSPerfAccumulate("int_rf_active_ports_ls", lsActivePortsInt)
XSPerfAccumulate("int_rf_active_ports_all", activePortsIntAll)
XSPerfAccumulate("fp_rf_active_ports_fp", fpActivePorts)
XSPerfAccumulate("fp_rf_active_ports_ls", lsActivePortsFp)
XSPerfAccumulate("fp_rf_active_ports_all", activePortsFpAll)
XSPerfHistogram("int_rf_active_ports_all", activePortsIntAll, true.B, 0, 14, 1)
XSPerfHistogram("fp_rf_active_ports_all", activePortsFpAll, true.B, 0, 14, 1)
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册