未验证 提交 1ca0e4f3 编写于 作者: Y Yinan Xu 提交者: GitHub

core: refactor hardware performance counters (#1335)

This commit optimizes the coding style and timing for hardware
performance counters.

By default, performance counters are RegNext(RegNext(_)).
上级 1dc3a3a0
......@@ -158,61 +158,83 @@ object XSPerfPrint {
}
}
class PerfBundle(implicit p: Parameters) extends XSBundle {
val incr_step = UInt(6.W)
class PerfEvent extends Bundle {
val value = UInt(6.W)
}
class PerfEventsBundle (val numPCnt: Int) (implicit p: Parameters)extends XSBundle{
val perf_events = Vec(numPCnt, (new PerfBundle))
def length = numPCnt
trait HasPerfEvents { this: RawModule =>
val perfEvents: Seq[(String, UInt)]
lazy val io_perf: Vec[PerfEvent] = IO(Output(Vec(perfEvents.length, new PerfEvent)))
def generatePerfEvent(noRegNext: Option[Seq[Int]] = None): Unit = {
for (((out, (name, counter)), i) <- io_perf.zip(perfEvents).zipWithIndex) {
require(!name.contains("/"))
out.value := RegNext(RegNext(counter))
if (noRegNext.isDefined && noRegNext.get.contains(i)) {
out.value := counter
}
}
}
def getPerfEvents: Seq[(String, UInt)] = {
perfEvents.map(_._1).zip(io_perf).map(x => (x._1, x._2.value))
}
def getPerf: Vec[PerfEvent] = io_perf
}
class HPerfCounter (val numPCnt: Int) (implicit p: Parameters) extends XSModule{
class HPerfCounter(val numPCnt: Int)(implicit p: Parameters) extends XSModule with HasPerfEvents {
val io = IO(new Bundle {
val hpm_event = Input(UInt(XLEN.W))
val events_sets = Input(new PerfEventsBundle(numPCnt))
val event_selected = Output(new PerfBundle)
val hpm_event = Input(UInt(XLEN.W))
val events_sets = Input(Vec(numPCnt, new PerfEvent))
})
val events_incr_0 = io.events_sets.perf_events(io.hpm_event(9,0))
val events_incr_1 = io.events_sets.perf_events(io.hpm_event(19,10))
val events_incr_2 = io.events_sets.perf_events(io.hpm_event(29,20))
val events_incr_3 = io.events_sets.perf_events(io.hpm_event(39,30))
val event_op_0 = io.hpm_event(44,40)
val event_op_1 = io.hpm_event(49,45)
val event_op_2 = io.hpm_event(54,50)
val event_step_0 = Mux(event_op_0(0),(events_incr_3.incr_step & events_incr_2.incr_step),
Mux(event_op_0(1),(events_incr_3.incr_step ^ events_incr_2.incr_step),
Mux(event_op_0(2),(events_incr_3.incr_step + events_incr_2.incr_step),
(events_incr_3.incr_step | events_incr_2.incr_step))))
val event_step_1 = Mux(event_op_1(0),(events_incr_1.incr_step & events_incr_0.incr_step),
Mux(event_op_1(1),(events_incr_1.incr_step ^ events_incr_0.incr_step),
Mux(event_op_1(2),(events_incr_1.incr_step + events_incr_0.incr_step),
(events_incr_1.incr_step | events_incr_0.incr_step))))
io.event_selected.incr_step := Mux(event_op_1(0),(event_step_0 & event_step_1),
Mux(event_op_1(1),(event_step_0 ^ event_step_1),
Mux(event_op_1(2),(event_step_0 + event_step_1),
(event_step_0 | event_step_1))))
val events_incr_0 = io.events_sets(io.hpm_event( 9, 0))
val events_incr_1 = io.events_sets(io.hpm_event(19, 10))
val events_incr_2 = io.events_sets(io.hpm_event(29, 20))
val events_incr_3 = io.events_sets(io.hpm_event(39, 30))
val event_op_0 = io.hpm_event(44, 40)
val event_op_1 = io.hpm_event(49, 45)
val event_op_2 = io.hpm_event(54, 50)
val event_step_0 = Mux(event_op_0(0), events_incr_3.value & events_incr_2.value,
Mux(event_op_0(1), events_incr_3.value ^ events_incr_2.value,
Mux(event_op_0(2), events_incr_3.value + events_incr_2.value,
events_incr_3.value | events_incr_2.value)))
val event_step_1 = Mux(event_op_1(0), events_incr_1.value & events_incr_0.value,
Mux(event_op_1(1), events_incr_1.value ^ events_incr_0.value,
Mux(event_op_1(2), events_incr_1.value + events_incr_0.value,
events_incr_1.value | events_incr_0.value)))
val selected = Mux(event_op_1(0), event_step_0 & event_step_1,
Mux(event_op_1(1), event_step_0 ^ event_step_1,
Mux(event_op_1(2), event_step_0 + event_step_1,
event_step_0 | event_step_1)))
val perfEvents = Seq(("selected", selected))
generatePerfEvent()
}
class HPerfmonitor (val numPCnt: Int, val numCSRPCnt: Int) (implicit p: Parameters) extends XSModule{
class HPerfMonitor(numCSRPCnt: Int, numPCnt: Int)(implicit p: Parameters) extends XSModule with HasPerfEvents {
val io = IO(new Bundle {
val hpm_event = Input(Vec(numCSRPCnt, UInt(XLEN.W)))
val events_sets = Input(new PerfEventsBundle(numPCnt))
//val Events_selected = Output(Vec(numCSRPCnt,(new PerfBundle)))
val events_selected = Output(new PerfEventsBundle(numCSRPCnt))
val hpm_event = Input(Vec(numCSRPCnt, UInt(XLEN.W)))
val events_sets = Input(Vec(numPCnt, new PerfEvent))
})
for (i <- 0 until numCSRPCnt) {
val perfEvents = io.hpm_event.zipWithIndex.map{ case (hpm, i) =>
val hpc = Module(new HPerfCounter(numPCnt))
hpc.io.events_sets <> io.events_sets
hpc.io.hpm_event := io.hpm_event(i)
hpc.io.event_selected <> io.events_selected.perf_events(i)
hpc.io.events_sets <> io.events_sets
hpc.io.hpm_event := hpm
val selected = hpc.getPerfEvents.head
(s"${selected._1}_$i", selected._2)
}
generatePerfEvent()
}
object HPerfMonitor {
def apply(hpm_event: Seq[UInt], events_sets: Seq[PerfEvent])(implicit p: Parameters): HPerfMonitor = {
val hpm = Module(new HPerfMonitor(hpm_event.length, events_sets.length))
hpm.io.hpm_event := hpm_event
hpm.io.events_sets := events_sets
hpm
}
}
......@@ -434,5 +434,4 @@ trait HasXSParameter {
val numCSRPCntCtrl = 8
val numCSRPCntLsu = 8
val numCSRPCntHc = 5
val print_perfcounter = false
}
......@@ -240,7 +240,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
val io = IO(new Bundle {
val hartId = Input(UInt(64.W))
val l2_pf_enable = Output(Bool())
val perfEvents = Vec(numPCntHc * coreParams.L2NBanks,(Input(UInt(6.W))))
val perfEvents = Input(Vec(numPCntHc * coreParams.L2NBanks, new PerfEvent))
val beu_errors = Output(new XSL1BusErrors())
})
......@@ -331,18 +331,10 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
XSPerfHistogram("fastIn_count", PopCount(allFastUop1.map(_.valid)), true.B, 0, allFastUop1.length, 1)
XSPerfHistogram("wakeup_count", PopCount(rfWriteback.map(_.valid)), true.B, 0, rfWriteback.length, 1)
// TODO: connect rsPerf
val rsPerf = VecInit(exuBlocks.flatMap(_.io.scheExtra.perf))
val rs_perf = Wire(new PerfEventsBundle(rsPerf.length))
val rs_cnt = rs_perf.length
for (i <- 0 until rs_cnt){
rs_perf.perf_events(i).incr_step := rsPerf(i).asUInt
}
dontTouch(rsPerf)
exuBlocks(0).perfinfo.perfEvents <> ctrlBlock.perfinfo.perfEventsEu0
exuBlocks(1).perfinfo.perfEvents <> ctrlBlock.perfinfo.perfEventsEu1
memBlock.perfinfo.perfEventsPTW <> ptw.perfinfo.perfEvents
ctrlBlock.perfinfo.perfEventsRs := rs_perf
ctrlBlock.perfinfo.perfEventsEu0 := exuBlocks(0).getPerf.dropRight(outer.exuBlocks(0).scheduler.numRs)
ctrlBlock.perfinfo.perfEventsEu1 := exuBlocks(1).getPerf.dropRight(outer.exuBlocks(1).scheduler.numRs)
memBlock.io.perfEventsPTW := ptw.getPerf
ctrlBlock.perfinfo.perfEventsRs := outer.exuBlocks.flatMap(b => b.module.getPerf.takeRight(b.scheduler.numRs))
csrioIn.hartId <> io.hartId
csrioIn.perf <> DontCare
......@@ -351,9 +343,9 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
csrioIn.perf.memInfo <> memBlock.io.memInfo
csrioIn.perf.frontendInfo <> frontend.io.frontendInfo
csrioIn.perf.perfEventsFrontend <> frontend.perfinfo.perfEvents
csrioIn.perf.perfEventsCtrl <> ctrlBlock.perfinfo.perfEvents
csrioIn.perf.perfEventsLsu <> memBlock.perfinfo.perfEvents
csrioIn.perf.perfEventsFrontend <> frontend.getPerf
csrioIn.perf.perfEventsCtrl <> ctrlBlock.getPerf
csrioIn.perf.perfEventsLsu <> memBlock.getPerf
csrioIn.perf.perfEventsHc <> io.perfEvents
csrioIn.fpu.fflags <> ctrlBlock.io.robio.toCSR.fflags
......
......@@ -130,7 +130,7 @@ class XSTile()(implicit p: Parameters) extends LazyModule
core.module.io.hartId := io.hartId
if(l2cache.isDefined){
core.module.io.perfEvents <> l2cache.get.module.io.perfEvents.flatten
core.module.io.perfEvents.zip(l2cache.get.module.io.perfEvents.flatten).foreach(x => x._1.value := x._2)
}
else {
core.module.io.perfEvents <> DontCare
......
......@@ -172,7 +172,8 @@ class RedirectGenerator(implicit p: Parameters) extends XSModule
}
}
class CtrlBlock(implicit p: Parameters) extends LazyModule with HasWritebackSink with HasWritebackSource {
class CtrlBlock(implicit p: Parameters) extends LazyModule
with HasWritebackSink with HasWritebackSource {
val rob = LazyModule(new Rob)
override def addWritebackSink(source: Seq[HasWritebackSource], index: Option[Seq[Int]]): HasWritebackSink = {
......@@ -196,7 +197,11 @@ class CtrlBlock(implicit p: Parameters) extends LazyModule with HasWritebackSink
}
class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleImp(outer)
with HasXSParameter with HasCircularQueuePtrHelper with HasWritebackSourceImp {
with HasXSParameter
with HasCircularQueuePtrHelper
with HasWritebackSourceImp
with HasPerfEvents
{
val writebackLengths = outer.writebackSinksParams.map(_.length)
val io = IO(new Bundle {
......@@ -252,9 +257,9 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
val waittable = Module(new WaitTable)
val rename = Module(new Rename)
val dispatch = Module(new Dispatch)
val intDq = Module(new DispatchQueue(dpParams.IntDqSize, RenameWidth, dpParams.IntDqDeqWidth, "int"))
val fpDq = Module(new DispatchQueue(dpParams.FpDqSize, RenameWidth, dpParams.FpDqDeqWidth, "fp"))
val lsDq = Module(new DispatchQueue(dpParams.LsDqSize, RenameWidth, dpParams.LsDqDeqWidth, "ls"))
val intDq = Module(new DispatchQueue(dpParams.IntDqSize, RenameWidth, dpParams.IntDqDeqWidth))
val fpDq = Module(new DispatchQueue(dpParams.FpDqSize, RenameWidth, dpParams.FpDqDeqWidth))
val lsDq = Module(new DispatchQueue(dpParams.LsDqSize, RenameWidth, dpParams.LsDqDeqWidth))
val redirectGen = Module(new RedirectGenerator)
val rob = outer.rob.module
......@@ -414,41 +419,17 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
io.perfInfo.ctrlInfo.lsdqFull := RegNext(lsDq.io.dqFull)
val pfevent = Module(new PFEvent)
pfevent.io.distribute_csr := RegNext(io.csrCtrl.distribute_csr)
val csrevents = pfevent.io.hpmevent.slice(8,16)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(csrevents.length))
val perfEventsRs = Input(new PerfEventsBundle(NumRs))
val perfEventsEu0 = Input(new PerfEventsBundle(10))
val perfEventsEu1 = Input(new PerfEventsBundle(10))
val perfEventsRs = Input(Vec(NumRs, new PerfEvent))
val perfEventsEu0 = Input(Vec(6, new PerfEvent))
val perfEventsEu1 = Input(Vec(6, new PerfEvent))
})
if(print_perfcounter){
val decode_perf = decode.perfEvents.map(_._1).zip(decode.perfinfo.perfEvents.perf_events)
val rename_perf = rename.perfEvents.map(_._1).zip(rename.perfinfo.perfEvents.perf_events)
val dispat_perf = dispatch.perfEvents.map(_._1).zip(dispatch.perfinfo.perfEvents.perf_events)
val intdq_perf = intDq.perfEvents.map(_._1).zip(intDq.perfinfo.perfEvents.perf_events)
val fpdq_perf = fpDq.perfEvents.map(_._1).zip(fpDq.perfinfo.perfEvents.perf_events)
val lsdq_perf = lsDq.perfEvents.map(_._1).zip(lsDq.perfinfo.perfEvents.perf_events)
val rob_perf = rob.perfEvents.map(_._1).zip(rob.perfinfo.perfEvents.perf_events)
val perfEvents = decode_perf ++ rename_perf ++ dispat_perf ++ intdq_perf ++ fpdq_perf ++ lsdq_perf ++ rob_perf
for (((perf_name,perf),i) <- perfEvents.zipWithIndex) {
println(s"ctrl perf $i: $perf_name")
}
}
val hpmEvents = decode.perfinfo.perfEvents.perf_events ++ rename.perfinfo.perfEvents.perf_events ++
dispatch.perfinfo.perfEvents.perf_events ++
intDq.perfinfo.perfEvents.perf_events ++ fpDq.perfinfo.perfEvents.perf_events ++
lsDq.perfinfo.perfEvents.perf_events ++ rob.perfinfo.perfEvents.perf_events ++
perfinfo.perfEventsEu0.perf_events ++ perfinfo.perfEventsEu1.perf_events ++
perfinfo.perfEventsRs.perf_events
val perf_length = hpmEvents.length
val hpm_ctrl = Module(new HPerfmonitor(perf_length,csrevents.length))
hpm_ctrl.io.hpm_event := csrevents
hpm_ctrl.io.events_sets.perf_events := hpmEvents
perfinfo.perfEvents := RegNext(hpm_ctrl.io.events_selected)
pfevent.io.distribute_csr := RegNext(io.csrCtrl.distribute_csr)
val allPerfEvents = Seq(decode, rename, dispatch, intDq, fpDq, lsDq, rob).flatMap(_.getPerf)
val hpmEvents = allPerfEvents ++ perfinfo.perfEventsEu0 ++ perfinfo.perfEventsEu1 ++ perfinfo.perfEventsRs
val perfEvents = HPerfMonitor(csrevents, hpmEvents).getPerfEvents
generatePerfEvent()
}
......@@ -54,7 +54,7 @@ class ExuBlock(
}
class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp(outer)
with HasWritebackSourceImp {
with HasWritebackSourceImp with HasPerfEvents {
val scheduler = outer.scheduler.module
val fuConfigs = outer.fuConfigs
......@@ -91,10 +91,8 @@ class ExuBlockImp(outer: ExuBlock)(implicit p: Parameters) extends LazyModuleImp
scheduler.io.fastUopIn <> io.fastUopIn
scheduler.io.extra <> io.scheExtra
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(scheduler.perfinfo.perfEvents.length))
})
scheduler.perfinfo <> perfinfo
val perfEvents = scheduler.getPerfEvents
generatePerfEvent()
// the scheduler issues instructions to function units
scheduler.io.issue <> fuBlock.io.issue ++ io.issue.getOrElse(Seq())
......
......@@ -57,6 +57,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
with HasXSParameter
with HasFPUParameters
with HasWritebackSourceImp
with HasPerfEvents
{
val io = IO(new Bundle {
......@@ -91,6 +92,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val lqFull = Output(Bool())
val dcacheMSHRFull = Output(Bool())
}
val perfEventsPTW = Input(Vec(19, new PerfEvent))
})
override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.writeback))
......@@ -474,39 +476,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val pfevent = Module(new PFEvent)
pfevent.io.distribute_csr := io.csrCtrl.distribute_csr
val csrevents = pfevent.io.hpmevent.slice(16,24)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(csrevents.length))
val perfEventsPTW = Input(new PerfEventsBundle(19))
})
val perfEvents_list = Wire(new PerfEventsBundle(2))
val perfEvents = Seq(
("ldDeqCount ", ldDeqCount ),
("stDeqCount ", stDeqCount ),
)
for (((perf_out,(perf_name,perf)),i) <- perfEvents_list.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
if(print_perfcounter){
val ldu0_perf = loadUnits(0).perfEvents.map(_._1).zip(loadUnits(0).perfinfo.perfEvents.perf_events)
val ldu1_perf = loadUnits(1).perfEvents.map(_._1).zip(loadUnits(1).perfinfo.perfEvents.perf_events)
val sbuf_perf = sbuffer.perfEvents.map(_._1).zip(sbuffer.perfinfo.perfEvents.perf_events)
val lsq_perf = lsq.perfEvents.map(_._1).zip(lsq.perfinfo.perfEvents.perf_events)
val dc_perf = dcache.perfEvents.map(_._1).zip(dcache.perfinfo.perfEvents.perf_events)
val mem_perf = perfEvents ++ ldu0_perf ++ ldu1_perf ++ sbuf_perf ++ lsq_perf ++ dc_perf
for (((perf_name,perf),i) <- mem_perf.zipWithIndex) {
println(s"lsu perf $i: $perf_name")
}
}
val hpmEvents = perfEvents_list.perf_events ++ loadUnits(0).perfinfo.perfEvents.perf_events ++
loadUnits(1).perfinfo.perfEvents.perf_events ++ sbuffer.perfinfo.perfEvents.perf_events ++
lsq.perfinfo.perfEvents.perf_events ++ dcache.perfinfo.perfEvents.perf_events ++
perfinfo.perfEventsPTW.perf_events
val perf_length = hpmEvents.length
val hpm_lsu = Module(new HPerfmonitor(perf_length,csrevents.length))
hpm_lsu.io.hpm_event := csrevents
hpm_lsu.io.events_sets.perf_events := hpmEvents
perfinfo.perfEvents := RegNext(hpm_lsu.io.events_selected)
val memBlockPerfEvents = Seq(
("ldDeqCount", ldDeqCount),
("stDeqCount", stDeqCount),
)
val allPerfEvents = memBlockPerfEvents ++ (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
val hpmEvents = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) ++ io.perfEventsPTW
val perfEvents = HPerfMonitor(csrevents, hpmEvents).getPerfEvents
generatePerfEvent()
}
......@@ -16,23 +16,21 @@
package xiangshan.backend
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import chipsalliance.rocketchip.config.Parameters
import difftest.{DifftestArchFpRegState, DifftestArchIntRegState}
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import xiangshan._
import utils._
import xiangshan._
import xiangshan.backend.dispatch.Dispatch2Rs
import xiangshan.backend.exu.ExuConfig
import xiangshan.backend.fu.fpu.FMAMidResultIO
import xiangshan.backend.issue.{ReservationStation, ReservationStationWrapper, RsPerfCounter}
import xiangshan.backend.regfile.{Regfile, RfReadPort, RfWritePort}
import xiangshan.backend.issue.ReservationStationWrapper
import xiangshan.backend.regfile.{Regfile, RfReadPort}
import xiangshan.backend.rename.{BusyTable, BusyTableReadIO}
import xiangshan.mem.{LsqEnqIO, MemWaitUpdateReq, SqPtr}
import scala.collection.mutable.ArrayBuffer
class DispatchArbiter(func: Seq[MicroOp => Bool])(implicit p: Parameters) extends XSModule {
val numTarget = func.length
......@@ -193,12 +191,11 @@ class Scheduler(
lazy val module = new SchedulerImp(this)
def canAccept(fuType: UInt): Bool = {
VecInit(configs.map(_._1.canAccept(fuType))).asUInt.orR
}
def canAccept(fuType: UInt): Bool = VecInit(configs.map(_._1.canAccept(fuType))).asUInt.orR
def numRs: Int = reservationStations.map(_.numRS).sum
}
class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSParameter {
class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSParameter with HasPerfEvents {
val memRsEntries = outer.getMemRsEntries
val updatedP = p.alter((site, here, up) => {
case XSCoreParamsKey => up(XSCoreParamsKey).copy(
......@@ -211,7 +208,6 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
val fpRfConfig = (outer.numFpRfReadPorts > 0 && outer.hasFpRf, outer.numFpRfReadPorts, fpRfWritePorts)
val rs_all = outer.reservationStations
val numPerfPorts = outer.reservationStations.map(_.module.perf.length).sum
// print rs info
println("Scheduler: ")
......@@ -221,7 +217,6 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
println(s" number of std ports: ${outer.numSTDPorts}")
val numLoadPorts = outer.reservationStations.map(_.module.io.load).filter(_.isDefined).map(_.get.fastMatch.length).sum
println(s" number of load ports: ${numLoadPorts}")
println(s" number of perf ports: ${numPerfPorts}")
if (intRfConfig._1) {
println(s"INT Regfile: ${intRfConfig._2}R${intRfConfig._3}W")
}
......@@ -260,7 +255,6 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
// debug
val debug_int_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
val debug_fp_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
val perf = Vec(numPerfPorts, Output(new RsPerfCounter))
override def cloneType: SchedulerExtraIO.this.type =
new SchedulerExtraIO().asInstanceOf[this.type]
......@@ -293,11 +287,7 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
io.in <> dispatch2.flatMap(_.io.in)
val readIntState = dispatch2.flatMap(_.io.readIntState.getOrElse(Seq()))
val intbtperfEvents = Wire(new PerfEventsBundle(4))
val fpbtperfEvents = Wire(new PerfEventsBundle(4))
intbtperfEvents := DontCare
fpbtperfEvents := DontCare
if (readIntState.nonEmpty) {
val intBusyTable = if (readIntState.nonEmpty) {
val busyTable = Module(new BusyTable(readIntState.length, intRfWritePorts))
busyTable.io.allocPregs.zip(io.allocPregs).foreach{ case (pregAlloc, allocReq) =>
pregAlloc.valid := allocReq.isInt
......@@ -307,16 +297,16 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
pregWb.valid := exuWb.valid && exuWb.bits.uop.ctrl.rfWen
pregWb.bits := exuWb.bits.uop.pdest
}
intbtperfEvents <> busyTable.perfinfo.perfEvents
busyTable.io.read <> readIntState
}
Some(busyTable)
} else None
val readFpState = io.extra.fpStateReadOut.getOrElse(Seq()) ++ dispatch2.flatMap(_.io.readFpState.getOrElse(Seq()))
if (readFpState.nonEmpty) {
val fpBusyTable = if (readFpState.nonEmpty) {
// Some fp states are read from outside
val numInFpStateRead = io.extra.fpStateReadIn.getOrElse(Seq()).length
// The left read requests are serviced by internal busytable
val numBusyTableRead = readFpState.length - numInFpStateRead
if (numBusyTableRead > 0) {
val busyTable = if (numBusyTableRead > 0) {
val busyTable = Module(new BusyTable(numBusyTableRead, fpRfWritePorts))
busyTable.io.allocPregs.zip(io.allocPregs).foreach { case (pregAlloc, allocReq) =>
pregAlloc.valid := allocReq.isFp
......@@ -327,13 +317,14 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
pregWb.bits := exuWb.bits.uop.pdest
}
busyTable.io.read <> readFpState.take(numBusyTableRead)
fpbtperfEvents <> busyTable.perfinfo.perfEvents
busyTable.io.read <> readFpState
}
Some(busyTable)
} else None
if (io.extra.fpStateReadIn.isDefined) {
io.extra.fpStateReadIn.get <> readFpState.takeRight(numInFpStateRead)
}
}
busyTable
} else None
val allocate = dispatch2.flatMap(_.io.out)
if (io.fmaMid.isDefined) {
......@@ -453,7 +444,6 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
val allLoadRS = outer.reservationStations.map(_.module.io.load).filter(_.isDefined)
io.extra.loadFastMatch.get := allLoadRS.map(_.get.fastMatch).fold(Seq())(_ ++ _)
}
io.extra.perf <> rs_all.flatMap(_.module.perf)
var intReadPort = 0
var fpReadPort = 0
......@@ -520,18 +510,13 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
XSPerfAccumulate("allocate_fire", PopCount(allocate.map(_.fire())))
XSPerfAccumulate("issue_valid", PopCount(io.issue.map(_.valid)))
XSPerfAccumulate("issue_fire", PopCount(io.issue.map(_.fire)))
val perfEvents_list = Wire(new PerfEventsBundle(2))
val perfEvents = Seq(
("sche_allocate_fire ", PopCount(allocate.map(_.fire())) ),
("sche_issue_fire ", PopCount(io.issue.map(_.fire)) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfEvents_list.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
val perf_list = perfEvents_list.perf_events ++ intbtperfEvents.perf_events ++ fpbtperfEvents.perf_events
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(perf_list.length))
})
perfinfo.perfEvents.perf_events := perf_list
val schedulerPerf = Seq(
("sche_allocate_fire ", PopCount(allocate.map(_.fire()))),
("sche_issue_fire ", PopCount(io.issue.map(_.fire)) )
)
val intBtPerf = if (intBusyTable.isDefined) intBusyTable.get.getPerfEvents else Seq()
val fpBtPerf = if (fpBusyTable.isDefined) fpBusyTable.get.getPerfEvents else Seq()
val perfEvents = schedulerPerf ++ intBtPerf ++ fpBtPerf ++ rs_all.flatMap(_.module.getPerfEvents)
generatePerfEvent()
}
......@@ -22,7 +22,7 @@ import chisel3.util._
import xiangshan._
import utils._
class DecodeStage(implicit p: Parameters) extends XSModule {
class DecodeStage(implicit p: Parameters) extends XSModule with HasPerfEvents {
val io = IO(new Bundle() {
// from Ibuffer
val in = Vec(DecodeWidth, Flipped(DecoupledIO(new CtrlFlow)))
......@@ -81,18 +81,12 @@ class DecodeStage(implicit p: Parameters) extends XSModule {
XSPerfAccumulate("utilization", PopCount(io.in.map(_.valid)))
XSPerfAccumulate("waitInstr", PopCount((0 until DecodeWidth).map(i => io.in(i).valid && !io.in(i).ready)))
XSPerfAccumulate("stall_cycle", hasValid && !io.out(0).ready)
val perfEvents = Seq(
("decoder_fused_instr ", PopCount(fusionDecoder.io.out.map(_.fire)) ),
("decoder_waitInstr ", PopCount((0 until DecodeWidth).map(i => io.in(i).valid && !io.in(i).ready))),
("decoder_stall_cycle ", hasValid && !io.out(0).ready ),
("decoder_utilization ", PopCount(io.in.map(_.valid)) ),
)
val numPerfEvents = perfEvents.size
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(numPerfEvents))
})
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -37,7 +37,7 @@ case class DispatchParameters
)
// read rob and enqueue
class Dispatch(implicit p: Parameters) extends XSModule {
class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents {
val io = IO(new Bundle() {
val hartId = Input(UInt(8.W))
// from rename
......@@ -294,9 +294,6 @@ class Dispatch(implicit p: Parameters) extends XSModule {
XSPerfAccumulate("stall_cycle_fp_dq", hasValidInstr && io.enqRob.canAccept && io.toIntDq.canAccept && !io.toFpDq.canAccept && io.toLsDq.canAccept)
XSPerfAccumulate("stall_cycle_ls_dq", hasValidInstr && io.enqRob.canAccept && io.toIntDq.canAccept && io.toFpDq.canAccept && !io.toLsDq.canAccept)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(9))
})
val perfEvents = Seq(
("dispatch_in ", PopCount(io.fromRename.map(_.valid & io.fromRename(0).ready)) ),
("dispatch_empty ", !hasValidInstr ),
......@@ -308,8 +305,5 @@ class Dispatch(implicit p: Parameters) extends XSModule {
("dispatch_stall_cycle_fp_dq ", hasValidInstr && io.enqRob.canAccept && io.toIntDq.canAccept && !io.toFpDq.canAccept && io.toLsDq.canAccept ),
("dispatch_stall_cycle_ls_dq ", hasValidInstr && io.enqRob.canAccept && io.toIntDq.canAccept && io.toFpDq.canAccept && !io.toLsDq.canAccept ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -40,10 +40,11 @@ class DispatchQueueIO(enqnum: Int, deqnum: Int)(implicit p: Parameters) extends
}
// dispatch queue: accepts at most enqnum uops from dispatch1 and dispatches deqnum uops at every clock cycle
class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String)(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
class DispatchQueue(size: Int, enqnum: Int, deqnum: Int)(implicit p: Parameters)
extends XSModule with HasCircularQueuePtrHelper with HasPerfEvents {
val io = IO(new DispatchQueueIO(enqnum, deqnum))
val s_invalid :: s_valid:: Nil = Enum(2)
val s_invalid :: s_valid :: Nil = Enum(2)
// queue data array
val dataModule = Module(new SyncDataModuleTemplate(new MicroOp, size, deqnum, enqnum))
......@@ -68,24 +69,24 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String)(implicit
val canActualEnqueue = canEnqueue && !io.redirect.valid
/**
* Part 1: update states and uops when enqueue, dequeue, commit, redirect/replay
*
* uop only changes when a new instruction enqueues.
*
* state changes when
* (1) enqueue: from s_invalid to s_valid
* (2) dequeue: from s_valid to s_dispatched
* (3) commit: from s_dispatched to s_invalid
* (4) redirect (branch misprediction or exception): from any state to s_invalid (flushed)
* (5) redirect (replay): from s_dispatched to s_valid (re-dispatch)
*/
* Part 1: update states and uops when enqueue, dequeue, commit, redirect/replay
*
* uop only changes when a new instruction enqueues.
*
* state changes when
* (1) enqueue: from s_invalid to s_valid
* (2) dequeue: from s_valid to s_dispatched
* (3) commit: from s_dispatched to s_invalid
* (4) redirect (branch misprediction or exception): from any state to s_invalid (flushed)
* (5) redirect (replay): from s_dispatched to s_valid (re-dispatch)
*/
// enqueue: from s_invalid to s_valid
io.enq.canAccept := canEnqueue
dataModule.io.wen := VecInit((0 until enqnum).map(_ => false.B))
dataModule.io.wen := VecInit((0 until enqnum).map(_ => false.B))
dataModule.io.waddr := DontCare
dataModule.io.wdata := VecInit(io.enq.req.map(_.bits))
for (i <- 0 until enqnum) {
when (io.enq.req(i).valid && canActualEnqueue) {
when(io.enq.req(i).valid && canActualEnqueue) {
dataModule.io.wen(i) := true.B
val sel = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
dataModule.io.waddr(i) := tailPtr(sel).value
......@@ -97,10 +98,10 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String)(implicit
// dequeue: from s_valid to s_dispatched
for (i <- 0 until deqnum) {
when (io.deq(i).fire() && !io.redirect.valid) {
when(io.deq(i).fire() && !io.redirect.valid) {
stateEntries(headPtr(i).value) := s_invalid
// XSError(stateEntries(headPtr(i).value) =/= s_valid, "state of the dispatch entry is not s_valid\n")
// XSError(stateEntries(headPtr(i).value) =/= s_valid, "state of the dispatch entry is not s_valid\n")
}
}
......@@ -109,7 +110,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String)(implicit
for (i <- 0 until size) {
needCancel(i) := stateEntries(i) =/= s_invalid && robIdxEntries(i).needFlush(io.redirect)
when (needCancel(i)) {
when(needCancel(i)) {
stateEntries(i) := s_invalid
}
......@@ -119,16 +120,16 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String)(implicit
}
/**
* Part 2: update indices
*
* tail: (1) enqueue; (2) redirect
* head: dequeue
*/
* Part 2: update indices
*
* tail: (1) enqueue; (2) redirect
* head: dequeue
*/
// dequeue
val currentValidCounter = distanceBetween(tailPtr(0), headPtr(0))
val numDeqTry = Mux(currentValidCounter > deqnum.U, deqnum.U, currentValidCounter)
val numDeqFire = PriorityEncoder(io.deq.zipWithIndex.map{case (deq, i) =>
val numDeqFire = PriorityEncoder(io.deq.zipWithIndex.map { case (deq, i) =>
// For dequeue, the first entry should never be s_invalid
// Otherwise, there should be a redirect and tail walks back
// in this case, we set numDeq to 0
......@@ -185,8 +186,8 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String)(implicit
allowEnqueue := Mux(currentValidCounter > (size - enqnum).U, false.B, numEnq <= (size - enqnum).U - currentValidCounter)
/**
* Part 3: set output and input
*/
* Part 3: set output and input
*/
// TODO: remove this when replay moves to rob
dataModule.io.raddr := VecInit(nextHeadPtr.map(_.value))
for (i <- 0 until deqnum) {
......@@ -213,7 +214,7 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String)(implicit
}
XSDebug(false, true.B, "\n")
// XSError(isAfter(headPtr(0), tailPtr(0)), p"assert greaterOrEqualThan(tailPtr: ${tailPtr(0)}, headPtr: ${headPtr(0)}) failed\n")
// XSError(isAfter(headPtr(0), tailPtr(0)), p"assert greaterOrEqualThan(tailPtr: ${tailPtr(0)}, headPtr: ${headPtr(0)}) failed\n")
QueuePerf(size, PopCount(stateEntries.map(_ =/= s_invalid)), !canEnqueue)
io.dqFull := !canEnqueue
XSPerfAccumulate("in", numEnq)
......@@ -222,21 +223,15 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, name: String)(implicit
val fake_block = currentValidCounter <= (size - enqnum).U && !canEnqueue
XSPerfAccumulate("fake_block", fake_block)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(8))
})
val perfEvents = Seq(
("dispatchq_in ", numEnq ),
("dispatchq_out ", PopCount(io.deq.map(_.fire())) ),
("dispatchq_out_try ", PopCount(io.deq.map(_.valid)) ),
("dispatchq_fake_block ", fake_block ),
("dispatchq_1/4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) < (size.U/4.U)) ),
("dispatchq_2/4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U/4.U)) & (PopCount(stateEntries.map(_ =/= s_invalid)) <= (size.U/2.U)) ),
("dispatchq_3/4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U/2.U)) & (PopCount(stateEntries.map(_ =/= s_invalid)) <= (size.U*3.U/4.U)) ),
("dispatchq_4/4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U*3.U/4.U)) ),
("dispatchq_in ", numEnq),
("dispatchq_out ", PopCount(io.deq.map(_.fire()))),
("dispatchq_out_try ", PopCount(io.deq.map(_.valid))),
("dispatchq_fake_block", fake_block),
("dispatchq_1_4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) < (size.U / 4.U))),
("dispatchq_2_4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U / 4.U)) & (PopCount(stateEntries.map(_ =/= s_invalid)) <= (size.U / 2.U))),
("dispatchq_3_4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U / 2.U)) & (PopCount(stateEntries.map(_ =/= s_invalid)) <= (size.U * 3.U / 4.U))),
("dispatchq_4_4_valid ", (PopCount(stateEntries.map(_ =/= s_invalid)) > (size.U * 3.U / 4.U))),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
}
generatePerfEvent()
}
......@@ -59,10 +59,10 @@ class FpuCsrIO extends Bundle {
class PerfCounterIO(implicit p: Parameters) extends XSBundle {
val perfEventsFrontend = (new PerfEventsBundle(numCSRPCntFrontend ))
val perfEventsCtrl = (new PerfEventsBundle(numCSRPCntCtrl ))
val perfEventsLsu = (new PerfEventsBundle(numCSRPCntLsu ))
val perfEventsHc = Vec(numPCntHc * coreParams.L2NBanks,(UInt(6.W)))
val perfEventsFrontend = Vec(numCSRPCntFrontend, new PerfEvent)
val perfEventsCtrl = Vec(numCSRPCntCtrl, new PerfEvent)
val perfEventsLsu = Vec(numCSRPCntLsu, new PerfEvent)
val perfEventsHc = Vec(numPCntHc * coreParams.L2NBanks, new PerfEvent)
val retiredInstr = UInt(3.W)
val frontendInfo = new Bundle {
val ibufFull = Bool()
......@@ -572,26 +572,24 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP
perfEventscounten(i) := (Cat(perfEvents(i)(62),perfEvents(i)(61),(perfEvents(i)(61,60))) & priviledgeModeOH).orR
}
val hpmEvents = Wire(new PerfEventsBundle(numPCntHc * coreParams.L2NBanks))
for(i <- 0 until numPCntHc * coreParams.L2NBanks) {
hpmEvents.perf_events(i).incr_step := csrio.perf.perfEventsHc(i)
val hpmEvents = Wire(Vec(numPCntHc * coreParams.L2NBanks, new PerfEvent))
for (i <- 0 until numPCntHc * coreParams.L2NBanks) {
hpmEvents(i) := csrio.perf.perfEventsHc(i)
}
val hpm_hc = Module(new HPerfmonitor(numPCntHc * coreParams.L2NBanks,numCSRPCntHc))
val csrevents = perfEvents.slice(24,29)
hpm_hc.io.hpm_event := csrevents
hpm_hc.io.events_sets := hpmEvents
val csrevents = perfEvents.slice(24, 29)
val hpm_hc = HPerfMonitor(csrevents, hpmEvents)
val mcountinhibit = RegInit(0.U(XLEN.W))
val mcycle = RegInit(0.U(XLEN.W))
mcycle := mcycle + 1.U
val minstret = RegInit(0.U(XLEN.W))
val perf_events = csrio.perf.perfEventsFrontend.perf_events ++
csrio.perf.perfEventsCtrl.perf_events ++
csrio.perf.perfEventsLsu.perf_events ++
hpm_hc.io.events_selected.perf_events
val perf_events = csrio.perf.perfEventsFrontend ++
csrio.perf.perfEventsCtrl ++
csrio.perf.perfEventsLsu ++
hpm_hc.getPerf
minstret := minstret + RegNext(csrio.perf.retiredInstr)
for(i <- 0 until 29){
perfCnts(i) := Mux((mcountinhibit(i+3) | !perfEventscounten(i)), perfCnts(i), (perfCnts(i) + perf_events(i).incr_step))
perfCnts(i) := Mux(mcountinhibit(i+3) | !perfEventscounten(i), perfCnts(i), perfCnts(i) + perf_events(i).value)
}
// CSR reg map
......
......@@ -126,15 +126,13 @@ class ReservationStationWrapper(implicit p: Parameters) extends LazyModule with
def wbFpPriority: Int = params.exuCfg.get.wbFpPriority
override def toString: String = params.toString
// for better timing, we limits the size of RS to 2-deq
val maxRsDeq = 2
def numRS = (params.numDeq + (maxRsDeq - 1)) / maxRsDeq
lazy val module = new LazyModuleImp(this) {
// for better timing, we limits the size of RS to 2-deq
val maxRsDeq = 2
// split rs to 2-deq
lazy val module = new LazyModuleImp(this) with HasPerfEvents {
require(params.numEnq < params.numDeq || params.numEnq % params.numDeq == 0)
require(params.numEntries % params.numDeq == 0)
val numRS = (params.numDeq + (maxRsDeq - 1)) / maxRsDeq
val rs = (0 until numRS).map(i => {
val numDeq = Seq(params.numDeq - maxRsDeq * i, maxRsDeq).min
val numEnq = params.numEnq / numRS
......@@ -154,7 +152,6 @@ class ReservationStationWrapper(implicit p: Parameters) extends LazyModule with
)
})
val io = IO(new ReservationStationIO(params)(updatedP))
val perf = IO(Vec(rs.length, Output(new RsPerfCounter)))
rs.foreach(_.io.redirect <> io.redirect)
io.numExist <> rs.map(_.io.numExist).reduce(_ +& _)
......@@ -185,7 +182,9 @@ class ReservationStationWrapper(implicit p: Parameters) extends LazyModule with
if (io.fmaMid.isDefined) {
io.fmaMid.get <> rs.flatMap(_.io.fmaMid.get)
}
perf <> rs.map(_.perf)
val perfEvents = rs.flatMap(_.getPerfEvents)
generatePerfEvent()
}
var fastWakeupIdx = 0
......@@ -237,13 +236,8 @@ class ReservationStationIO(params: RSParams)(implicit p: Parameters) extends XSB
new ReservationStationIO(params).asInstanceOf[this.type]
}
class RsPerfCounter(implicit p: Parameters) extends XSBundle {
val full = Bool()
}
class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSModule {
class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSModule with HasPerfEvents {
val io = IO(new ReservationStationIO(params))
val perf = IO(Output(new RsPerfCounter))
val statusArray = Module(new StatusArray(params))
val select = Module(new SelectPolicy(params))
......@@ -253,7 +247,10 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
val s2_deq = Wire(io.deq.cloneType)
io.numExist := PopCount(statusArray.io.isValid)
perf.full := RegNext(statusArray.io.isValid.andR)
val perfEvents = Seq(("full", statusArray.io.isValid.andR))
generatePerfEvent()
statusArray.io.redirect := io.redirect
/**
......
......@@ -27,7 +27,7 @@ class BusyTableReadIO(implicit p: Parameters) extends XSBundle {
val resp = Output(Bool())
}
class BusyTable(numReadPorts: Int, numWritePorts: Int)(implicit p: Parameters) extends XSModule {
class BusyTable(numReadPorts: Int, numWritePorts: Int)(implicit p: Parameters) extends XSModule with HasPerfEvents {
val io = IO(new Bundle() {
// set preg state to busy
val allocPregs = Vec(RenameWidth, Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
......@@ -62,17 +62,12 @@ class BusyTable(numReadPorts: Int, numWritePorts: Int)(implicit p: Parameters) e
}
XSPerfAccumulate("busy_count", PopCount(table))
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(4))
})
val perfEvents = Seq(
("std_freelist_1/4_valid ", (PopCount(table) < (NRPhyRegs.U/4.U)) ),
("std_freelist_2/4_valid ", (PopCount(table) > (NRPhyRegs.U/4.U)) & (PopCount(table) <= (NRPhyRegs.U/2.U)) ),
("std_freelist_3/4_valid ", (PopCount(table) > (NRPhyRegs.U/2.U)) & (PopCount(table) <= (NRPhyRegs.U*3.U/4.U))),
("std_freelist_4/4_valid ", (PopCount(table) > (NRPhyRegs.U*3.U/4.U)) ),
("std_freelist_1_4_valid", (PopCount(table) < (NRPhyRegs.U/4.U)) ),
("std_freelist_2_4_valid", (PopCount(table) > (NRPhyRegs.U/4.U)) & (PopCount(table) <= (NRPhyRegs.U/2.U)) ),
("std_freelist_3_4_valid", (PopCount(table) > (NRPhyRegs.U/2.U)) & (PopCount(table) <= (NRPhyRegs.U*3.U/4.U))),
("std_freelist_4_4_valid", (PopCount(table) > (NRPhyRegs.U*3.U/4.U)) )
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -25,7 +25,7 @@ import xiangshan.backend.rob.RobPtr
import xiangshan.backend.rename.freelist._
import xiangshan.mem.mdp._
class Rename(implicit p: Parameters) extends XSModule {
class Rename(implicit p: Parameters) extends XSModule with HasPerfEvents {
val io = IO(new Bundle() {
val redirect = Flipped(ValidIO(new Redirect))
val robCommits = Flipped(new RobCommitIO)
......@@ -300,28 +300,16 @@ class Rename(implicit p: Parameters) extends XSModule {
XSPerfAccumulate("move_instr_count", PopCount(io.out.map(out => out.fire() && out.bits.ctrl.isMove)))
val intfl_perf = intFreeList.perfEvents.map(_._1).zip(intFreeList.perfinfo.perfEvents.perf_events)
val fpfl_perf = fpFreeList.perfEvents.map(_._1).zip(fpFreeList.perfinfo.perfEvents.perf_events)
val perf_list = Wire(new PerfEventsBundle(6))
val perf_seq = Seq(
("rename_in ", PopCount(io.in.map(_.valid & io.in(0).ready )) ),
("rename_waitinstr ", PopCount((0 until RenameWidth).map(i => io.in(i).valid && !io.in(i).ready)) ),
("rename_stall_cycle_dispatch ", hasValid && !io.out(0).ready && fpFreeList.io.canAllocate && intFreeList.io.canAllocate && !io.robCommits.isWalk ),
("rename_stall_cycle_fp ", hasValid && io.out(0).ready && !fpFreeList.io.canAllocate && intFreeList.io.canAllocate && !io.robCommits.isWalk ),
("rename_stall_cycle_int ", hasValid && io.out(0).ready && fpFreeList.io.canAllocate && !intFreeList.io.canAllocate && !io.robCommits.isWalk ),
("rename_stall_cycle_walk ", hasValid && io.out(0).ready && fpFreeList.io.canAllocate && intFreeList.io.canAllocate && io.robCommits.isWalk ),
)
for (((perf_out,(perf_name,perf)),i) <- perf_list.perf_events.zip(perf_seq).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
val perfEvents_list = perf_list.perf_events ++
intFreeList.asInstanceOf[freelist.MEFreeList].perfinfo.perfEvents.perf_events ++
fpFreeList.perfinfo.perfEvents.perf_events
val perfEvents = perf_seq ++ intfl_perf ++ fpfl_perf
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(perfEvents_list.length))
})
perfinfo.perfEvents.perf_events := perfEvents_list
val renamePerf = Seq(
("rename_in ", PopCount(io.in.map(_.valid & io.in(0).ready )) ),
("rename_waitinstr ", PopCount((0 until RenameWidth).map(i => io.in(i).valid && !io.in(i).ready)) ),
("rename_stall_cycle_dispatch", hasValid && !io.out(0).ready && fpFreeList.io.canAllocate && intFreeList.io.canAllocate && !io.robCommits.isWalk),
("rename_stall_cycle_fp ", hasValid && io.out(0).ready && !fpFreeList.io.canAllocate && intFreeList.io.canAllocate && !io.robCommits.isWalk),
("rename_stall_cycle_int ", hasValid && io.out(0).ready && fpFreeList.io.canAllocate && !intFreeList.io.canAllocate && !io.robCommits.isWalk),
("rename_stall_cycle_walk ", hasValid && io.out(0).ready && fpFreeList.io.canAllocate && intFreeList.io.canAllocate && io.robCommits.isWalk)
)
val intFlPerf = intFreeList.getPerfEvents
val fpFlPerf = fpFreeList.getPerfEvents
val perfEvents = renamePerf ++ intFlPerf ++ fpFlPerf
generatePerfEvent()
}
......@@ -23,7 +23,7 @@ import xiangshan._
import utils._
class MEFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) {
class MEFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) with HasPerfEvents {
val freeList = Mem(size, UInt(PhyRegIdxWidth.W))
// head and tail pointer
......@@ -68,17 +68,11 @@ class MEFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) {
val freeRegCnt = Mux(doRename, distanceBetween(tailPtrNext, headPtrNext), distanceBetween(tailPtrNext, headPtr))
io.canAllocate := RegNext(freeRegCnt) >= RenameWidth.U
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(4))
})
val perfEvents = Seq(
("me_freelist_1/4_valid ", (freeRegCnt < ((NRPhyRegs-32).U/4.U)) ),
("me_freelist_2/4_valid ", (freeRegCnt > ((NRPhyRegs-32).U/4.U)) & (freeRegCnt <= ((NRPhyRegs-32).U/2.U)) ),
("me_freelist_3/4_valid ", (freeRegCnt > ((NRPhyRegs-32).U/2.U)) & (freeRegCnt <= ((NRPhyRegs-32).U*3.U/4.U))),
("me_freelist_4/4_valid ", (freeRegCnt > ((NRPhyRegs-32).U*3.U/4.U)) ),
("me_freelist_1_4_valid", (freeRegCnt < ((NRPhyRegs-32).U/4.U)) ),
("me_freelist_2_4_valid", (freeRegCnt > ((NRPhyRegs-32).U/4.U)) & (freeRegCnt <= ((NRPhyRegs-32).U/2.U)) ),
("me_freelist_3_4_valid", (freeRegCnt > ((NRPhyRegs-32).U/2.U)) & (freeRegCnt <= ((NRPhyRegs-32).U*3.U/4.U))),
("me_freelist_4_4_valid", (freeRegCnt > ((NRPhyRegs-32).U*3.U/4.U)) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -23,7 +23,7 @@ import xiangshan._
import utils._
class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) {
class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) with HasPerfEvents {
val freeList = RegInit(VecInit(Seq.tabulate(size)( i => (i + 32).U(PhyRegIdxWidth.W) )))
val headPtr = RegInit(FreeListPtr(false.B, 0.U))
......@@ -84,17 +84,12 @@ class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size)
XSPerfAccumulate("utilization", freeRegCnt)
XSPerfAccumulate("allocation_blocked", !io.canAllocate)
XSPerfAccumulate("can_alloc_wrong", !io.canAllocate && freeRegCnt >= RenameWidth.U)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(4))
})
val perfEvents = Seq(
("std_freelist_1_4_valid", (freeRegCnt < (size / 4).U) ),
("std_freelist_2_4_valid", (freeRegCnt > (size / 4).U) & (freeRegCnt <= (size / 2).U) ),
("std_freelist_3_4_valid", (freeRegCnt > (size / 2).U) & (freeRegCnt <= (size * 3 / 4).U)),
("std_freelist_4_4_valid", (freeRegCnt > (size * 3 / 4).U) )
)
for (((perf_out, (perf_name, perf)), i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -277,7 +277,7 @@ class Rob(implicit p: Parameters) extends LazyModule with HasWritebackSink with
}
class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
with HasXSParameter with HasCircularQueuePtrHelper {
with HasXSParameter with HasCircularQueuePtrHelper with HasPerfEvents {
val wbExuConfigs = outer.writebackSinksParams.map(_.exuConfigs)
val numWbPorts = wbExuConfigs.map(_.length)
......@@ -1094,9 +1094,6 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
difftest.io.instrCnt := instrCnt
}
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(18))
})
val perfEvents = Seq(
("rob_interrupt_num ", io.flushOut.valid && intrEnable ),
("rob_exception_num ", io.flushOut.valid && exceptionEnable ),
......@@ -1112,13 +1109,10 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer)
("rob_commitInstrStore ", ifCommit(PopCount(io.commits.valid.zip(commitIsStore).map{ case (v, t) => v && t })) ),
("rob_walkInstr ", Mux(io.commits.isWalk, PopCount(io.commits.valid), 0.U) ),
("rob_walkCycle ", (state === s_walk || state === s_extrawalk) ),
("rob_1/4_valid ", (PopCount((0 until RobSize).map(valid(_))) < (RobSize.U/4.U)) ),
("rob_2/4_valid ", (PopCount((0 until RobSize).map(valid(_))) > (RobSize.U/4.U)) & (PopCount((0 until RobSize).map(valid(_))) <= (RobSize.U/2.U)) ),
("rob_3/4_valid ", (PopCount((0 until RobSize).map(valid(_))) > (RobSize.U/2.U)) & (PopCount((0 until RobSize).map(valid(_))) <= (RobSize.U*3.U/4.U))),
("rob_4/4_valid ", (PopCount((0 until RobSize).map(valid(_))) > (RobSize.U*3.U/4.U)) ),
("rob_1_4_valid ", (PopCount((0 until RobSize).map(valid(_))) < (RobSize.U/4.U)) ),
("rob_2_4_valid ", (PopCount((0 until RobSize).map(valid(_))) > (RobSize.U/4.U)) & (PopCount((0 until RobSize).map(valid(_))) <= (RobSize.U/2.U)) ),
("rob_3_4_valid ", (PopCount((0 until RobSize).map(valid(_))) > (RobSize.U/2.U)) & (PopCount((0 until RobSize).map(valid(_))) <= (RobSize.U*3.U/4.U))),
("rob_4_4_valid ", (PopCount((0 until RobSize).map(valid(_))) > (RobSize.U*3.U/4.U)) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -385,7 +385,7 @@ class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParamete
}
class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters {
class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters with HasPerfEvents {
val io = IO(new DCacheIO)
......@@ -703,22 +703,8 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
}
XSPerfAccumulate("access_early_replace", PopCount(Cat(access_early_replace)))
val wb_perf = wb.perfEvents.map(_._1).zip(wb.perfinfo.perfEvents.perf_events)
val mainp_perf = mainPipe.perfEvents.map(_._1).zip(mainPipe.perfinfo.perfEvents.perf_events)
val missq_perf = missQueue.perfEvents.map(_._1).zip(missQueue.perfinfo.perfEvents.perf_events)
val probq_perf = probeQueue.perfEvents.map(_._1).zip(probeQueue.perfinfo.perfEvents.perf_events)
val ldu_0_perf = ldu(0).perfEvents.map(_._1).zip(ldu(0).perfinfo.perfEvents.perf_events)
val ldu_1_perf = ldu(1).perfEvents.map(_._1).zip(ldu(1).perfinfo.perfEvents.perf_events)
val perfEvents = wb_perf ++ mainp_perf ++ missq_perf ++ probq_perf ++ ldu_0_perf ++ ldu_1_perf
val perflist = wb.perfinfo.perfEvents.perf_events ++ mainPipe.perfinfo.perfEvents.perf_events ++
missQueue.perfinfo.perfEvents.perf_events ++ probeQueue.perfinfo.perfEvents.perf_events ++
ldu(0).perfinfo.perfEvents.perf_events ++ ldu(1).perfinfo.perfEvents.perf_events
val perf_length = perflist.length
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(perflist.length))
})
perfinfo.perfEvents.perf_events := perflist
val perfEvents = (Seq(wb, mainPipe, missQueue, probeQueue) ++ ldu).flatMap(_.getPerfEvents)
generatePerfEvent()
}
class AMOHelper() extends ExtModule {
......@@ -740,20 +726,18 @@ class DCacheWrapper()(implicit p: Parameters) extends LazyModule with HasXSParam
clientNode := dcache.clientNode
}
lazy val module = new LazyModuleImp(this) {
lazy val module = new LazyModuleImp(this) with HasPerfEvents {
val io = IO(new DCacheIO)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(dcache.asInstanceOf[DCache].module.perf_length))
})
val perfEvents = dcache.asInstanceOf[DCache].module.perfEvents.map(_._1).zip(dcache.asInstanceOf[DCache].module.perfinfo.perfEvents.perf_events)
if (!useDcache) {
val perfEvents = if (!useDcache) {
// a fake dcache which uses dpi-c to access memory, only for debug usage!
val fake_dcache = Module(new FakeDCache())
io <> fake_dcache.io
Seq()
}
else {
io <> dcache.module.io
perfinfo := dcache.asInstanceOf[DCache].module.perfinfo
dcache.module.getPerfEvents
}
generatePerfEvent()
}
}
......@@ -20,9 +20,9 @@ import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink.ClientMetadata
import utils.{XSDebug, XSPerfAccumulate, PerfEventsBundle}
import utils.{HasPerfEvents, XSDebug, XSPerfAccumulate}
class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule {
class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPerfEvents {
def metaBits = (new Meta).getWidth
def encMetaBits = cacheParams.tagCode.width((new MetaAndTag).getWidth) - tagBits
def getMeta(encMeta: UInt): UInt = {
......@@ -300,18 +300,12 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule {
XSPerfAccumulate("actual_ld_fast_wakeup", s1_fire && s1_tag_match && !io.disable_ld_fast_wakeup)
XSPerfAccumulate("ideal_ld_fast_wakeup", io.banked_data_read.fire() && s1_tag_match)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(5))
})
val perfEvents = Seq(
("load_req ", io.lsu.req.fire() ),
("load_replay ", io.lsu.resp.fire() && resp.bits.replay ),
("load_replay_for_data_nack ", io.lsu.resp.fire() && resp.bits.replay && s2_nack_data ),
("load_replay_for_no_mshr ", io.lsu.resp.fire() && resp.bits.replay && s2_nack_no_mshr ),
("load_replay_for_conflict ", io.lsu.resp.fire() && resp.bits.replay && io.bank_conflict_slow ),
("load_req ", io.lsu.req.fire() ),
("load_replay ", io.lsu.resp.fire() && resp.bits.replay ),
("load_replay_for_data_nack", io.lsu.resp.fire() && resp.bits.replay && s2_nack_data ),
("load_replay_for_no_mshr ", io.lsu.resp.fire() && resp.bits.replay && s2_nack_no_mshr ),
("load_replay_for_conflict ", io.lsu.resp.fire() && resp.bits.replay && io.bank_conflict_slow ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -84,7 +84,7 @@ class MainPipeReq(implicit p: Parameters) extends DCacheBundle {
}
}
class MainPipe(implicit p: Parameters) extends DCacheModule {
class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents {
val metaBits = (new Meta).getWidth
val encMetaBits = cacheParams.tagCode.width((new MetaAndTag).getWidth) - tagBits
......@@ -669,15 +669,9 @@ class MainPipe(implicit p: Parameters) extends DCacheModule {
io.status.s3.bits.set := s3_idx
io.status.s3.bits.way_en := s3_way_en
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(2))
})
val perfEvents = Seq(
("dcache_mp_req ", s0_fire ),
("dcache_mp_total_penalty ", (PopCount(VecInit(Seq(s0_fire, s1_valid, s2_valid, s3_valid)))) ),
("dcache_mp_req ", s0_fire ),
("dcache_mp_total_penalty", PopCount(VecInit(Seq(s0_fire, s1_valid, s2_valid, s3_valid))))
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -443,7 +443,7 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
XSPerfHistogram("a_to_d_penalty", a_to_d_penalty, a_to_d_penalty_sample, 20, 100, 10, true, false)
}
class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasPerfEvents {
val io = IO(new Bundle {
val hartId = Input(UInt(8.W))
val req = Flipped(DecoupledIO(new MissReq))
......@@ -582,18 +582,13 @@ class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
QueuePerf(cfg.nMissEntries, num_valids, num_valids === cfg.nMissEntries.U)
io.full := num_valids === cfg.nMissEntries.U
XSPerfHistogram("num_valids", num_valids, true.B, 0, cfg.nMissEntries, 1)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(5))
})
val perfEvents = Seq(
("dcache_missq_req ", io.req.fire() ),
("dcache_missq_1/4_valid ", (PopCount(entries.map(entry => (!entry.io.primary_ready))) < (cfg.nMissEntries.U/4.U)) ),
("dcache_missq_2/4_valid ", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U/4.U)) & (PopCount(entries.map(entry => (!entry.io.primary_ready))) <= (cfg.nMissEntries.U/2.U)) ),
("dcache_missq_3/4_valid ", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U/2.U)) & (PopCount(entries.map(entry => (!entry.io.primary_ready))) <= (cfg.nMissEntries.U*3.U/4.U))),
("dcache_missq_4/4_valid ", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U*3.U/4.U)) ),
("dcache_missq_req ", io.req.fire() ),
("dcache_missq_1_4_valid", (PopCount(entries.map(entry => (!entry.io.primary_ready))) < (cfg.nMissEntries.U/4.U)) ),
("dcache_missq_2_4_valid", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U/4.U)) & (PopCount(entries.map(entry => (!entry.io.primary_ready))) <= (cfg.nMissEntries.U/2.U)) ),
("dcache_missq_3_4_valid", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U/2.U)) & (PopCount(entries.map(entry => (!entry.io.primary_ready))) <= (cfg.nMissEntries.U*3.U/4.U))),
("dcache_missq_4_4_valid", (PopCount(entries.map(entry => (!entry.io.primary_ready))) > (cfg.nMissEntries.U*3.U/4.U)) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -19,10 +19,8 @@ package xiangshan.cache
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleB, TLMessages, TLPermissions}
import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect}
import freechips.rocketchip.tilelink.{TLBundleB, TLEdgeOut, TLMessages, TLPermissions}
import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
class ProbeReq(implicit p: Parameters) extends DCacheBundle
{
......@@ -128,7 +126,7 @@ class ProbeEntry(implicit p: Parameters) extends DCacheModule {
XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
}
class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
{
val io = IO(new Bundle {
val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
......@@ -231,18 +229,13 @@ class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule w
when (io.lrsc_locked_block.valid) {
XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
}
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(5))
})
val perfEvents = Seq(
("dcache_probq_req ", io.pipe_req.fire() ),
("dcache_probq_1/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nProbeEntries.U/4.U)) ),
("dcache_probq_2/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U/2.U)) ),
("dcache_probq_3/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U*3.U/4.U))),
("dcache_probq_4/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U*3.U/4.U)) ),
("dcache_probq_req ", io.pipe_req.fire() ),
("dcache_probq_1_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nProbeEntries.U/4.U)) ),
("dcache_probq_2_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U/2.U)) ),
("dcache_probq_3_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U*3.U/4.U))),
("dcache_probq_4_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U*3.U/4.U)) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -19,10 +19,10 @@ package xiangshan.cache
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect}
import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
import freechips.rocketchip.tilelink.TLPermissions._
import huancun.{DirtyField, DirtyKey}
import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
import huancun.DirtyKey
import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
class WritebackReq(implicit p: Parameters) extends DCacheBundle {
val addr = UInt(PAddrBits.W)
......@@ -379,7 +379,7 @@ class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModu
XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
}
class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump {
class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents {
val io = IO(new Bundle {
val req = Flipped(DecoupledIO(new WritebackReq))
val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
......@@ -475,18 +475,12 @@ class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModu
// performance counters
XSPerfAccumulate("wb_req", io.req.fire())
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(5))
})
val perfEvents = Seq(
("dcache_wbq_req ", io.req.fire() ),
("dcache_wbq_1/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U)) ),
("dcache_wbq_2/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U)) ),
("dcache_wbq_3/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ),
("dcache_wbq_4/4_valid ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U)) ),
("dcache_wbq_req ", io.req.fire() ),
("dcache_wbq_1_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U)) ),
("dcache_wbq_2_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U)) ),
("dcache_wbq_3_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ),
("dcache_wbq_4_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U)) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -71,7 +71,7 @@ class L2TlbMQIO(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwConst
}
@chiselName
class L2TlbMissQueue(implicit p: Parameters) extends XSModule with HasPtwConst {
class L2TlbMissQueue(implicit p: Parameters) extends XSModule with HasPtwConst with HasPerfEvents {
require(MSHRSize >= (2 + l2tlbParams.filterSize))
val io = IO(new L2TlbMQIO())
......@@ -226,17 +226,11 @@ class L2TlbMissQueue(implicit p: Parameters) extends XSModule with HasPtwConst {
TimeOutAssert(state(i) =/= state_idle, timeOutThreshold, s"missqueue time out no out ${i}")
}
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(4))
})
val perfEvents = Seq(
("tlbmissq_incount ", io.in.fire() ),
("tlbmissq_inblock ", io.in.valid && !io.in.ready),
("tlbmissq_memcount ", io.mem.req.fire() ),
("tlbmissq_memcycle ", PopCount(is_waiting) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -677,7 +677,6 @@ class PtwIO(implicit p: Parameters) extends PtwBundle {
val tlb = Input(new TlbCsrBundle)
val distribute_csr = Flipped(new DistributedCSRIO)
}
val perfEvents = Output(new PerfEventsBundle(numPCntPtw))
}
class L2TlbMemReqBundle(implicit p: Parameters) extends PtwBundle {
......
......@@ -42,7 +42,7 @@ class PTW()(implicit p: Parameters) extends LazyModule with HasPtwConst {
}
@chiselName
class PTWImp(outer: PTW)(implicit p: Parameters) extends PtwModule(outer) with HasCSRConst {
class PTWImp(outer: PTW)(implicit p: Parameters) extends PtwModule(outer) with HasCSRConst with HasPerfEvents {
val (mem, edge) = outer.node.out.head
......@@ -327,22 +327,8 @@ class PTWImp(outer: PTW)(implicit p: Parameters) extends PtwModule(outer) with H
}
if(print_perfcounter){
val missq_perf = missQueue.perfEvents.map(_._1).zip(missQueue.perfinfo.perfEvents.perf_events)
val cache_perf = cache.perfEvents.map(_._1).zip(cache.perfinfo.perfEvents.perf_events)
val fsm_perf = fsm.perfEvents.map(_._1).zip(fsm.perfinfo.perfEvents.perf_events)
val perfEvents = missq_perf ++ cache_perf ++ fsm_perf
for (((perf_name,perf),i) <- perfEvents.zipWithIndex) {
println(s"ptw perf $i: $perf_name")
}
}
val perf_list = missQueue.perfinfo.perfEvents.perf_events ++ cache.perfinfo.perfEvents.perf_events ++ fsm.perfinfo.perfEvents.perf_events
val perf_length = perf_list.length
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(perf_list.length))
})
perfinfo.perfEvents.perf_events := perf_list
val perfEvents = Seq(missQueue, cache, fsm).flatMap(_.getPerfEvents)
generatePerfEvent()
}
class PTEHelper() extends ExtModule {
......@@ -389,18 +375,17 @@ class PTWWrapper()(implicit p: Parameters) extends LazyModule with HasXSParamete
node := ptw.node
}
lazy val module = new LazyModuleImp(this) {
lazy val module = new LazyModuleImp(this) with HasPerfEvents {
val io = IO(new PtwIO)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(ptw.asInstanceOf[PTW].module.perf_length))
})
if(useSoftPTW) {
val perfEvents = if (useSoftPTW) {
val fake_ptw = Module(new FakePTW())
io <> fake_ptw.io
Seq()
}
else {
io <> ptw.module.io
perfinfo := ptw.asInstanceOf[PTW].module.perfinfo
ptw.module.getPerfEvents
}
generatePerfEvent()
}
}
......@@ -85,7 +85,7 @@ class PtwCacheIO()(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwCo
}
@chiselName
class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst {
class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst with HasPerfEvents {
val io = IO(new PtwCacheIO)
val ecc = Code.fromString(l2tlbParams.ecc)
......@@ -659,9 +659,6 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst {
XSDebug(RegNext(sfence.valid), p"[sfence] l3g:${Binary(l3g)}\n")
XSDebug(RegNext(sfence.valid), p"[sfence] spv:${Binary(spv)}\n")
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(8))
})
val perfEvents = Seq(
("access ", base_valid_access_0 ),
("l1_hit ", l1Hit ),
......@@ -672,8 +669,5 @@ class PtwCache()(implicit p: Parameters) extends XSModule with HasPtwConst {
("rwHarzad ", io.req.valid && !io.req.ready ),
("out_blocked ", io.resp.valid && !io.resp.ready),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -59,7 +59,7 @@ class PtwFsmIO()(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwCons
}
@chiselName
class PtwFsm()(implicit p: Parameters) extends XSModule with HasPtwConst {
class PtwFsm()(implicit p: Parameters) extends XSModule with HasPtwConst with HasPerfEvents {
val io = IO(new PtwFsmIO)
val sfence = io.sfence
......@@ -193,9 +193,6 @@ class PtwFsm()(implicit p: Parameters) extends XSModule with HasPtwConst {
TimeOutAssert(state =/= s_idle, timeOutThreshold, "page table walker time out")
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(7))
})
val perfEvents = Seq(
("fsm_count ", io.req.fire() ),
("fsm_busy ", state =/= s_idle ),
......@@ -205,8 +202,5 @@ class PtwFsm()(implicit p: Parameters) extends XSModule with HasPtwConst {
("mem_cycle ", BoolStopWatch(mem.req.fire, mem.resp.fire(), true)),
("mem_blocked ", mem.req.valid && !mem.req.ready ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -29,7 +29,7 @@ import xiangshan.backend.fu.util.HasCSRConst
@chiselName
class TLB(Width: Int, q: TLBParameters)(implicit p: Parameters) extends TlbModule with HasCSRConst {
class TLB(Width: Int, q: TLBParameters)(implicit p: Parameters) extends TlbModule with HasCSRConst with HasPerfEvents {
val io = IO(new TlbIO(Width, q))
require(q.superAssociative == "fa")
......@@ -276,26 +276,19 @@ class TLB(Width: Int, q: TLBParameters)(implicit p: Parameters) extends TlbModul
// // NOTE: just for simple tlb debug, comment it after tlb's debug
// assert(!io.ptw.resp.valid || io.ptw.resp.bits.entry.tag === io.ptw.resp.bits.entry.ppn, "Simple tlb debug requires vpn === ppn")
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(2))
})
if(!q.shouldBlock) {
val perfEvents = Seq(
("access ", PopCount((0 until Width).map(i => vmEnable && validRegVec(i))) ),
("miss ", PopCount((0 until Width).map(i => vmEnable && validRegVec(i) && missVec(i))) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
} else {
val perfEvents = Seq(
("access ", PopCount((0 until Width).map(i => io.requestor(i).req.fire())) ),
("miss ", PopCount((0 until Width).map(i => ptw.req(i).fire())) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
}
val perfEvents = if(!q.shouldBlock) {
Seq(
("access", PopCount((0 until Width).map(i => vmEnable && validRegVec(i))) ),
("miss ", PopCount((0 until Width).map(i => vmEnable && validRegVec(i) && missVec(i)))),
)
} else {
Seq(
("access", PopCount((0 until Width).map(i => io.requestor(i).req.fire()))),
("miss ", PopCount((0 until Width).map(i => ptw.req(i).fire())) ),
)
}
generatePerfEvent()
}
class TlbReplace(Width: Int, q: TLBParameters)(implicit p: Parameters) extends TlbModule {
......
......@@ -18,10 +18,8 @@ package xiangshan.cache.mmu
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import chisel3.experimental.chiselName
import freechips.rocketchip.util.SRAMAnnotation
import xiangshan._
import chisel3.util._
import utils._
@chiselName
......@@ -34,7 +32,7 @@ class TLBFA(
saveLevel: Boolean = false,
normalPage: Boolean,
superPage: Boolean
)(implicit p: Parameters) extends TlbModule{
)(implicit p: Parameters) extends TlbModule with HasPerfEvents {
require(!(sameCycle && saveLevel))
val io = IO(new TlbStorageIO(nSets, nWays, ports))
......@@ -143,17 +141,11 @@ class TLBFA(
XSPerfAccumulate(s"refill${i}", io.w.valid && io.w.bits.wayIdx === i.U)
}
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(2))
})
val perfEvents = Seq(
("tlbstore_access ", io.r.resp.map(_.valid.asUInt()).fold(0.U)(_ + _) ),
("tlbstore_hit ", io.r.resp.map(a => a.valid && a.bits.hit).fold(0.U)(_.asUInt() + _.asUInt())),
("tlbstore_access", io.r.resp.map(_.valid.asUInt()).fold(0.U)(_ + _) ),
("tlbstore_hit ", io.r.resp.map(a => a.valid && a.bits.hit).fold(0.U)(_.asUInt() + _.asUInt())),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
println(s"tlb_fa: nSets${nSets} nWays:${nWays}")
}
......
......@@ -295,7 +295,7 @@ class FakeBPU(implicit p: Parameters) extends XSModule with HasBPUConst {
}
@chiselName
class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst {
class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with HasPerfEvents {
val io = IO(new PredictorIO)
val predictors = Module(if (useBPD) new Composer else new FakePredictor)
......@@ -658,10 +658,6 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst {
XSPerfAccumulate("s2_redirect", s2_redirect)
XSPerfAccumulate("s3_redirect", s3_redirect)
val perfEvents = predictors.asInstanceOf[Composer].perfEvents.map(_._1).zip(predictors.asInstanceOf[Composer].perfinfo.perfEvents.perf_events)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(predictors.asInstanceOf[Composer].perfinfo.perfEvents.perf_events.length))
})
perfinfo.perfEvents := predictors.asInstanceOf[Composer].perfinfo.perfEvents
val perfEvents = predictors.asInstanceOf[Composer].getPerfEvents
generatePerfEvent()
}
......@@ -24,7 +24,7 @@ import xiangshan._
import utils._
@chiselName
class Composer(implicit p: Parameters) extends BasePredictor with HasBPUConst {
class Composer(implicit p: Parameters) extends BasePredictor with HasBPUConst with HasPerfEvents {
val (components, resp) = getBPDComponents(io.in.bits.resp_in(0), p)
io.out.resp := resp
......@@ -77,15 +77,9 @@ class Composer(implicit p: Parameters) extends BasePredictor with HasBPUConst {
override def getFoldedHistoryInfo = Some(components.map(_.getFoldedHistoryInfo.getOrElse(Set())).reduce(_++_))
val comp_1_perf = components(1).asInstanceOf[MicroBTB].perfEvents.map(_._1).zip(components(1).asInstanceOf[MicroBTB].perfinfo.perfEvents.perf_events)
val comp_2_perf = components(2).asInstanceOf[Tage_SC].perfEvents.map(_._1).zip(components(2).asInstanceOf[Tage_SC].perfinfo.perfEvents.perf_events)
val comp_3_perf = components(3).asInstanceOf[FTB].perfEvents.map(_._1).zip(components(3).asInstanceOf[FTB].perfinfo.perfEvents.perf_events)
val comp_1_perf = components(1).asInstanceOf[MicroBTB].getPerfEvents
val comp_2_perf = components(2).asInstanceOf[Tage_SC].getPerfEvents
val comp_3_perf = components(3).asInstanceOf[FTB].getPerfEvents
val perfEvents = comp_1_perf ++ comp_2_perf ++ comp_3_perf
val perf_list = components(1).asInstanceOf[MicroBTB].perfinfo.perfEvents.perf_events ++
components(2).asInstanceOf[Tage_SC].perfinfo.perfEvents.perf_events ++
components(3).asInstanceOf[FTB].perfinfo.perfEvents.perf_events
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(perf_list.length))
})
perfinfo.perfEvents.perf_events := perf_list
generatePerfEvent()
}
......@@ -265,7 +265,8 @@ object FTBMeta {
// }
// }
class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUUtils with HasCircularQueuePtrHelper {
class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUUtils
with HasCircularQueuePtrHelper with HasPerfEvents {
override val meta_size = WireInit(0.U.asTypeOf(new FTBMeta)).getWidth
val ftbAddr = new TableAddr(log2Up(numSets), 1)
......@@ -503,15 +504,9 @@ class FTB(implicit p: Parameters) extends BasePredictor with FTBParams with BPUU
XSPerfAccumulate("ftb_update_ignored", io.update.valid && io.update.bits.old_entry)
XSPerfAccumulate("ftb_updated", u_valid)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(2))
})
val perfEvents = Seq(
("ftb_commit_hits ", u_valid && update.preds.hit),
("ftb_commit_misses ", u_valid && !update.preds.hit),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -37,6 +37,7 @@ class Frontend()(implicit p: Parameters) extends LazyModule with HasXSParameter{
class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
with HasXSParameter
with HasPerfEvents
{
val io = IO(new Bundle() {
val fencei = Input(Bool())
......@@ -64,10 +65,8 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
val ibuffer = Module(new Ibuffer)
val ftq = Module(new Ftq)
//PFEvent
val pfevent = Module(new PFEvent)
val tlbCsr = RegNext(io.tlbCsr)
pfevent.io.distribute_csr := io.csrCtrl.distribute_csr
// trigger
ifu.io.frontendTrigger := io.csrCtrl.frontend_trigger
......@@ -137,29 +136,12 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
XSPerfAccumulate("FrontendBubble", frontendBubble)
io.frontendInfo.ibufFull := RegNext(ibuffer.io.full)
if(print_perfcounter){
val ifu_perf = ifu.perfEvents.map(_._1).zip(ifu.perfinfo.perfEvents.perf_events)
val ibuffer_perf = ibuffer.perfEvents.map(_._1).zip(ibuffer.perfinfo.perfEvents.perf_events)
val icache_perf = icache.perfEvents.map(_._1).zip(icache.perfinfo.perfEvents.perf_events)
val ftq_perf = ftq.perfEvents.map(_._1).zip(ftq.perfinfo.perfEvents.perf_events)
val bpu_perf = bpu.perfEvents.map(_._1).zip(bpu.perfinfo.perfEvents.perf_events)
val perfEvents = ifu_perf ++ ibuffer_perf ++ icache_perf ++ ftq_perf ++ bpu_perf
for (((perf_name,perf),i) <- perfEvents.zipWithIndex) {
println(s"frontend perf $i: $perf_name")
}
}
// PFEvent
val pfevent = Module(new PFEvent)
pfevent.io.distribute_csr := io.csrCtrl.distribute_csr
val csrevents = pfevent.io.hpmevent.take(8)
val hpmEvents = ifu.perfinfo.perfEvents.perf_events ++ ibuffer.perfinfo.perfEvents.perf_events ++
icache.perfinfo.perfEvents.perf_events ++ ftq.perfinfo.perfEvents.perf_events ++
bpu.perfinfo.perfEvents.perf_events
val perf_length = hpmEvents.length
val csrevents = pfevent.io.hpmevent.slice(0,8)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(csrevents.length))
})
val hpm_frontend = Module(new HPerfmonitor(perf_length,csrevents.length))
hpm_frontend.io.hpm_event := csrevents
hpm_frontend.io.events_sets.perf_events := hpmEvents
perfinfo.perfEvents := RegNext(hpm_frontend.io.events_selected)
val allPerfEvents = Seq(ifu, ibuffer, icache, ftq, bpu).flatMap(_.getPerf)
override val perfEvents = HPerfMonitor(csrevents, allPerfEvents).getPerfEvents
generatePerfEvent()
}
......@@ -104,7 +104,7 @@ class IfuToPreDecode(implicit p: Parameters) extends XSBundle {
}
class NewIFU(implicit p: Parameters) extends XSModule with HasICacheParameters with HasIFUConst
with HasCircularQueuePtrHelper
with HasCircularQueuePtrHelper with HasPerfEvents
{
println(s"icache ways: ${nWays} sets:${nSets}")
val io = IO(new NewIFUIO)
......@@ -429,6 +429,7 @@ with HasCircularQueuePtrHelper
val predecodeFlush = preDecoderOut.misOffset.valid && f3_valid
val predecodeFlushReg = RegNext(predecodeFlush && !(f2_fire && !f2_flush))
f3_redirect := (!predecodeFlushReg && predecodeFlush && !f3_req_is_mmio) || (f3_mmio_req_commit && f3_mmio_use_seq_pc)
/** performance counter */
......@@ -438,11 +439,6 @@ with HasCircularQueuePtrHelper
val f3_hit_0 = io.toIbuffer.fire() && f3_perf_info.bank_hit(0)
val f3_hit_1 = io.toIbuffer.fire() && f3_doubleLine & f3_perf_info.bank_hit(1)
val f3_hit = f3_perf_info.hit
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(15))
})
val perfEvents = Seq(
("frontendFlush ", f3_redirect ),
("ifu_req ", io.toIbuffer.fire() ),
......@@ -460,12 +456,7 @@ with HasCircularQueuePtrHelper
("cross_line_block ", io.toIbuffer.fire() && f3_situation(0) ),
("fall_through_is_cacheline_end", io.toIbuffer.fire() && f3_situation(1) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
f3_redirect := (!predecodeFlushReg && predecodeFlush && !f3_req_is_mmio) || (f3_mmio_req_commit && f3_mmio_use_seq_pc)
generatePerfEvent()
XSPerfAccumulate("ifu_req", io.toIbuffer.fire() )
XSPerfAccumulate("ifu_miss", io.toIbuffer.fire() && !f3_hit )
......@@ -473,7 +464,7 @@ with HasCircularQueuePtrHelper
XSPerfAccumulate("ifu_req_cacheline_1", f3_req_1 )
XSPerfAccumulate("ifu_req_cacheline_0_hit", f3_hit_0 )
XSPerfAccumulate("ifu_req_cacheline_1_hit", f3_hit_1 )
XSPerfAccumulate("frontendFlush", f3_redirect )
XSPerfAccumulate("frontendFlush", f3_redirect )
XSPerfAccumulate("only_0_hit", f3_perf_info.only_0_hit && io.toIbuffer.fire() )
XSPerfAccumulate("only_0_miss", f3_perf_info.only_0_miss && io.toIbuffer.fire() )
XSPerfAccumulate("hit_0_hit_1", f3_perf_info.hit_0_hit_1 && io.toIbuffer.fire() )
......
......@@ -36,7 +36,7 @@ class IBufferIO(implicit p: Parameters) extends XSBundle {
val full = Output(Bool())
}
class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper with HasPerfEvents {
val io = IO(new IBufferIO)
class IBufEntry(implicit p: Parameters) extends XSBundle {
......@@ -189,21 +189,16 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH
QueuePerf(IBufSize, validEntries, !allowEnq)
XSPerfAccumulate("flush", io.flush)
XSPerfAccumulate("hungry", instrHungry)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(8))
})
val perfEvents = Seq(
("IBuffer_Flushed ", io.flush ),
("IBuffer_hungry ", instrHungry ),
("IBuffer_1/4_valid ", (validEntries > (0*(IBufSize/4)).U) & (validEntries < (1*(IBufSize/4)).U) ),
("IBuffer_2/4_valid ", (validEntries >= (1*(IBufSize/4)).U) & (validEntries < (2*(IBufSize/4)).U) ),
("IBuffer_3/4_valid ", (validEntries >= (2*(IBufSize/4)).U) & (validEntries < (3*(IBufSize/4)).U) ),
("IBuffer_4/4_valid ", (validEntries >= (3*(IBufSize/4)).U) & (validEntries < (4*(IBufSize/4)).U) ),
("IBuffer_full ", validEntries.andR ),
("Front_Bubble ", PopCount((0 until DecodeWidth).map(i => io.out(i).ready && !io.out(i).valid))),
("IBuffer_Flushed ", io.flush ),
("IBuffer_hungry ", instrHungry ),
("IBuffer_1_4_valid", (validEntries > (0*(IBufSize/4)).U) & (validEntries < (1*(IBufSize/4)).U) ),
("IBuffer_2_4_valid", (validEntries >= (1*(IBufSize/4)).U) & (validEntries < (2*(IBufSize/4)).U) ),
("IBuffer_3_4_valid", (validEntries >= (2*(IBufSize/4)).U) & (validEntries < (3*(IBufSize/4)).U) ),
("IBuffer_4_4_valid", (validEntries >= (3*(IBufSize/4)).U) & (validEntries < (4*(IBufSize/4)).U) ),
("IBuffer_full ", validEntries.andR ),
("Front_Bubble ", PopCount((0 until DecodeWidth).map(i => io.out(i).ready && !io.out(i).valid)))
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -19,12 +19,9 @@ package xiangshan.frontend
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError}
import utils._
import xiangshan._
import scala.tools.nsc.doc.model.Val
import utils.{ParallelPriorityMux, ParallelPriorityEncoder}
import xiangshan.backend.{CtrlToFtqIO}
import firrtl.annotations.MemoryLoadFileType
import xiangshan.backend.CtrlToFtqIO
class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
p => p(XSCoreParamsKey).FtqSize
......@@ -444,7 +441,7 @@ class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedire
}
class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
with HasBackendRedirectInfo with BPUUtils with HasBPUConst {
with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents {
val io = IO(new Bundle {
val fromBpu = Flipped(new BpuToFtqIO)
val fromIfu = Flipped(new IfuToFtqIO)
......@@ -1199,9 +1196,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
// val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
// val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(22))
})
val perfEvents = Seq(
("bpu_s2_redirect ", bpu_s2_redirect ),
("bpu_s3_redirect ", bpu_s3_redirect ),
......@@ -1228,8 +1223,5 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
("ftb_false_hit ", PopCount(ftb_false_hit) ),
("ftb_hit ", PopCount(ftb_hit) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -174,7 +174,7 @@ object SCThreshold {
}
trait HasSC extends HasSCParameter { this: Tage =>
trait HasSC extends HasSCParameter with HasPerfEvents { this: Tage =>
val update_on_mispred, update_on_unconf = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
var sc_fh_info = Set[FoldedHistoryInfo]()
if (EnableSC) {
......@@ -343,16 +343,10 @@ trait HasSC extends HasSCParameter { this: Tage =>
override def getFoldedHistoryInfo = Some(tage_fh_info ++ sc_fh_info)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(3))
})
val perfEvents = Seq(
("tage_tht_hit ", updateMetas(1).provider.valid + updateMetas(0).provider.valid),
("sc_update_on_mispred ", PopCount(update_on_mispred) ),
("sc_update_on_unconf ", PopCount(update_on_unconf) ),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -444,7 +444,7 @@ class ICache()(implicit p: Parameters) extends LazyModule with HasICacheParamete
lazy val module = new ICacheImp(this)
}
class ICacheImp(outer: ICache) extends LazyModuleImp(outer) with HasICacheParameters {
class ICacheImp(outer: ICache) extends LazyModuleImp(outer) with HasICacheParameters with HasPerfEvents {
val io = IO(new ICacheIO)
val (bus, edge) = outer.clientNode.out.head
......@@ -576,15 +576,13 @@ class ICacheImp(outer: ICache) extends LazyModuleImp(outer) with HasICacheParame
assert (!bus.d.fire())
}
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(2))
})
val perfEvents = Seq(
("icache_miss_cnt ", false.B ),
("icache_miss_penty ", BoolStopWatch(start = false.B, stop = false.B || false.B, startHighPriority = true) ),
("icache_miss_cnt ", false.B),
("icache_miss_penty", BoolStopWatch(start = false.B, stop = false.B || false.B, startHighPriority = true)),
)
generatePerfEvent()
// Customized csr cache op support
// Customized csr cache op support
val cacheOpDecoder = Module(new CSRCacheOpDecoder("icache", CacheInstrucion.COP_ID_ICACHE))
cacheOpDecoder.io.csr <> io.csr
dataArray.io.cacheOp.req := cacheOpDecoder.io.cache.req
......
......@@ -33,7 +33,7 @@ trait MicroBTBParams extends HasXSParameter {
@chiselName
class MicroBTB(implicit p: Parameters) extends BasePredictor
with MicroBTBParams
with MicroBTBParams with HasPerfEvents
{
val ubtbAddr = new TableAddr(log2Up(numWays), 1)
......@@ -129,15 +129,9 @@ class MicroBTB(implicit p: Parameters) extends BasePredictor
XSPerfAccumulate("ubtb_commit_hits", u_valid && u_meta.hit)
XSPerfAccumulate("ubtb_commit_misses", u_valid && !u_meta.hit)
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(2))
})
val perfEvents = Seq(
("ubtb_commit_hits ", u_valid && u_meta.hit),
("ubtb_commit_misse ", u_valid && !u_meta.hit),
("ubtb_commit_hit ", u_valid && u_meta.hit),
("ubtb_commit_miss ", u_valid && !u_meta.hit),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
}
......@@ -52,7 +52,7 @@ class LsqEnqIO(implicit p: Parameters) extends XSBundle {
}
// Load / Store Queue Wrapper for XiangShan Out of Order LSU
class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParameters {
class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasPerfEvents {
val io = IO(new Bundle() {
val hartId = Input(UInt(8.W))
val enq = new LsqEnqIO
......@@ -191,12 +191,6 @@ class LsqWrappper(implicit p: Parameters) extends XSModule with HasDCacheParamet
io.lqFull := loadQueue.io.lqFull
io.sqFull := storeQueue.io.sqFull
val ldq_perf = loadQueue.perfEvents.map(_._1).zip(loadQueue.perfinfo.perfEvents.perf_events)
val stq_perf = storeQueue.perfEvents.map(_._1).zip(storeQueue.perfinfo.perfEvents.perf_events)
val perfEvents = ldq_perf ++ stq_perf
val perf_list = storeQueue.perfinfo.perfEvents.perf_events ++ loadQueue.perfinfo.perfEvents.perf_events
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(perf_list.length))
})
perfinfo.perfEvents.perf_events := perf_list
}
val perfEvents = Seq(loadQueue, storeQueue).flatMap(_.getPerfEvents)
generatePerfEvent()
}
......@@ -75,6 +75,7 @@ class LoadQueue(implicit p: Parameters) extends XSModule
with HasDCacheParameters
with HasCircularQueuePtrHelper
with HasLoadHelper
with HasPerfEvents
{
val io = IO(new Bundle() {
val enq = new LqEnqIO
......@@ -772,25 +773,20 @@ class LoadQueue(implicit p: Parameters) extends XSModule
XSPerfAccumulate("writeback_blocked", PopCount(VecInit(io.ldout.map(i => i.valid && !i.ready))))
XSPerfAccumulate("utilization_miss", PopCount((0 until LoadQueueSize).map(i => allocated(i) && miss(i))))
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(10))
})
val perfEvents = Seq(
("rollback ", io.rollback.valid ),
("mmioCycle ", uncacheState =/= s_idle ),
("mmio_Cnt ", io.uncache.req.fire() ),
("refill ", io.dcache.valid ),
("writeback_success ", PopCount(VecInit(io.ldout.map(i => i.fire()))) ),
("writeback_blocked ", PopCount(VecInit(io.ldout.map(i => i.valid && !i.ready))) ),
("ltq_1/4_valid ", (validCount < (LoadQueueSize.U/4.U)) ),
("ltq_2/4_valid ", (validCount > (LoadQueueSize.U/4.U)) & (validCount <= (LoadQueueSize.U/2.U)) ),
("ltq_3/4_valid ", (validCount > (LoadQueueSize.U/2.U)) & (validCount <= (LoadQueueSize.U*3.U/4.U))),
("ltq_4/4_valid ", (validCount > (LoadQueueSize.U*3.U/4.U)) ),
("rollback ", io.rollback.valid ),
("mmioCycle ", uncacheState =/= s_idle ),
("mmio_Cnt ", io.uncache.req.fire() ),
("refill ", io.dcache.valid ),
("writeback_success", PopCount(VecInit(io.ldout.map(i => i.fire()))) ),
("writeback_blocked", PopCount(VecInit(io.ldout.map(i => i.valid && !i.ready))) ),
("ltq_1_4_valid ", (validCount < (LoadQueueSize.U/4.U)) ),
("ltq_2_4_valid ", (validCount > (LoadQueueSize.U/4.U)) & (validCount <= (LoadQueueSize.U/2.U)) ),
("ltq_3_4_valid ", (validCount > (LoadQueueSize.U/2.U)) & (validCount <= (LoadQueueSize.U*3.U/4.U))),
("ltq_4_4_valid ", (validCount > (LoadQueueSize.U*3.U/4.U)) )
)
generatePerfEvent()
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
// debug info
XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
......
......@@ -60,7 +60,8 @@ class DataBufferEntry (implicit p: Parameters) extends DCacheBundle {
}
// Store Queue
class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
class StoreQueue(implicit p: Parameters) extends XSModule
with HasDCacheParameters with HasCircularQueuePtrHelper with HasPerfEvents {
val io = IO(new Bundle() {
val hartId = Input(UInt(8.W))
val enq = new SqEnqIO
......@@ -630,23 +631,18 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
XSPerfAccumulate("cmtEntryCnt", distanceBetween(cmtPtrExt(0), deqPtrExt(0)))
XSPerfAccumulate("nCmtEntryCnt", distanceBetween(enqPtrExt(0), cmtPtrExt(0)))
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(8))
})
val perfEvents = Seq(
("mmioCycle ", uncacheState =/= s_idle ),
("mmioCnt ", io.uncache.req.fire() ),
("mmio_wb_success ", io.mmioStout.fire() ),
("mmio_wb_blocked ", io.mmioStout.valid && !io.mmioStout.ready ),
("stq_1/4_valid ", (distanceBetween(enqPtrExt(0), deqPtrExt(0)) < (StoreQueueSize.U/4.U)) ),
("stq_2/4_valid ", (distanceBetween(enqPtrExt(0), deqPtrExt(0)) > (StoreQueueSize.U/4.U)) & (distanceBetween(enqPtrExt(0), deqPtrExt(0)) <= (StoreQueueSize.U/2.U)) ),
("stq_3/4_valid ", (distanceBetween(enqPtrExt(0), deqPtrExt(0)) > (StoreQueueSize.U/2.U)) & (distanceBetween(enqPtrExt(0), deqPtrExt(0)) <= (StoreQueueSize.U*3.U/4.U))),
("stq_4/4_valid ", (distanceBetween(enqPtrExt(0), deqPtrExt(0)) > (StoreQueueSize.U*3.U/4.U)) ),
("mmioCycle ", uncacheState =/= s_idle ),
("mmioCnt ", io.uncache.req.fire() ),
("mmio_wb_success", io.mmioStout.fire() ),
("mmio_wb_blocked", io.mmioStout.valid && !io.mmioStout.ready ),
("stq_1_4_valid ", (distanceBetween(enqPtrExt(0), deqPtrExt(0)) < (StoreQueueSize.U/4.U)) ),
("stq_2_4_valid ", (distanceBetween(enqPtrExt(0), deqPtrExt(0)) > (StoreQueueSize.U/4.U)) & (distanceBetween(enqPtrExt(0), deqPtrExt(0)) <= (StoreQueueSize.U/2.U)) ),
("stq_3_4_valid ", (distanceBetween(enqPtrExt(0), deqPtrExt(0)) > (StoreQueueSize.U/2.U)) & (distanceBetween(enqPtrExt(0), deqPtrExt(0)) <= (StoreQueueSize.U*3.U/4.U))),
("stq_4_4_valid ", (distanceBetween(enqPtrExt(0), deqPtrExt(0)) > (StoreQueueSize.U*3.U/4.U)) ),
)
generatePerfEvent()
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
// debug info
XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt(0).flag, deqPtr)
......
......@@ -443,7 +443,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
XSPerfAccumulate("replay_from_fetch_load_vio", io.out.valid && ldldVioReplay)
}
class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper with HasPerfEvents {
val io = IO(new Bundle() {
val ldin = Flipped(Decoupled(new ExuInput))
val ldout = Decoupled(new ExuOutput)
......@@ -579,10 +579,6 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
io.lsq.ldout.ready := !hitLoadOut.valid
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(12))
})
val perfEvents = Seq(
("load_s0_in_fire ", load_s0.io.in.fire() ),
("load_to_load_forward ", load_s0.io.loadFastMatch.orR && load_s0.io.in.fire() ),
......@@ -597,10 +593,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
("load_s2_replay_tlb_miss ", load_s2.io.rsFeedback.valid && !load_s2.io.rsFeedback.bits.hit && load_s2.io.in.bits.tlbMiss ),
("load_s2_replay_cache ", load_s2.io.rsFeedback.valid && !load_s2.io.rsFeedback.bits.hit && !load_s2.io.in.bits.tlbMiss && load_s2.io.dcacheResp.bits.miss),
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
generatePerfEvent()
when(io.ldout.fire()){
XSDebug("ldout %x\n", io.ldout.bits.uop.cf.pc)
......
......@@ -99,7 +99,7 @@ class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst
io.dataOut := data
}
class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst {
class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst with HasPerfEvents {
val io = IO(new Bundle() {
val hartId = Input(UInt(8.W))
val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReqWithVaddr))) //Todo: store logic only support Width == 2 now
......@@ -630,9 +630,6 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
// XSPerfHistogram("store_latency", store_latency, store_latency_sample, 0, 100, 10)
// XSPerfAccumulate("store_req", io.lsu.req.fire())
val perfinfo = IO(new Bundle(){
val perfEvents = Output(new PerfEventsBundle(10))
})
val perfEvents = Seq(
("sbuffer_req_valid ", PopCount(VecInit(io.in.map(_.valid)).asUInt) ),
("sbuffer_req_fire ", PopCount(VecInit(io.in.map(_.fire())).asUInt) ),
......@@ -647,13 +644,11 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
("refill_resp_valid ", io.dcache.refill_hit_resp.fire() ),
("replay_resp_valid ", io.dcache.replay_resp.fire() ),
("coh_timeout ", cohHasTimeOut ),
("sbuffer_1/4_valid ", (perf_valid_entry_count < (StoreBufferSize.U/4.U)) ),
("sbuffer_2/4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/4.U)) & (perf_valid_entry_count <= (StoreBufferSize.U/2.U)) ),
("sbuffer_3/4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/2.U)) & (perf_valid_entry_count <= (StoreBufferSize.U*3.U/4.U))),
("sbuffer_1_4_valid ", (perf_valid_entry_count < (StoreBufferSize.U/4.U)) ),
("sbuffer_2_4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/4.U)) & (perf_valid_entry_count <= (StoreBufferSize.U/2.U)) ),
("sbuffer_3_4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/2.U)) & (perf_valid_entry_count <= (StoreBufferSize.U*3.U/4.U))),
("sbuffer_full_valid", (perf_valid_entry_count > (StoreBufferSize.U*3.U/4.U)))
)
generatePerfEvent()
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册