提交 e69babf9 编写于 作者: L Lingrui98

Merge remote-tracking branch 'origin/master' into bpu-timing

......@@ -362,8 +362,6 @@ trait HasXSParameter {
val PhyRegIdxWidth = log2Up(NRPhyRegs)
val RobSize = coreParams.RobSize
val IntRefCounterWidth = log2Ceil(RobSize)
val StdFreeListSize = NRPhyRegs - 32
val MEFreeListSize = NRPhyRegs
val LoadQueueSize = coreParams.LoadQueueSize
val StoreQueueSize = coreParams.StoreQueueSize
val dpParams = coreParams.dpParams
......
......@@ -23,7 +23,7 @@ import chisel3.util._
import xiangshan._
import utils._
abstract class BaseFusionCase(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]] = None)(implicit p: Parameters)
abstract class BaseFusionCase(pair: Seq[Valid[UInt]])(implicit p: Parameters)
extends DecodeUnitConstants {
require(pair.length == 2)
......@@ -393,11 +393,10 @@ class FusedOddaddw(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseF
// Case: addw and extract its lower 8 bits (fused into addwbyte)
class FusedAddwbyte(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
extends BaseFusionCase(pair, csPair) {
require(csPair.isDefined)
// the first instruction is a addw
def inst1Cond = csPair.get(0).fuType === FuType.alu && ALUOpType.isAddw(csPair.get(0).fuOpType)
extends BaseFusionCase(pair) {
// the first instruction is a ALUOpType.addw
// According to DecodeUnit.scala, only ADDIW and ADDW are ALUOpType.addw, which are used for inst1Cond.
def inst1Cond = instr(0) === Instructions.ADDIW || instr(0) === Instructions.ADDIW
def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 0xff.U
def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
......@@ -457,12 +456,22 @@ class FusedAddwsexth(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(i
}
// Case: logic operation and extract its LSB
class FusedLogiclsb(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
extends BaseFusionCase(pair, csPair) {
extends BaseFusionCase(pair) {
require(csPair.isDefined)
// the first instruction is a logic
def inst1Cond = csPair.get(0).fuType === FuType.alu && ALUOpType.isSimpleLogic(csPair.get(0).fuOpType)
// the first instruction is a logic (and, or, xor, orcb)
// (1) def ANDI = BitPat("b?????????????????111?????0010011")
// (2) def AND = BitPat("b0000000??????????111?????0110011")
// (3) def ORI = BitPat("b?????????????????110?????0010011")
// (4) def OR = BitPat("b0000000??????????110?????0110011")
// (5) def XORI = BitPat("b?????????????????100?????0010011")
// (6) def XOR = BitPat("b0000000??????????100?????0110011")
// (7) def ORC_B = BitPat("b001010000111?????101?????0010011")
val logicInstrList = Seq(Instructions.ANDI, Instructions.AND, Instructions.ORI, Instructions.OR,
Instructions.XORI, Instructions.XOR, Instructions.ORC_B)
def inst1Cond = VecInit(logicInstrList.map(_ === instr(0))).asUInt.orR
def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 1.U
def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
......@@ -512,17 +521,15 @@ class FusedOrh48(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFus
// Case: mul 7bit data with 32-bit data
// Source: `andi r1, r0, 127`` + `mulw r1, r1, r2`
// Target: `mulw7 r1, r0, r2`
class FusedMulw7(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
extends BaseFusionCase(pair, csPair) {
require(csPair.isDefined)
class FusedMulw7(pair: Seq[Valid[UInt]])(implicit p: Parameters)
extends BaseFusionCase(pair) {
def inst1Cond = instr(0) === Instructions.ANDI && instr(0)(31, 20) === 127.U
def inst2Cond = instr(1) === Instructions.MULW
def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
def target: CtrlSignals = {
// use MULW as the base
val cs = WireInit(csPair.get(1))
val cs = getBaseCS(Instructions.MULW)
// replace the fuOpType with mulw7
cs.fuOpType := MDUOpType.mulw7
cs.lsrc(0) := instr1Rs1
......@@ -568,14 +575,14 @@ class FusionDecoder(implicit p: Parameters) extends XSModule {
new FusedSr32add(pair),
new FusedOddadd(pair),
new FusedOddaddw(pair),
new FusedOrh48(pair),
new FusedMulw7(pair),
new FusedAddwbyte(pair, Some(cs)),
new FusedAddwbit(pair, Some(cs)),
new FusedAddwzexth(pair, Some(cs)),
new FusedAddwsexth(pair, Some(cs)),
new FusedLogiclsb(pair, Some(cs)),
new FusedLogicZexth(pair, Some(cs)),
new FusedOrh48(pair),
new FusedMulw7(pair, Some(cs))
new FusedLogicZexth(pair, Some(cs))
)
val pairValid = VecInit(pair.map(_.valid)).asUInt().andR
val thisCleared = io.clear(i)
......
......@@ -638,56 +638,35 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP
val priviledgeModeOH = UIntToOH(priviledgeMode)
val perfEventscounten = RegInit(0.U.asTypeOf(Vec(nrPerfCnts, Bool())))
val perfCnts = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W)))
val perfEvents = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W)))
val perfEvents = List.fill(8)(RegInit("h0000000000".U(XLEN.W))) ++
List.fill(8)(RegInit("h4010040100".U(XLEN.W))) ++
List.fill(8)(RegInit("h8020080200".U(XLEN.W))) ++
List.fill(5)(RegInit("hc0300c0300".U(XLEN.W)))
for (i <-0 until nrPerfCnts) {
perfEventscounten(i) := (Cat(perfEvents(i)(62),perfEvents(i)(61),(perfEvents(i)(61,60))) & priviledgeModeOH).orR
}
val hpmEvents = Wire(new PerfEventsBundle(numPCntHc * coreParams.L2NBanks))
val pfevent = Module(new PFEvent)
pfevent.io.distribute_csr := csrio.customCtrl.distribute_csr
for(i <- 0 until numPCntHc * coreParams.L2NBanks) {
hpmEvents.perf_events(i).incr_step := csrio.perf.perfEventsHc(i)
}
val hpm_hc = Module(new HPerfmonitor(numPCntHc * coreParams.L2NBanks,numCSRPCntHc))
val csrevents = pfevent.io.hpmevent.slice(24,29)
val csrevents = perfEvents.slice(24,29)
hpm_hc.io.hpm_event := csrevents
hpm_hc.io.events_sets := hpmEvents
val mcountinhibit = RegInit(0.U(XLEN.W))
val mcycle = RegInit(0.U(XLEN.W))
mcycle := mcycle + 1.U
val minstret = RegInit(0.U(XLEN.W))
val perf_events = csrio.perf.perfEventsFrontend.perf_events ++
csrio.perf.perfEventsCtrl.perf_events ++
csrio.perf.perfEventsLsu.perf_events ++
hpm_hc.io.events_selected.perf_events
minstret := minstret + RegNext(csrio.perf.retiredInstr)
perfCnts( 0) := Mux((mcountinhibit( 3) | perfEventscounten( 0)),perfCnts( 0) , (perfCnts( 0) + RegNext(csrio.perf.perfEventsFrontend.perf_events(0 ).incr_step)))
perfCnts( 1) := Mux((mcountinhibit( 4) | perfEventscounten( 1)),perfCnts( 1) , (perfCnts( 1) + RegNext(csrio.perf.perfEventsFrontend.perf_events(1 ).incr_step)))
perfCnts( 2) := Mux((mcountinhibit( 5) | perfEventscounten( 2)),perfCnts( 2) , (perfCnts( 2) + RegNext(csrio.perf.perfEventsFrontend.perf_events(2 ).incr_step)))
perfCnts( 3) := Mux((mcountinhibit( 6) | perfEventscounten( 3)),perfCnts( 3) , (perfCnts( 3) + RegNext(csrio.perf.perfEventsFrontend.perf_events(3 ).incr_step)))
perfCnts( 4) := Mux((mcountinhibit( 7) | perfEventscounten( 4)),perfCnts( 4) , (perfCnts( 4) + RegNext(csrio.perf.perfEventsFrontend.perf_events(4 ).incr_step)))
perfCnts( 5) := Mux((mcountinhibit( 8) | perfEventscounten( 5)),perfCnts( 5) , (perfCnts( 5) + RegNext(csrio.perf.perfEventsFrontend.perf_events(5 ).incr_step)))
perfCnts( 6) := Mux((mcountinhibit( 9) | perfEventscounten( 6)),perfCnts( 6) , (perfCnts( 6) + RegNext(csrio.perf.perfEventsFrontend.perf_events(6 ).incr_step)))
perfCnts( 7) := Mux((mcountinhibit(10) | perfEventscounten( 7)),perfCnts( 7) , (perfCnts( 7) + RegNext(csrio.perf.perfEventsFrontend.perf_events(7 ).incr_step)))
perfCnts( 8) := Mux((mcountinhibit(11) | perfEventscounten( 8)),perfCnts( 8) , (perfCnts( 8) + RegNext(csrio.perf.perfEventsCtrl.perf_events(0 ).incr_step)))
perfCnts( 9) := Mux((mcountinhibit(12) | perfEventscounten( 9)),perfCnts( 9) , (perfCnts( 9) + RegNext(csrio.perf.perfEventsCtrl.perf_events(1 ).incr_step)))
perfCnts(10) := Mux((mcountinhibit(13) | perfEventscounten(10)),perfCnts(10) , (perfCnts(10) + RegNext(csrio.perf.perfEventsCtrl.perf_events(2 ).incr_step)))
perfCnts(11) := Mux((mcountinhibit(14) | perfEventscounten(11)),perfCnts(11) , (perfCnts(11) + RegNext(csrio.perf.perfEventsCtrl.perf_events(3 ).incr_step)))
perfCnts(12) := Mux((mcountinhibit(15) | perfEventscounten(12)),perfCnts(12) , (perfCnts(12) + RegNext(csrio.perf.perfEventsCtrl.perf_events(4 ).incr_step)))
perfCnts(13) := Mux((mcountinhibit(16) | perfEventscounten(13)),perfCnts(13) , (perfCnts(13) + RegNext(csrio.perf.perfEventsCtrl.perf_events(5 ).incr_step)))
perfCnts(14) := Mux((mcountinhibit(17) | perfEventscounten(14)),perfCnts(14) , (perfCnts(14) + RegNext(csrio.perf.perfEventsCtrl.perf_events(6 ).incr_step)))
perfCnts(15) := Mux((mcountinhibit(18) | perfEventscounten(15)),perfCnts(15) , (perfCnts(15) + RegNext(csrio.perf.perfEventsCtrl.perf_events(7 ).incr_step)))
perfCnts(16) := Mux((mcountinhibit(19) | perfEventscounten(16)),perfCnts(16) , (perfCnts(16) + RegNext(csrio.perf.perfEventsLsu.perf_events(0 ).incr_step)))
perfCnts(17) := Mux((mcountinhibit(20) | perfEventscounten(17)),perfCnts(17) , (perfCnts(17) + RegNext(csrio.perf.perfEventsLsu.perf_events(1 ).incr_step)))
perfCnts(18) := Mux((mcountinhibit(21) | perfEventscounten(18)),perfCnts(18) , (perfCnts(18) + RegNext(csrio.perf.perfEventsLsu.perf_events(2 ).incr_step)))
perfCnts(19) := Mux((mcountinhibit(22) | perfEventscounten(19)),perfCnts(19) , (perfCnts(19) + RegNext(csrio.perf.perfEventsLsu.perf_events(3 ).incr_step)))
perfCnts(20) := Mux((mcountinhibit(23) | perfEventscounten(20)),perfCnts(20) , (perfCnts(20) + RegNext(csrio.perf.perfEventsLsu.perf_events(4 ).incr_step)))
perfCnts(21) := Mux((mcountinhibit(24) | perfEventscounten(21)),perfCnts(21) , (perfCnts(21) + RegNext(csrio.perf.perfEventsLsu.perf_events(5 ).incr_step)))
perfCnts(22) := Mux((mcountinhibit(25) | perfEventscounten(22)),perfCnts(22) , (perfCnts(22) + RegNext(csrio.perf.perfEventsLsu.perf_events(6 ).incr_step)))
perfCnts(23) := Mux((mcountinhibit(26) | perfEventscounten(23)),perfCnts(23) , (perfCnts(23) + RegNext(csrio.perf.perfEventsLsu.perf_events(7 ).incr_step)))
perfCnts(24) := Mux((mcountinhibit(27) | perfEventscounten(24)),perfCnts(24) , (perfCnts(24) + RegNext(hpm_hc.io.events_selected.perf_events(0 ).incr_step)))
perfCnts(25) := Mux((mcountinhibit(28) | perfEventscounten(25)),perfCnts(25) , (perfCnts(25) + RegNext(hpm_hc.io.events_selected.perf_events(1 ).incr_step)))
perfCnts(26) := Mux((mcountinhibit(29) | perfEventscounten(26)),perfCnts(26) , (perfCnts(26) + RegNext(hpm_hc.io.events_selected.perf_events(2 ).incr_step)))
perfCnts(27) := Mux((mcountinhibit(30) | perfEventscounten(27)),perfCnts(27) , (perfCnts(27) + RegNext(hpm_hc.io.events_selected.perf_events(3 ).incr_step)))
perfCnts(28) := Mux((mcountinhibit(31) | perfEventscounten(28)),perfCnts(28) , (perfCnts(28) + RegNext(hpm_hc.io.events_selected.perf_events(4 ).incr_step)))
for(i <- 0 until 29){
perfCnts(i) := Mux((mcountinhibit(i+3) | !perfEventscounten(i)), perfCnts(i), (perfCnts(i) + perf_events(i).incr_step))
}
// CSR reg map
val basicPrivMapping = Map(
......@@ -768,72 +747,19 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP
MaskedRegMap(Dcsr, dcsr, dcsrMask, dcsrUpdateSideEffect),
MaskedRegMap(Dpc, dpc),
MaskedRegMap(Dscratch, dscratch),
MaskedRegMap(Dscratch1, dscratch1)
)
var perfCntMapping = Map(
MaskedRegMap(Dscratch1, dscratch1),
MaskedRegMap(Mcountinhibit, mcountinhibit),
MaskedRegMap(Mcycle, mcycle),
MaskedRegMap(Minstret, minstret),
MaskedRegMap(Mhpmevent3 , perfEvents( 0)),
MaskedRegMap(Mhpmevent4 , perfEvents( 1)),
MaskedRegMap(Mhpmevent5 , perfEvents( 2)),
MaskedRegMap(Mhpmevent6 , perfEvents( 3)),
MaskedRegMap(Mhpmevent7 , perfEvents( 4)),
MaskedRegMap(Mhpmevent8 , perfEvents( 5)),
MaskedRegMap(Mhpmevent9 , perfEvents( 6)),
MaskedRegMap(Mhpmevent10, perfEvents( 7)),
MaskedRegMap(Mhpmevent11, perfEvents( 8)),
MaskedRegMap(Mhpmevent12, perfEvents( 9)),
MaskedRegMap(Mhpmevent13, perfEvents(10)),
MaskedRegMap(Mhpmevent14, perfEvents(11)),
MaskedRegMap(Mhpmevent15, perfEvents(12)),
MaskedRegMap(Mhpmevent16, perfEvents(13)),
MaskedRegMap(Mhpmevent17, perfEvents(14)),
MaskedRegMap(Mhpmevent18, perfEvents(15)),
MaskedRegMap(Mhpmevent19, perfEvents(16)),
MaskedRegMap(Mhpmevent20, perfEvents(17)),
MaskedRegMap(Mhpmevent21, perfEvents(18)),
MaskedRegMap(Mhpmevent22, perfEvents(19)),
MaskedRegMap(Mhpmevent23, perfEvents(20)),
MaskedRegMap(Mhpmevent24, perfEvents(21)),
MaskedRegMap(Mhpmevent25, perfEvents(22)),
MaskedRegMap(Mhpmevent26, perfEvents(23)),
MaskedRegMap(Mhpmevent27, perfEvents(24)),
MaskedRegMap(Mhpmevent28, perfEvents(25)),
MaskedRegMap(Mhpmevent29, perfEvents(26)),
MaskedRegMap(Mhpmevent30, perfEvents(27)),
MaskedRegMap(Mhpmevent31, perfEvents(28)),
MaskedRegMap(Mhpmcounter3 , perfCnts( 0)),
MaskedRegMap(Mhpmcounter4 , perfCnts( 1)),
MaskedRegMap(Mhpmcounter5 , perfCnts( 2)),
MaskedRegMap(Mhpmcounter6 , perfCnts( 3)),
MaskedRegMap(Mhpmcounter7 , perfCnts( 4)),
MaskedRegMap(Mhpmcounter8 , perfCnts( 5)),
MaskedRegMap(Mhpmcounter9 , perfCnts( 6)),
MaskedRegMap(Mhpmcounter10, perfCnts( 7)),
MaskedRegMap(Mhpmcounter11, perfCnts( 8)),
MaskedRegMap(Mhpmcounter12, perfCnts( 9)),
MaskedRegMap(Mhpmcounter13, perfCnts(10)),
MaskedRegMap(Mhpmcounter14, perfCnts(11)),
MaskedRegMap(Mhpmcounter15, perfCnts(12)),
MaskedRegMap(Mhpmcounter16, perfCnts(13)),
MaskedRegMap(Mhpmcounter17, perfCnts(14)),
MaskedRegMap(Mhpmcounter18, perfCnts(15)),
MaskedRegMap(Mhpmcounter19, perfCnts(16)),
MaskedRegMap(Mhpmcounter20, perfCnts(17)),
MaskedRegMap(Mhpmcounter21, perfCnts(18)),
MaskedRegMap(Mhpmcounter22, perfCnts(19)),
MaskedRegMap(Mhpmcounter23, perfCnts(20)),
MaskedRegMap(Mhpmcounter24, perfCnts(21)),
MaskedRegMap(Mhpmcounter25, perfCnts(22)),
MaskedRegMap(Mhpmcounter26, perfCnts(23)),
MaskedRegMap(Mhpmcounter27, perfCnts(24)),
MaskedRegMap(Mhpmcounter28, perfCnts(25)),
MaskedRegMap(Mhpmcounter29, perfCnts(26)),
MaskedRegMap(Mhpmcounter30, perfCnts(27)),
MaskedRegMap(Mhpmcounter31, perfCnts(28)),
)
val perfCntMapping = (0 until 29).map(i => {Map(
MaskedRegMap(addr = Mhpmevent3 +i,
reg = perfEvents(i),
wmask = "hf87fff3fcff3fcff".U(XLEN.W)),
MaskedRegMap(addr = Mhpmcounter3 +i,
reg = perfCnts(i))
)}).fold(Map())((a,b) => a ++ b)
// TODO: mechanism should be implemented later
// val MhpmcounterStart = Mhpmcounter3
// val MhpmeventStart = Mhpmevent3
......@@ -1258,99 +1184,21 @@ class PFEvent(implicit p: Parameters) extends XSModule with HasCSRConst {
val w = io.distribute_csr.w
//val csrevents = Vec(29,RegInit(UInt(XLEN.W), 0.U))
val csrevent3 = RegInit(UInt(XLEN.W), 0.U)
val csrevent4 = RegInit(UInt(XLEN.W), 0.U)
val csrevent5 = RegInit(UInt(XLEN.W), 0.U)
val csrevent6 = RegInit(UInt(XLEN.W), 0.U)
val csrevent7 = RegInit(UInt(XLEN.W), 0.U)
val csrevent8 = RegInit(UInt(XLEN.W), 0.U)
val csrevent9 = RegInit(UInt(XLEN.W), 0.U)
val csrevent10 = RegInit(UInt(XLEN.W), 0.U)
val csrevent11 = RegInit(UInt(XLEN.W), 0.U)
val csrevent12 = RegInit(UInt(XLEN.W), 0.U)
val csrevent13 = RegInit(UInt(XLEN.W), 0.U)
val csrevent14 = RegInit(UInt(XLEN.W), 0.U)
val csrevent15 = RegInit(UInt(XLEN.W), 0.U)
val csrevent16 = RegInit(UInt(XLEN.W), 0.U)
val csrevent17 = RegInit(UInt(XLEN.W), 0.U)
val csrevent18 = RegInit(UInt(XLEN.W), 0.U)
val csrevent19 = RegInit(UInt(XLEN.W), 0.U)
val csrevent20 = RegInit(UInt(XLEN.W), 0.U)
val csrevent21 = RegInit(UInt(XLEN.W), 0.U)
val csrevent22 = RegInit(UInt(XLEN.W), 0.U)
val csrevent23 = RegInit(UInt(XLEN.W), 0.U)
val csrevent24 = RegInit(UInt(XLEN.W), 0.U)
val csrevent25 = RegInit(UInt(XLEN.W), 0.U)
val csrevent26 = RegInit(UInt(XLEN.W), 0.U)
val csrevent27 = RegInit(UInt(XLEN.W), 0.U)
val csrevent28 = RegInit(UInt(XLEN.W), 0.U)
val csrevent29 = RegInit(UInt(XLEN.W), 0.U)
val csrevent30 = RegInit(UInt(XLEN.W), 0.U)
val csrevent31 = RegInit(UInt(XLEN.W), 0.U)
var perfEventMapping = Map(
MaskedRegMap(Mhpmevent3, csrevent3 ),
MaskedRegMap(Mhpmevent4, csrevent4 ),
MaskedRegMap(Mhpmevent5, csrevent5 ),
MaskedRegMap(Mhpmevent6, csrevent6 ),
MaskedRegMap(Mhpmevent7, csrevent7 ),
MaskedRegMap(Mhpmevent8, csrevent8 ),
MaskedRegMap(Mhpmevent9, csrevent9 ),
MaskedRegMap(Mhpmevent10,csrevent10),
MaskedRegMap(Mhpmevent11,csrevent11),
MaskedRegMap(Mhpmevent12,csrevent12),
MaskedRegMap(Mhpmevent13,csrevent13),
MaskedRegMap(Mhpmevent14,csrevent14),
MaskedRegMap(Mhpmevent15,csrevent15),
MaskedRegMap(Mhpmevent16,csrevent16),
MaskedRegMap(Mhpmevent17,csrevent17),
MaskedRegMap(Mhpmevent18,csrevent18),
MaskedRegMap(Mhpmevent19,csrevent19),
MaskedRegMap(Mhpmevent20,csrevent20),
MaskedRegMap(Mhpmevent21,csrevent21),
MaskedRegMap(Mhpmevent22,csrevent22),
MaskedRegMap(Mhpmevent23,csrevent23),
MaskedRegMap(Mhpmevent24,csrevent24),
MaskedRegMap(Mhpmevent25,csrevent25),
MaskedRegMap(Mhpmevent26,csrevent26),
MaskedRegMap(Mhpmevent27,csrevent27),
MaskedRegMap(Mhpmevent28,csrevent28),
MaskedRegMap(Mhpmevent29,csrevent29),
MaskedRegMap(Mhpmevent30,csrevent30),
MaskedRegMap(Mhpmevent31,csrevent31),
)
val perfEvents = List.fill(8)(RegInit("h0000000000".U(XLEN.W))) ++
List.fill(8)(RegInit("h4010040100".U(XLEN.W))) ++
List.fill(8)(RegInit("h8020080200".U(XLEN.W))) ++
List.fill(5)(RegInit("hc0300c0300".U(XLEN.W)))
val perfEventMapping = (0 until 29).map(i => {Map(
MaskedRegMap(addr = Mhpmevent3 +i,
reg = perfEvents(i),
wmask = "hf87fff3fcff3fcff".U(XLEN.W))
)}).fold(Map())((a,b) => a ++ b)
val rdata = Wire(UInt(XLEN.W))
MaskedRegMap.generate(perfEventMapping, w.bits.addr, rdata, w.valid, w.bits.data)
io.hpmevent( 0) := csrevent3
io.hpmevent( 1) := csrevent4
io.hpmevent( 2) := csrevent5
io.hpmevent( 3) := csrevent6
io.hpmevent( 4) := csrevent7
io.hpmevent( 5) := csrevent8
io.hpmevent( 6) := csrevent9
io.hpmevent( 7) := csrevent10
io.hpmevent( 8) := csrevent11
io.hpmevent( 9) := csrevent12
io.hpmevent(10) := csrevent13
io.hpmevent(11) := csrevent14
io.hpmevent(12) := csrevent15
io.hpmevent(13) := csrevent16
io.hpmevent(14) := csrevent17
io.hpmevent(15) := csrevent18
io.hpmevent(16) := csrevent19
io.hpmevent(17) := csrevent20
io.hpmevent(18) := csrevent21
io.hpmevent(19) := csrevent22
io.hpmevent(20) := csrevent23
io.hpmevent(21) := csrevent24
io.hpmevent(22) := csrevent25
io.hpmevent(23) := csrevent26
io.hpmevent(24) := csrevent27
io.hpmevent(25) := csrevent28
io.hpmevent(26) := csrevent29
io.hpmevent(27) := csrevent30
io.hpmevent(28) := csrevent31
for(i <- 0 until 29){
io.hpmevent(i) := perfEvents(i)
}
}
......@@ -45,9 +45,9 @@ class Rename(implicit p: Parameters) extends XSModule {
})
// create free list and rat
val intFreeList = Module(new MEFreeList(MEFreeListSize))
val intRefCounter = Module(new RefCounter(MEFreeListSize))
val fpFreeList = Module(new StdFreeList(StdFreeListSize))
val intFreeList = Module(new MEFreeList(NRPhyRegs))
val intRefCounter = Module(new RefCounter(NRPhyRegs))
val fpFreeList = Module(new StdFreeList(NRPhyRegs - 32))
// decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RobCommitInfo: from rob)
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
......
......@@ -33,22 +33,29 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule {
val allocate = RegNext(io.allocate)
val deallocate = RegNext(io.deallocate)
// recording referenced times of each physical registers
// refCounter: increase at rename; decrease at walk/commit
// Originally 0-31 registers have counters of ones.
val refCounter = RegInit(VecInit(Seq.fill(32)(1.U(IntRefCounterWidth.W)) ++ Seq.fill(NRPhyRegs - 32)(0.U(IntRefCounterWidth.W))))
val refCounter = RegInit(VecInit(Seq.fill(32)(1.U(IntRefCounterWidth.W)) ++ Seq.fill(size - 32)(0.U(IntRefCounterWidth.W))))
val refCounterInc = WireInit(refCounter)
val refCounterDec = WireInit(refCounter)
val refCounterNext = WireInit(refCounter)
// One-hot Encoding for allocation and de-allocation
val allocateOH = allocate.map(alloc => UIntToOH(alloc.bits))
val deallocateOH = deallocate.map(dealloc => UIntToOH(dealloc.bits))
/**
* Deallocation: when refCounter becomes zero, the register can be released to freelist
* De-allocation: when refCounter becomes zero, the register can be released to freelist
*/
for ((de, i) <- deallocate.zipWithIndex) {
val isNonZero = de.valid && refCounter(de.bits) =/= 0.U
val hasDuplicate = deallocate.take(i).map(de => de.valid && de.bits === deallocate(i).bits)
val blockedByDup = if (i == 0) false.B else VecInit(hasDuplicate).asUInt.orR
val isFreed = refCounter(RegNext(de.bits)) === 0.U
io.freeRegs(i).valid := RegNext(isNonZero && !blockedByDup) && isFreed
val isFreed = refCounter(de.bits) + refCounterInc(de.bits) === refCounterDec(de.bits)
io.freeRegs(i).valid := RegNext(isNonZero && !blockedByDup) && RegNext(isFreed)
val isFreed1 = refCounter(RegNext(de.bits)) === 0.U
XSError(RegNext(isFreed) =/= isFreed1, p"why isFreed ${RegNext(isFreed)} $isFreed1\n")
io.freeRegs(i).bits := RegNext(deallocate(i).bits)
}
......@@ -68,17 +75,23 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule {
* We don't count the number of references for physical register 0.
* It should never be released to freelist.
*/
for (i <- 1 until NRPhyRegs) {
val numAlloc = PopCount(allocate.map(alloc => alloc.valid && alloc.bits === i.U))
val numDealloc = PopCount(deallocate.map(dealloc => dealloc.valid && dealloc.bits === i.U))
refCounterNext(i) := refCounter(i) + numAlloc - numDealloc
XSError(RegNext(refCounter(i) + numAlloc < numDealloc), p"why $i?\n")
for (i <- 1 until size) {
refCounterInc(i) := PopCount(allocate.zip(allocateOH).map(alloc => alloc._1.valid && alloc._2(i)))
refCounterDec(i) := PopCount(deallocate.zip(deallocateOH).map(dealloc => dealloc._1.valid && dealloc._2(i)))
val numAlloc1 = PopCount(allocate.map(alloc => alloc.valid && alloc.bits === i.U))
val numDealloc1 = PopCount(deallocate.map(dealloc => dealloc.valid && dealloc.bits === i.U))
XSError(refCounterInc(i) =/= numAlloc1, p"why numAlloc ${refCounterInc(i)} $numAlloc1??")
XSError(refCounterDec(i) =/= numDealloc1, p"why numDealloc ${refCounterDec(i)} $numDealloc1??")
refCounterNext(i) := refCounter(i) + refCounterInc(i) - refCounterDec(i)
XSError(RegNext(refCounter(i) + refCounterInc(i) < refCounterDec(i)), p"why $i?\n")
refCounter(i) := refCounterNext(i)
}
for (i <- 0 until RobSize) {
val numCounters = PopCount(refCounter.map(_ === i.U))
XSPerfAccumulate(s"ref_counter_$i", numCounters)
}
for (i <- 0 until size) {
val isFreed = io.freeRegs.map(f => f.valid && f.bits === i.U)
XSPerfAccumulate(s"free_reg_$i", VecInit(isFreed).asUInt.orR)
}
......
......@@ -88,13 +88,13 @@ class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size)
val perfEvents = Output(new PerfEventsBundle(4))
})
val perfEvents = Seq(
("std_freelist_1/4_valid ", (freeRegCnt < (StdFreeListSize.U/4.U)) ),
("std_freelist_2/4_valid ", (freeRegCnt > (StdFreeListSize.U/4.U)) & (freeRegCnt <= (StdFreeListSize.U/2.U)) ),
("std_freelist_3/4_valid ", (freeRegCnt > (StdFreeListSize.U/2.U)) & (freeRegCnt <= (StdFreeListSize.U*3.U/4.U))),
("std_freelist_4/4_valid ", (freeRegCnt > (StdFreeListSize.U*3.U/4.U)) ),
("std_freelist_1_4_valid", (freeRegCnt < (size / 4).U) ),
("std_freelist_2_4_valid", (freeRegCnt > (size / 4).U) & (freeRegCnt <= (size / 2).U) ),
("std_freelist_3_4_valid", (freeRegCnt > (size / 2).U) & (freeRegCnt <= (size * 3 / 4).U)),
("std_freelist_4_4_valid", (freeRegCnt > (size * 3 / 4).U) )
)
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
for (((perf_out, (perf_name, perf)), i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf)
}
}
......@@ -33,19 +33,11 @@ class FetchRequestBundle(implicit p: Parameters) extends XSBundle {
val target = UInt(VAddrBits.W)
val oversize = Bool()
def fallThroughError() = {
def carryPos = instOffsetBits+log2Ceil(PredictWidth)+1
def getLower(pc: UInt) = pc(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits)
val carry = (startAddr(carryPos) =/= fallThruAddr(carryPos)).asUInt
val startLower = Cat(0.U(1.W), getLower(startAddr))
val endLowerwithCarry = Cat(carry, getLower(fallThruAddr))
require(startLower.getWidth == log2Ceil(PredictWidth)+2)
require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
}
def fromFtqPcBundle(b: Ftq_RF_Components) = {
val ftError = b.fallThroughError()
this.startAddr := b.startAddr
this.fallThruAddr := b.getFallThrough()
this.fallThruError := ftError
this.fallThruAddr := Mux(ftError, b.nextRangeAddr, b.getFallThrough())
this.oversize := b.oversize
this
}
......
......@@ -98,11 +98,12 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH
inWire.ipf := io.in.bits.ipf(i)
inWire.acf := io.in.bits.acf(i)
inWire.crossPageIPFFix := io.in.bits.crossPageIPFFix(i)
inWire.triggered := io.in.bits.triggered(i)
for(k<-0 until 10){
inWire.triggered.triggerHitVec(k) := false.B
}
inWire.triggered := io.in.bits.triggered(i)
dontTouch(inWire.triggered.triggerHitVec)
ibuf.io.waddr(i) := tail_vec(offset(i)).value
ibuf.io.wdata(i) := inWire
......
......@@ -76,20 +76,28 @@ class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
val oversize = Bool()
val carry = Bool()
def getPc(offset: UInt) = {
def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits)
def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits)
Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)),
def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)),
getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
}
def getFallThrough() = {
getFallThroughAddr(this.startAddr, this.carry, this.pftAddr)
def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1)
val startHigher = getHigher(startAddr)
val nextHigher = getHigher(nextRangeAddr)
val higher = Mux(carry, nextHigher, startHigher)
Cat(higher, pftAddr, 0.U(instOffsetBits.W))
}
def fallThroughError() = {
!carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr
val startLower = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits))
val endLowerwithCarry = Cat(carry, pftAddr)
require(startLower.getWidth == log2Ceil(PredictWidth)+2)
require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
}
def fromBranchPrediction(resp: BranchPredictionBundle) = {
this.startAddr := resp.pc
this.nextRangeAddr := resp.pc + (FetchWidth * 4).U
this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U
this.pftAddr :=
Mux(resp.preds.hit, resp.ftb_entry.pftAddr,
resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U)
......@@ -553,7 +561,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
}
io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
bpuPtr := bpu_s2_resp.ftq_idx + 1.U
......@@ -563,7 +571,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
}
}
io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
bpuPtr := bpu_s3_resp.ftq_idx + 1.U
......@@ -588,41 +596,34 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
val toIfuReq = Wire(chiselTypeOf(io.toIfu.req))
toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
toIfuReq.bits.ftqIdx := ifuPtr
toIfuReq.bits.target := update_target(ifuPtr.value)
toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
toIfuReq.bits.fallThruError := false.B
io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
io.toIfu.req.bits.ftqIdx := ifuPtr
io.toIfu.req.bits.target := update_target(ifuPtr.value)
io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf)
io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf)
}.elsewhen (last_cycle_to_ifu_fire) {
toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
}.otherwise {
toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
}
io.toIfu.req <> toIfuReq
// when fall through is smaller in value than start address, there must be a false hit
when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) {
when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
when (io.toIfu.req.fire &&
!(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
!(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
) {
entry_hit_status(ifuPtr.value) := h_false_hit
XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
}
io.toIfu.req.bits.fallThruAddr := toIfuReq.bits.startAddr + (FetchWidth*4).U
io.toIfu.req.bits.fallThruError := true.B
XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
}
val ifu_req_should_be_flushed =
io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) ||
io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx)
io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
entry_fetch_status(ifuPtr.value) := f_sent
......@@ -815,31 +816,13 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
// **************************** flush ptr and state queue ****************************
// ***********************************************************************************
class RedirectInfo extends Bundle {
val valid = Bool()
val ftqIdx = new FtqPtr
val ftqOffset = UInt(log2Ceil(PredictWidth).W)
val flushItSelf = Bool()
def apply(redirect: Valid[Redirect]) = {
this.valid := redirect.valid
this.ftqIdx := redirect.bits.ftqIdx
this.ftqOffset := redirect.bits.ftqOffset
this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level)
this
}
}
val redirectVec = Wire(Vec(3, new RedirectInfo))
val robRedirect = robFlush
redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map {
case (ve, r) => ve(r)
}
val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect)
// when redirect, we should reset ptrs and status queues
when(redirectVec.map(r => r.valid).reduce(_||_)){
val r = PriorityMux(redirectVec.map(r => (r.valid -> r)))
val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf)
val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
val next = idx + 1.U
bpuPtr := next
ifuPtr := next
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册