diff --git a/src/main/scala/xiangshan/Parameters.scala b/src/main/scala/xiangshan/Parameters.scala index 17906eb092734ac0de2b974f4986edc6c18f2df0..56760ae3a788c665d3be4b775382e103fa1a52c6 100644 --- a/src/main/scala/xiangshan/Parameters.scala +++ b/src/main/scala/xiangshan/Parameters.scala @@ -362,8 +362,6 @@ trait HasXSParameter { val PhyRegIdxWidth = log2Up(NRPhyRegs) val RobSize = coreParams.RobSize val IntRefCounterWidth = log2Ceil(RobSize) - val StdFreeListSize = NRPhyRegs - 32 - val MEFreeListSize = NRPhyRegs val LoadQueueSize = coreParams.LoadQueueSize val StoreQueueSize = coreParams.StoreQueueSize val dpParams = coreParams.dpParams diff --git a/src/main/scala/xiangshan/backend/decode/FusionDecoder.scala b/src/main/scala/xiangshan/backend/decode/FusionDecoder.scala index 11f6219adbf79b5e0376b5798c1ae42c1637996b..6f9df8632462f217ca9a1d32149ad5f9ec673f53 100644 --- a/src/main/scala/xiangshan/backend/decode/FusionDecoder.scala +++ b/src/main/scala/xiangshan/backend/decode/FusionDecoder.scala @@ -23,7 +23,7 @@ import chisel3.util._ import xiangshan._ import utils._ -abstract class BaseFusionCase(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]] = None)(implicit p: Parameters) +abstract class BaseFusionCase(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends DecodeUnitConstants { require(pair.length == 2) @@ -393,11 +393,10 @@ class FusedOddaddw(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseF // Case: addw and extract its lower 8 bits (fused into addwbyte) class FusedAddwbyte(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters) - extends BaseFusionCase(pair, csPair) { - require(csPair.isDefined) - - // the first instruction is a addw - def inst1Cond = csPair.get(0).fuType === FuType.alu && ALUOpType.isAddw(csPair.get(0).fuOpType) + extends BaseFusionCase(pair) { + // the first instruction is a ALUOpType.addw + // According to DecodeUnit.scala, only ADDIW and ADDW are ALUOpType.addw, which are used for inst1Cond. + def inst1Cond = instr(0) === Instructions.ADDIW || instr(0) === Instructions.ADDIW def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 0xff.U def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1 @@ -457,12 +456,22 @@ class FusedAddwsexth(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(i } // Case: logic operation and extract its LSB + class FusedLogiclsb(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters) - extends BaseFusionCase(pair, csPair) { + extends BaseFusionCase(pair) { require(csPair.isDefined) - // the first instruction is a logic - def inst1Cond = csPair.get(0).fuType === FuType.alu && ALUOpType.isSimpleLogic(csPair.get(0).fuOpType) + // the first instruction is a logic (and, or, xor, orcb) + // (1) def ANDI = BitPat("b?????????????????111?????0010011") + // (2) def AND = BitPat("b0000000??????????111?????0110011") + // (3) def ORI = BitPat("b?????????????????110?????0010011") + // (4) def OR = BitPat("b0000000??????????110?????0110011") + // (5) def XORI = BitPat("b?????????????????100?????0010011") + // (6) def XOR = BitPat("b0000000??????????100?????0110011") + // (7) def ORC_B = BitPat("b001010000111?????101?????0010011") + val logicInstrList = Seq(Instructions.ANDI, Instructions.AND, Instructions.ORI, Instructions.OR, + Instructions.XORI, Instructions.XOR, Instructions.ORC_B) + def inst1Cond = VecInit(logicInstrList.map(_ === instr(0))).asUInt.orR def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 1.U def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1 @@ -512,17 +521,15 @@ class FusedOrh48(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFus // Case: mul 7bit data with 32-bit data // Source: `andi r1, r0, 127`` + `mulw r1, r1, r2` // Target: `mulw7 r1, r0, r2` -class FusedMulw7(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters) - extends BaseFusionCase(pair, csPair) { - require(csPair.isDefined) - +class FusedMulw7(pair: Seq[Valid[UInt]])(implicit p: Parameters) + extends BaseFusionCase(pair) { def inst1Cond = instr(0) === Instructions.ANDI && instr(0)(31, 20) === 127.U def inst2Cond = instr(1) === Instructions.MULW def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2) def target: CtrlSignals = { // use MULW as the base - val cs = WireInit(csPair.get(1)) + val cs = getBaseCS(Instructions.MULW) // replace the fuOpType with mulw7 cs.fuOpType := MDUOpType.mulw7 cs.lsrc(0) := instr1Rs1 @@ -568,14 +575,14 @@ class FusionDecoder(implicit p: Parameters) extends XSModule { new FusedSr32add(pair), new FusedOddadd(pair), new FusedOddaddw(pair), + new FusedOrh48(pair), + new FusedMulw7(pair), new FusedAddwbyte(pair, Some(cs)), new FusedAddwbit(pair, Some(cs)), new FusedAddwzexth(pair, Some(cs)), new FusedAddwsexth(pair, Some(cs)), new FusedLogiclsb(pair, Some(cs)), - new FusedLogicZexth(pair, Some(cs)), - new FusedOrh48(pair), - new FusedMulw7(pair, Some(cs)) + new FusedLogicZexth(pair, Some(cs)) ) val pairValid = VecInit(pair.map(_.valid)).asUInt().andR val thisCleared = io.clear(i) diff --git a/src/main/scala/xiangshan/backend/fu/CSR.scala b/src/main/scala/xiangshan/backend/fu/CSR.scala index 1eca31228469c7f7ce3fda7ab3ca31ac88a31778..671f938bfa6595f235ddbd694bbeaa2c68b28e24 100644 --- a/src/main/scala/xiangshan/backend/fu/CSR.scala +++ b/src/main/scala/xiangshan/backend/fu/CSR.scala @@ -638,56 +638,35 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP val priviledgeModeOH = UIntToOH(priviledgeMode) val perfEventscounten = RegInit(0.U.asTypeOf(Vec(nrPerfCnts, Bool()))) val perfCnts = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W))) - val perfEvents = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W))) + val perfEvents = List.fill(8)(RegInit("h0000000000".U(XLEN.W))) ++ + List.fill(8)(RegInit("h4010040100".U(XLEN.W))) ++ + List.fill(8)(RegInit("h8020080200".U(XLEN.W))) ++ + List.fill(5)(RegInit("hc0300c0300".U(XLEN.W))) for (i <-0 until nrPerfCnts) { perfEventscounten(i) := (Cat(perfEvents(i)(62),perfEvents(i)(61),(perfEvents(i)(61,60))) & priviledgeModeOH).orR } val hpmEvents = Wire(new PerfEventsBundle(numPCntHc * coreParams.L2NBanks)) - val pfevent = Module(new PFEvent) - pfevent.io.distribute_csr := csrio.customCtrl.distribute_csr for(i <- 0 until numPCntHc * coreParams.L2NBanks) { hpmEvents.perf_events(i).incr_step := csrio.perf.perfEventsHc(i) } val hpm_hc = Module(new HPerfmonitor(numPCntHc * coreParams.L2NBanks,numCSRPCntHc)) - val csrevents = pfevent.io.hpmevent.slice(24,29) + val csrevents = perfEvents.slice(24,29) hpm_hc.io.hpm_event := csrevents hpm_hc.io.events_sets := hpmEvents val mcountinhibit = RegInit(0.U(XLEN.W)) val mcycle = RegInit(0.U(XLEN.W)) mcycle := mcycle + 1.U val minstret = RegInit(0.U(XLEN.W)) + val perf_events = csrio.perf.perfEventsFrontend.perf_events ++ + csrio.perf.perfEventsCtrl.perf_events ++ + csrio.perf.perfEventsLsu.perf_events ++ + hpm_hc.io.events_selected.perf_events minstret := minstret + RegNext(csrio.perf.retiredInstr) - perfCnts( 0) := Mux((mcountinhibit( 3) | perfEventscounten( 0)),perfCnts( 0) , (perfCnts( 0) + RegNext(csrio.perf.perfEventsFrontend.perf_events(0 ).incr_step))) - perfCnts( 1) := Mux((mcountinhibit( 4) | perfEventscounten( 1)),perfCnts( 1) , (perfCnts( 1) + RegNext(csrio.perf.perfEventsFrontend.perf_events(1 ).incr_step))) - perfCnts( 2) := Mux((mcountinhibit( 5) | perfEventscounten( 2)),perfCnts( 2) , (perfCnts( 2) + RegNext(csrio.perf.perfEventsFrontend.perf_events(2 ).incr_step))) - perfCnts( 3) := Mux((mcountinhibit( 6) | perfEventscounten( 3)),perfCnts( 3) , (perfCnts( 3) + RegNext(csrio.perf.perfEventsFrontend.perf_events(3 ).incr_step))) - perfCnts( 4) := Mux((mcountinhibit( 7) | perfEventscounten( 4)),perfCnts( 4) , (perfCnts( 4) + RegNext(csrio.perf.perfEventsFrontend.perf_events(4 ).incr_step))) - perfCnts( 5) := Mux((mcountinhibit( 8) | perfEventscounten( 5)),perfCnts( 5) , (perfCnts( 5) + RegNext(csrio.perf.perfEventsFrontend.perf_events(5 ).incr_step))) - perfCnts( 6) := Mux((mcountinhibit( 9) | perfEventscounten( 6)),perfCnts( 6) , (perfCnts( 6) + RegNext(csrio.perf.perfEventsFrontend.perf_events(6 ).incr_step))) - perfCnts( 7) := Mux((mcountinhibit(10) | perfEventscounten( 7)),perfCnts( 7) , (perfCnts( 7) + RegNext(csrio.perf.perfEventsFrontend.perf_events(7 ).incr_step))) - perfCnts( 8) := Mux((mcountinhibit(11) | perfEventscounten( 8)),perfCnts( 8) , (perfCnts( 8) + RegNext(csrio.perf.perfEventsCtrl.perf_events(0 ).incr_step))) - perfCnts( 9) := Mux((mcountinhibit(12) | perfEventscounten( 9)),perfCnts( 9) , (perfCnts( 9) + RegNext(csrio.perf.perfEventsCtrl.perf_events(1 ).incr_step))) - perfCnts(10) := Mux((mcountinhibit(13) | perfEventscounten(10)),perfCnts(10) , (perfCnts(10) + RegNext(csrio.perf.perfEventsCtrl.perf_events(2 ).incr_step))) - perfCnts(11) := Mux((mcountinhibit(14) | perfEventscounten(11)),perfCnts(11) , (perfCnts(11) + RegNext(csrio.perf.perfEventsCtrl.perf_events(3 ).incr_step))) - perfCnts(12) := Mux((mcountinhibit(15) | perfEventscounten(12)),perfCnts(12) , (perfCnts(12) + RegNext(csrio.perf.perfEventsCtrl.perf_events(4 ).incr_step))) - perfCnts(13) := Mux((mcountinhibit(16) | perfEventscounten(13)),perfCnts(13) , (perfCnts(13) + RegNext(csrio.perf.perfEventsCtrl.perf_events(5 ).incr_step))) - perfCnts(14) := Mux((mcountinhibit(17) | perfEventscounten(14)),perfCnts(14) , (perfCnts(14) + RegNext(csrio.perf.perfEventsCtrl.perf_events(6 ).incr_step))) - perfCnts(15) := Mux((mcountinhibit(18) | perfEventscounten(15)),perfCnts(15) , (perfCnts(15) + RegNext(csrio.perf.perfEventsCtrl.perf_events(7 ).incr_step))) - perfCnts(16) := Mux((mcountinhibit(19) | perfEventscounten(16)),perfCnts(16) , (perfCnts(16) + RegNext(csrio.perf.perfEventsLsu.perf_events(0 ).incr_step))) - perfCnts(17) := Mux((mcountinhibit(20) | perfEventscounten(17)),perfCnts(17) , (perfCnts(17) + RegNext(csrio.perf.perfEventsLsu.perf_events(1 ).incr_step))) - perfCnts(18) := Mux((mcountinhibit(21) | perfEventscounten(18)),perfCnts(18) , (perfCnts(18) + RegNext(csrio.perf.perfEventsLsu.perf_events(2 ).incr_step))) - perfCnts(19) := Mux((mcountinhibit(22) | perfEventscounten(19)),perfCnts(19) , (perfCnts(19) + RegNext(csrio.perf.perfEventsLsu.perf_events(3 ).incr_step))) - perfCnts(20) := Mux((mcountinhibit(23) | perfEventscounten(20)),perfCnts(20) , (perfCnts(20) + RegNext(csrio.perf.perfEventsLsu.perf_events(4 ).incr_step))) - perfCnts(21) := Mux((mcountinhibit(24) | perfEventscounten(21)),perfCnts(21) , (perfCnts(21) + RegNext(csrio.perf.perfEventsLsu.perf_events(5 ).incr_step))) - perfCnts(22) := Mux((mcountinhibit(25) | perfEventscounten(22)),perfCnts(22) , (perfCnts(22) + RegNext(csrio.perf.perfEventsLsu.perf_events(6 ).incr_step))) - perfCnts(23) := Mux((mcountinhibit(26) | perfEventscounten(23)),perfCnts(23) , (perfCnts(23) + RegNext(csrio.perf.perfEventsLsu.perf_events(7 ).incr_step))) - perfCnts(24) := Mux((mcountinhibit(27) | perfEventscounten(24)),perfCnts(24) , (perfCnts(24) + RegNext(hpm_hc.io.events_selected.perf_events(0 ).incr_step))) - perfCnts(25) := Mux((mcountinhibit(28) | perfEventscounten(25)),perfCnts(25) , (perfCnts(25) + RegNext(hpm_hc.io.events_selected.perf_events(1 ).incr_step))) - perfCnts(26) := Mux((mcountinhibit(29) | perfEventscounten(26)),perfCnts(26) , (perfCnts(26) + RegNext(hpm_hc.io.events_selected.perf_events(2 ).incr_step))) - perfCnts(27) := Mux((mcountinhibit(30) | perfEventscounten(27)),perfCnts(27) , (perfCnts(27) + RegNext(hpm_hc.io.events_selected.perf_events(3 ).incr_step))) - perfCnts(28) := Mux((mcountinhibit(31) | perfEventscounten(28)),perfCnts(28) , (perfCnts(28) + RegNext(hpm_hc.io.events_selected.perf_events(4 ).incr_step))) + for(i <- 0 until 29){ + perfCnts(i) := Mux((mcountinhibit(i+3) | !perfEventscounten(i)), perfCnts(i), (perfCnts(i) + perf_events(i).incr_step)) + } // CSR reg map val basicPrivMapping = Map( @@ -768,72 +747,19 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP MaskedRegMap(Dcsr, dcsr, dcsrMask, dcsrUpdateSideEffect), MaskedRegMap(Dpc, dpc), MaskedRegMap(Dscratch, dscratch), - MaskedRegMap(Dscratch1, dscratch1) - ) - - var perfCntMapping = Map( + MaskedRegMap(Dscratch1, dscratch1), MaskedRegMap(Mcountinhibit, mcountinhibit), MaskedRegMap(Mcycle, mcycle), MaskedRegMap(Minstret, minstret), - MaskedRegMap(Mhpmevent3 , perfEvents( 0)), - MaskedRegMap(Mhpmevent4 , perfEvents( 1)), - MaskedRegMap(Mhpmevent5 , perfEvents( 2)), - MaskedRegMap(Mhpmevent6 , perfEvents( 3)), - MaskedRegMap(Mhpmevent7 , perfEvents( 4)), - MaskedRegMap(Mhpmevent8 , perfEvents( 5)), - MaskedRegMap(Mhpmevent9 , perfEvents( 6)), - MaskedRegMap(Mhpmevent10, perfEvents( 7)), - MaskedRegMap(Mhpmevent11, perfEvents( 8)), - MaskedRegMap(Mhpmevent12, perfEvents( 9)), - MaskedRegMap(Mhpmevent13, perfEvents(10)), - MaskedRegMap(Mhpmevent14, perfEvents(11)), - MaskedRegMap(Mhpmevent15, perfEvents(12)), - MaskedRegMap(Mhpmevent16, perfEvents(13)), - MaskedRegMap(Mhpmevent17, perfEvents(14)), - MaskedRegMap(Mhpmevent18, perfEvents(15)), - MaskedRegMap(Mhpmevent19, perfEvents(16)), - MaskedRegMap(Mhpmevent20, perfEvents(17)), - MaskedRegMap(Mhpmevent21, perfEvents(18)), - MaskedRegMap(Mhpmevent22, perfEvents(19)), - MaskedRegMap(Mhpmevent23, perfEvents(20)), - MaskedRegMap(Mhpmevent24, perfEvents(21)), - MaskedRegMap(Mhpmevent25, perfEvents(22)), - MaskedRegMap(Mhpmevent26, perfEvents(23)), - MaskedRegMap(Mhpmevent27, perfEvents(24)), - MaskedRegMap(Mhpmevent28, perfEvents(25)), - MaskedRegMap(Mhpmevent29, perfEvents(26)), - MaskedRegMap(Mhpmevent30, perfEvents(27)), - MaskedRegMap(Mhpmevent31, perfEvents(28)), - MaskedRegMap(Mhpmcounter3 , perfCnts( 0)), - MaskedRegMap(Mhpmcounter4 , perfCnts( 1)), - MaskedRegMap(Mhpmcounter5 , perfCnts( 2)), - MaskedRegMap(Mhpmcounter6 , perfCnts( 3)), - MaskedRegMap(Mhpmcounter7 , perfCnts( 4)), - MaskedRegMap(Mhpmcounter8 , perfCnts( 5)), - MaskedRegMap(Mhpmcounter9 , perfCnts( 6)), - MaskedRegMap(Mhpmcounter10, perfCnts( 7)), - MaskedRegMap(Mhpmcounter11, perfCnts( 8)), - MaskedRegMap(Mhpmcounter12, perfCnts( 9)), - MaskedRegMap(Mhpmcounter13, perfCnts(10)), - MaskedRegMap(Mhpmcounter14, perfCnts(11)), - MaskedRegMap(Mhpmcounter15, perfCnts(12)), - MaskedRegMap(Mhpmcounter16, perfCnts(13)), - MaskedRegMap(Mhpmcounter17, perfCnts(14)), - MaskedRegMap(Mhpmcounter18, perfCnts(15)), - MaskedRegMap(Mhpmcounter19, perfCnts(16)), - MaskedRegMap(Mhpmcounter20, perfCnts(17)), - MaskedRegMap(Mhpmcounter21, perfCnts(18)), - MaskedRegMap(Mhpmcounter22, perfCnts(19)), - MaskedRegMap(Mhpmcounter23, perfCnts(20)), - MaskedRegMap(Mhpmcounter24, perfCnts(21)), - MaskedRegMap(Mhpmcounter25, perfCnts(22)), - MaskedRegMap(Mhpmcounter26, perfCnts(23)), - MaskedRegMap(Mhpmcounter27, perfCnts(24)), - MaskedRegMap(Mhpmcounter28, perfCnts(25)), - MaskedRegMap(Mhpmcounter29, perfCnts(26)), - MaskedRegMap(Mhpmcounter30, perfCnts(27)), - MaskedRegMap(Mhpmcounter31, perfCnts(28)), ) + + val perfCntMapping = (0 until 29).map(i => {Map( + MaskedRegMap(addr = Mhpmevent3 +i, + reg = perfEvents(i), + wmask = "hf87fff3fcff3fcff".U(XLEN.W)), + MaskedRegMap(addr = Mhpmcounter3 +i, + reg = perfCnts(i)) + )}).fold(Map())((a,b) => a ++ b) // TODO: mechanism should be implemented later // val MhpmcounterStart = Mhpmcounter3 // val MhpmeventStart = Mhpmevent3 @@ -1258,99 +1184,21 @@ class PFEvent(implicit p: Parameters) extends XSModule with HasCSRConst { val w = io.distribute_csr.w - //val csrevents = Vec(29,RegInit(UInt(XLEN.W), 0.U)) - val csrevent3 = RegInit(UInt(XLEN.W), 0.U) - val csrevent4 = RegInit(UInt(XLEN.W), 0.U) - val csrevent5 = RegInit(UInt(XLEN.W), 0.U) - val csrevent6 = RegInit(UInt(XLEN.W), 0.U) - val csrevent7 = RegInit(UInt(XLEN.W), 0.U) - val csrevent8 = RegInit(UInt(XLEN.W), 0.U) - val csrevent9 = RegInit(UInt(XLEN.W), 0.U) - val csrevent10 = RegInit(UInt(XLEN.W), 0.U) - val csrevent11 = RegInit(UInt(XLEN.W), 0.U) - val csrevent12 = RegInit(UInt(XLEN.W), 0.U) - val csrevent13 = RegInit(UInt(XLEN.W), 0.U) - val csrevent14 = RegInit(UInt(XLEN.W), 0.U) - val csrevent15 = RegInit(UInt(XLEN.W), 0.U) - val csrevent16 = RegInit(UInt(XLEN.W), 0.U) - val csrevent17 = RegInit(UInt(XLEN.W), 0.U) - val csrevent18 = RegInit(UInt(XLEN.W), 0.U) - val csrevent19 = RegInit(UInt(XLEN.W), 0.U) - val csrevent20 = RegInit(UInt(XLEN.W), 0.U) - val csrevent21 = RegInit(UInt(XLEN.W), 0.U) - val csrevent22 = RegInit(UInt(XLEN.W), 0.U) - val csrevent23 = RegInit(UInt(XLEN.W), 0.U) - val csrevent24 = RegInit(UInt(XLEN.W), 0.U) - val csrevent25 = RegInit(UInt(XLEN.W), 0.U) - val csrevent26 = RegInit(UInt(XLEN.W), 0.U) - val csrevent27 = RegInit(UInt(XLEN.W), 0.U) - val csrevent28 = RegInit(UInt(XLEN.W), 0.U) - val csrevent29 = RegInit(UInt(XLEN.W), 0.U) - val csrevent30 = RegInit(UInt(XLEN.W), 0.U) - val csrevent31 = RegInit(UInt(XLEN.W), 0.U) - - var perfEventMapping = Map( - MaskedRegMap(Mhpmevent3, csrevent3 ), - MaskedRegMap(Mhpmevent4, csrevent4 ), - MaskedRegMap(Mhpmevent5, csrevent5 ), - MaskedRegMap(Mhpmevent6, csrevent6 ), - MaskedRegMap(Mhpmevent7, csrevent7 ), - MaskedRegMap(Mhpmevent8, csrevent8 ), - MaskedRegMap(Mhpmevent9, csrevent9 ), - MaskedRegMap(Mhpmevent10,csrevent10), - MaskedRegMap(Mhpmevent11,csrevent11), - MaskedRegMap(Mhpmevent12,csrevent12), - MaskedRegMap(Mhpmevent13,csrevent13), - MaskedRegMap(Mhpmevent14,csrevent14), - MaskedRegMap(Mhpmevent15,csrevent15), - MaskedRegMap(Mhpmevent16,csrevent16), - MaskedRegMap(Mhpmevent17,csrevent17), - MaskedRegMap(Mhpmevent18,csrevent18), - MaskedRegMap(Mhpmevent19,csrevent19), - MaskedRegMap(Mhpmevent20,csrevent20), - MaskedRegMap(Mhpmevent21,csrevent21), - MaskedRegMap(Mhpmevent22,csrevent22), - MaskedRegMap(Mhpmevent23,csrevent23), - MaskedRegMap(Mhpmevent24,csrevent24), - MaskedRegMap(Mhpmevent25,csrevent25), - MaskedRegMap(Mhpmevent26,csrevent26), - MaskedRegMap(Mhpmevent27,csrevent27), - MaskedRegMap(Mhpmevent28,csrevent28), - MaskedRegMap(Mhpmevent29,csrevent29), - MaskedRegMap(Mhpmevent30,csrevent30), - MaskedRegMap(Mhpmevent31,csrevent31), - ) + val perfEvents = List.fill(8)(RegInit("h0000000000".U(XLEN.W))) ++ + List.fill(8)(RegInit("h4010040100".U(XLEN.W))) ++ + List.fill(8)(RegInit("h8020080200".U(XLEN.W))) ++ + List.fill(5)(RegInit("hc0300c0300".U(XLEN.W))) + + val perfEventMapping = (0 until 29).map(i => {Map( + MaskedRegMap(addr = Mhpmevent3 +i, + reg = perfEvents(i), + wmask = "hf87fff3fcff3fcff".U(XLEN.W)) + )}).fold(Map())((a,b) => a ++ b) val rdata = Wire(UInt(XLEN.W)) MaskedRegMap.generate(perfEventMapping, w.bits.addr, rdata, w.valid, w.bits.data) - io.hpmevent( 0) := csrevent3 - io.hpmevent( 1) := csrevent4 - io.hpmevent( 2) := csrevent5 - io.hpmevent( 3) := csrevent6 - io.hpmevent( 4) := csrevent7 - io.hpmevent( 5) := csrevent8 - io.hpmevent( 6) := csrevent9 - io.hpmevent( 7) := csrevent10 - io.hpmevent( 8) := csrevent11 - io.hpmevent( 9) := csrevent12 - io.hpmevent(10) := csrevent13 - io.hpmevent(11) := csrevent14 - io.hpmevent(12) := csrevent15 - io.hpmevent(13) := csrevent16 - io.hpmevent(14) := csrevent17 - io.hpmevent(15) := csrevent18 - io.hpmevent(16) := csrevent19 - io.hpmevent(17) := csrevent20 - io.hpmevent(18) := csrevent21 - io.hpmevent(19) := csrevent22 - io.hpmevent(20) := csrevent23 - io.hpmevent(21) := csrevent24 - io.hpmevent(22) := csrevent25 - io.hpmevent(23) := csrevent26 - io.hpmevent(24) := csrevent27 - io.hpmevent(25) := csrevent28 - io.hpmevent(26) := csrevent29 - io.hpmevent(27) := csrevent30 - io.hpmevent(28) := csrevent31 + for(i <- 0 until 29){ + io.hpmevent(i) := perfEvents(i) + } } diff --git a/src/main/scala/xiangshan/backend/rename/Rename.scala b/src/main/scala/xiangshan/backend/rename/Rename.scala index 99f79922d27ccf5a75e773bab3ea6bd3799974cf..454a037f4d6a6470476a86b0d9ef6abdf7bed6eb 100644 --- a/src/main/scala/xiangshan/backend/rename/Rename.scala +++ b/src/main/scala/xiangshan/backend/rename/Rename.scala @@ -45,9 +45,9 @@ class Rename(implicit p: Parameters) extends XSModule { }) // create free list and rat - val intFreeList = Module(new MEFreeList(MEFreeListSize)) - val intRefCounter = Module(new RefCounter(MEFreeListSize)) - val fpFreeList = Module(new StdFreeList(StdFreeListSize)) + val intFreeList = Module(new MEFreeList(NRPhyRegs)) + val intRefCounter = Module(new RefCounter(NRPhyRegs)) + val fpFreeList = Module(new StdFreeList(NRPhyRegs - 32)) // decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RobCommitInfo: from rob) def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = { diff --git a/src/main/scala/xiangshan/backend/rename/freelist/RefCounter.scala b/src/main/scala/xiangshan/backend/rename/freelist/RefCounter.scala index fc29f7ca084dab2fbf8a74eb304fbb0f56160d93..551b4fe595d1406571cb47f1db53740d5dccaa40 100644 --- a/src/main/scala/xiangshan/backend/rename/freelist/RefCounter.scala +++ b/src/main/scala/xiangshan/backend/rename/freelist/RefCounter.scala @@ -33,22 +33,29 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule { val allocate = RegNext(io.allocate) val deallocate = RegNext(io.deallocate) - // recording referenced times of each physical registers // refCounter: increase at rename; decrease at walk/commit // Originally 0-31 registers have counters of ones. - val refCounter = RegInit(VecInit(Seq.fill(32)(1.U(IntRefCounterWidth.W)) ++ Seq.fill(NRPhyRegs - 32)(0.U(IntRefCounterWidth.W)))) + val refCounter = RegInit(VecInit(Seq.fill(32)(1.U(IntRefCounterWidth.W)) ++ Seq.fill(size - 32)(0.U(IntRefCounterWidth.W)))) + val refCounterInc = WireInit(refCounter) + val refCounterDec = WireInit(refCounter) val refCounterNext = WireInit(refCounter) + // One-hot Encoding for allocation and de-allocation + val allocateOH = allocate.map(alloc => UIntToOH(alloc.bits)) + val deallocateOH = deallocate.map(dealloc => UIntToOH(dealloc.bits)) + /** - * Deallocation: when refCounter becomes zero, the register can be released to freelist + * De-allocation: when refCounter becomes zero, the register can be released to freelist */ for ((de, i) <- deallocate.zipWithIndex) { val isNonZero = de.valid && refCounter(de.bits) =/= 0.U val hasDuplicate = deallocate.take(i).map(de => de.valid && de.bits === deallocate(i).bits) val blockedByDup = if (i == 0) false.B else VecInit(hasDuplicate).asUInt.orR - val isFreed = refCounter(RegNext(de.bits)) === 0.U - io.freeRegs(i).valid := RegNext(isNonZero && !blockedByDup) && isFreed + val isFreed = refCounter(de.bits) + refCounterInc(de.bits) === refCounterDec(de.bits) + io.freeRegs(i).valid := RegNext(isNonZero && !blockedByDup) && RegNext(isFreed) + val isFreed1 = refCounter(RegNext(de.bits)) === 0.U + XSError(RegNext(isFreed) =/= isFreed1, p"why isFreed ${RegNext(isFreed)} $isFreed1\n") io.freeRegs(i).bits := RegNext(deallocate(i).bits) } @@ -68,17 +75,23 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule { * We don't count the number of references for physical register 0. * It should never be released to freelist. */ - for (i <- 1 until NRPhyRegs) { - val numAlloc = PopCount(allocate.map(alloc => alloc.valid && alloc.bits === i.U)) - val numDealloc = PopCount(deallocate.map(dealloc => dealloc.valid && dealloc.bits === i.U)) - refCounterNext(i) := refCounter(i) + numAlloc - numDealloc - XSError(RegNext(refCounter(i) + numAlloc < numDealloc), p"why $i?\n") + for (i <- 1 until size) { + refCounterInc(i) := PopCount(allocate.zip(allocateOH).map(alloc => alloc._1.valid && alloc._2(i))) + refCounterDec(i) := PopCount(deallocate.zip(deallocateOH).map(dealloc => dealloc._1.valid && dealloc._2(i))) + val numAlloc1 = PopCount(allocate.map(alloc => alloc.valid && alloc.bits === i.U)) + val numDealloc1 = PopCount(deallocate.map(dealloc => dealloc.valid && dealloc.bits === i.U)) + XSError(refCounterInc(i) =/= numAlloc1, p"why numAlloc ${refCounterInc(i)} $numAlloc1??") + XSError(refCounterDec(i) =/= numDealloc1, p"why numDealloc ${refCounterDec(i)} $numDealloc1??") + refCounterNext(i) := refCounter(i) + refCounterInc(i) - refCounterDec(i) + XSError(RegNext(refCounter(i) + refCounterInc(i) < refCounterDec(i)), p"why $i?\n") refCounter(i) := refCounterNext(i) } for (i <- 0 until RobSize) { val numCounters = PopCount(refCounter.map(_ === i.U)) XSPerfAccumulate(s"ref_counter_$i", numCounters) + } + for (i <- 0 until size) { val isFreed = io.freeRegs.map(f => f.valid && f.bits === i.U) XSPerfAccumulate(s"free_reg_$i", VecInit(isFreed).asUInt.orR) } diff --git a/src/main/scala/xiangshan/backend/rename/freelist/StdFreeList.scala b/src/main/scala/xiangshan/backend/rename/freelist/StdFreeList.scala index d34fcc99b50b3659fe82445bae27517bdf341cbb..5369166cd794fb8e6e7e4d65bf1a3bdc2d1e2c6d 100644 --- a/src/main/scala/xiangshan/backend/rename/freelist/StdFreeList.scala +++ b/src/main/scala/xiangshan/backend/rename/freelist/StdFreeList.scala @@ -88,13 +88,13 @@ class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) val perfEvents = Output(new PerfEventsBundle(4)) }) val perfEvents = Seq( - ("std_freelist_1/4_valid ", (freeRegCnt < (StdFreeListSize.U/4.U)) ), - ("std_freelist_2/4_valid ", (freeRegCnt > (StdFreeListSize.U/4.U)) & (freeRegCnt <= (StdFreeListSize.U/2.U)) ), - ("std_freelist_3/4_valid ", (freeRegCnt > (StdFreeListSize.U/2.U)) & (freeRegCnt <= (StdFreeListSize.U*3.U/4.U))), - ("std_freelist_4/4_valid ", (freeRegCnt > (StdFreeListSize.U*3.U/4.U)) ), + ("std_freelist_1_4_valid", (freeRegCnt < (size / 4).U) ), + ("std_freelist_2_4_valid", (freeRegCnt > (size / 4).U) & (freeRegCnt <= (size / 2).U) ), + ("std_freelist_3_4_valid", (freeRegCnt > (size / 2).U) & (freeRegCnt <= (size * 3 / 4).U)), + ("std_freelist_4_4_valid", (freeRegCnt > (size * 3 / 4).U) ) ) - for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { + for (((perf_out, (perf_name, perf)), i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { perf_out.incr_step := RegNext(perf) } } diff --git a/src/main/scala/xiangshan/frontend/FrontendBundle.scala b/src/main/scala/xiangshan/frontend/FrontendBundle.scala index 041d872b19b28ae3a3566e43eab8216556ea4e8b..b7a28f0d4df185e3a7d924e60689a7da5e4387bd 100644 --- a/src/main/scala/xiangshan/frontend/FrontendBundle.scala +++ b/src/main/scala/xiangshan/frontend/FrontendBundle.scala @@ -33,19 +33,11 @@ class FetchRequestBundle(implicit p: Parameters) extends XSBundle { val target = UInt(VAddrBits.W) val oversize = Bool() - def fallThroughError() = { - def carryPos = instOffsetBits+log2Ceil(PredictWidth)+1 - def getLower(pc: UInt) = pc(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) - val carry = (startAddr(carryPos) =/= fallThruAddr(carryPos)).asUInt - val startLower = Cat(0.U(1.W), getLower(startAddr)) - val endLowerwithCarry = Cat(carry, getLower(fallThruAddr)) - require(startLower.getWidth == log2Ceil(PredictWidth)+2) - require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2) - startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U - } def fromFtqPcBundle(b: Ftq_RF_Components) = { + val ftError = b.fallThroughError() this.startAddr := b.startAddr - this.fallThruAddr := b.getFallThrough() + this.fallThruError := ftError + this.fallThruAddr := Mux(ftError, b.nextRangeAddr, b.getFallThrough()) this.oversize := b.oversize this } diff --git a/src/main/scala/xiangshan/frontend/Ibuffer.scala b/src/main/scala/xiangshan/frontend/Ibuffer.scala index 72eaea7f58ec644b1d255f863a9d1f8f478c16d1..b8c2ad9aec9184aeb677c8286609686ff9b8fa02 100644 --- a/src/main/scala/xiangshan/frontend/Ibuffer.scala +++ b/src/main/scala/xiangshan/frontend/Ibuffer.scala @@ -98,11 +98,12 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH inWire.ipf := io.in.bits.ipf(i) inWire.acf := io.in.bits.acf(i) inWire.crossPageIPFFix := io.in.bits.crossPageIPFFix(i) - inWire.triggered := io.in.bits.triggered(i) for(k<-0 until 10){ inWire.triggered.triggerHitVec(k) := false.B } + inWire.triggered := io.in.bits.triggered(i) + dontTouch(inWire.triggered.triggerHitVec) ibuf.io.waddr(i) := tail_vec(offset(i)).value ibuf.io.wdata(i) := inWire diff --git a/src/main/scala/xiangshan/frontend/NewFtq.scala b/src/main/scala/xiangshan/frontend/NewFtq.scala index 3ed3801bb6889d94d392576feef33153a6db6b59..485ff8f615e0d82d05fc62f76fe5300bfdab4045 100644 --- a/src/main/scala/xiangshan/frontend/NewFtq.scala +++ b/src/main/scala/xiangshan/frontend/NewFtq.scala @@ -76,20 +76,28 @@ class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils { val oversize = Bool() val carry = Bool() def getPc(offset: UInt) = { - def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits) - def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits) - Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)), + def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1) + def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits) + Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)), getOffset(startAddr)+offset, 0.U(instOffsetBits.W)) } def getFallThrough() = { - getFallThroughAddr(this.startAddr, this.carry, this.pftAddr) + def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1) + val startHigher = getHigher(startAddr) + val nextHigher = getHigher(nextRangeAddr) + val higher = Mux(carry, nextHigher, startHigher) + Cat(higher, pftAddr, 0.U(instOffsetBits.W)) } def fallThroughError() = { - !carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr + val startLower = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits)) + val endLowerwithCarry = Cat(carry, pftAddr) + require(startLower.getWidth == log2Ceil(PredictWidth)+2) + require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2) + startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U } def fromBranchPrediction(resp: BranchPredictionBundle) = { this.startAddr := resp.pc - this.nextRangeAddr := resp.pc + (FetchWidth * 4).U + this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U this.pftAddr := Mux(resp.preds.hit, resp.ftb_entry.pftAddr, resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U) @@ -553,7 +561,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe } - io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect + io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) { bpuPtr := bpu_s2_resp.ftq_idx + 1.U @@ -563,7 +571,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe } } - io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect + io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) { bpuPtr := bpu_s3_resp.ftq_idx + 1.U @@ -588,41 +596,34 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value - val toIfuReq = Wire(chiselTypeOf(io.toIfu.req)) - - toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr - toIfuReq.bits.ftqIdx := ifuPtr - toIfuReq.bits.target := update_target(ifuPtr.value) - toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value) - toIfuReq.bits.fallThruError := false.B + io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr + io.toIfu.req.bits.ftqIdx := ifuPtr + io.toIfu.req.bits.target := update_target(ifuPtr.value) + io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value) when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) { - toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf) + io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf) }.elsewhen (last_cycle_to_ifu_fire) { - toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last) + io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last) }.otherwise { - toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last) + io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last) } - io.toIfu.req <> toIfuReq - // when fall through is smaller in value than start address, there must be a false hit - when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) { + when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) { when (io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr) ) { entry_hit_status(ifuPtr.value) := h_false_hit - XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) + XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr) } - io.toIfu.req.bits.fallThruAddr := toIfuReq.bits.startAddr + (FetchWidth*4).U - io.toIfu.req.bits.fallThruError := true.B - XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) + XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr) } val ifu_req_should_be_flushed = - io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) || - io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx) + io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) || + io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx) when (io.toIfu.req.fire && !ifu_req_should_be_flushed) { entry_fetch_status(ifuPtr.value) := f_sent @@ -815,31 +816,13 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe // **************************** flush ptr and state queue **************************** // *********************************************************************************** - class RedirectInfo extends Bundle { - val valid = Bool() - val ftqIdx = new FtqPtr - val ftqOffset = UInt(log2Ceil(PredictWidth).W) - val flushItSelf = Bool() - def apply(redirect: Valid[Redirect]) = { - this.valid := redirect.valid - this.ftqIdx := redirect.bits.ftqIdx - this.ftqOffset := redirect.bits.ftqOffset - this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level) - this - } - } - val redirectVec = Wire(Vec(3, new RedirectInfo)) - val robRedirect = robFlush - - redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map { - case (ve, r) => ve(r) - } + val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect) // when redirect, we should reset ptrs and status queues when(redirectVec.map(r => r.valid).reduce(_||_)){ - val r = PriorityMux(redirectVec.map(r => (r.valid -> r))) + val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits))) val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_) - val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf) + val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level)) val next = idx + 1.U bpuPtr := next ifuPtr := next