提交 e69babf9 编写于 作者: L Lingrui98

Merge remote-tracking branch 'origin/master' into bpu-timing

...@@ -362,8 +362,6 @@ trait HasXSParameter { ...@@ -362,8 +362,6 @@ trait HasXSParameter {
val PhyRegIdxWidth = log2Up(NRPhyRegs) val PhyRegIdxWidth = log2Up(NRPhyRegs)
val RobSize = coreParams.RobSize val RobSize = coreParams.RobSize
val IntRefCounterWidth = log2Ceil(RobSize) val IntRefCounterWidth = log2Ceil(RobSize)
val StdFreeListSize = NRPhyRegs - 32
val MEFreeListSize = NRPhyRegs
val LoadQueueSize = coreParams.LoadQueueSize val LoadQueueSize = coreParams.LoadQueueSize
val StoreQueueSize = coreParams.StoreQueueSize val StoreQueueSize = coreParams.StoreQueueSize
val dpParams = coreParams.dpParams val dpParams = coreParams.dpParams
......
...@@ -23,7 +23,7 @@ import chisel3.util._ ...@@ -23,7 +23,7 @@ import chisel3.util._
import xiangshan._ import xiangshan._
import utils._ import utils._
abstract class BaseFusionCase(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]] = None)(implicit p: Parameters) abstract class BaseFusionCase(pair: Seq[Valid[UInt]])(implicit p: Parameters)
extends DecodeUnitConstants { extends DecodeUnitConstants {
require(pair.length == 2) require(pair.length == 2)
...@@ -393,11 +393,10 @@ class FusedOddaddw(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseF ...@@ -393,11 +393,10 @@ class FusedOddaddw(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseF
// Case: addw and extract its lower 8 bits (fused into addwbyte) // Case: addw and extract its lower 8 bits (fused into addwbyte)
class FusedAddwbyte(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters) class FusedAddwbyte(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
extends BaseFusionCase(pair, csPair) { extends BaseFusionCase(pair) {
require(csPair.isDefined) // the first instruction is a ALUOpType.addw
// According to DecodeUnit.scala, only ADDIW and ADDW are ALUOpType.addw, which are used for inst1Cond.
// the first instruction is a addw def inst1Cond = instr(0) === Instructions.ADDIW || instr(0) === Instructions.ADDIW
def inst1Cond = csPair.get(0).fuType === FuType.alu && ALUOpType.isAddw(csPair.get(0).fuOpType)
def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 0xff.U def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 0xff.U
def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1 def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
...@@ -457,12 +456,22 @@ class FusedAddwsexth(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(i ...@@ -457,12 +456,22 @@ class FusedAddwsexth(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(i
} }
// Case: logic operation and extract its LSB // Case: logic operation and extract its LSB
class FusedLogiclsb(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters) class FusedLogiclsb(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters)
extends BaseFusionCase(pair, csPair) { extends BaseFusionCase(pair) {
require(csPair.isDefined) require(csPair.isDefined)
// the first instruction is a logic // the first instruction is a logic (and, or, xor, orcb)
def inst1Cond = csPair.get(0).fuType === FuType.alu && ALUOpType.isSimpleLogic(csPair.get(0).fuOpType) // (1) def ANDI = BitPat("b?????????????????111?????0010011")
// (2) def AND = BitPat("b0000000??????????111?????0110011")
// (3) def ORI = BitPat("b?????????????????110?????0010011")
// (4) def OR = BitPat("b0000000??????????110?????0110011")
// (5) def XORI = BitPat("b?????????????????100?????0010011")
// (6) def XOR = BitPat("b0000000??????????100?????0110011")
// (7) def ORC_B = BitPat("b001010000111?????101?????0010011")
val logicInstrList = Seq(Instructions.ANDI, Instructions.AND, Instructions.ORI, Instructions.OR,
Instructions.XORI, Instructions.XOR, Instructions.ORC_B)
def inst1Cond = VecInit(logicInstrList.map(_ === instr(0))).asUInt.orR
def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 1.U def inst2Cond = instr(1) === Instructions.ANDI && instr(1)(31, 20) === 1.U
def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1 def isValid: Bool = inst1Cond && inst2Cond && withSameDest && destToRs1
...@@ -512,17 +521,15 @@ class FusedOrh48(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFus ...@@ -512,17 +521,15 @@ class FusedOrh48(pair: Seq[Valid[UInt]])(implicit p: Parameters) extends BaseFus
// Case: mul 7bit data with 32-bit data // Case: mul 7bit data with 32-bit data
// Source: `andi r1, r0, 127`` + `mulw r1, r1, r2` // Source: `andi r1, r0, 127`` + `mulw r1, r1, r2`
// Target: `mulw7 r1, r0, r2` // Target: `mulw7 r1, r0, r2`
class FusedMulw7(pair: Seq[Valid[UInt]], csPair: Option[Seq[CtrlSignals]])(implicit p: Parameters) class FusedMulw7(pair: Seq[Valid[UInt]])(implicit p: Parameters)
extends BaseFusionCase(pair, csPair) { extends BaseFusionCase(pair) {
require(csPair.isDefined)
def inst1Cond = instr(0) === Instructions.ANDI && instr(0)(31, 20) === 127.U def inst1Cond = instr(0) === Instructions.ANDI && instr(0)(31, 20) === 127.U
def inst2Cond = instr(1) === Instructions.MULW def inst2Cond = instr(1) === Instructions.MULW
def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2) def isValid: Bool = inst1Cond && inst2Cond && withSameDest && (destToRs1 || destToRs2)
def target: CtrlSignals = { def target: CtrlSignals = {
// use MULW as the base // use MULW as the base
val cs = WireInit(csPair.get(1)) val cs = getBaseCS(Instructions.MULW)
// replace the fuOpType with mulw7 // replace the fuOpType with mulw7
cs.fuOpType := MDUOpType.mulw7 cs.fuOpType := MDUOpType.mulw7
cs.lsrc(0) := instr1Rs1 cs.lsrc(0) := instr1Rs1
...@@ -568,14 +575,14 @@ class FusionDecoder(implicit p: Parameters) extends XSModule { ...@@ -568,14 +575,14 @@ class FusionDecoder(implicit p: Parameters) extends XSModule {
new FusedSr32add(pair), new FusedSr32add(pair),
new FusedOddadd(pair), new FusedOddadd(pair),
new FusedOddaddw(pair), new FusedOddaddw(pair),
new FusedOrh48(pair),
new FusedMulw7(pair),
new FusedAddwbyte(pair, Some(cs)), new FusedAddwbyte(pair, Some(cs)),
new FusedAddwbit(pair, Some(cs)), new FusedAddwbit(pair, Some(cs)),
new FusedAddwzexth(pair, Some(cs)), new FusedAddwzexth(pair, Some(cs)),
new FusedAddwsexth(pair, Some(cs)), new FusedAddwsexth(pair, Some(cs)),
new FusedLogiclsb(pair, Some(cs)), new FusedLogiclsb(pair, Some(cs)),
new FusedLogicZexth(pair, Some(cs)), new FusedLogicZexth(pair, Some(cs))
new FusedOrh48(pair),
new FusedMulw7(pair, Some(cs))
) )
val pairValid = VecInit(pair.map(_.valid)).asUInt().andR val pairValid = VecInit(pair.map(_.valid)).asUInt().andR
val thisCleared = io.clear(i) val thisCleared = io.clear(i)
......
...@@ -638,56 +638,35 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP ...@@ -638,56 +638,35 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP
val priviledgeModeOH = UIntToOH(priviledgeMode) val priviledgeModeOH = UIntToOH(priviledgeMode)
val perfEventscounten = RegInit(0.U.asTypeOf(Vec(nrPerfCnts, Bool()))) val perfEventscounten = RegInit(0.U.asTypeOf(Vec(nrPerfCnts, Bool())))
val perfCnts = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W))) val perfCnts = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W)))
val perfEvents = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W))) val perfEvents = List.fill(8)(RegInit("h0000000000".U(XLEN.W))) ++
List.fill(8)(RegInit("h4010040100".U(XLEN.W))) ++
List.fill(8)(RegInit("h8020080200".U(XLEN.W))) ++
List.fill(5)(RegInit("hc0300c0300".U(XLEN.W)))
for (i <-0 until nrPerfCnts) { for (i <-0 until nrPerfCnts) {
perfEventscounten(i) := (Cat(perfEvents(i)(62),perfEvents(i)(61),(perfEvents(i)(61,60))) & priviledgeModeOH).orR perfEventscounten(i) := (Cat(perfEvents(i)(62),perfEvents(i)(61),(perfEvents(i)(61,60))) & priviledgeModeOH).orR
} }
val hpmEvents = Wire(new PerfEventsBundle(numPCntHc * coreParams.L2NBanks)) val hpmEvents = Wire(new PerfEventsBundle(numPCntHc * coreParams.L2NBanks))
val pfevent = Module(new PFEvent)
pfevent.io.distribute_csr := csrio.customCtrl.distribute_csr
for(i <- 0 until numPCntHc * coreParams.L2NBanks) { for(i <- 0 until numPCntHc * coreParams.L2NBanks) {
hpmEvents.perf_events(i).incr_step := csrio.perf.perfEventsHc(i) hpmEvents.perf_events(i).incr_step := csrio.perf.perfEventsHc(i)
} }
val hpm_hc = Module(new HPerfmonitor(numPCntHc * coreParams.L2NBanks,numCSRPCntHc)) val hpm_hc = Module(new HPerfmonitor(numPCntHc * coreParams.L2NBanks,numCSRPCntHc))
val csrevents = pfevent.io.hpmevent.slice(24,29) val csrevents = perfEvents.slice(24,29)
hpm_hc.io.hpm_event := csrevents hpm_hc.io.hpm_event := csrevents
hpm_hc.io.events_sets := hpmEvents hpm_hc.io.events_sets := hpmEvents
val mcountinhibit = RegInit(0.U(XLEN.W)) val mcountinhibit = RegInit(0.U(XLEN.W))
val mcycle = RegInit(0.U(XLEN.W)) val mcycle = RegInit(0.U(XLEN.W))
mcycle := mcycle + 1.U mcycle := mcycle + 1.U
val minstret = RegInit(0.U(XLEN.W)) val minstret = RegInit(0.U(XLEN.W))
val perf_events = csrio.perf.perfEventsFrontend.perf_events ++
csrio.perf.perfEventsCtrl.perf_events ++
csrio.perf.perfEventsLsu.perf_events ++
hpm_hc.io.events_selected.perf_events
minstret := minstret + RegNext(csrio.perf.retiredInstr) minstret := minstret + RegNext(csrio.perf.retiredInstr)
perfCnts( 0) := Mux((mcountinhibit( 3) | perfEventscounten( 0)),perfCnts( 0) , (perfCnts( 0) + RegNext(csrio.perf.perfEventsFrontend.perf_events(0 ).incr_step))) for(i <- 0 until 29){
perfCnts( 1) := Mux((mcountinhibit( 4) | perfEventscounten( 1)),perfCnts( 1) , (perfCnts( 1) + RegNext(csrio.perf.perfEventsFrontend.perf_events(1 ).incr_step))) perfCnts(i) := Mux((mcountinhibit(i+3) | !perfEventscounten(i)), perfCnts(i), (perfCnts(i) + perf_events(i).incr_step))
perfCnts( 2) := Mux((mcountinhibit( 5) | perfEventscounten( 2)),perfCnts( 2) , (perfCnts( 2) + RegNext(csrio.perf.perfEventsFrontend.perf_events(2 ).incr_step))) }
perfCnts( 3) := Mux((mcountinhibit( 6) | perfEventscounten( 3)),perfCnts( 3) , (perfCnts( 3) + RegNext(csrio.perf.perfEventsFrontend.perf_events(3 ).incr_step)))
perfCnts( 4) := Mux((mcountinhibit( 7) | perfEventscounten( 4)),perfCnts( 4) , (perfCnts( 4) + RegNext(csrio.perf.perfEventsFrontend.perf_events(4 ).incr_step)))
perfCnts( 5) := Mux((mcountinhibit( 8) | perfEventscounten( 5)),perfCnts( 5) , (perfCnts( 5) + RegNext(csrio.perf.perfEventsFrontend.perf_events(5 ).incr_step)))
perfCnts( 6) := Mux((mcountinhibit( 9) | perfEventscounten( 6)),perfCnts( 6) , (perfCnts( 6) + RegNext(csrio.perf.perfEventsFrontend.perf_events(6 ).incr_step)))
perfCnts( 7) := Mux((mcountinhibit(10) | perfEventscounten( 7)),perfCnts( 7) , (perfCnts( 7) + RegNext(csrio.perf.perfEventsFrontend.perf_events(7 ).incr_step)))
perfCnts( 8) := Mux((mcountinhibit(11) | perfEventscounten( 8)),perfCnts( 8) , (perfCnts( 8) + RegNext(csrio.perf.perfEventsCtrl.perf_events(0 ).incr_step)))
perfCnts( 9) := Mux((mcountinhibit(12) | perfEventscounten( 9)),perfCnts( 9) , (perfCnts( 9) + RegNext(csrio.perf.perfEventsCtrl.perf_events(1 ).incr_step)))
perfCnts(10) := Mux((mcountinhibit(13) | perfEventscounten(10)),perfCnts(10) , (perfCnts(10) + RegNext(csrio.perf.perfEventsCtrl.perf_events(2 ).incr_step)))
perfCnts(11) := Mux((mcountinhibit(14) | perfEventscounten(11)),perfCnts(11) , (perfCnts(11) + RegNext(csrio.perf.perfEventsCtrl.perf_events(3 ).incr_step)))
perfCnts(12) := Mux((mcountinhibit(15) | perfEventscounten(12)),perfCnts(12) , (perfCnts(12) + RegNext(csrio.perf.perfEventsCtrl.perf_events(4 ).incr_step)))
perfCnts(13) := Mux((mcountinhibit(16) | perfEventscounten(13)),perfCnts(13) , (perfCnts(13) + RegNext(csrio.perf.perfEventsCtrl.perf_events(5 ).incr_step)))
perfCnts(14) := Mux((mcountinhibit(17) | perfEventscounten(14)),perfCnts(14) , (perfCnts(14) + RegNext(csrio.perf.perfEventsCtrl.perf_events(6 ).incr_step)))
perfCnts(15) := Mux((mcountinhibit(18) | perfEventscounten(15)),perfCnts(15) , (perfCnts(15) + RegNext(csrio.perf.perfEventsCtrl.perf_events(7 ).incr_step)))
perfCnts(16) := Mux((mcountinhibit(19) | perfEventscounten(16)),perfCnts(16) , (perfCnts(16) + RegNext(csrio.perf.perfEventsLsu.perf_events(0 ).incr_step)))
perfCnts(17) := Mux((mcountinhibit(20) | perfEventscounten(17)),perfCnts(17) , (perfCnts(17) + RegNext(csrio.perf.perfEventsLsu.perf_events(1 ).incr_step)))
perfCnts(18) := Mux((mcountinhibit(21) | perfEventscounten(18)),perfCnts(18) , (perfCnts(18) + RegNext(csrio.perf.perfEventsLsu.perf_events(2 ).incr_step)))
perfCnts(19) := Mux((mcountinhibit(22) | perfEventscounten(19)),perfCnts(19) , (perfCnts(19) + RegNext(csrio.perf.perfEventsLsu.perf_events(3 ).incr_step)))
perfCnts(20) := Mux((mcountinhibit(23) | perfEventscounten(20)),perfCnts(20) , (perfCnts(20) + RegNext(csrio.perf.perfEventsLsu.perf_events(4 ).incr_step)))
perfCnts(21) := Mux((mcountinhibit(24) | perfEventscounten(21)),perfCnts(21) , (perfCnts(21) + RegNext(csrio.perf.perfEventsLsu.perf_events(5 ).incr_step)))
perfCnts(22) := Mux((mcountinhibit(25) | perfEventscounten(22)),perfCnts(22) , (perfCnts(22) + RegNext(csrio.perf.perfEventsLsu.perf_events(6 ).incr_step)))
perfCnts(23) := Mux((mcountinhibit(26) | perfEventscounten(23)),perfCnts(23) , (perfCnts(23) + RegNext(csrio.perf.perfEventsLsu.perf_events(7 ).incr_step)))
perfCnts(24) := Mux((mcountinhibit(27) | perfEventscounten(24)),perfCnts(24) , (perfCnts(24) + RegNext(hpm_hc.io.events_selected.perf_events(0 ).incr_step)))
perfCnts(25) := Mux((mcountinhibit(28) | perfEventscounten(25)),perfCnts(25) , (perfCnts(25) + RegNext(hpm_hc.io.events_selected.perf_events(1 ).incr_step)))
perfCnts(26) := Mux((mcountinhibit(29) | perfEventscounten(26)),perfCnts(26) , (perfCnts(26) + RegNext(hpm_hc.io.events_selected.perf_events(2 ).incr_step)))
perfCnts(27) := Mux((mcountinhibit(30) | perfEventscounten(27)),perfCnts(27) , (perfCnts(27) + RegNext(hpm_hc.io.events_selected.perf_events(3 ).incr_step)))
perfCnts(28) := Mux((mcountinhibit(31) | perfEventscounten(28)),perfCnts(28) , (perfCnts(28) + RegNext(hpm_hc.io.events_selected.perf_events(4 ).incr_step)))
// CSR reg map // CSR reg map
val basicPrivMapping = Map( val basicPrivMapping = Map(
...@@ -768,72 +747,19 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP ...@@ -768,72 +747,19 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP
MaskedRegMap(Dcsr, dcsr, dcsrMask, dcsrUpdateSideEffect), MaskedRegMap(Dcsr, dcsr, dcsrMask, dcsrUpdateSideEffect),
MaskedRegMap(Dpc, dpc), MaskedRegMap(Dpc, dpc),
MaskedRegMap(Dscratch, dscratch), MaskedRegMap(Dscratch, dscratch),
MaskedRegMap(Dscratch1, dscratch1) MaskedRegMap(Dscratch1, dscratch1),
)
var perfCntMapping = Map(
MaskedRegMap(Mcountinhibit, mcountinhibit), MaskedRegMap(Mcountinhibit, mcountinhibit),
MaskedRegMap(Mcycle, mcycle), MaskedRegMap(Mcycle, mcycle),
MaskedRegMap(Minstret, minstret), MaskedRegMap(Minstret, minstret),
MaskedRegMap(Mhpmevent3 , perfEvents( 0)),
MaskedRegMap(Mhpmevent4 , perfEvents( 1)),
MaskedRegMap(Mhpmevent5 , perfEvents( 2)),
MaskedRegMap(Mhpmevent6 , perfEvents( 3)),
MaskedRegMap(Mhpmevent7 , perfEvents( 4)),
MaskedRegMap(Mhpmevent8 , perfEvents( 5)),
MaskedRegMap(Mhpmevent9 , perfEvents( 6)),
MaskedRegMap(Mhpmevent10, perfEvents( 7)),
MaskedRegMap(Mhpmevent11, perfEvents( 8)),
MaskedRegMap(Mhpmevent12, perfEvents( 9)),
MaskedRegMap(Mhpmevent13, perfEvents(10)),
MaskedRegMap(Mhpmevent14, perfEvents(11)),
MaskedRegMap(Mhpmevent15, perfEvents(12)),
MaskedRegMap(Mhpmevent16, perfEvents(13)),
MaskedRegMap(Mhpmevent17, perfEvents(14)),
MaskedRegMap(Mhpmevent18, perfEvents(15)),
MaskedRegMap(Mhpmevent19, perfEvents(16)),
MaskedRegMap(Mhpmevent20, perfEvents(17)),
MaskedRegMap(Mhpmevent21, perfEvents(18)),
MaskedRegMap(Mhpmevent22, perfEvents(19)),
MaskedRegMap(Mhpmevent23, perfEvents(20)),
MaskedRegMap(Mhpmevent24, perfEvents(21)),
MaskedRegMap(Mhpmevent25, perfEvents(22)),
MaskedRegMap(Mhpmevent26, perfEvents(23)),
MaskedRegMap(Mhpmevent27, perfEvents(24)),
MaskedRegMap(Mhpmevent28, perfEvents(25)),
MaskedRegMap(Mhpmevent29, perfEvents(26)),
MaskedRegMap(Mhpmevent30, perfEvents(27)),
MaskedRegMap(Mhpmevent31, perfEvents(28)),
MaskedRegMap(Mhpmcounter3 , perfCnts( 0)),
MaskedRegMap(Mhpmcounter4 , perfCnts( 1)),
MaskedRegMap(Mhpmcounter5 , perfCnts( 2)),
MaskedRegMap(Mhpmcounter6 , perfCnts( 3)),
MaskedRegMap(Mhpmcounter7 , perfCnts( 4)),
MaskedRegMap(Mhpmcounter8 , perfCnts( 5)),
MaskedRegMap(Mhpmcounter9 , perfCnts( 6)),
MaskedRegMap(Mhpmcounter10, perfCnts( 7)),
MaskedRegMap(Mhpmcounter11, perfCnts( 8)),
MaskedRegMap(Mhpmcounter12, perfCnts( 9)),
MaskedRegMap(Mhpmcounter13, perfCnts(10)),
MaskedRegMap(Mhpmcounter14, perfCnts(11)),
MaskedRegMap(Mhpmcounter15, perfCnts(12)),
MaskedRegMap(Mhpmcounter16, perfCnts(13)),
MaskedRegMap(Mhpmcounter17, perfCnts(14)),
MaskedRegMap(Mhpmcounter18, perfCnts(15)),
MaskedRegMap(Mhpmcounter19, perfCnts(16)),
MaskedRegMap(Mhpmcounter20, perfCnts(17)),
MaskedRegMap(Mhpmcounter21, perfCnts(18)),
MaskedRegMap(Mhpmcounter22, perfCnts(19)),
MaskedRegMap(Mhpmcounter23, perfCnts(20)),
MaskedRegMap(Mhpmcounter24, perfCnts(21)),
MaskedRegMap(Mhpmcounter25, perfCnts(22)),
MaskedRegMap(Mhpmcounter26, perfCnts(23)),
MaskedRegMap(Mhpmcounter27, perfCnts(24)),
MaskedRegMap(Mhpmcounter28, perfCnts(25)),
MaskedRegMap(Mhpmcounter29, perfCnts(26)),
MaskedRegMap(Mhpmcounter30, perfCnts(27)),
MaskedRegMap(Mhpmcounter31, perfCnts(28)),
) )
val perfCntMapping = (0 until 29).map(i => {Map(
MaskedRegMap(addr = Mhpmevent3 +i,
reg = perfEvents(i),
wmask = "hf87fff3fcff3fcff".U(XLEN.W)),
MaskedRegMap(addr = Mhpmcounter3 +i,
reg = perfCnts(i))
)}).fold(Map())((a,b) => a ++ b)
// TODO: mechanism should be implemented later // TODO: mechanism should be implemented later
// val MhpmcounterStart = Mhpmcounter3 // val MhpmcounterStart = Mhpmcounter3
// val MhpmeventStart = Mhpmevent3 // val MhpmeventStart = Mhpmevent3
...@@ -1258,99 +1184,21 @@ class PFEvent(implicit p: Parameters) extends XSModule with HasCSRConst { ...@@ -1258,99 +1184,21 @@ class PFEvent(implicit p: Parameters) extends XSModule with HasCSRConst {
val w = io.distribute_csr.w val w = io.distribute_csr.w
//val csrevents = Vec(29,RegInit(UInt(XLEN.W), 0.U)) val perfEvents = List.fill(8)(RegInit("h0000000000".U(XLEN.W))) ++
val csrevent3 = RegInit(UInt(XLEN.W), 0.U) List.fill(8)(RegInit("h4010040100".U(XLEN.W))) ++
val csrevent4 = RegInit(UInt(XLEN.W), 0.U) List.fill(8)(RegInit("h8020080200".U(XLEN.W))) ++
val csrevent5 = RegInit(UInt(XLEN.W), 0.U) List.fill(5)(RegInit("hc0300c0300".U(XLEN.W)))
val csrevent6 = RegInit(UInt(XLEN.W), 0.U)
val csrevent7 = RegInit(UInt(XLEN.W), 0.U) val perfEventMapping = (0 until 29).map(i => {Map(
val csrevent8 = RegInit(UInt(XLEN.W), 0.U) MaskedRegMap(addr = Mhpmevent3 +i,
val csrevent9 = RegInit(UInt(XLEN.W), 0.U) reg = perfEvents(i),
val csrevent10 = RegInit(UInt(XLEN.W), 0.U) wmask = "hf87fff3fcff3fcff".U(XLEN.W))
val csrevent11 = RegInit(UInt(XLEN.W), 0.U) )}).fold(Map())((a,b) => a ++ b)
val csrevent12 = RegInit(UInt(XLEN.W), 0.U)
val csrevent13 = RegInit(UInt(XLEN.W), 0.U)
val csrevent14 = RegInit(UInt(XLEN.W), 0.U)
val csrevent15 = RegInit(UInt(XLEN.W), 0.U)
val csrevent16 = RegInit(UInt(XLEN.W), 0.U)
val csrevent17 = RegInit(UInt(XLEN.W), 0.U)
val csrevent18 = RegInit(UInt(XLEN.W), 0.U)
val csrevent19 = RegInit(UInt(XLEN.W), 0.U)
val csrevent20 = RegInit(UInt(XLEN.W), 0.U)
val csrevent21 = RegInit(UInt(XLEN.W), 0.U)
val csrevent22 = RegInit(UInt(XLEN.W), 0.U)
val csrevent23 = RegInit(UInt(XLEN.W), 0.U)
val csrevent24 = RegInit(UInt(XLEN.W), 0.U)
val csrevent25 = RegInit(UInt(XLEN.W), 0.U)
val csrevent26 = RegInit(UInt(XLEN.W), 0.U)
val csrevent27 = RegInit(UInt(XLEN.W), 0.U)
val csrevent28 = RegInit(UInt(XLEN.W), 0.U)
val csrevent29 = RegInit(UInt(XLEN.W), 0.U)
val csrevent30 = RegInit(UInt(XLEN.W), 0.U)
val csrevent31 = RegInit(UInt(XLEN.W), 0.U)
var perfEventMapping = Map(
MaskedRegMap(Mhpmevent3, csrevent3 ),
MaskedRegMap(Mhpmevent4, csrevent4 ),
MaskedRegMap(Mhpmevent5, csrevent5 ),
MaskedRegMap(Mhpmevent6, csrevent6 ),
MaskedRegMap(Mhpmevent7, csrevent7 ),
MaskedRegMap(Mhpmevent8, csrevent8 ),
MaskedRegMap(Mhpmevent9, csrevent9 ),
MaskedRegMap(Mhpmevent10,csrevent10),
MaskedRegMap(Mhpmevent11,csrevent11),
MaskedRegMap(Mhpmevent12,csrevent12),
MaskedRegMap(Mhpmevent13,csrevent13),
MaskedRegMap(Mhpmevent14,csrevent14),
MaskedRegMap(Mhpmevent15,csrevent15),
MaskedRegMap(Mhpmevent16,csrevent16),
MaskedRegMap(Mhpmevent17,csrevent17),
MaskedRegMap(Mhpmevent18,csrevent18),
MaskedRegMap(Mhpmevent19,csrevent19),
MaskedRegMap(Mhpmevent20,csrevent20),
MaskedRegMap(Mhpmevent21,csrevent21),
MaskedRegMap(Mhpmevent22,csrevent22),
MaskedRegMap(Mhpmevent23,csrevent23),
MaskedRegMap(Mhpmevent24,csrevent24),
MaskedRegMap(Mhpmevent25,csrevent25),
MaskedRegMap(Mhpmevent26,csrevent26),
MaskedRegMap(Mhpmevent27,csrevent27),
MaskedRegMap(Mhpmevent28,csrevent28),
MaskedRegMap(Mhpmevent29,csrevent29),
MaskedRegMap(Mhpmevent30,csrevent30),
MaskedRegMap(Mhpmevent31,csrevent31),
)
val rdata = Wire(UInt(XLEN.W)) val rdata = Wire(UInt(XLEN.W))
MaskedRegMap.generate(perfEventMapping, w.bits.addr, rdata, w.valid, w.bits.data) MaskedRegMap.generate(perfEventMapping, w.bits.addr, rdata, w.valid, w.bits.data)
io.hpmevent( 0) := csrevent3 for(i <- 0 until 29){
io.hpmevent( 1) := csrevent4 io.hpmevent(i) := perfEvents(i)
io.hpmevent( 2) := csrevent5 }
io.hpmevent( 3) := csrevent6
io.hpmevent( 4) := csrevent7
io.hpmevent( 5) := csrevent8
io.hpmevent( 6) := csrevent9
io.hpmevent( 7) := csrevent10
io.hpmevent( 8) := csrevent11
io.hpmevent( 9) := csrevent12
io.hpmevent(10) := csrevent13
io.hpmevent(11) := csrevent14
io.hpmevent(12) := csrevent15
io.hpmevent(13) := csrevent16
io.hpmevent(14) := csrevent17
io.hpmevent(15) := csrevent18
io.hpmevent(16) := csrevent19
io.hpmevent(17) := csrevent20
io.hpmevent(18) := csrevent21
io.hpmevent(19) := csrevent22
io.hpmevent(20) := csrevent23
io.hpmevent(21) := csrevent24
io.hpmevent(22) := csrevent25
io.hpmevent(23) := csrevent26
io.hpmevent(24) := csrevent27
io.hpmevent(25) := csrevent28
io.hpmevent(26) := csrevent29
io.hpmevent(27) := csrevent30
io.hpmevent(28) := csrevent31
} }
...@@ -45,9 +45,9 @@ class Rename(implicit p: Parameters) extends XSModule { ...@@ -45,9 +45,9 @@ class Rename(implicit p: Parameters) extends XSModule {
}) })
// create free list and rat // create free list and rat
val intFreeList = Module(new MEFreeList(MEFreeListSize)) val intFreeList = Module(new MEFreeList(NRPhyRegs))
val intRefCounter = Module(new RefCounter(MEFreeListSize)) val intRefCounter = Module(new RefCounter(NRPhyRegs))
val fpFreeList = Module(new StdFreeList(StdFreeListSize)) val fpFreeList = Module(new StdFreeList(NRPhyRegs - 32))
// decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RobCommitInfo: from rob) // decide if given instruction needs allocating a new physical register (CfCtrl: from decode; RobCommitInfo: from rob)
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = { def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
......
...@@ -33,22 +33,29 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule { ...@@ -33,22 +33,29 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule {
val allocate = RegNext(io.allocate) val allocate = RegNext(io.allocate)
val deallocate = RegNext(io.deallocate) val deallocate = RegNext(io.deallocate)
// recording referenced times of each physical registers // recording referenced times of each physical registers
// refCounter: increase at rename; decrease at walk/commit // refCounter: increase at rename; decrease at walk/commit
// Originally 0-31 registers have counters of ones. // Originally 0-31 registers have counters of ones.
val refCounter = RegInit(VecInit(Seq.fill(32)(1.U(IntRefCounterWidth.W)) ++ Seq.fill(NRPhyRegs - 32)(0.U(IntRefCounterWidth.W)))) val refCounter = RegInit(VecInit(Seq.fill(32)(1.U(IntRefCounterWidth.W)) ++ Seq.fill(size - 32)(0.U(IntRefCounterWidth.W))))
val refCounterInc = WireInit(refCounter)
val refCounterDec = WireInit(refCounter)
val refCounterNext = WireInit(refCounter) val refCounterNext = WireInit(refCounter)
// One-hot Encoding for allocation and de-allocation
val allocateOH = allocate.map(alloc => UIntToOH(alloc.bits))
val deallocateOH = deallocate.map(dealloc => UIntToOH(dealloc.bits))
/** /**
* Deallocation: when refCounter becomes zero, the register can be released to freelist * De-allocation: when refCounter becomes zero, the register can be released to freelist
*/ */
for ((de, i) <- deallocate.zipWithIndex) { for ((de, i) <- deallocate.zipWithIndex) {
val isNonZero = de.valid && refCounter(de.bits) =/= 0.U val isNonZero = de.valid && refCounter(de.bits) =/= 0.U
val hasDuplicate = deallocate.take(i).map(de => de.valid && de.bits === deallocate(i).bits) val hasDuplicate = deallocate.take(i).map(de => de.valid && de.bits === deallocate(i).bits)
val blockedByDup = if (i == 0) false.B else VecInit(hasDuplicate).asUInt.orR val blockedByDup = if (i == 0) false.B else VecInit(hasDuplicate).asUInt.orR
val isFreed = refCounter(RegNext(de.bits)) === 0.U val isFreed = refCounter(de.bits) + refCounterInc(de.bits) === refCounterDec(de.bits)
io.freeRegs(i).valid := RegNext(isNonZero && !blockedByDup) && isFreed io.freeRegs(i).valid := RegNext(isNonZero && !blockedByDup) && RegNext(isFreed)
val isFreed1 = refCounter(RegNext(de.bits)) === 0.U
XSError(RegNext(isFreed) =/= isFreed1, p"why isFreed ${RegNext(isFreed)} $isFreed1\n")
io.freeRegs(i).bits := RegNext(deallocate(i).bits) io.freeRegs(i).bits := RegNext(deallocate(i).bits)
} }
...@@ -68,17 +75,23 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule { ...@@ -68,17 +75,23 @@ class RefCounter(size: Int)(implicit p: Parameters) extends XSModule {
* We don't count the number of references for physical register 0. * We don't count the number of references for physical register 0.
* It should never be released to freelist. * It should never be released to freelist.
*/ */
for (i <- 1 until NRPhyRegs) { for (i <- 1 until size) {
val numAlloc = PopCount(allocate.map(alloc => alloc.valid && alloc.bits === i.U)) refCounterInc(i) := PopCount(allocate.zip(allocateOH).map(alloc => alloc._1.valid && alloc._2(i)))
val numDealloc = PopCount(deallocate.map(dealloc => dealloc.valid && dealloc.bits === i.U)) refCounterDec(i) := PopCount(deallocate.zip(deallocateOH).map(dealloc => dealloc._1.valid && dealloc._2(i)))
refCounterNext(i) := refCounter(i) + numAlloc - numDealloc val numAlloc1 = PopCount(allocate.map(alloc => alloc.valid && alloc.bits === i.U))
XSError(RegNext(refCounter(i) + numAlloc < numDealloc), p"why $i?\n") val numDealloc1 = PopCount(deallocate.map(dealloc => dealloc.valid && dealloc.bits === i.U))
XSError(refCounterInc(i) =/= numAlloc1, p"why numAlloc ${refCounterInc(i)} $numAlloc1??")
XSError(refCounterDec(i) =/= numDealloc1, p"why numDealloc ${refCounterDec(i)} $numDealloc1??")
refCounterNext(i) := refCounter(i) + refCounterInc(i) - refCounterDec(i)
XSError(RegNext(refCounter(i) + refCounterInc(i) < refCounterDec(i)), p"why $i?\n")
refCounter(i) := refCounterNext(i) refCounter(i) := refCounterNext(i)
} }
for (i <- 0 until RobSize) { for (i <- 0 until RobSize) {
val numCounters = PopCount(refCounter.map(_ === i.U)) val numCounters = PopCount(refCounter.map(_ === i.U))
XSPerfAccumulate(s"ref_counter_$i", numCounters) XSPerfAccumulate(s"ref_counter_$i", numCounters)
}
for (i <- 0 until size) {
val isFreed = io.freeRegs.map(f => f.valid && f.bits === i.U) val isFreed = io.freeRegs.map(f => f.valid && f.bits === i.U)
XSPerfAccumulate(s"free_reg_$i", VecInit(isFreed).asUInt.orR) XSPerfAccumulate(s"free_reg_$i", VecInit(isFreed).asUInt.orR)
} }
......
...@@ -88,13 +88,13 @@ class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size) ...@@ -88,13 +88,13 @@ class StdFreeList(size: Int)(implicit p: Parameters) extends BaseFreeList(size)
val perfEvents = Output(new PerfEventsBundle(4)) val perfEvents = Output(new PerfEventsBundle(4))
}) })
val perfEvents = Seq( val perfEvents = Seq(
("std_freelist_1/4_valid ", (freeRegCnt < (StdFreeListSize.U/4.U)) ), ("std_freelist_1_4_valid", (freeRegCnt < (size / 4).U) ),
("std_freelist_2/4_valid ", (freeRegCnt > (StdFreeListSize.U/4.U)) & (freeRegCnt <= (StdFreeListSize.U/2.U)) ), ("std_freelist_2_4_valid", (freeRegCnt > (size / 4).U) & (freeRegCnt <= (size / 2).U) ),
("std_freelist_3/4_valid ", (freeRegCnt > (StdFreeListSize.U/2.U)) & (freeRegCnt <= (StdFreeListSize.U*3.U/4.U))), ("std_freelist_3_4_valid", (freeRegCnt > (size / 2).U) & (freeRegCnt <= (size * 3 / 4).U)),
("std_freelist_4/4_valid ", (freeRegCnt > (StdFreeListSize.U*3.U/4.U)) ), ("std_freelist_4_4_valid", (freeRegCnt > (size * 3 / 4).U) )
) )
for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) { for (((perf_out, (perf_name, perf)), i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
perf_out.incr_step := RegNext(perf) perf_out.incr_step := RegNext(perf)
} }
} }
...@@ -33,19 +33,11 @@ class FetchRequestBundle(implicit p: Parameters) extends XSBundle { ...@@ -33,19 +33,11 @@ class FetchRequestBundle(implicit p: Parameters) extends XSBundle {
val target = UInt(VAddrBits.W) val target = UInt(VAddrBits.W)
val oversize = Bool() val oversize = Bool()
def fallThroughError() = {
def carryPos = instOffsetBits+log2Ceil(PredictWidth)+1
def getLower(pc: UInt) = pc(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits)
val carry = (startAddr(carryPos) =/= fallThruAddr(carryPos)).asUInt
val startLower = Cat(0.U(1.W), getLower(startAddr))
val endLowerwithCarry = Cat(carry, getLower(fallThruAddr))
require(startLower.getWidth == log2Ceil(PredictWidth)+2)
require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
}
def fromFtqPcBundle(b: Ftq_RF_Components) = { def fromFtqPcBundle(b: Ftq_RF_Components) = {
val ftError = b.fallThroughError()
this.startAddr := b.startAddr this.startAddr := b.startAddr
this.fallThruAddr := b.getFallThrough() this.fallThruError := ftError
this.fallThruAddr := Mux(ftError, b.nextRangeAddr, b.getFallThrough())
this.oversize := b.oversize this.oversize := b.oversize
this this
} }
......
...@@ -98,11 +98,12 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH ...@@ -98,11 +98,12 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH
inWire.ipf := io.in.bits.ipf(i) inWire.ipf := io.in.bits.ipf(i)
inWire.acf := io.in.bits.acf(i) inWire.acf := io.in.bits.acf(i)
inWire.crossPageIPFFix := io.in.bits.crossPageIPFFix(i) inWire.crossPageIPFFix := io.in.bits.crossPageIPFFix(i)
inWire.triggered := io.in.bits.triggered(i)
for(k<-0 until 10){ for(k<-0 until 10){
inWire.triggered.triggerHitVec(k) := false.B inWire.triggered.triggerHitVec(k) := false.B
} }
inWire.triggered := io.in.bits.triggered(i)
dontTouch(inWire.triggered.triggerHitVec)
ibuf.io.waddr(i) := tail_vec(offset(i)).value ibuf.io.waddr(i) := tail_vec(offset(i)).value
ibuf.io.wdata(i) := inWire ibuf.io.wdata(i) := inWire
......
...@@ -76,20 +76,28 @@ class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils { ...@@ -76,20 +76,28 @@ class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
val oversize = Bool() val oversize = Bool()
val carry = Bool() val carry = Bool()
def getPc(offset: UInt) = { def getPc(offset: UInt) = {
def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits) def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits) def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)), Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)),
getOffset(startAddr)+offset, 0.U(instOffsetBits.W)) getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
} }
def getFallThrough() = { def getFallThrough() = {
getFallThroughAddr(this.startAddr, this.carry, this.pftAddr) def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1)
val startHigher = getHigher(startAddr)
val nextHigher = getHigher(nextRangeAddr)
val higher = Mux(carry, nextHigher, startHigher)
Cat(higher, pftAddr, 0.U(instOffsetBits.W))
} }
def fallThroughError() = { def fallThroughError() = {
!carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr val startLower = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits))
val endLowerwithCarry = Cat(carry, pftAddr)
require(startLower.getWidth == log2Ceil(PredictWidth)+2)
require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
} }
def fromBranchPrediction(resp: BranchPredictionBundle) = { def fromBranchPrediction(resp: BranchPredictionBundle) = {
this.startAddr := resp.pc this.startAddr := resp.pc
this.nextRangeAddr := resp.pc + (FetchWidth * 4).U this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U
this.pftAddr := this.pftAddr :=
Mux(resp.preds.hit, resp.ftb_entry.pftAddr, Mux(resp.preds.hit, resp.ftb_entry.pftAddr,
resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U) resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U)
...@@ -553,7 +561,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -553,7 +561,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
} }
io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) { when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
bpuPtr := bpu_s2_resp.ftq_idx + 1.U bpuPtr := bpu_s2_resp.ftq_idx + 1.U
...@@ -563,7 +571,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -563,7 +571,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
} }
} }
io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) { when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
bpuPtr := bpu_s3_resp.ftq_idx + 1.U bpuPtr := bpu_s3_resp.ftq_idx + 1.U
...@@ -588,41 +596,34 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -588,41 +596,34 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
val toIfuReq = Wire(chiselTypeOf(io.toIfu.req)) io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
io.toIfu.req.bits.ftqIdx := ifuPtr
toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr io.toIfu.req.bits.target := update_target(ifuPtr.value)
toIfuReq.bits.ftqIdx := ifuPtr io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
toIfuReq.bits.target := update_target(ifuPtr.value)
toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
toIfuReq.bits.fallThruError := false.B
when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) { when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf) io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf)
}.elsewhen (last_cycle_to_ifu_fire) { }.elsewhen (last_cycle_to_ifu_fire) {
toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last) io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
}.otherwise { }.otherwise {
toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last) io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
} }
io.toIfu.req <> toIfuReq
// when fall through is smaller in value than start address, there must be a false hit // when fall through is smaller in value than start address, there must be a false hit
when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) { when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
when (io.toIfu.req.fire && when (io.toIfu.req.fire &&
!(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
!(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr) !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
) { ) {
entry_hit_status(ifuPtr.value) := h_false_hit entry_hit_status(ifuPtr.value) := h_false_hit
XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr) XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
} }
io.toIfu.req.bits.fallThruAddr := toIfuReq.bits.startAddr + (FetchWidth*4).U XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
io.toIfu.req.bits.fallThruError := true.B
XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
} }
val ifu_req_should_be_flushed = val ifu_req_should_be_flushed =
io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) || io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx) io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
when (io.toIfu.req.fire && !ifu_req_should_be_flushed) { when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
entry_fetch_status(ifuPtr.value) := f_sent entry_fetch_status(ifuPtr.value) := f_sent
...@@ -815,31 +816,13 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe ...@@ -815,31 +816,13 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
// **************************** flush ptr and state queue **************************** // **************************** flush ptr and state queue ****************************
// *********************************************************************************** // ***********************************************************************************
class RedirectInfo extends Bundle { val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect)
val valid = Bool()
val ftqIdx = new FtqPtr
val ftqOffset = UInt(log2Ceil(PredictWidth).W)
val flushItSelf = Bool()
def apply(redirect: Valid[Redirect]) = {
this.valid := redirect.valid
this.ftqIdx := redirect.bits.ftqIdx
this.ftqOffset := redirect.bits.ftqOffset
this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level)
this
}
}
val redirectVec = Wire(Vec(3, new RedirectInfo))
val robRedirect = robFlush
redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map {
case (ve, r) => ve(r)
}
// when redirect, we should reset ptrs and status queues // when redirect, we should reset ptrs and status queues
when(redirectVec.map(r => r.valid).reduce(_||_)){ when(redirectVec.map(r => r.valid).reduce(_||_)){
val r = PriorityMux(redirectVec.map(r => (r.valid -> r))) val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_) val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf) val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
val next = idx + 1.U val next = idx + 1.U
bpuPtr := next bpuPtr := next
ifuPtr := next ifuPtr := next
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册