未验证 提交 c7160cd3 编写于 作者: W William Wang 提交者: GitHub

mem: update block load logic (#1035)

* mem: update block load logic

Now load will be selected as soon as the store it depends on is ready,
which is predicted by Store Sets

* mem: opt block load logic

Load blocked by std invalid will wait for that std to issue
Load blocked by load violation wait for that sta to issue

* csr: add 2 extra storeset config bits

Following bits were added to slvpredctl:
- storeset_wait_store
- storeset_no_fast_wakeup

* storeset: fix waitForSqIdx generate logic

Now right waitForSqIdx will be generated for earlier store in the same
dispatch bundle
上级 a8a94ddc
......@@ -109,6 +109,7 @@ class CtrlFlow(implicit p: Parameters) extends XSBundle {
val pred_taken = Bool()
val crossPageIPFFix = Bool()
val storeSetHit = Bool() // inst has been allocated an store set
val waitForSqIdx = new SqPtr // store set predicted previous store sqIdx
val loadWaitBit = Bool() // load inst should not be executed until all former store addr calcuated
val ssid = UInt(SSIDWidth.W)
val ftqPtr = new FtqPtr
......@@ -347,6 +348,7 @@ class RSFeedback(implicit p: Parameters) extends XSBundle {
val hit = Bool()
val flushState = Bool()
val sourceType = RSFeedbackType()
val dataInvalidSqIdx = new SqPtr
}
class MemRSFeedbackIO(implicit p: Parameters) extends XSBundle {
......@@ -421,7 +423,9 @@ class CustomCSRCtrlIO(implicit p: Parameters) extends XSBundle {
// Load violation predictor
val lvpred_disable = Output(Bool())
val no_spec_load = Output(Bool())
val waittable_timeout = Output(UInt(5.W))
val storeset_wait_store = Output(Bool())
val storeset_no_fast_wakeup = Output(Bool())
val lvpred_timeout = Output(UInt(5.W))
// Branch predictor
val bp_ctrl = Output(new BPUCtrl)
// Memory Block
......
......@@ -254,6 +254,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
memBlock.io.issue.map(_.bits.uop.clearExceptions())
exuBlocks(0).io.scheExtra.loadFastMatch.get <> memBlock.io.loadFastMatch
val stdIssue = exuBlocks(0).io.issue.get.takeRight(exuParameters.StuCnt)
exuBlocks.map(_.io).foreach { exu =>
exu.redirect <> ctrlBlock.io.redirect
exu.flush <> ctrlBlock.io.flush
......@@ -265,6 +266,14 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
exu.scheExtra.stIssuePtr <> memBlock.io.stIssuePtr
exu.scheExtra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat
exu.scheExtra.debug_int_rat <> ctrlBlock.io.debug_int_rat
exu.scheExtra.memWaitUpdateReq.staIssue.zip(memBlock.io.stIn).foreach{case (sink, src) => {
sink.bits := src.bits
sink.valid := src.valid && !csrioIn.customCtrl.storeset_no_fast_wakeup
}}
exu.scheExtra.memWaitUpdateReq.stdIssue.zip(stdIssue).foreach{case (sink, src) => {
sink.valid := src.valid
sink.bits := src.bits
}}
}
XSPerfHistogram("fastIn_count", PopCount(allFastUop1.map(_.valid)), true.B, 0, allFastUop1.length, 1)
XSPerfHistogram("wakeup_count", PopCount(rfWriteback.map(_.valid)), true.B, 0, rfWriteback.length, 1)
......
......@@ -251,7 +251,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
stdExeUnits(i).io.redirect <> io.redirect
stdExeUnits(i).io.flush <> io.flush
stdExeUnits(i).io.fromInt <> io.issue(i + 4)
stdExeUnits(i).io.fromInt <> io.issue(i + exuParameters.LduCnt + exuParameters.StuCnt)
stdExeUnits(i).io.fromFp := DontCare
stdExeUnits(i).io.out := DontCare
......@@ -273,7 +273,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// Lsq to load unit's rs
lsq.io.storeDataIn(i) := stData(i)
// sync issue info to store set LFST
// 1. sync issue info to store set LFST
// 2. when store issue, broadcast issued sqPtr to wake up the following insts
io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
......
......@@ -29,7 +29,7 @@ import xiangshan.backend.fu.fpu.FMAMidResultIO
import xiangshan.backend.issue.{ReservationStation, ReservationStationWrapper}
import xiangshan.backend.regfile.{Regfile, RfReadPort, RfWritePort}
import xiangshan.backend.rename.{BusyTable, BusyTableReadIO}
import xiangshan.mem.{SqPtr, StoreDataBundle}
import xiangshan.mem.{SqPtr, StoreDataBundle, MemWaitUpdateReq}
import scala.collection.mutable.ArrayBuffer
......@@ -204,6 +204,8 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
val jumpPc = Input(UInt(VAddrBits.W))
val jalr_target = Input(UInt(VAddrBits.W))
val stIssuePtr = Input(new SqPtr())
// special ports for load / store rs
val memWaitUpdateReq = Flipped(new MemWaitUpdateReq)
// debug
val debug_int_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
val debug_fp_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
......@@ -227,7 +229,7 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
// wakeup-related ports
val writeback = Vec(intRfWritePorts + fpRfWritePorts, Flipped(ValidIO(new ExuOutput)))
val fastUopIn = Vec(intRfWritePorts + fpRfWritePorts, Flipped(ValidIO(new MicroOp)))
// feedback ports
// misc ports
val extra = new SchedulerExtraIO
val fmaMid = if (numFma > 0) Some(Vec(numFma, Flipped(new FMAMidResultIO))) else None
})
......@@ -348,6 +350,7 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
}
if (rs.io.checkwait.isDefined) {
rs.io.checkwait.get.stIssuePtr <> io.extra.stIssuePtr
rs.io.checkwait.get.memWaitUpdateReq <> io.extra.memWaitUpdateReq
}
if (rs.io.feedback.isDefined) {
val width = rs.io.feedback.get.length
......
......@@ -129,7 +129,7 @@ class SSIT(implicit p: Parameters) extends XSModule {
XSPerfAccumulate("ssit_update_lysy", memPredUpdateReqValid && loadAssigned && storeAssigned)
// reset period: ResetTimeMax2Pow
when(resetCounter(ResetTimeMax2Pow-1, ResetTimeMin2Pow)(RegNext(io.csrCtrl.waittable_timeout))) {
when(resetCounter(ResetTimeMax2Pow-1, ResetTimeMin2Pow)(RegNext(io.csrCtrl.lvpred_timeout))) {
for (j <- 0 until SSITSize) {
valid(j) := 0.U
}
......@@ -163,6 +163,7 @@ class LookupLFST(implicit p: Parameters) extends XSBundle {
val raddr = Vec(DecodeWidth, Input(UInt(SSIDWidth.W))) // use ssid to llokup LFST
val ren = Vec(DecodeWidth, Input(Bool())) // ren iff uop.cf.storeSetHit
val rdata = Vec(DecodeWidth, Output(Bool()))
val sqIdx = Vec(DecodeWidth, Output(new SqPtr))
}
// Last Fetched Store Table
......@@ -183,6 +184,7 @@ class LFST(implicit p: Parameters) extends XSModule {
// TODO: use MemTemplate
val validVec = RegInit(VecInit(Seq.fill(LFSTSize)(VecInit(Seq.fill(LFSTWidth)(false.B)))))
val sqIdxVec = Reg(Vec(LFSTSize, Vec(LFSTWidth, new SqPtr)))
val lastSqIdx = Reg(Vec(LFSTSize, new SqPtr))
val robIdxVec = Reg(Vec(LFSTSize, Vec(LFSTWidth, new RobPtr)))
val allocPtr = RegInit(VecInit(Seq.fill(LFSTSize)(0.U(log2Up(LFSTWidth).W))))
val valid = Wire(Vec(LFSTSize, Bool()))
......@@ -193,25 +195,34 @@ class LFST(implicit p: Parameters) extends XSModule {
// read LFST in rename stage
for (i <- 0 until DecodeWidth) {
// If store-load pair is in the same dispatch bundle, loadWaitBit should also be set for load
val hitInDispatchBundle = if(i > 0){
(0 until i).map(j =>
val hitInDispatchBundleVec = if(i > 0){
WireInit(VecInit((0 until i).map(j =>
io.dispatch(j).valid && io.dispatch(j).bits.ssid === io.lookup.raddr(i)
).reduce(_||_)
)))
} else {
false.B
WireInit(VecInit(Seq(false.B))) // DontCare
}
val hitInDispatchBundle = hitInDispatchBundleVec.asUInt.orR
// Check if store set is valid in LFST
io.lookup.rdata(i) := (
(valid(io.lookup.raddr(i)) || hitInDispatchBundle) && io.lookup.ren(i) ||
io.csrCtrl.no_spec_load // set loadWaitBit for all loads
) && !io.csrCtrl.lvpred_disable
io.lookup.sqIdx(i) := lastSqIdx(io.lookup.raddr(i))
if(i > 0){
(0 until i).map(j =>
when(hitInDispatchBundleVec(j)){
io.lookup.sqIdx(i) := io.dispatch(j).bits.sqIdx
}
)
}
}
// when store is issued, mark it as invalid
(0 until exuParameters.StuCnt).map(i => {
// TODO: opt timing
(0 until LFSTWidth).map(j => {
when(io.storeIssue(i).valid && io.storeIssue(i).bits.uop.sqIdx.asUInt === sqIdxVec(io.storeIssue(i).bits.uop.cf.ssid)(j).asUInt){
when(io.storeIssue(i).valid && io.storeIssue(i).bits.uop.sqIdx.value === sqIdxVec(io.storeIssue(i).bits.uop.cf.ssid)(j).value){
validVec(io.storeIssue(i).bits.uop.cf.ssid)(j) := false.B
}
})
......@@ -226,6 +237,7 @@ class LFST(implicit p: Parameters) extends XSModule {
validVec(waddr)(wptr) := true.B
sqIdxVec(waddr)(wptr) := io.dispatch(i).bits.sqIdx
robIdxVec(waddr)(wptr) := io.dispatch(i).bits.robIdx
lastSqIdx(waddr) := io.dispatch(i).bits.sqIdx
}
})
......
......@@ -49,7 +49,7 @@ class WaitTable(implicit p: Parameters) extends XSModule {
// reset period: ResetTimeMax2Pow
when(resetCounter(ResetTimeMax2Pow-1, ResetTimeMin2Pow)(RegNext(io.csrCtrl.waittable_timeout))) {
when(resetCounter(ResetTimeMax2Pow-1, ResetTimeMin2Pow)(RegNext(io.csrCtrl.lvpred_timeout))) {
for (j <- 0 until WaitTableSize) {
data(j) := 0.U
}
......
......@@ -184,15 +184,15 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasExceptionNO {
// override load delay ctrl signal with store set result
if(StoreSetEnable) {
// updatedUop(i).cf.loadWaitBit := lfst.io.lookup.rdata(i) // classic store set
updatedUop(i).cf.loadWaitBit := lfst.io.lookup.rdata(i) && !isStore(i) // store set lite
// updatedUop(i).cf.loadWaitBit := lfst.io.lookup.rdata(i) && io.fromRename(i).bits.cf.loadWaitBit && !isStore(i) // 2-bit store set
updatedUop(i).cf.loadWaitBit := lfst.io.lookup.rdata(i) &&
(!isStore(i) || io.csrCtrl.storeset_wait_store)
updatedUop(i).cf.waitForSqIdx := lfst.io.lookup.sqIdx(i)
} else {
updatedUop(i).cf.loadWaitBit := io.fromRename(i).bits.cf.loadWaitBit && !isStore(i) // wait table does not require store to be delayed
updatedUop(i).cf.waitForSqIdx := DontCare
}
// update store set LFST
io.lfst(i).valid := io.fromRename(i).valid && updatedUop(i).cf.storeSetHit && isStore(i)
io.lfst(i).valid := io.fromRename(i).fire() && updatedUop(i).cf.storeSetHit && isStore(i)
// or io.fromRename(i).ready && updatedUop(i).cf.storeSetHit && isStore(i), which is much slower
io.lfst(i).bits.robIdx := updatedUop(i).robIdx
io.lfst(i).bits.sqIdx := updatedUop(i).sqIdx
......
......@@ -470,7 +470,9 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP
val slvpredctl = RegInit(UInt(XLEN.W), "h70".U) // default reset period: 2^17
csrio.customCtrl.lvpred_disable := slvpredctl(0)
csrio.customCtrl.no_spec_load := slvpredctl(1)
csrio.customCtrl.waittable_timeout := slvpredctl(8, 4)
csrio.customCtrl.storeset_wait_store := slvpredctl(2)
csrio.customCtrl.storeset_no_fast_wakeup := slvpredctl(3)
csrio.customCtrl.lvpred_timeout := slvpredctl(8, 4)
// smblockctl: memory block configurations
// bits 0-3: store buffer flush threshold (default: 8 entries)
......
......@@ -24,8 +24,8 @@ import xiangshan._
import utils._
import xiangshan.backend.exu.ExuConfig
import xiangshan.backend.fu.FuConfig
import xiangshan.mem.{SqPtr, StoreDataBundle, MemWaitUpdateReq}
import xiangshan.backend.fu.fpu.{FMAMidResult, FMAMidResultIO}
import xiangshan.mem.{SqPtr, StoreDataBundle}
import scala.math.max
......@@ -227,6 +227,8 @@ class ReservationStationIO(params: RSParams)(implicit p: Parameters) extends XSB
)) else None
val checkwait = if (params.checkWaitBit) Some(new Bundle {
val stIssuePtr = Input(new SqPtr())
val stIssue = Flipped(Vec(exuParameters.StuCnt, ValidIO(new ExuInput)))
val memWaitUpdateReq = Flipped(new MemWaitUpdateReq)
}) else None
val store = if (params.isStore) Some(new Bundle {
val stData = Vec(params.numDeq, ValidIO(new StoreDataBundle))
......@@ -279,6 +281,8 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
statusArray.io.update(i).data.srcType := VecInit(io.fromDispatch(i).bits.ctrl.srcType.take(params.numSrc))
statusArray.io.update(i).data.robIdx := io.fromDispatch(i).bits.robIdx
statusArray.io.update(i).data.sqIdx := io.fromDispatch(i).bits.sqIdx
statusArray.io.update(i).data.waitForSqIdx := io.fromDispatch(i).bits.cf.waitForSqIdx
statusArray.io.update(i).data.waitForStoreData := false.B
statusArray.io.update(i).data.isFirstIssue := true.B
// for better power, we don't write payload array when there's a redirect
payloadArray.io.write(i).enable := doEnqueue(i)
......@@ -290,6 +294,7 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
// when config.checkWaitBit is set, we need to block issue until the corresponding store issues
if (params.checkWaitBit) {
statusArray.io.stIssuePtr := io.checkwait.get.stIssuePtr
statusArray.io.memWaitUpdateReq := io.checkwait.get.memWaitUpdateReq
}
// wakeup from other RS or function units
val wakeupValid = io.fastUopsIn.map(_.valid) ++ io.slowPorts.map(_.valid)
......@@ -363,11 +368,13 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
statusArray.io.deqResp(2*i).bits.rsMask := UIntToOH(io.feedback.get(i).feedbackSlow.bits.rsIdx)
statusArray.io.deqResp(2*i).bits.success := io.feedback.get(i).feedbackSlow.bits.hit
statusArray.io.deqResp(2*i).bits.resptype := io.feedback.get(i).feedbackSlow.bits.sourceType
statusArray.io.deqResp(2*i).bits.dataInvalidSqIdx := io.feedback.get(i).feedbackSlow.bits.dataInvalidSqIdx
// feedbackFast, for load pipeline only
statusArray.io.deqResp(2*i+1).valid := io.feedback.get(i).feedbackFast.valid
statusArray.io.deqResp(2*i+1).bits.rsMask := UIntToOH(io.feedback.get(i).feedbackFast.bits.rsIdx)
statusArray.io.deqResp(2*i+1).bits.success := io.feedback.get(i).feedbackFast.bits.hit
statusArray.io.deqResp(2*i+1).bits.resptype := io.feedback.get(i).feedbackFast.bits.sourceType
statusArray.io.deqResp(2*i+1).bits.dataInvalidSqIdx := DontCare
} else {
// For FMAs that can be scheduled multiple times, only when
// all source operands are ready we dequeue the instruction.
......@@ -375,6 +382,7 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
statusArray.io.deqResp(i).bits.rsMask := issueVec(i).bits
statusArray.io.deqResp(i).bits.success := s2_deq(i).ready
statusArray.io.deqResp(i).bits.resptype := DontCare
statusArray.io.deqResp(i).bits.dataInvalidSqIdx := DontCare
}
if (io.fastWakeup.isDefined) {
......
......@@ -22,7 +22,7 @@ import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.rob.RobPtr
import xiangshan.mem.SqPtr
import xiangshan.mem.{SqPtr, MemWaitUpdateReq}
class StatusArrayUpdateIO(params: RSParams)(implicit p: Parameters) extends Bundle {
val enable = Input(Bool())
......@@ -50,6 +50,8 @@ class StatusEntry(params: RSParams)(implicit p: Parameters) extends XSBundle {
val psrc = Vec(params.numSrc, UInt(params.dataIdBits.W))
val srcType = Vec(params.numSrc, SrcType())
val robIdx = new RobPtr
val waitForSqIdx = new SqPtr // generated by store set
val waitForStoreData = Bool()
val sqIdx = new SqPtr
// misc
val isFirstIssue = Bool()
......@@ -97,8 +99,10 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule
val rsMask = UInt(params.numEntries.W)
val success = Bool()
val resptype = RSFeedbackType() // update credit if needs replay
val dataInvalidSqIdx = new SqPtr
})))
val stIssuePtr = if (params.checkWaitBit) Input(new SqPtr()) else null
val memWaitUpdateReq = if (params.checkWaitBit) Flipped(new MemWaitUpdateReq) else null
})
val statusArray = Reg(Vec(params.numEntries, new StatusEntry(params)))
......@@ -138,7 +142,7 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule
(stateMatch, dataMatch)
}
def deqRespSel(i: Int) : (Bool, Bool, UInt) = {
def deqRespSel(i: Int) : (Bool, Bool, UInt, SqPtr) = {
val mask = VecInit(io.deqResp.map(resp => resp.valid && resp.bits.rsMask(i)))
XSError(PopCount(mask) > 1.U, p"feedbackVec ${Binary(mask.asUInt)} should be one-hot\n")
val deqValid = mask.asUInt.orR
......@@ -148,7 +152,8 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule
}
val successVec = io.deqResp.map(_.bits.success)
val respTypeVec = io.deqResp.map(_.bits.resptype)
(mask.asUInt.orR, Mux1H(mask, successVec), Mux1H(mask, respTypeVec))
val dataInvalidSqIdxVec = io.deqResp.map(_.bits.dataInvalidSqIdx)
(mask.asUInt.orR, Mux1H(mask, successVec), Mux1H(mask, respTypeVec), Mux1H(mask, dataInvalidSqIdxVec))
}
def enqUpdate(i: Int): (Bool, StatusEntry) = {
......@@ -167,7 +172,7 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule
// valid: when the entry holds a valid instruction, mark it true.
// Set when (1) not (flushed or deq); AND (2) update.
val isFlushed = status.valid && status.robIdx.needFlush(io.redirect, io.flush)
val (deqRespValid, deqRespSucc, deqRespType) = deqResp(i)
val (deqRespValid, deqRespSucc, deqRespType, deqRespDataInvalidSqIdx) = deqResp(i)
flushedVec(i) := isFlushed || (deqRespValid && deqRespSucc)
val realUpdateValid = updateValid(i) && !io.redirect.valid && !io.flush
statusNext.valid := !flushedVec(i) && (realUpdateValid || status.valid)
......@@ -190,10 +195,23 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule
// blocked: indicate whether the entry is blocked for issue until certain conditions meet.
statusNext.blocked := false.B
if (params.checkWaitBit) {
val blockReleased = isAfter(statusNext.sqIdx, io.stIssuePtr)
statusNext.blocked := Mux(updateValid(i), updateVal(i).blocked, status.blocked) && blockReleased
val blockNotReleased = isAfter(statusNext.sqIdx, io.stIssuePtr)
val storeAddrWaitforIsIssuing = VecInit((0 until StorePipelineWidth).map(i => {
io.memWaitUpdateReq.staIssue(i).valid &&
io.memWaitUpdateReq.staIssue(i).bits.uop.sqIdx.value === statusNext.waitForSqIdx.value
})).asUInt.orR && !statusNext.waitForStoreData // is waiting for stroe addr ready
val storeDataWaitforIsIssuing = VecInit((0 until StorePipelineWidth).map(i => {
io.memWaitUpdateReq.stdIssue(i).valid &&
io.memWaitUpdateReq.stdIssue(i).bits.uop.sqIdx.value === statusNext.waitForSqIdx.value
})).asUInt.orR && statusNext.waitForStoreData
statusNext.blocked := Mux(updateValid(i), updateVal(i).blocked, status.blocked) &&
!storeAddrWaitforIsIssuing &&
!storeDataWaitforIsIssuing &&
blockNotReleased
when (deqNotGranted && deqRespType === RSFeedbackType.dataInvalid) {
statusNext.blocked := true.B
statusNext.waitForSqIdx := deqRespDataInvalidSqIdx
statusNext.waitForStoreData := true.B
XSError(status.valid && !isAfter(status.sqIdx, RegNext(RegNext(io.stIssuePtr))),
"Previous store instructions are all issued. Should not trigger dataInvalid.\n")
}
......
......@@ -126,6 +126,7 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH
io.out(i).bits.crossPageIPFFix := outWire.crossPageIPFFix
io.out(i).bits.foldpc := outWire.foldpc
io.out(i).bits.loadWaitBit := DontCare
io.out(i).bits.waitForSqIdx := DontCare
io.out(i).bits.storeSetHit := DontCare
io.out(i).bits.ssid := DontCare
io.out(i).bits.replayInst := false.B
......
......@@ -117,4 +117,11 @@ class PipeLoadForwardQueryIO(implicit p: Parameters) extends LoadForwardQueryIO
// dataInvalid: addr match, but data is not valid for now
val dataInvalidFast = Input(Bool()) // resp to load_s1
// val dataInvalid = Input(Bool()) // resp to load_s2
val dataInvalidSqIdx = Input(UInt(log2Up(StoreQueueSize).W)) // resp to load_s2, sqIdx value
}
// // Bundle for load / store wait waking up
class MemWaitUpdateReq(implicit p: Parameters) extends XSBundle {
val staIssue = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
val stdIssue = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
}
\ No newline at end of file
......@@ -335,14 +335,16 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
io.forward(i).forwardData := dataModule.io.forwardData(i)
// If addr match, data not ready, mark it as dataInvalid
// load_s1: generate dataInvalid in load_s1 to set fastUop to
// load_s1: generate dataInvalid in load_s1 to set fastUop
io.forward(i).dataInvalidFast := (addrValidVec.asUInt & ~dataValidVec.asUInt & vaddrModule.io.forwardMmask(i).asUInt & needForward).orR
val dataInvalidSqIdxReg = RegNext(OHToUInt(addrValidVec.asUInt & ~dataValidVec.asUInt & vaddrModule.io.forwardMmask(i).asUInt & needForward))
// load_s2
io.forward(i).dataInvalid := RegNext(io.forward(i).dataInvalidFast)
// load_s2
// check if vaddr forward mismatched
io.forward(i).matchInvalid := vaddrMatchFailed
io.forward(i).dataInvalidSqIdx := dataInvalidSqIdxReg
}
/**
......
......@@ -108,6 +108,7 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstant
io.feedbackSlow.bits.rsIdx := RegEnable(io.rsIdx, io.in.valid)
io.feedbackSlow.bits.flushState := DontCare
io.feedbackSlow.bits.sourceType := DontCare
io.feedbackSlow.bits.dataInvalidSqIdx := DontCare
// tlb translation, manipulating signals && deal with exception
when (state === s_tlb) {
......
......@@ -218,6 +218,7 @@ class LoadUnit_S1(implicit p: Parameters) extends XSModule {
io.rsFeedback.bits.rsIdx := io.in.bits.rsIdx
io.rsFeedback.bits.flushState := io.in.bits.ptwBack
io.rsFeedback.bits.sourceType := RSFeedbackType.bankConflict
io.rsFeedback.bits.dataInvalidSqIdx := DontCare
io.out.valid := io.in.valid && !s1_bank_conflict // if bank conflict, load inst will be canceled immediately
io.out.bits.paddr := s1_paddr
......@@ -257,6 +258,7 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val pmpResp = Input(new PMPRespBundle())
val lsq = new LoadForwardQueryIO
val dataInvalidSqIdx = Input(UInt())
val sbuffer = new LoadForwardQueryIO
val dataForwarded = Output(Bool())
val needReplayFromRS = Output(Bool())
......@@ -383,6 +385,8 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
RSFeedbackType.mshrFull
)
)
io.rsFeedback.bits.dataInvalidSqIdx.value := io.dataInvalidSqIdx
io.rsFeedback.bits.dataInvalidSqIdx.flag := DontCare
// s2_cache_replay is quite slow to generate, send it separately to LQ
io.needReplayFromRS := s2_cache_replay && !fullForward
......@@ -471,6 +475,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
load_s2.io.sbuffer.matchInvalid <> io.sbuffer.matchInvalid
load_s2.io.dataForwarded <> io.lsq.loadDataForwarded
load_s2.io.fastpath <> io.fastpathOut
load_s2.io.dataInvalidSqIdx := io.lsq.forward.dataInvalidSqIdx // provide dataInvalidSqIdx to make wakeup faster
io.lsq.needReplayFromRS := load_s2.io.needReplayFromRS
// feedback tlb miss / dcache miss queue full
......
......@@ -118,6 +118,7 @@ class StoreUnit_S1(implicit p: Parameters) extends XSModule {
io.rsFeedback.bits.hit,
io.rsFeedback.bits.rsIdx
)
io.rsFeedback.bits.dataInvalidSqIdx := DontCare
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册