提交 b3d33343 编写于 作者: W William Wang

LoadUnit: add naive load after load check

上级 0ca353c7
......@@ -2,6 +2,7 @@ package xiangshan.backend
import chisel3._
import chisel3.util._
import utils._
import chipsalliance.rocketchip.config.Parameters
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.tile.HasFPUParameters
......@@ -246,6 +247,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
lsq.io.needReplayFromRS(i) <> loadUnits(i).io.lsq.needReplayFromRS
}
XSPerf("loadWbCnt", PopCount(loadUnits.map(i => i.io.ldout.fire())), acc = true)
// StoreUnit
for (i <- 0 until exuParameters.StuCnt) {
val stu = storeUnits(i)
......
......@@ -74,4 +74,6 @@ class MaskedLoadForwardQueryIO extends XSBundle {
val sqIdx = Output(new SqPtr) // for debug
// sqIdxMask is calcuated in earlier stage for better timing
val sqIdxMask = Output(UInt(StoreQueueSize.W))
val lqIdx = Output(new LqPtr) // for lal violation check
}
......@@ -11,7 +11,7 @@ import xiangshan.backend.LSUOpType
import xiangshan.mem._
import xiangshan.backend.roq.RoqLsqIO
import xiangshan.backend.fu.HasExceptionNO
import chisel3.experimental.chiselName
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
......@@ -56,6 +56,7 @@ class LqEnqIO extends XSBundle {
}
// Load Queue
@chiselName
class LoadQueue extends XSModule
with HasDCacheParameters
with HasCircularQueuePtrHelper
......@@ -77,6 +78,7 @@ class LoadQueue extends XSModule
val dcache = Flipped(ValidIO(new Refill))
val uncache = new DCacheWordIO
val exceptionAddr = new ExceptionAddrIO
val lalViolation = Vec(LoadPipelineWidth, Output(Bool()))
})
val uop = Reg(Vec(LoadQueueSize, new MicroOp))
......@@ -356,18 +358,18 @@ class LoadQueue extends XSModule
}
/**
* Memory violation detection
* Memory violation detection: store
*
* When store writes back, it searches LoadQueue for younger load instructions
* with the same load physical address. They loaded wrong data and need re-execution.
*
* Cycle 0: Store Writeback
* Generate match vector for store address with rangeMask(stPtr, enqPtr).
* Cycle 0: Store Writeback (Store Pipeline store_s1)
* Generate match vector for store address with rangeMask(lqIdx, enqPtr).
* Besides, load instructions in LoadUnit_S1 and S2 are also checked.
* Cycle 1: Redirect Generation
* Cycle 1: Redirect Generation (Store Pipeline store_s2)
* There're three possible types of violations, up to 6 possible redirect requests.
* Choose the oldest load (part 1). (4 + 2) -> (1 + 2)
* Cycle 2: Redirect Fire
* Cycle 2: Redirect Fire (Store Pipeline store_s3)
* Choose the oldest load (part 2). (3 -> 1)
* Prepare redirect request according to the detected violation.
* Fire redirect request (if valid)
......@@ -532,6 +534,53 @@ class LoadQueue extends XSModule
// XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.cfi, io.rollback.bits.roqIdx.asUInt)
}
/**
* Memory violation detection: load
*
* When load paddr is ready, it searches LoadQueue for younger load instructions
* with the same load physical address. They may loaded wrong data and need re-execution.
*
* Cycle 0: Addr Match (Load Pipeline load_s1)
* Generate match vector for load address with rangeMask(lqIdx, enqPtr).
* Cycle 1: Redirect Generation (Load Pipeline load_s2)
* Generate violation check result based on match result
*
* Violation exception will be add to load inst, redirect will be actually triggered
* when it reaches the end of roq
*/
for (i <- 0 until LoadPipelineWidth) {
// Memory violation detection: load starts in load_s1, returen result in load_s2
// If violation exists, the processor should rerun from this load
val startIndex = io.load_s1(i).lqIdx.value
val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
val xorMask = lqIdxMask ^ enqMask
val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
// check if load already in lq needs to be rolledback
dataModule.io.violation(StorePipelineWidth + i).paddr := io.load_s1(i).paddr
dataModule.io.violation(StorePipelineWidth + i).mask := io.load_s1(i).mask
val addrMaskMatch = RegNext(dataModule.io.violation(StorePipelineWidth + i).violationMask)
val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => {
allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j))
})))
val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
addrMaskMatch(j) && entryNeedCheck(j)
}))
val lqViolation = lqViolationVec.asUInt().orR() && io.load_s1(i).valid
val debug_lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
// check if rollback is needed for load in l2
val l2Violation = io.loadIn(i).valid && RegNext(io.loadIn(i).valid) &&
isAfter(io.loadIn(i).bits.uop.roqIdx, RegNext(io.loadIn(i).bits.uop.roqIdx)) && // load in s1 is younger than load in s2
io.loadIn(i).bits.paddr(PAddrBits - 1, 3) === RegNext(io.loadIn(i).bits.paddr(PAddrBits - 1, 3)) && // Paddr Match
(io.loadIn(i).bits.mask & RegNext(io.loadIn(i).bits.mask)).orR // Mask overlap
io.lalViolation(i) := l2Violation || lqViolation
XSPerf("lalViolation", PopCount(io.lalViolation), acc = true)
}
/**
* Memory mapped IO / other uncached operations
*
......
......@@ -27,8 +27,8 @@ class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModu
val wen = Input(Vec(numWrite, Bool()))
val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
val wdata = Input(Vec(numWrite, UInt((PAddrBits).W)))
val violationMdata = Input(Vec(2, UInt((PAddrBits).W)))
val violationMmask = Output(Vec(2, Vec(numEntries, Bool())))
val violationMdata = Input(Vec(StorePipelineWidth + LoadPipelineWidth, UInt((PAddrBits).W)))
val violationMmask = Output(Vec(StorePipelineWidth + LoadPipelineWidth, Vec(numEntries, Bool())))
val refillMdata = Input(UInt((PAddrBits).W))
val refillMmask = Output(Vec(numEntries, Bool()))
})
......@@ -48,7 +48,7 @@ class LQPaddrModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModu
}
// content addressed match
for (i <- 0 until 2) {
for (i <- 0 until StorePipelineWidth + LoadPipelineWidth) {
for (j <- 0 until numEntries) {
io.violationMmask(i)(j) := io.violationMdata(i)(PAddrBits-1, 3) === data(j)(PAddrBits-1, 3)
}
......@@ -73,8 +73,8 @@ class MaskModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule
val wen = Input(Vec(numWrite, Bool()))
val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
val wdata = Input(Vec(numWrite, UInt(8.W)))
val violationMdata = Input(Vec(2, UInt((PAddrBits).W)))
val violationMmask = Output(Vec(2, Vec(numEntries, Bool())))
val violationMdata = Input(Vec(StorePipelineWidth + LoadPipelineWidth, UInt((PAddrBits).W)))
val violationMmask = Output(Vec(StorePipelineWidth + LoadPipelineWidth, Vec(numEntries, Bool())))
})
val data = Reg(Vec(numEntries, UInt(8.W)))
......@@ -92,7 +92,7 @@ class MaskModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule
}
// content addressed match
for (i <- 0 until 2) {
for (i <- 0 until StorePipelineWidth + LoadPipelineWidth) {
for (j <- 0 until numEntries) {
io.violationMmask(i)(j) := (io.violationMdata(i) & data(j)).orR
}
......@@ -260,7 +260,7 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
val refillMask = Input(Vec(size, Bool()))
val matchMask = Output(Vec(size, Bool()))
}
val violation = Vec(StorePipelineWidth, new Bundle() {
val violation = Vec(StorePipelineWidth + LoadPipelineWidth, new Bundle() {
val paddr = Input(UInt(PAddrBits.W))
val mask = Input(UInt(8.W))
val violationMask = Output(Vec(size, Bool()))
......@@ -355,7 +355,7 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
coredataModule.io.wdata(wbNumWrite) := io.uncache.wdata
// mem access violation check, gen violationMask
(0 until StorePipelineWidth).map(i => {
(0 until StorePipelineWidth + LoadPipelineWidth).map(i => {
paddrModule.io.violationMdata(i) := io.violation(i).paddr
maskModule.io.violationMdata(i) := io.violation(i).mask
io.violation(i).violationMask := (paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt).asBools
......
......@@ -121,6 +121,7 @@ class LoadUnit_S1 extends XSModule {
io.lsq.paddr := s1_paddr
io.lsq.uop := s1_uop
io.lsq.sqIdx := s1_uop.sqIdx
io.lsq.lqIdx := s1_uop.lqIdx
io.lsq.sqIdxMask := DontCare // will be overwritten by sqIdxMask pre-generated in s0
io.lsq.mask := s1_mask
io.lsq.pc := s1_uop.cf.pc // FIXME: remove it
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册