未验证 提交 d1fe0262 编写于 作者: W William Wang 提交者: GitHub

Add strict mode to reduce mdp mispredict (#1113)

* storeset: fix waitForSqIdx generate logic

Now right waitForSqIdx will be generated for earlier store in the same
dispatch bundle.

* mdp: add strict wait mode

When loadWaitStrict && loadWaitBit, load will wait in rs until all
older store addr calculation are finished.

* chore: add storeset_load_strict_wait counter
上级 ed26abcf
......@@ -110,7 +110,12 @@ class CtrlFlow(implicit p: Parameters) extends XSBundle {
val crossPageIPFFix = Bool()
val storeSetHit = Bool() // inst has been allocated an store set
val waitForSqIdx = new SqPtr // store set predicted previous store sqIdx
val loadWaitBit = Bool() // load inst should not be executed until all former store addr calcuated
// Load wait is needed
// load inst will not be executed until former store (predicted by mdp) addr calcuated
val loadWaitBit = Bool()
// If (loadWaitBit && loadWaitStrict), strict load wait is needed
// load inst will not be executed until ALL former store addr calcuated
val loadWaitStrict = Bool()
val ssid = UInt(SSIDWidth.W)
val ftqPtr = new FtqPtr
val ftqOffset = UInt(log2Up(PredictWidth).W)
......
......@@ -51,6 +51,7 @@ class DecodeStage(implicit p: Parameters) extends XSModule {
// read SSIT, get SSID
ssit.io.raddr(i) := io.in(i).bits.foldpc
decoders(i).io.enq.ctrl_flow.storeSetHit := ssit.io.rdata(i).valid
decoders(i).io.enq.ctrl_flow.loadWaitStrict := ssit.io.rdata(i).strict
decoders(i).io.enq.ctrl_flow.ssid := ssit.io.rdata(i).ssid
io.out(i).valid := io.in(i).valid
......
......@@ -32,6 +32,7 @@ class SSITEntry(implicit p: Parameters) extends XSBundle {
val valid = Bool()
val isload = Bool()
val ssid = UInt(SSIDWidth.W) // store set identifier
val strict = Bool() // strict load wait is needed
}
// Store Set Identifier Table
......@@ -47,6 +48,7 @@ class SSIT(implicit p: Parameters) extends XSModule {
val valid = RegInit(VecInit(Seq.fill(SSITSize)(false.B)))
val isload = Reg(Vec(SSITSize, Bool()))
val ssid = Reg(Vec(SSITSize, UInt(SSIDWidth.W)))
val strict = Reg(Vec(SSITSize, Bool()))
val resetCounter = RegInit(0.U(ResetTimeMax2Pow.W))
resetCounter := resetCounter + 1.U
......@@ -57,6 +59,7 @@ class SSIT(implicit p: Parameters) extends XSModule {
io.rdata(i).valid := valid(io.raddr(i))
io.rdata(i).isload := isload(io.raddr(i))
io.rdata(i).ssid := ssid(io.raddr(i))
io.rdata(i).strict := strict(io.raddr(i)) && valid(io.raddr(i))
}
// update SSIT if load violation redirect is detected
......@@ -78,6 +81,7 @@ class SSIT(implicit p: Parameters) extends XSModule {
// both the load and the store have already been assigned store sets
// but load's store set ID is smaller
val winnerSSID = Mux(loadOldSSID < storeOldSSID, loadOldSSID, storeOldSSID)
val ssidIsSame = loadOldSSID === storeOldSSID
// for now we just use lowest bits of ldpc as store set id
val ssidAllocate = memPredUpdateReqReg.ldpc(SSIDWidth-1, 0)
......@@ -91,9 +95,11 @@ class SSIT(implicit p: Parameters) extends XSModule {
valid(memPredUpdateReqReg.ldpc) := true.B
isload(memPredUpdateReqReg.ldpc) := true.B
ssid(memPredUpdateReqReg.ldpc) := ssidAllocate
strict(memPredUpdateReqReg.ldpc) := false.B
valid(memPredUpdateReqReg.stpc) := true.B
isload(memPredUpdateReqReg.stpc) := false.B
ssid(memPredUpdateReqReg.stpc) := ssidAllocate
strict(memPredUpdateReqReg.stpc) := false.B
}
// 2. "If the load has been assigned a store set, but the store has not,
// the store is assigned the load’s store set."
......@@ -101,6 +107,7 @@ class SSIT(implicit p: Parameters) extends XSModule {
valid(memPredUpdateReqReg.stpc) := true.B
isload(memPredUpdateReqReg.stpc) := false.B
ssid(memPredUpdateReqReg.stpc) := loadOldSSID
strict(memPredUpdateReqReg.stpc) := false.B
}
// 3. "If the store has been assigned a store set, but the load has not,
// the load is assigned the store’s store set."
......@@ -108,6 +115,7 @@ class SSIT(implicit p: Parameters) extends XSModule {
valid(memPredUpdateReqReg.ldpc) := true.B
isload(memPredUpdateReqReg.ldpc) := true.B
ssid(memPredUpdateReqReg.ldpc) := storeOldSSID
strict(memPredUpdateReqReg.ldpc) := false.B
}
// 4. "If both the load and the store have already been assigned store sets,
// one of the two store sets is declared the "winner".
......@@ -119,6 +127,9 @@ class SSIT(implicit p: Parameters) extends XSModule {
valid(memPredUpdateReqReg.stpc) := true.B
isload(memPredUpdateReqReg.stpc) := false.B
ssid(memPredUpdateReqReg.stpc) := winnerSSID
when(ssidIsSame){
strict(memPredUpdateReqReg.ldpc) := true.B
}
}
}
}
......@@ -127,6 +138,10 @@ class SSIT(implicit p: Parameters) extends XSModule {
XSPerfAccumulate("ssit_update_lysx", memPredUpdateReqValid && loadAssigned && !storeAssigned)
XSPerfAccumulate("ssit_update_lxsy", memPredUpdateReqValid && !loadAssigned && storeAssigned)
XSPerfAccumulate("ssit_update_lysy", memPredUpdateReqValid && loadAssigned && storeAssigned)
XSPerfAccumulate("ssit_update_should_strict", memPredUpdateReqValid && ssidIsSame && loadAssigned && storeAssigned)
XSPerfAccumulate("ssit_update_strict_failed",
memPredUpdateReqValid && ssidIsSame && strict(memPredUpdateReqReg.ldpc) && loadAssigned && storeAssigned
) // should be zero
// reset period: ResetTimeMax2Pow
when(resetCounter(ResetTimeMax2Pow-1, ResetTimeMin2Pow)(RegNext(io.csrCtrl.lvpred_timeout))) {
......
......@@ -260,25 +260,28 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasExceptionNO {
XSPerfAccumulate("storeset_load_wait", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && !isStore(i) && isLs(i)
)))
XSPerfAccumulate("storeset_load_strict_wait", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && updatedUop(i).cf.loadWaitStrict && !isStore(i) && isLs(i)
)))
XSPerfAccumulate("storeset_store_wait", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && isStore(i)
)))
XSPerfAccumulate("loadwait_diffmat_sywy", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && io.fromRename(i).bits.cf.loadWaitBit &&
!isStore(i) && isLs(i)
)))
XSPerfAccumulate("loadwait_diffmat_sywx", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && !io.fromRename(i).bits.cf.loadWaitBit &&
!isStore(i) && isLs(i)
)))
XSPerfAccumulate("loadwait_diffmat_sxwy", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && !updatedUop(i).cf.loadWaitBit && io.fromRename(i).bits.cf.loadWaitBit &&
!isStore(i) && isLs(i)
)))
XSPerfAccumulate("loadwait_diffmat_sxwx", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && !updatedUop(i).cf.loadWaitBit && !io.fromRename(i).bits.cf.loadWaitBit &&
!isStore(i) && isLs(i)
)))
// XSPerfAccumulate("loadwait_diffmat_sywy", PopCount((0 until RenameWidth).map(i =>
// io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && io.fromRename(i).bits.cf.loadWaitBit &&
// !isStore(i) && isLs(i)
// )))
// XSPerfAccumulate("loadwait_diffmat_sywx", PopCount((0 until RenameWidth).map(i =>
// io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && !io.fromRename(i).bits.cf.loadWaitBit &&
// !isStore(i) && isLs(i)
// )))
// XSPerfAccumulate("loadwait_diffmat_sxwy", PopCount((0 until RenameWidth).map(i =>
// io.fromRename(i).fire() && !updatedUop(i).cf.loadWaitBit && io.fromRename(i).bits.cf.loadWaitBit &&
// !isStore(i) && isLs(i)
// )))
// XSPerfAccumulate("loadwait_diffmat_sxwx", PopCount((0 until RenameWidth).map(i =>
// io.fromRename(i).fire() && !updatedUop(i).cf.loadWaitBit && !io.fromRename(i).bits.cf.loadWaitBit &&
// !isStore(i) && isLs(i)
// )))
/**
* Part 3:
......
......@@ -291,6 +291,7 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
statusArray.io.update(i).data.sqIdx := io.fromDispatch(i).bits.sqIdx
statusArray.io.update(i).data.waitForSqIdx := io.fromDispatch(i).bits.cf.waitForSqIdx
statusArray.io.update(i).data.waitForStoreData := false.B
statusArray.io.update(i).data.strictWait := io.fromDispatch(i).bits.cf.loadWaitStrict
statusArray.io.update(i).data.isFirstIssue := true.B
// for better power, we don't write payload array when there's a redirect
payloadArray.io.write(i).enable := doEnqueue(i)
......
......@@ -52,6 +52,7 @@ class StatusEntry(params: RSParams)(implicit p: Parameters) extends XSBundle {
val robIdx = new RobPtr
val waitForSqIdx = new SqPtr // generated by store set
val waitForStoreData = Bool()
val strictWait = Bool()
val sqIdx = new SqPtr
// misc
val isFirstIssue = Bool()
......@@ -199,7 +200,7 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule
val storeAddrWaitforIsIssuing = VecInit((0 until StorePipelineWidth).map(i => {
io.memWaitUpdateReq.staIssue(i).valid &&
io.memWaitUpdateReq.staIssue(i).bits.uop.sqIdx.value === statusNext.waitForSqIdx.value
})).asUInt.orR && !statusNext.waitForStoreData // is waiting for stroe addr ready
})).asUInt.orR && !statusNext.waitForStoreData && !statusNext.strictWait // is waiting for store addr ready
val storeDataWaitforIsIssuing = VecInit((0 until StorePipelineWidth).map(i => {
io.memWaitUpdateReq.stdIssue(i).valid &&
io.memWaitUpdateReq.stdIssue(i).bits.uop.sqIdx.value === statusNext.waitForSqIdx.value
......@@ -208,6 +209,11 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule
!storeAddrWaitforIsIssuing &&
!storeDataWaitforIsIssuing &&
blockNotReleased
when(updateValid(i)) {
statusNext.strictWait := updateVal(i).strictWait
statusNext.waitForStoreData := updateVal(i).waitForStoreData
assert(updateVal(i).waitForStoreData === false.B)
}
when (deqNotGranted && deqRespType === RSFeedbackType.dataInvalid) {
statusNext.blocked := true.B
statusNext.waitForSqIdx := deqRespDataInvalidSqIdx
......
......@@ -128,6 +128,7 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH
io.out(i).bits.loadWaitBit := DontCare
io.out(i).bits.waitForSqIdx := DontCare
io.out(i).bits.storeSetHit := DontCare
io.out(i).bits.loadWaitStrict := DontCare
io.out(i).bits.ssid := DontCare
io.out(i).bits.replayInst := false.B
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册