未验证 提交 de169c67 编写于 作者: W William Wang 提交者: GitHub

backend,mem: add Store Sets memory dependence predictor (#796)

* LoadQueue: send stFtqIdx via rollback request

* It will make it possible for setore set to update its SSIT

* StoreSet: setup store set update req

* StoreSet: add store set identifier table (SSIT)

* StoreSet: add last fetched store table (LFST)

* StoreSet: put SSIT into decode stage

* StoreSet: put LFST into dispatch1

* Future work: optimize timing

* RS: store rs now supports delayed issue

* StoreSet: add perf counter

* StoreSet: fix SSIT update logic

* StoreSet: delay LFST update input for 1 cycle

* StoreSet: fix LFST update logic

* StoreSet: fix LFST raddr width

* StoreSet: do not force store in ss issue in order

Classic store set requires store in the same store set issue in seq.
However, in current micro-architecture, such restrict will lead to
severe perf lost. We choose to disable it until we find another way
to fix it.

* StoreSet: support ooo store in the same store set

* StoreSet: fix store set merge logic

* StoreSet: check earlier store when read LFST

* If store-load pair is in the same dispatch bundle, loadWaitBit should
also be set for load

* StoreSet: increase default SSIT flush period

* StoreSet: fix LFST read logic

* Fix commit c0e541d1

* StoreSet: add StoreSetEnable parameter

* RSFeedback: add source type

* StoreQueue: split store addr and store data

* StoreQueue: update ls forward logic

* Now it supports splited addr and data

* Chore: force assign name for load/store unit

* RS: add rs'support for store a-d split

* StoreQueue: fix stlf logic

* StoreQueue: fix addr wb sq update logic

* AtomicsUnit: support splited a/d

* Parameters: disable store set by default

* WaitTable: wait table will not cause store delay

* WaitTable: recover default reset period to 2^17

* Fix dev-stad merge conflict

* StoreSet: enable storeset

* RS: disable store rs delay logic

CI perf shows that current delay logic will cause perf loss. Disable
unnecessary delay logic will help.

To be more specific, `io.readyVec` caused the problem. It will be
updated in future commits.

* RS: opt select logic with load delay (ldWait)

* StoreSet: disable 2-bit lwt
Co-authored-by: NZhangZifei <zhangzifei20z@ict.ac.cn>
上级 7ebd1b2c
......@@ -3,7 +3,7 @@ package xiangshan
import chisel3._
import chisel3.util._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.decode.{ImmUnion, WaitTableParameters, XDecode}
import xiangshan.backend.decode.{ImmUnion, XDecode}
import xiangshan.mem.{LqPtr, SqPtr}
import xiangshan.frontend.PreDecodeInfoForDebug
import xiangshan.frontend.PreDecodeInfo
......@@ -23,13 +23,13 @@ import chipsalliance.rocketchip.config.Parameters
import xiangshan.backend.ftq.FtqPtr
// Fetch FetchWidth x 32-bit insts from Icache
class FetchPacket(implicit p: Parameters) extends XSBundle with WaitTableParameters {
class FetchPacket(implicit p: Parameters) extends XSBundle {
val instrs = Vec(PredictWidth, UInt(32.W))
val mask = UInt(PredictWidth.W)
val pdmask = UInt(PredictWidth.W)
// val pc = UInt(VAddrBits.W)
val pc = Vec(PredictWidth, UInt(VAddrBits.W))
val foldpc = Vec(PredictWidth, UInt(WaitTableAddrWidth.W))
val foldpc = Vec(PredictWidth, UInt(MemPredPCWidth.W))
val pd = Vec(PredictWidth, new PreDecodeInfo)
val ipf = Bool()
val acf = Bool()
......@@ -179,16 +179,18 @@ class CfiUpdateInfo(implicit p: Parameters) extends XSBundle with HasBPUParamete
}
// Dequeue DecodeWidth insts from Ibuffer
class CtrlFlow(implicit p: Parameters) extends XSBundle with WaitTableParameters {
class CtrlFlow(implicit p: Parameters) extends XSBundle {
val instr = UInt(32.W)
val pc = UInt(VAddrBits.W)
val foldpc = UInt(WaitTableAddrWidth.W)
val foldpc = UInt(MemPredPCWidth.W)
val exceptionVec = ExceptionVec()
val intrVec = Vec(12, Bool())
val pd = new PreDecodeInfo
val pred_taken = Bool()
val crossPageIPFFix = Bool()
val storeSetHit = Bool() // inst has been allocated an store set
val loadWaitBit = Bool() // load inst should not be executed until all former store addr calcuated
val ssid = UInt(SSIDWidth.W)
val ftqPtr = new FtqPtr
val ftqOffset = UInt(log2Up(PredictWidth).W)
}
......@@ -325,6 +327,11 @@ class MicroOp(implicit p: Parameters) extends CfCtrl {
}
}
class MicroOpRbExt(implicit p: Parameters) extends XSBundle {
val uop = new MicroOp
val flag = UInt(1.W)
}
class Redirect(implicit p: Parameters) extends XSBundle {
val roqIdx = new RoqPtr
val ftqIdx = new FtqPtr
......@@ -333,6 +340,8 @@ class Redirect(implicit p: Parameters) extends XSBundle {
val interrupt = Bool()
val cfiUpdate = new CfiUpdateInfo
val stFtqIdx = new FtqPtr // for load violation predict
val stFtqOffset = UInt(log2Up(PredictWidth).W)
// def isUnconditional() = RedirectLevel.isUnconditional(level)
def flushItself() = RedirectLevel.flushItself(level)
......@@ -467,10 +476,18 @@ class SfenceBundle(implicit p: Parameters) extends XSBundle {
}
}
class WaitTableUpdateReq(implicit p: Parameters) extends XSBundle with WaitTableParameters {
// Bundle for load violation predictor updating
class MemPredUpdateReq(implicit p: Parameters) extends XSBundle {
val valid = Bool()
val waddr = UInt(WaitTableAddrWidth.W)
// wait table update
val waddr = UInt(MemPredPCWidth.W)
val wdata = Bool() // true.B by default
// store set update
// by default, ldpc/stpc should be xor folded
val ldpc = UInt(MemPredPCWidth.W)
val stpc = UInt(MemPredPCWidth.W)
}
class PerfInfoIO extends Bundle {
......
......@@ -285,7 +285,21 @@ trait HasXSParameter {
blockBytes = L2BlockSize,
nEntries = dcacheParameters.nMissEntries * 2 // TODO: this is too large
),
)
)
// load violation predict
val ResetTimeMax2Pow = 20 //1078576
val ResetTimeMin2Pow = 10 //1024
// wait table parameters
val WaitTableSize = 1024
val MemPredPCWidth = log2Up(WaitTableSize)
val LWTUse2BitCounter = true
// store set parameters
val SSITSize = WaitTableSize
val LFSTSize = 32
val SSIDWidth = log2Up(LFSTSize)
val LFSTWidth = 4
val StoreSetEnable = true // LWT will be disabled if SS is enabled
val loadExuConfigs = coreParams.loadExuConfigs
val storeExuConfigs = coreParams.storeExuConfigs
......
......@@ -5,7 +5,7 @@ import chisel3._
import chisel3.util._
import utils._
import xiangshan._
import xiangshan.backend.decode.{DecodeStage, ImmUnion, WaitTableParameters}
import xiangshan.backend.decode.{DecodeStage, ImmUnion}
import xiangshan.backend.rename.{BusyTable, Rename}
import xiangshan.backend.dispatch.Dispatch
import xiangshan.backend.exu._
......@@ -38,13 +38,13 @@ class CtrlToFpBlockIO(implicit p: Parameters) extends XSBundle {
class CtrlToLsBlockIO(implicit p: Parameters) extends XSBundle {
val enqIqCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
val enqLsq = Flipped(new LsqEnqIO)
val waitTableUpdate = Vec(StorePipelineWidth, Input(new WaitTableUpdateReq))
val memPredUpdate = Vec(StorePipelineWidth, Input(new MemPredUpdateReq))
val redirect = ValidIO(new Redirect)
val flush = Output(Bool())
}
class RedirectGenerator(implicit p: Parameters) extends XSModule
with HasCircularQueuePtrHelper with WaitTableParameters with HasFtqHelper {
with HasCircularQueuePtrHelper with HasFtqHelper {
val numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt
val io = IO(new Bundle() {
val exuMispredict = Vec(numRedirect, Flipped(ValidIO(new ExuOutput)))
......@@ -54,7 +54,8 @@ class RedirectGenerator(implicit p: Parameters) extends XSModule
val stage2FtqRead = new FtqRead
val stage2Redirect = ValidIO(new Redirect)
val stage3Redirect = ValidIO(new Redirect)
val waitTableUpdate = Output(new WaitTableUpdateReq)
val memPredUpdate = Output(new MemPredUpdateReq)
val memPredFtqRead = new FtqRead // read req send form stage 2
})
/*
LoadQueue Jump ALU0 ALU1 ALU2 ALU3 exception Stage1
......@@ -143,12 +144,26 @@ class RedirectGenerator(implicit p: Parameters) extends XSModule
)
)
// update waittable if load violation redirect triggered
io.waitTableUpdate.valid := RegNext(s1_isReplay && s1_redirect_valid_reg, init = false.B)
io.waitTableUpdate.waddr := RegNext(XORFold(real_pc(VAddrBits-1, 1), WaitTableAddrWidth))
io.waitTableUpdate.wdata := true.B
// get pc from ftq
io.memPredFtqRead.ptr := s1_redirect_bits_reg.stFtqIdx
// valid only if redirect is caused by load violation
// store_pc is used to update store set
val memPredFtqRead = io.memPredFtqRead.entry
val store_pc = GetPcByFtq(memPredFtqRead.ftqPC, RegNext(s1_redirect_bits_reg).stFtqOffset,
memPredFtqRead.lastPacketPC.valid,
memPredFtqRead.lastPacketPC.bits
)
// update load violation predictor if load violation redirect triggered
io.memPredUpdate.valid := RegNext(s1_isReplay && s1_redirect_valid_reg, init = false.B)
// update wait table
io.memPredUpdate.waddr := RegNext(XORFold(real_pc(VAddrBits-1, 1), MemPredPCWidth))
io.memPredUpdate.wdata := true.B
// update store set
io.memPredUpdate.ldpc := RegNext(XORFold(real_pc(VAddrBits-1, 1), MemPredPCWidth))
// store pc is ready 1 cycle after s1_isReplay is judged
io.memPredUpdate.stpc := XORFold(store_pc(VAddrBits-1, 1), MemPredPCWidth)
io.stage2FtqRead.ptr := s1_redirect_bits_reg.ftqIdx
val s2_br_mask = RegEnable(ftqRead.br_mask, enable = s1_redirect_valid_reg)
val s2_sawNotTakenBranch = RegEnable(VecInit((0 until PredictWidth).map{ i =>
......@@ -242,7 +257,8 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
init = false.B
)
loadReplay.bits := RegEnable(io.fromLsBlock.replay.bits, io.fromLsBlock.replay.valid)
VecInit(ftq.io.ftqRead.tail.dropRight(1)) <> redirectGen.io.stage1FtqRead
VecInit(ftq.io.ftqRead.tail.dropRight(2)) <> redirectGen.io.stage1FtqRead
ftq.io.ftqRead.dropRight(1).last <> redirectGen.io.memPredFtqRead
ftq.io.cfiRead <> redirectGen.io.stage2FtqRead
redirectGen.io.exuMispredict <> exuRedirect
redirectGen.io.loadReplay <> loadReplay
......@@ -288,10 +304,10 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
decode.io.in <> io.frontend.cfVec
// currently, we only update wait table when isReplay
decode.io.waitTableUpdate(0) <> RegNext(redirectGen.io.waitTableUpdate)
decode.io.waitTableUpdate(1) := DontCare
decode.io.waitTableUpdate(1).valid := false.B
// decode.io.waitTableUpdate <> io.toLsBlock.waitTableUpdate
decode.io.memPredUpdate(0) <> RegNext(redirectGen.io.memPredUpdate)
decode.io.memPredUpdate(1) := DontCare
decode.io.memPredUpdate(1).valid := false.B
// decode.io.memPredUpdate <> io.toLsBlock.memPredUpdate
decode.io.csrCtrl := RegNext(io.csrCtrl)
......@@ -335,6 +351,8 @@ class CtrlBlock(implicit p: Parameters) extends XSModule
dispatch.io.numExist <> io.fromIntBlock.numExist ++ io.fromFpBlock.numExist ++ io.fromLsBlock.numExist
dispatch.io.enqIQCtrl <> io.toIntBlock.enqIqCtrl ++ io.toFpBlock.enqIqCtrl ++ io.toLsBlock.enqIqCtrl
// dispatch.io.enqIQData <> io.toIntBlock.enqIqData ++ io.toFpBlock.enqIqData ++ io.toLsBlock.enqIqData
dispatch.io.csrCtrl <> io.csrCtrl
dispatch.io.storeIssue <> io.fromLsBlock.stIn
fpBusyTable.io.flush := flushReg
......
......@@ -17,6 +17,7 @@ import xiangshan.backend.regfile.RfReadPort
import utils._
class LsBlockToCtrlIO(implicit p: Parameters) extends XSBundle {
val stIn = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.LsExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
val replay = ValidIO(new Redirect)
......@@ -246,7 +247,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// update waittable
// TODO: read pc
io.fromCtrlBlock.waitTableUpdate(i) := DontCare
io.fromCtrlBlock.memPredUpdate(i) := DontCare
lsq.io.needReplayFromRS(i) <> loadUnits(i).io.lsq.needReplayFromRS
}
......@@ -265,6 +266,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
stu.io.stin <> rs.io.deq
stu.io.lsq <> lsq.io.storeIn(i)
// Lsq to load unit's rs
rs.io.stIssuePtr := lsq.io.issuePtrExt
// rs.io.storeData <> lsq.io.storeDataIn(i)
lsq.io.storeDataIn(i) := rs.io.stData
......@@ -272,6 +275,10 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
lsq.io.storeIssue(i).valid := rs.io.deq.valid
lsq.io.storeIssue(i).bits := rs.io.deq.bits
// sync issue info to store set LFST
io.toCtrlBlock.stIn(i).valid := rs.io.deq.valid
io.toCtrlBlock.stIn(i).bits := rs.io.deq.bits
io.toCtrlBlock.stOut(i).valid := stu.io.stout.valid
io.toCtrlBlock.stOut(i).bits := stu.io.stout.bits
stu.io.stout.ready := true.B
......
......@@ -11,7 +11,7 @@ class DecodeStage(implicit p: Parameters) extends XSModule {
// from Ibuffer
val in = Vec(DecodeWidth, Flipped(DecoupledIO(new CtrlFlow)))
// from memblock
val waitTableUpdate = Vec(StorePipelineWidth, Input(new WaitTableUpdateReq))
val memPredUpdate = Vec(StorePipelineWidth, Input(new MemPredUpdateReq))
// to DecBuffer
val out = Vec(DecodeWidth, DecoupledIO(new CfCtrl))
// waitable ctrl
......@@ -19,7 +19,12 @@ class DecodeStage(implicit p: Parameters) extends XSModule {
})
val decoders = Seq.fill(DecodeWidth)(Module(new DecodeUnit))
// basic wait table load violation predictor (for debug only)
val waittable = Module(new WaitTable)
// store set load violation predictor stage 1: SSIT look up
val ssit = Module(new SSIT)
for (i <- 0 until DecodeWidth) {
decoders(i).io.enq.ctrl_flow <> io.in(i).bits
......@@ -27,18 +32,26 @@ class DecodeStage(implicit p: Parameters) extends XSModule {
waittable.io.raddr(i) := io.in(i).bits.foldpc
decoders(i).io.enq.ctrl_flow.loadWaitBit := waittable.io.rdata(i)
// read SSIT, get SSID
ssit.io.raddr(i) := io.in(i).bits.foldpc
decoders(i).io.enq.ctrl_flow.storeSetHit := ssit.io.rdata(i).valid
decoders(i).io.enq.ctrl_flow.ssid := ssit.io.rdata(i).ssid
io.out(i).valid := io.in(i).valid
io.out(i).bits := decoders(i).io.deq.cf_ctrl
io.in(i).ready := io.out(i).ready
}
for (i <- 0 until StorePipelineWidth) {
waittable.io.update(i) <> RegNext(io.waitTableUpdate(i))
waittable.io.update(i) <> RegNext(io.memPredUpdate(i))
}
waittable.io.csrCtrl <> io.csrCtrl
ssit.io.update <> RegNext(io.memPredUpdate(0))
ssit.io.csrCtrl <> io.csrCtrl
val loadWaitBitSet = PopCount(io.out.map(o => o.fire() && o.bits.cf.loadWaitBit))
XSPerfAccumulate("loadWaitBitSet", loadWaitBitSet)
val storeSetHit = PopCount(io.out.map(o => o.fire() && o.bits.cf.storeSetHit))
XSPerfAccumulate("storeset_ssit_hit", storeSetHit)
val hasValid = VecInit(io.in.map(_.valid)).asUInt.orR
XSPerfAccumulate("utilization", PopCount(io.in.map(_.valid)))
......
package xiangshan.backend.decode
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.mem.{LqPtr, SqPtr}
import xiangshan.backend.roq.RoqPtr
// store set load violation predictor
// See "Memory Dependence Prediction using Store Sets" for details
// Store Set Identifier Table Entry
class SSITEntry(implicit p: Parameters) extends XSBundle {
val valid = Bool()
val isload = Bool()
val ssid = UInt(SSIDWidth.W) // store set identifier
}
// Store Set Identifier Table
class SSIT(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle {
val raddr = Vec(DecodeWidth, Input(UInt(MemPredPCWidth.W))) // xor hashed decode pc(VaddrBits-1, 1)
val rdata = Vec(DecodeWidth, Output(new SSITEntry))
val update = Input(new MemPredUpdateReq) // RegNext should be added outside
val csrCtrl = Input(new CustomCSRCtrlIO)
})
// TODO: use MemTemplate
val valid = RegInit(VecInit(Seq.fill(SSITSize)(false.B)))
val isload = Reg(Vec(SSITSize, Bool()))
val ssid = Reg(Vec(SSITSize, UInt(SSIDWidth.W)))
val resetCounter = RegInit(0.U(ResetTimeMax2Pow.W))
resetCounter := resetCounter + 1.U
// read SSIT in decode stage
for (i <- 0 until DecodeWidth) {
// io.rdata(i) := (data(io.raddr(i))(1) || io.csrCtrl.no_spec_load) && !io.csrCtrl.lvpred_disable
io.rdata(i).valid := valid(io.raddr(i))
io.rdata(i).isload := isload(io.raddr(i))
io.rdata(i).ssid := ssid(io.raddr(i))
}
// update SSIT if load violation redirect is detected
// update stage -1
// when io.update.valid, we should RegNext() it for at least 1 cycle
// outside of SSIT.
// update stage 0
// RegNext(io.update) while reading SSIT entry for necessary information
val memPredUpdateReqValid = RegNext(io.update.valid)
val memPredUpdateReqReg = RegEnable(io.update, enable = io.update.valid)
// load has already been assigned with a store set
val loadAssigned = RegNext(valid(io.update.ldpc))
val loadOldSSID = RegNext(ssid(io.update.ldpc))
// store has already been assigned with a store set
val storeAssigned = RegNext(valid(io.update.stpc))
val storeOldSSID = RegNext(ssid(io.update.stpc))
// both the load and the store have already been assigned store sets
// but load's store set ID is smaller
val winnerSSID = Mux(loadOldSSID < storeOldSSID, loadOldSSID, storeOldSSID)
// for now we just use lowest bits of ldpc as store set id
val ssidAllocate = memPredUpdateReqReg.ldpc(SSIDWidth-1, 0)
// update stage 1
when(memPredUpdateReqValid){
switch (Cat(loadAssigned, storeAssigned)) {
// 1. "If neither the load nor the store has been assigned a store set,
// one is allocated and assigned to both instructions."
is ("b00".U(2.W)) {
valid(memPredUpdateReqReg.ldpc) := true.B
isload(memPredUpdateReqReg.ldpc) := true.B
ssid(memPredUpdateReqReg.ldpc) := ssidAllocate
valid(memPredUpdateReqReg.stpc) := true.B
isload(memPredUpdateReqReg.stpc) := false.B
ssid(memPredUpdateReqReg.stpc) := ssidAllocate
}
// 2. "If the load has been assigned a store set, but the store has not,
// the store is assigned the load’s store set."
is ("b10".U(2.W)) {
valid(memPredUpdateReqReg.stpc) := true.B
isload(memPredUpdateReqReg.stpc) := false.B
ssid(memPredUpdateReqReg.stpc) := loadOldSSID
}
// 3. "If the store has been assigned a store set, but the load has not,
// the load is assigned the store’s store set."
is ("b01".U(2.W)) {
valid(memPredUpdateReqReg.ldpc) := true.B
isload(memPredUpdateReqReg.ldpc) := true.B
ssid(memPredUpdateReqReg.ldpc) := storeOldSSID
}
// 4. "If both the load and the store have already been assigned store sets,
// one of the two store sets is declared the "winner".
// The instruction belonging to the loser’s store set is assigned the winner’s store set."
is ("b11".U(2.W)) {
valid(memPredUpdateReqReg.ldpc) := true.B
isload(memPredUpdateReqReg.ldpc) := true.B
ssid(memPredUpdateReqReg.ldpc) := winnerSSID
valid(memPredUpdateReqReg.stpc) := true.B
isload(memPredUpdateReqReg.stpc) := false.B
ssid(memPredUpdateReqReg.stpc) := winnerSSID
}
}
}
XSPerfAccumulate("ssit_update_lxsx", memPredUpdateReqValid && !loadAssigned && !storeAssigned)
XSPerfAccumulate("ssit_update_lysx", memPredUpdateReqValid && loadAssigned && !storeAssigned)
XSPerfAccumulate("ssit_update_lxsy", memPredUpdateReqValid && !loadAssigned && storeAssigned)
XSPerfAccumulate("ssit_update_lysy", memPredUpdateReqValid && loadAssigned && storeAssigned)
// reset period: ResetTimeMax2Pow
when(resetCounter(ResetTimeMax2Pow-1, ResetTimeMin2Pow)(RegNext(io.csrCtrl.waittable_timeout))) {
for (j <- 0 until SSITSize) {
valid(j) := 0.U
}
resetCounter:= 0.U
}
// debug
for (i <- 0 until StorePipelineWidth) {
when (memPredUpdateReqReg.valid) {
XSDebug("%d: SSIT update: load pc %x store pc %x\n", GTimer(), memPredUpdateReqReg.ldpc, memPredUpdateReqReg.stpc)
XSDebug("%d: SSIT update: load valid %b ssid %x store valid %b ssid %x\n", GTimer(), loadAssigned, loadOldSSID, storeAssigned,storeOldSSID)
}
}
}
// Last Fetched Store Table Entry
class LFSTEntry(implicit p: Parameters) extends XSBundle {
val valid = Bool()
val sqIdx = new SqPtr
val roqIdx = new RoqPtr
}
class DispatchToLFST(implicit p: Parameters) extends XSBundle {
val sqIdx = new SqPtr
val roqIdx = new RoqPtr
val ssid = UInt(SSIDWidth.W)
}
class LookupLFST(implicit p: Parameters) extends XSBundle {
val raddr = Vec(DecodeWidth, Input(UInt(SSIDWidth.W))) // use ssid to llokup LFST
val ren = Vec(DecodeWidth, Input(Bool())) // ren iff uop.cf.storeSetHit
val rdata = Vec(DecodeWidth, Output(Bool()))
}
// Last Fetched Store Table
class LFST(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle {
val lookup = new LookupLFST
// val update = Input(new MemPredUpdateReq) // RegNext should be added outside
// when redirect, mark canceled store as invalid
val redirect = Input(Valid(new Redirect))
val flush = Input(Bool())
// when store is dispatched, mark it as valid
val dispatch = Vec(RenameWidth, Flipped(Valid(new DispatchToLFST)))
// when store issued, mark store as invalid
val storeIssue = Vec(exuParameters.StuCnt, Flipped(Valid(new ExuInput)))
val csrCtrl = Input(new CustomCSRCtrlIO)
})
// TODO: use MemTemplate
val validVec = RegInit(VecInit(Seq.fill(LFSTSize)(VecInit(Seq.fill(LFSTWidth)(false.B)))))
val sqIdxVec = Reg(Vec(LFSTSize, Vec(LFSTWidth, new SqPtr)))
val roqIdxVec = Reg(Vec(LFSTSize, Vec(LFSTWidth, new RoqPtr)))
val allocPtr = RegInit(VecInit(Seq.fill(LFSTSize)(0.U(log2Up(LFSTWidth).W))))
val valid = Wire(Vec(LFSTSize, Bool()))
(0 until LFSTSize).map(i => {
valid(i) := validVec(i).asUInt.orR
})
// read LFST in rename stage
for (i <- 0 until DecodeWidth) {
// If store-load pair is in the same dispatch bundle, loadWaitBit should also be set for load
val hitInDispatchBundle = if(i > 0){
(0 until i).map(j =>
io.dispatch(j).valid && io.dispatch(j).bits.ssid === io.lookup.raddr(i)
).reduce(_||_)
} else {
false.B
}
// Check if store set is valid in LFST
io.lookup.rdata(i) := (
(valid(io.lookup.raddr(i)) || hitInDispatchBundle) && io.lookup.ren(i) ||
io.csrCtrl.no_spec_load // set loadWaitBit for all loads
) && !io.csrCtrl.lvpred_disable
}
// when store is issued, mark it as invalid
(0 until exuParameters.StuCnt).map(i => {
// TODO: opt timing
(0 until LFSTWidth).map(j => {
when(io.storeIssue(i).valid && io.storeIssue(i).bits.uop.sqIdx.asUInt === sqIdxVec(io.storeIssue(i).bits.uop.cf.ssid)(j).asUInt){
validVec(io.storeIssue(i).bits.uop.cf.ssid)(j) := false.B
}
})
})
// when store is dispatched, mark it as valid
(0 until RenameWidth).map(i => {
when(io.dispatch(i).valid){
val waddr = io.dispatch(i).bits.ssid
val wptr = allocPtr(waddr)
allocPtr(waddr) := allocPtr(waddr) + 1.U
validVec(waddr)(wptr) := true.B
sqIdxVec(waddr)(wptr) := io.dispatch(i).bits.sqIdx
roqIdxVec(waddr)(wptr) := io.dispatch(i).bits.roqIdx
}
})
// when redirect, cancel store influenced
(0 until LFSTSize).map(i => {
(0 until LFSTWidth).map(j => {
when(roqIdxVec(i)(j).needFlush(io.redirect, io.flush)){
validVec(i)(j) := false.B
}
})
})
}
\ No newline at end of file
......@@ -6,19 +6,12 @@ import chisel3.util._
import xiangshan._
import utils._
trait WaitTableParameters {
val WaitTableSize = 1024
val WaitTableAddrWidth = log2Up(WaitTableSize)
val ResetTimeMax2Pow = 20 //1078576
val ResetTimeMin2Pow = 10 //1024
}
// 21264-like wait table
class WaitTable(implicit p: Parameters) extends XSModule with WaitTableParameters {
class WaitTable(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle {
val raddr = Vec(DecodeWidth, Input(UInt(WaitTableAddrWidth.W))) // decode pc(VaddrBits-1, 1)
val raddr = Vec(DecodeWidth, Input(UInt(MemPredPCWidth.W))) // decode pc(VaddrBits-1, 1)
val rdata = Vec(DecodeWidth, Output(Bool())) // loadWaitBit
val update = Vec(StorePipelineWidth, Input(new WaitTableUpdateReq)) // RegNext should be added outside
val update = Vec(StorePipelineWidth, Input(new MemPredUpdateReq)) // RegNext should be added outside
val csrCtrl = Input(new CustomCSRCtrlIO)
})
......@@ -28,7 +21,7 @@ class WaitTable(implicit p: Parameters) extends XSModule with WaitTableParameter
// read ports
for (i <- 0 until DecodeWidth) {
io.rdata(i) := (data(io.raddr(i))(1) || io.csrCtrl.no_spec_load) && !io.csrCtrl.lvpred_disable
io.rdata(i) := (data(io.raddr(i))(LWTUse2BitCounter.B.asUInt) || io.csrCtrl.no_spec_load) && !io.csrCtrl.lvpred_disable
}
// write ports (with priority)
......
......@@ -50,6 +50,9 @@ class Dispatch(implicit p: Parameters) extends XSModule {
val fpIndex = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil((NRFpReadPorts - exuParameters.StuCnt) / 3).W)))
// ls: hardwired to (0, 1, 2, 4)
}
val csrCtrl = Input(new CustomCSRCtrlIO)
// LFST state sync
val storeIssue = Vec(StorePipelineWidth, Flipped(Valid(new ExuInput)))
val ctrlInfo = new Bundle {
val roqFull = Output(Bool())
val intdqFull = Output(Bool())
......@@ -80,6 +83,10 @@ class Dispatch(implicit p: Parameters) extends XSModule {
dispatch1.io.toFpDq <> fpDq.io.enq
dispatch1.io.toLsDq <> lsDq.io.enq
dispatch1.io.allocPregs <> io.allocPregs
dispatch1.io.csrCtrl <> io.csrCtrl
dispatch1.io.storeIssue <> io.storeIssue
dispatch1.io.redirect <> io.redirect
dispatch1.io.flush <> io.flush
// dispatch queue: queue uops and dispatch them to different reservation stations or issue queues
// it may cancel the uops
......
......@@ -9,6 +9,7 @@ import xiangshan.backend.roq.{RoqEnqIO, RoqPtr}
import xiangshan.backend.rename.RenameBypassInfo
import xiangshan.mem.LsqEnqIO
import xiangshan.backend.fu.HasExceptionNO
import xiangshan.backend.decode.{LFST, DispatchToLFST, LookupLFST}
class PreDispatchInfo(implicit p: Parameters) extends XSBundle {
......@@ -44,9 +45,30 @@ class Dispatch1(implicit p: Parameters) extends XSModule with HasExceptionNO {
val needAlloc = Vec(RenameWidth, Output(Bool()))
val req = Vec(RenameWidth, ValidIO(new MicroOp))
}
// to store set LFST
val lfst = Vec(RenameWidth, Valid(new DispatchToLFST))
// flush or replay, for LFST
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
// LFST ctrl
val csrCtrl = Input(new CustomCSRCtrlIO)
// LFST state sync
val storeIssue = Vec(StorePipelineWidth, Flipped(Valid(new ExuInput)))
})
/**
* Store set LFST lookup
*/
// store set LFST lookup may start from rename for better timing
val lfst = Module(new LFST)
lfst.io.redirect <> RegNext(io.redirect)
lfst.io.flush <> RegNext(io.flush)
lfst.io.storeIssue <> RegNext(io.storeIssue)
lfst.io.csrCtrl <> RegNext(io.csrCtrl)
lfst.io.dispatch := io.lfst
/**
* Part 1: choose the target dispatch queue and the corresponding write ports
*/
......@@ -124,8 +146,54 @@ class Dispatch1(implicit p: Parameters) extends XSModule with HasExceptionNO {
// XSError(io.fromRename(i).valid && updatedUop(i).roqIdx.asUInt =/= io.enqRoq.resp(i).asUInt, "they should equal")
updatedUop(i).lqIdx := io.enqLsq.resp(i).lqIdx
updatedUop(i).sqIdx := io.enqLsq.resp(i).sqIdx
// lookup store set LFST
lfst.io.lookup.raddr(i) := updatedUop(i).cf.ssid
lfst.io.lookup.ren(i) := updatedUop(i).cf.storeSetHit
// override load delay ctrl signal with store set result
if(StoreSetEnable) {
// updatedUop(i).cf.loadWaitBit := lfst.io.lookup.rdata(i) // classic store set
updatedUop(i).cf.loadWaitBit := lfst.io.lookup.rdata(i) && !isStore(i) // store set lite
// updatedUop(i).cf.loadWaitBit := lfst.io.lookup.rdata(i) && io.fromRename(i).bits.cf.loadWaitBit && !isStore(i) // 2-bit store set
} else {
updatedUop(i).cf.loadWaitBit := io.fromRename(i).bits.cf.loadWaitBit && !isStore(i) // wait table does not require store to be delayed
}
// update store set LFST
io.lfst(i).valid := io.fromRename(i).valid && updatedUop(i).cf.storeSetHit && isStore(i)
// or io.fromRename(i).ready && updatedUop(i).cf.storeSetHit && isStore(i), which is much slower
io.lfst(i).bits.roqIdx := updatedUop(i).roqIdx
io.lfst(i).bits.sqIdx := updatedUop(i).sqIdx
io.lfst(i).bits.ssid := updatedUop(i).cf.ssid
}
// store set perf count
XSPerfAccumulate("waittable_load_wait", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && io.fromRename(i).bits.cf.loadWaitBit && !isStore(i) && isLs(i)
)))
XSPerfAccumulate("storeset_load_wait", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && !isStore(i) && isLs(i)
)))
XSPerfAccumulate("storeset_store_wait", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && isStore(i)
)))
XSPerfAccumulate("loadwait_diffmat_sywy", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && io.fromRename(i).bits.cf.loadWaitBit &&
!isStore(i) && isLs(i)
)))
XSPerfAccumulate("loadwait_diffmat_sywx", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && updatedUop(i).cf.loadWaitBit && !io.fromRename(i).bits.cf.loadWaitBit &&
!isStore(i) && isLs(i)
)))
XSPerfAccumulate("loadwait_diffmat_sxwy", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && !updatedUop(i).cf.loadWaitBit && io.fromRename(i).bits.cf.loadWaitBit &&
!isStore(i) && isLs(i)
)))
XSPerfAccumulate("loadwait_diffmat_sxwx", PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire() && !updatedUop(i).cf.loadWaitBit && !io.fromRename(i).bits.cf.loadWaitBit &&
!isStore(i) && isLs(i)
)))
/**
* Part 3:
......
......@@ -108,8 +108,8 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
val frontendRedirect = Flipped(ValidIO(new Redirect))
// exu write back, update info
val exuWriteback = Vec(exuParameters.JmpCnt + exuParameters.AluCnt, Flipped(ValidIO(new ExuOutput)))
// pc read reqs (0: jump/auipc 1~6: mispredict/load replay 7: exceptions)
val ftqRead = Vec(1 + 6 + 1, Flipped(new FtqRead))
// pc read reqs (0: jump/auipc 1~6: mispredict/load replay 7: store pc for store set update 8: exceptions)
val ftqRead = Vec(1 + 6 + 1 + 1, Flipped(new FtqRead))
val cfiRead = Flipped(new FtqRead)
val bpuInfo = new Bundle {
val bpRight = Output(UInt(XLEN.W))
......@@ -131,7 +131,7 @@ class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelpe
val real_fire = io.enq.fire() && !stage2Flush && !stage3Flush
val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_4R_SRAMEntry, FtqSize, 9, 1))
val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_4R_SRAMEntry, FtqSize, 10, 1))
ftq_pc_mem.io.wen(0) := real_fire
ftq_pc_mem.io.waddr(0) := tailPtr.value
ftq_pc_mem.io.wdata(0).ftqPC := io.enq.bits.ftqPC
......
......@@ -105,7 +105,7 @@ class ReservationStation
val stData = if (exuCfg == StExeUnitCfg) ValidIO(new StoreDataBundle) else null
val srcRegValue = Input(Vec(srcNum, UInt(srcLen.W)))
val stIssuePtr = if (exuCfg == LdExeUnitCfg) Input(new SqPtr()) else null
val stIssuePtr = if (exuCfg == LdExeUnitCfg || exuCfg == StExeUnitCfg) Input(new SqPtr()) else null
val fpRegValue = if (exuCfg == StExeUnitCfg) Input(UInt(srcLen.W)) else null
val jumpPc = if(exuCfg == JumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
......@@ -165,7 +165,7 @@ class ReservationStation
c.valid := i.valid
c.bits := i.bits.uop
}
if (exuCfg == LdExeUnitCfg) {
if (exuCfg == LdExeUnitCfg || exuCfg == StExeUnitCfg) {
ctrl.io.stIssuePtr := RegNext(io.stIssuePtr)
}
if (exuCfg == StExeUnitCfg) {
......@@ -541,7 +541,7 @@ class ReservationStationCtrl
val listen = Output(Vec(srcNum, Vec(iqSize, Vec(fastPortsCnt + slowPortsCnt, Bool()))))
val enqSrcReady = Output(Vec(srcNum, Bool()))
val stIssuePtr = if (exuCfg == LdExeUnitCfg) Input(new SqPtr()) else null
val stIssuePtr = if (exuCfg == LdExeUnitCfg || exuCfg == StExeUnitCfg) Input(new SqPtr()) else null
})
val selValid = io.sel.valid
......@@ -619,6 +619,7 @@ class ReservationStationCtrl
if (exuCfg == LdExeUnitCfg) {
val ldWait = Reg(Vec(iqSize, Bool()))
val sqIdx = Reg(Vec(iqSize, new SqPtr()))
val ldWaitUpdated = WireInit(ldWait)
ldWait.zip(sqIdx).map{ case (lw, sq) =>
when (!isAfter(sq, io.stIssuePtr)) {
lw := true.B
......@@ -626,11 +627,12 @@ class ReservationStationCtrl
}
when (enqEn) {
ldWait(enqPtr) := !enqUop.cf.loadWaitBit
ldWaitUpdated(enqPtr) := !enqUop.cf.loadWaitBit
sqIdx(enqPtr) := enqUop.sqIdx
}
ldWait.suggestName(s"${this.name}_ldWait")
sqIdx.suggestName(s"${this.name}_sqIdx")
io.readyVec := srcQueueWire.map(Cat(_).andR).zip(ldWait).map{ case (s, l) => s&l }
io.readyVec := srcQueueWire.map(Cat(_).andR).zip(ldWaitUpdated).map{ case (s, l) => s&l }
}
val redirectHit = io.redirectVec(selPtr)
......
......@@ -9,7 +9,6 @@ import xiangshan.cache._
import chisel3.experimental.chiselName
import freechips.rocketchip.tile.HasLazyRoCC
import xiangshan.backend.ftq.FtqPtr
import xiangshan.backend.decode.WaitTableParameters
import system.L1CacheErrorInfo
trait HasInstrMMIOConst extends HasXSParameter with HasIFUConst{
......@@ -99,7 +98,7 @@ class PrevHalfInstr(implicit p: Parameters) extends XSBundle {
}
@chiselName
class IFU(implicit p: Parameters) extends XSModule with HasIFUConst with HasCircularQueuePtrHelper with WaitTableParameters
class IFU(implicit p: Parameters) extends XSModule with HasIFUConst with HasCircularQueuePtrHelper
{
val io = IO(new IFUIO)
val bpu = BPU(EnableBPU)
......@@ -518,7 +517,7 @@ class IFU(implicit p: Parameters) extends XSModule with HasIFUConst with HasCirc
fetchPacketWire.instrs := expandedInstrs
fetchPacketWire.pc := if4_pd.pc
fetchPacketWire.foldpc := if4_pd.pc.map(i => XORFold(i(VAddrBits-1,1), WaitTableAddrWidth))
fetchPacketWire.foldpc := if4_pd.pc.map(i => XORFold(i(VAddrBits-1,1), MemPredPCWidth))
fetchPacketWire.pdmask := if4_pd.mask
fetchPacketWire.pd := if4_pd.pd
......
......@@ -6,7 +6,6 @@ import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.ftq.FtqPtr
import xiangshan.backend.decode.WaitTableParameters
class IbufPtr(implicit p: Parameters) extends CircularQueuePtr[IbufPtr](
p => p(XSCoreParamsKey).IBufSize
......@@ -24,10 +23,10 @@ class IBufferIO(implicit p: Parameters) extends XSBundle {
class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
val io = IO(new IBufferIO)
class IBufEntry(implicit p: Parameters) extends XSBundle with WaitTableParameters {
class IBufEntry(implicit p: Parameters) extends XSBundle {
val inst = UInt(32.W)
val pc = UInt(VAddrBits.W)
val foldpc = UInt(WaitTableAddrWidth.W)
val foldpc = UInt(MemPredPCWidth.W)
val pd = new PreDecodeInfo
val ipf = Bool()
val acf = Bool()
......@@ -125,6 +124,8 @@ class Ibuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrH
io.out(i).bits.crossPageIPFFix := outWire.crossPageIPFFix
io.out(i).bits.foldpc := outWire.foldpc
io.out(i).bits.loadWaitBit := DontCare
io.out(i).bits.storeSetHit := DontCare
io.out(i).bits.ssid := DontCare
}
val next_head_vec = VecInit(head_vec.map(_ + numDeq))
ibuf.io.raddr := VecInit(next_head_vec.map(_.value))
......
......@@ -11,6 +11,7 @@ import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbReques
import xiangshan.mem._
import xiangshan.backend.roq.RoqLsqIO
import xiangshan.backend.fu.HasExceptionNO
import xiangshan.backend.ftq.FtqPtr
class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
......@@ -455,12 +456,12 @@ class LoadQueue(implicit p: Parameters) extends XSModule
((lqViolation, lqViolationUop), (wbViolation, wbViolationUop), (l1Violation, l1ViolationUop))
}
def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
def rollbackSel(a: Valid[MicroOpRbExt], b: Valid[MicroOpRbExt]): ValidIO[MicroOpRbExt] = {
Mux(
a.valid,
Mux(
b.valid,
Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
Mux(isAfter(a.bits.uop.roqIdx, b.bits.uop.roqIdx), b, a), // a,b both valid, sel oldest
a // sel a
),
b // sel b
......@@ -474,21 +475,29 @@ class LoadQueue(implicit p: Parameters) extends XSModule
// S2: select rollback (part1) and generate rollback request
// rollback check
// Wb/L1 rollback seq check is done in s2
val rollbackWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
val rollbackL1 = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
val rollbackL1Wb = Wire(Vec(StorePipelineWidth*2, Valid(new MicroOp)))
val rollbackWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOpRbExt)))
val rollbackL1 = Wire(Vec(StorePipelineWidth, Valid(new MicroOpRbExt)))
val rollbackL1Wb = Wire(Vec(StorePipelineWidth*2, Valid(new MicroOpRbExt)))
// Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow
val rollbackLq = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
val rollbackLq = Wire(Vec(StorePipelineWidth, Valid(new MicroOpRbExt)))
// store ftq index for store set update
val stFtqIdxS2 = Wire(Vec(StorePipelineWidth, new FtqPtr))
val stFtqOffsetS2 = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W)))
for (i <- 0 until StorePipelineWidth) {
val detectedRollback = detectRollback(i)
rollbackLq(i).valid := detectedRollback._1._1 && RegNext(io.storeIn(i).valid)
rollbackLq(i).bits := detectedRollback._1._2
rollbackLq(i).bits.uop := detectedRollback._1._2
rollbackLq(i).bits.flag := i.U
rollbackWb(i).valid := detectedRollback._2._1 && RegNext(io.storeIn(i).valid)
rollbackWb(i).bits := detectedRollback._2._2
rollbackWb(i).bits.uop := detectedRollback._2._2
rollbackWb(i).bits.flag := i.U
rollbackL1(i).valid := detectedRollback._3._1 && RegNext(io.storeIn(i).valid)
rollbackL1(i).bits := detectedRollback._3._2
rollbackL1(i).bits.uop := detectedRollback._3._2
rollbackL1(i).bits.flag := i.U
rollbackL1Wb(2*i) := rollbackL1(i)
rollbackL1Wb(2*i+1) := rollbackWb(i)
stFtqIdxS2(i) := RegNext(io.storeIn(i).bits.uop.cf.ftqPtr)
stFtqOffsetS2(i) := RegNext(io.storeIn(i).bits.uop.cf.ftqOffset)
}
val rollbackL1WbSelected = ParallelOperation(rollbackL1Wb, rollbackSel)
......@@ -505,18 +514,23 @@ class LoadQueue(implicit p: Parameters) extends XSModule
// FIXME: this is ugly
val rollbackValidVec = Seq(rollbackL1WbVReg, rollbackLq0VReg, rollbackLq1VReg)
val rollbackUopVec = Seq(rollbackL1WbReg, rollbackLq0Reg, rollbackLq1Reg)
val rollbackUopExtVec = Seq(rollbackL1WbReg, rollbackLq0Reg, rollbackLq1Reg)
// select uop in parallel
val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
val mask = getAfterMask(rollbackValidVec, rollbackUopExtVec.map(i => i.uop))
val oneAfterZero = mask(1)(0)
val rollbackUop = Mux(oneAfterZero && mask(2)(0),
rollbackUopVec(0),
Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))
val rollbackUopExt = Mux(oneAfterZero && mask(2)(0),
rollbackUopExtVec(0),
Mux(!oneAfterZero && mask(2)(1), rollbackUopExtVec(1), rollbackUopExtVec(2)))
val stFtqIdxS3 = RegNext(stFtqIdxS2)
val stFtqOffsetS3 = RegNext(stFtqOffsetS2)
val rollbackUop = rollbackUopExt.uop
val rollbackStFtqIdx = stFtqIdxS3(rollbackUopExt.flag)
val rollbackStFtqOffset = stFtqOffsetS3(rollbackUopExt.flag)
// check if rollback request is still valid in parallel
val rollbackValidVecChecked = Wire(Vec(3, Bool()))
for(((v, uop), idx) <- rollbackValidVec.zip(rollbackUopVec).zipWithIndex) {
for(((v, uop), idx) <- rollbackValidVec.zip(rollbackUopExtVec.map(i => i.uop)).zipWithIndex) {
rollbackValidVecChecked(idx) := v &&
(!lastCycleRedirect.valid || isBefore(uop.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
(!lastlastCycleRedirect.valid || isBefore(uop.roqIdx, lastlastCycleRedirect.bits.roqIdx))
......@@ -524,7 +538,9 @@ class LoadQueue(implicit p: Parameters) extends XSModule
io.rollback.bits.roqIdx := rollbackUop.roqIdx
io.rollback.bits.ftqIdx := rollbackUop.cf.ftqPtr
io.rollback.bits.stFtqIdx := rollbackStFtqIdx
io.rollback.bits.ftqOffset := rollbackUop.cf.ftqOffset
io.rollback.bits.stFtqOffset := rollbackStFtqOffset
io.rollback.bits.level := RedirectLevel.flush
io.rollback.bits.interrupt := DontCare
io.rollback.bits.cfiUpdate := DontCare
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册