未验证 提交 74515c5a 编写于 作者: Y Yinan Xu 提交者: GitHub

jump: delay pc and jalr_target for one cycle (#1640)

上级 1cee9cb8
......@@ -271,6 +271,7 @@ class MicroOp(implicit p: Parameters) extends CfCtrl {
def wakeup(successor: MicroOp, exuCfg: ExuConfig): Seq[(Bool, Bool)] = {
wakeup(successor.psrc.zip(successor.ctrl.srcType), exuCfg)
}
def isJump: Bool = FuType.isJumpExu(ctrl.fuType)
}
class XSBundleWithMicroOp(implicit p: Parameters) extends XSBundle {
......
......@@ -522,7 +522,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
pingpong := !pingpong
val jumpInst = Mux(pingpong && (exuParameters.AluCnt > 2).B, io.dispatch(2).bits, io.dispatch(0).bits)
val jumpPcRead = io.frontend.fromFtq.getJumpPcRead
io.jumpPc := jumpPcRead(jumpInst.cf.ftqPtr, jumpInst.cf.ftqOffset)
io.jumpPc := jumpPcRead(jumpInst.cf.ftqPtr, jumpInst.cf.ftqOffset).asUInt
val jumpTargetRead = io.frontend.fromFtq.target_read
io.jalr_target := jumpTargetRead(jumpInst.cf.ftqPtr, jumpInst.cf.ftqOffset)
......
......@@ -417,8 +417,9 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara
issueIdx += issueWidth
if (rs.io.jump.isDefined) {
rs.io.jump.get.jumpPc := io.extra.jumpPc
rs.io.jump.get.jalr_target := io.extra.jalr_target
val lastJumpFire = VecInit(rs.io.fromDispatch.map(dp => RegNext(dp.fire && dp.bits.isJump))).asUInt.orR
rs.io.jump.get.jumpPc := RegEnable(io.extra.jumpPc, lastJumpFire)
rs.io.jump.get.jalr_target := RegEnable(io.extra.jalr_target, lastJumpFire)
}
if (rs.io.checkwait.isDefined) {
rs.io.checkwait.get.stIssuePtr <> io.extra.stIssuePtr
......
......@@ -42,11 +42,17 @@ class DataArrayMultiWriteIO(numEntries: Int, numSrc: Int, dataBits: Int)(implici
val data = Input(UInt(dataBits.W))
}
class DataArrayDelayedWriteIO(numEntries: Int, numSrc: Int, dataBits: Int)(implicit p: Parameters) extends XSBundle {
val mask = Vec(numSrc, Input(Bool()))
val addr = Input(UInt(numEntries.W))
val data = Vec(numSrc, Input(UInt(dataBits.W)))
}
class DataArrayIO(params: RSParams)(implicit p: Parameters) extends XSBundle {
val read = Vec(params.numDeq + 1, new DataArrayReadIO(params.numEntries, params.numSrc, params.dataBits))
val write = Vec(params.numEnq, new DataArrayWriteIO(params.numEntries, params.numSrc, params.dataBits))
val multiWrite = Vec(params.numWakeup, new DataArrayMultiWriteIO(params.numEntries, params.numSrc, params.dataBits))
val delayedWrite = if (params.delayedRf) Vec(params.numEnq, Flipped(ValidIO(UInt(params.dataBits.W)))) else null
val delayedWrite = if (params.delayedSrc) Vec(params.numEnq, new DataArrayDelayedWriteIO(params.numEntries, params.numSrc, params.dataBits)) else null
val partialWrite = if (params.hasMidState) Vec(params.numDeq, new DataArrayWriteIO(params.numEntries, params.numSrc - 1, params.dataBits)) else null
}
......@@ -54,10 +60,9 @@ class DataArray(params: RSParams)(implicit p: Parameters) extends XSModule {
val io = IO(new DataArrayIO(params))
for (i <- 0 until params.numSrc) {
// delayed by more one cycle for delayed write ports
val delayedWen = if (params.delayedRf) RegNext(VecInit(io.delayedWrite.map(_.valid))) else Seq()
val delayedWaddr = if (params.delayedRf) RegNext(RegNext(VecInit(io.write.map(_.addr)))) else Seq()
val delayedWdata = if (params.delayedRf) io.delayedWrite.map(_.bits) else Seq()
val delayedWen = if (params.delayedSrc) io.delayedWrite.map(_.mask(i)) else Seq()
val delayedWaddr = if (params.delayedSrc) io.delayedWrite.map(_.addr) else Seq()
val delayedWdata = if (params.delayedSrc) io.delayedWrite.map(_.data(i)) else Seq()
val partialWen = if (i < 2 && params.hasMidState) io.partialWrite.map(_.enable) else Seq()
val partialWaddr = if (i < 2 && params.hasMidState) io.partialWrite.map(_.addr) else Seq()
......@@ -97,13 +102,14 @@ class JumpImmExtractor(implicit p: Parameters) extends ImmExtractor(2, 64) {
val jump_pc = IO(Input(UInt(VAddrBits.W)))
val jalr_target = IO(Input(UInt(VAddrBits.W)))
when (SrcType.isPc(io.uop.ctrl.srcType(0))) {
io.data_out(0) := SignExt(jump_pc, XLEN)
}
// when src1 is reg (like sfence's asid) do not let data_out(1) be the jarl_target
when (!SrcType.isReg(io.uop.ctrl.srcType(1))) {
io.data_out(1) := jalr_target
}
// Jump now don;t need
// when (SrcType.isPc(io.uop.ctrl.srcType(0))) {
// io.data_out(0) := SignExt(jump_pc, XLEN)
// }
// when src1 is reg (like sfence's asid) do not let data_out(1) be the jalr_target
// when (SrcType.isPcOrImm(io.uop.ctrl.srcType(1))) {
// io.data_out(1) := jalr_target
// }
}
class AluImmExtractor(implicit p: Parameters) extends ImmExtractor(2, 64) {
......
......@@ -57,9 +57,9 @@ case class RSParams
// oldestFirst: (Enable_or_not, Need_balance, Victim_index)
def oldestFirst: (Boolean, Boolean, Int) = (true, false, 0)
def hasMidState: Boolean = exuCfg.get == FmacExeUnitCfg
def delayedRf: Boolean = exuCfg.get == StdExeUnitCfg
def needScheduledBit: Boolean = hasFeedback || delayedRf || hasMidState
// Load prefers the first index.
def delayedFpRf: Boolean = exuCfg.get == StdExeUnitCfg
def delayedSrc: Boolean = delayedFpRf || isJump
def needScheduledBit: Boolean = hasFeedback || delayedSrc || hasMidState
def needBalance: Boolean = exuCfg.get.needLoadBalance && exuCfg.get != LdExeUnitCfg
def numSelect: Int = numDeq + numEnq + (if (oldestFirst._1) 1 else 0)
def dropOnRedirect: Boolean = !(isLoad || isStore || isStoreData)
......@@ -208,7 +208,7 @@ class ReservationStationIO(params: RSParams)(implicit p: Parameters) extends XSB
// enq
val fromDispatch = Vec(params.numEnq, Flipped(DecoupledIO(new MicroOp)))
val srcRegValue = Vec(params.numEnq, Input(Vec(params.numSrc, UInt(params.dataBits.W))))
val fpRegValue = if (params.delayedRf) Some(Vec(params.numEnq, Input(UInt(params.dataBits.W)))) else None
val fpRegValue = if (params.delayedFpRf) Some(Vec(params.numEnq, Input(UInt(params.dataBits.W)))) else None
// deq
val deq = Vec(params.numDeq, DecoupledIO(new ExuInput))
// wakeup
......@@ -320,6 +320,7 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
val s1_slowPorts = RegNext(io.slowPorts)
val s1_fastUops = RegNext(io.fastUopsIn)
val s1_dispatchUops = Reg(Vec(params.numEnq, Valid(new MicroOp)))
val s1_delayedSrc = Wire(Vec(params.numEnq, Vec(params.numSrc, Bool())))
val s1_allocatePtrOH = RegNext(VecInit(s0_allocatePtrOH.reverse))
val s1_allocatePtr = RegNext(VecInit(s0_allocatePtr.reverse))
val s1_enqWakeup = RegNext(VecInit(s0_enqWakeup.reverse))
......@@ -358,27 +359,42 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
}
// update status and payload array
statusArray.io.redirect := io.redirect
val needFpSource = s1_dispatchUops.map(_.bits.needRfRPort(0, true, false))
for ((statusUpdate, i) <- statusArray.io.update.zipWithIndex) {
statusUpdate.enable := s1_dispatchUops(i).valid
for (((statusUpdate, uop), i) <- statusArray.io.update.zip(s1_dispatchUops).zipWithIndex) {
s1_delayedSrc(i).foreach(_ := false.B)
if (params.delayedFpRf) {
when (uop.bits.needRfRPort(0, true, false)) {
s1_delayedSrc(i)(0) := true.B
}
}
if (params.isJump) {
when (uop.bits.isJump) {
when (SrcType.isPc(uop.bits.ctrl.srcType(0))) {
s1_delayedSrc(i)(0) := true.B
}
when (SrcType.isPcOrImm(uop.bits.ctrl.srcType(1))) {
s1_delayedSrc(i)(1) := true.B
}
}
}
statusUpdate.enable := uop.valid
statusUpdate.addr := s1_allocatePtrOH(i)
statusUpdate.data.valid := true.B
val waitForFpSource = if (params.delayedRf) needFpSource(i) else false.B
statusUpdate.data.scheduled := waitForFpSource
statusUpdate.data.blocked := params.checkWaitBit.B && s1_dispatchUops(i).bits.cf.loadWaitBit
statusUpdate.data.credit := Mux(waitForFpSource, 2.U, 0.U)
statusUpdate.data.scheduled := s1_delayedSrc(i).asUInt.orR
statusUpdate.data.blocked := params.checkWaitBit.B && uop.bits.cf.loadWaitBit
val credit = if (params.delayedFpRf) 2 else 1
statusUpdate.data.credit := Mux(s1_delayedSrc(i).asUInt.orR, credit.U, 0.U)
for (j <- 0 until params.numSrc) {
statusUpdate.data.srcState(j) := s1_dispatchUops(i).bits.srcIsReady(j) || s1_enqWakeup(i)(j).asUInt.orR || s1_fastWakeup(i)(j).asUInt.orR
statusUpdate.data.srcState(j) := uop.bits.srcIsReady(j) || s1_enqWakeup(i)(j).asUInt.orR || s1_fastWakeup(i)(j).asUInt.orR
}
statusUpdate.data.midState := false.B
statusUpdate.data.psrc := s1_dispatchUops(i).bits.psrc.take(params.numSrc)
statusUpdate.data.srcType := s1_dispatchUops(i).bits.ctrl.srcType.take(params.numSrc)
statusUpdate.data.robIdx := s1_dispatchUops(i).bits.robIdx
statusUpdate.data.sqIdx := s1_dispatchUops(i).bits.sqIdx
statusUpdate.data.psrc := uop.bits.psrc.take(params.numSrc)
statusUpdate.data.srcType := uop.bits.ctrl.srcType.take(params.numSrc)
statusUpdate.data.robIdx := uop.bits.robIdx
statusUpdate.data.sqIdx := uop.bits.sqIdx
statusUpdate.data.waitForSqIdx := DontCare // generated by sq, will be updated later
statusUpdate.data.waitForRobIdx := s1_dispatchUops(i).bits.cf.waitForRobIdx // generated by mdp
statusUpdate.data.waitForRobIdx := uop.bits.cf.waitForRobIdx // generated by mdp
statusUpdate.data.waitForStoreData := false.B
statusUpdate.data.strictWait := s1_dispatchUops(i).bits.cf.loadWaitStrict
statusUpdate.data.strictWait := uop.bits.cf.loadWaitStrict
statusUpdate.data.isFirstIssue := true.B
}
// We need to block issue until the corresponding store issues.
......@@ -515,16 +531,26 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
for (i <- 0 until params.numEnq) {
dataArray.io.write(i).enable := s1_dispatchUops(i).valid
dataArray.io.write(i).mask := s1_dispatchUops(i).bits.srcIsReady.take(params.numSrc)
if (params.delayedRf) {
when (needFpSource(i)) {
dataArray.io.write(i).mask.head := false.B
}
}
dataArray.io.write(i).addr := s1_allocatePtrOH(i)
dataArray.io.write(i).data := immBypassedData(i)
if (params.delayedRf) {
dataArray.io.delayedWrite(i).valid := RegNext(s1_dispatchUops(i).valid && needFpSource(i))
dataArray.io.delayedWrite(i).bits := io.fpRegValue.get.reverse(i)
if (params.delayedSrc) {
for (j <- 0 until params.numSrc) {
when (s1_delayedSrc(i)(j)) {
dataArray.io.write(i).mask(j) := false.B
}
dataArray.io.delayedWrite(i).data := DontCare
if (params.delayedFpRf) {
dataArray.io.delayedWrite(i).mask(j) := RegNext(RegNext(s1_dispatchUops(i).valid && s1_delayedSrc(i)(j)))
dataArray.io.delayedWrite(i).addr := RegNext(RegNext(dataArray.io.write(i).addr))
dataArray.io.delayedWrite(i).data(0) := io.fpRegValue.get.reverse(i)
}
if (params.isJump) {
dataArray.io.delayedWrite(i).mask(j) := RegNext(s1_dispatchUops(i).valid && s1_delayedSrc(i)(j))
dataArray.io.delayedWrite(i).addr := RegNext(dataArray.io.write(i).addr)
dataArray.io.delayedWrite(i).data(0) := SignExt(io.jump.get.jumpPc, XLEN)
dataArray.io.delayedWrite(i).data(1) := io.jump.get.jalr_target
}
}
}
}
// data broadcast: from function units (only slow wakeup date are needed)
......@@ -803,14 +829,16 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod
val pcMem = Reg(Vec(params.numEntries, UInt(VAddrBits.W)))
for (i <- 0 until params.numEntries) {
val writeEn = VecInit(dataArray.io.write.map(w => w.enable && w.addr(i))).asUInt.orR
when (writeEn) {
when (RegNext(writeEn)) {
pcMem(i) := io.jump.get.jumpPc
}
}
for (i <- 0 until params.numDeq) {
// currently we assert there's only one enqueue.
require(params.numDeq == 1, "only one jump now")
val pcRead = Mux(s1_select_bypass_s0.asUInt.orR, io.jump.get.jumpPc, Mux1H(s1_issuePtrOH(i).bits, pcMem))
val oldestPc = Mux1H(s1_in_oldestPtrOH.bits, pcMem)
val issuePc = Mux1H(s1_in_selectPtrOH(i), pcMem)
val pcRead = Mux(s1_issue_oldest(i), oldestPc, issuePc)
io.deq(i).bits.uop.cf.pc := RegEnable(pcRead, s1_out_fire(i))
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册