diff --git a/difftest b/difftest index 7204a60b2e52453fdac0877402a8bbc09bec6e44..bafef94dde8cffc036093ef01544481c4efb3f92 160000 --- a/difftest +++ b/difftest @@ -1 +1 @@ -Subproject commit 7204a60b2e52453fdac0877402a8bbc09bec6e44 +Subproject commit bafef94dde8cffc036093ef01544481c4efb3f92 diff --git a/src/main/scala/utils/PipelineConnect.scala b/src/main/scala/utils/PipelineConnect.scala index 7f91a02b1cca1b27897b4815f1a95bffa1854d0f..2403df23e27647da94ef9e9131c62a90451740ee 100644 --- a/src/main/scala/utils/PipelineConnect.scala +++ b/src/main/scala/utils/PipelineConnect.scala @@ -27,15 +27,7 @@ class PipelineConnectPipe[T <: Data](gen: T) extends Module { val isFlush = Input(Bool()) }) - val valid = RegInit(false.B) - val leftFire = io.in.valid && io.out.ready - when (io.rightOutFire) { valid := false.B } - when (leftFire) { valid := true.B } - when (io.isFlush) { valid := false.B } - - io.in.ready := io.out.ready - io.out.bits := RegEnable(io.in.bits, leftFire) - io.out.valid := valid + PipelineConnect.connect(io.in, io.out, io.rightOutFire, io.isFlush, false.B) } class PipelineConnectBuffer[T <: Data, FlushT <: Data](gen: T, flushGen: FlushT, flushFunc: (T, FlushT) => Bool) @@ -106,6 +98,24 @@ class PipelineConnectBufferWithExtraData[T <: Data, FlushT <: Data, ExtraT <: Da } object PipelineConnect { + def connect[T <: Data]( + left: DecoupledIO[T], + right: DecoupledIO[T], + rightOutFire: Bool, + isFlush: Bool, + block: Bool + ): Unit = { + val valid = RegInit(false.B) + val leftFire = left.valid && right.ready && !block + when (rightOutFire) { valid := false.B } + when (leftFire) { valid := true.B } + when (isFlush) { valid := false.B } + + left.ready := right.ready && !block + right.bits := RegEnable(left.bits, leftFire) + right.valid := valid + } + def apply[T <: Data]( left: DecoupledIO[T], right: DecoupledIO[T], @@ -114,13 +124,19 @@ object PipelineConnect { block: Bool = false.B, moduleName: Option[String] = None ): Unit = { - val pipeline = Module(new PipelineConnectPipe(left.bits)) - if(moduleName.nonEmpty) pipeline.suggestName(moduleName.get) - pipeline.io.in <> left - pipeline.io.rightOutFire := rightOutFire - pipeline.io.isFlush := isFlush - pipeline.io.out <> right - pipeline.io.out.ready := right.ready && !block + if (moduleName.isDefined) { + val pipeline = Module(new PipelineConnectPipe(left.bits)) + pipeline.suggestName(moduleName.get) + pipeline.io.in <> left + pipeline.io.rightOutFire := rightOutFire + pipeline.io.isFlush := isFlush + pipeline.io.out <> right + pipeline.io.out.ready := right.ready && !block + } + else { + // do not use module here to please DCE + connect(left, right, rightOutFire, isFlush, block) + } } def apply[T <: Data, FlushT <: Data]( @@ -137,7 +153,6 @@ object PipelineConnect { pipe_buffer.io.flush := flush } - def apply[T <: Data, FlushT <: Data, ExtraT <: Data]( left: DecoupledIO[T], right: DecoupledIO[T], @@ -161,7 +176,7 @@ object PipelineNext { isFlush: Bool ): DecoupledIO[T] = { val right = Wire(Decoupled(left.bits.cloneType)) - PipelineConnect(left, right, rightOutFire, isFlush, moduleName = Some("pipeline")) + PipelineConnect(left, right, rightOutFire, isFlush) right } diff --git a/src/main/scala/xiangshan/backend/Scheduler.scala b/src/main/scala/xiangshan/backend/Scheduler.scala index b948075846d3d3feb6d363ba338f078cda124bdb..7a5534423ababd3351d3fa211f0e9e6de26fb4a6 100644 --- a/src/main/scala/xiangshan/backend/Scheduler.scala +++ b/src/main/scala/xiangshan/backend/Scheduler.scala @@ -287,6 +287,7 @@ class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSPara }) val dispatch2 = outer.dispatch2.map(_.module) + dispatch2.foreach(_.io.redirect := io.redirect) // dirty code for ls dp dispatch2.foreach(dp => if (dp.io.enqLsq.isDefined) { diff --git a/src/main/scala/xiangshan/backend/dispatch/Dispatch2Rs.scala b/src/main/scala/xiangshan/backend/dispatch/Dispatch2Rs.scala index 8a5d99046ddd3cc23451f9ab1bdb9f69055dfa0d..c49e0da2ebe7d4ec5e17272416d9f4137c4c80e8 100644 --- a/src/main/scala/xiangshan/backend/dispatch/Dispatch2Rs.scala +++ b/src/main/scala/xiangshan/backend/dispatch/Dispatch2Rs.scala @@ -57,6 +57,7 @@ class Dispatch2RsImp(outer: Dispatch2Rs)(implicit p: Parameters) extends LazyMod val numFpStateRead = outer.numFpStateRead val io = IO(new Bundle() { + val redirect = Flipped(ValidIO(new Redirect)) val in = Flipped(Vec(outer.numIn, DecoupledIO(new MicroOp))) val readIntState = if (numIntStateRead > 0) Some(Vec(numIntStateRead, Flipped(new BusyTableReadIO))) else None val readFpState = if (numFpStateRead > 0) Some(Vec(numFpStateRead, Flipped(new BusyTableReadIO))) else None @@ -64,12 +65,12 @@ class Dispatch2RsImp(outer: Dispatch2Rs)(implicit p: Parameters) extends LazyMod val enqLsq = if (outer.hasLoadStore) Some(Flipped(new LsqEnqIO)) else None }) - val numInFire = PopCount(io.in.map(_.fire())) - val numStaFire = PopCount(io.out.zip(outer.configs).filter(_._2.contains(StaExeUnitCfg)).map(_._1.fire())) - val numStdFire = PopCount(io.out.zip(outer.configs).filter(_._2.contains(StdExeUnitCfg)).map(_._1.fire())) - XSError(numStaFire =/= numStdFire, "sta_fire != std_fire\n") - val numOutFire = PopCount(io.out.map(_.fire())) - numStdFire - XSError(numInFire =/= numOutFire, "in != out\n") + val numInFire = PopCount(io.in.map(_.fire)) + val numStaFire = PopCount(io.out.zip(outer.configs).filter(_._2.contains(StaExeUnitCfg)).map(_._1.fire)) + val numStdFire = PopCount(io.out.zip(outer.configs).filter(_._2.contains(StdExeUnitCfg)).map(_._1.fire)) + // XSError(numStaFire =/= numStdFire, "sta_fire != std_fire\n") + val numOutFire = PopCount(io.out.map(_.fire)) - numStdFire + // XSError(numInFire =/= numOutFire, "in != out\n") XSPerfAccumulate("in_valid", PopCount(io.in.map(_.valid))) XSPerfAccumulate("in_fire", PopCount(io.in.map(_.fire))) @@ -179,6 +180,8 @@ class Dispatch2RsDistinctImp(outer: Dispatch2Rs)(implicit p: Parameters) extends in.foreach(_.ready := false.B) io.in.zip(in).foreach(x => x._1.ready := x._2.ready) + // add one pipeline before out + val s0_out = Wire(io.out.cloneType) // dirty code for lsq enq val is_blocked = WireDefault(VecInit(Seq.fill(io.in.length)(false.B))) if (io.enqLsq.isDefined) { @@ -207,7 +210,7 @@ class Dispatch2RsDistinctImp(outer: Dispatch2Rs)(implicit p: Parameters) extends in(i).bits.lqIdx := enqLsq.resp(i).lqIdx in(i).bits.sqIdx := enqLsq.resp(i).sqIdx - enqLsq.req(i).valid := in(i).valid && VecInit(io.out.map(_.ready)).asUInt.andR + enqLsq.req(i).valid := in(i).valid && VecInit(s0_out.map(_.ready)).asUInt.andR } } } @@ -219,24 +222,49 @@ class Dispatch2RsDistinctImp(outer: Dispatch2Rs)(implicit p: Parameters) extends val select = SelectOne("naive", canAccept, numOfThisExu) for ((idx, j) <- outIndices.zipWithIndex) { val (selectValid, selectIdxOH) = select.getNthOH(j + 1) - io.out(idx).valid := selectValid && !Mux1H(selectIdxOH, is_blocked) - io.out(idx).bits := Mux1H(selectIdxOH, in.map(_.bits)) + s0_out(idx).valid := selectValid && !Mux1H(selectIdxOH, is_blocked) + s0_out(idx).bits := Mux1H(selectIdxOH, in.map(_.bits)) // Special case for STD if (config.contains(StdExeUnitCfg)) { - val sta = io.out(idx - StorePipelineWidth) - sta.valid := io.out(idx).valid - io.out(idx).bits.ctrl.srcType(0) := io.out(idx).bits.ctrl.srcType(1) - io.out(idx).bits.psrc(0) := io.out(idx).bits.psrc(1) - XSPerfAccumulate(s"st_rs_not_ready_$idx", selectValid && (!sta.ready || !io.out(idx).ready)) - XSPerfAccumulate(s"sta_rs_not_ready_$idx", selectValid && !sta.ready && io.out(idx).ready) - XSPerfAccumulate(s"std_rs_not_ready_$idx", selectValid && sta.ready && !io.out(idx).ready) + val sta = s0_out(idx - StorePipelineWidth) + sta.valid := s0_out(idx).valid + s0_out(idx).bits.ctrl.srcType(0) := s0_out(idx).bits.ctrl.srcType(1) + s0_out(idx).bits.psrc(0) := s0_out(idx).bits.psrc(1) + XSPerfAccumulate(s"st_rs_not_ready_$idx", selectValid && (!sta.ready || !s0_out(idx).ready)) + XSPerfAccumulate(s"sta_rs_not_ready_$idx", selectValid && !sta.ready && s0_out(idx).ready) + XSPerfAccumulate(s"std_rs_not_ready_$idx", selectValid && sta.ready && !s0_out(idx).ready) } else { - in.zip(selectIdxOH).foreach{ case (in, v) => when (v) { in.ready := io.out(idx).ready }} + in.zip(selectIdxOH).foreach{ case (in, v) => when (v) { in.ready := s0_out(idx).ready }} } } } + // dispatch is allowed when lsq and rs can accept all the instructions + // TODO: better algorithm here? + if (io.enqLsq.isDefined) { + when (!VecInit(s0_out.map(_.ready)).asUInt.andR || !io.enqLsq.get.canAccept) { + in.foreach(_.ready := false.B) + s0_out.foreach(_.valid := false.B) + } + } + + // agreement with dispatch queue: don't enqueue when io.redirect.valid + when (io.redirect.valid) { + s0_out.foreach(_.valid := false.B) + } + + // Note: the dispatch queue must not dequeue when io.redirect.valid + val s1_rightFire = Wire(Vec(s0_out.length, Bool())) + val s1_flush = Wire(Vec(s0_out.length, Bool())) + val s1_out = io.out.indices.map(i => PipelineNext(s0_out(i), s1_rightFire(i), s1_flush(i))) + for (i <- io.out.indices) { + io.out(i).valid := s1_out(i).valid + io.out(i).bits := s1_out(i).bits + s1_out(i).ready := !s1_out(i).valid || io.out(i).ready + s1_rightFire(i) := io.out(i).ready + s1_flush(i) := s1_out(i).valid && s1_out(i).bits.robIdx.needFlush(io.redirect) + } if (io.readIntState.isDefined) { val stateReadReq = io.out.zip(outer.numIntSrc).flatMap(x => x._1.bits.psrc.take(x._2)) io.readIntState.get.map(_.req).zip(stateReadReq).foreach(x => x._1 := x._2) @@ -258,12 +286,4 @@ class Dispatch2RsDistinctImp(outer: Dispatch2Rs)(implicit p: Parameters) extends } } - // dispatch is allowed when lsq and rs can accept all the instructions - // TODO: better algorithm here? - if (io.enqLsq.isDefined) { - when (!VecInit(io.out.map(_.ready)).asUInt.andR || !io.enqLsq.get.canAccept) { - in.foreach(_.ready := false.B) - io.out.foreach(_.valid := false.B) - } - } } diff --git a/src/main/scala/xiangshan/backend/issue/ReservationStation.scala b/src/main/scala/xiangshan/backend/issue/ReservationStation.scala index 6a772070b73b63577fe26961cd1a725833a7522f..050e531619b650abc4e8b784e9b1d59868a72d22 100644 --- a/src/main/scala/xiangshan/backend/issue/ReservationStation.scala +++ b/src/main/scala/xiangshan/backend/issue/ReservationStation.scala @@ -61,6 +61,7 @@ case class RSParams def needScheduledBit: Boolean = hasFeedback || delayedRf || hasMidState def needBalance: Boolean = exuCfg.get.needLoadBalance def numSelect: Int = numDeq + (if (oldestFirst._1) 1 else 0) + def dropOnRedirect: Boolean = !(isLoad || isStore || isStoreData) override def toString: String = { s"type ${exuCfg.get.name}, size $numEntries, enq $numEnq, deq $numDeq, numSrc $numSrc, fast $numFastWakeup, wakeup $numWakeup" @@ -221,8 +222,8 @@ class ReservationStationIO(params: RSParams)(implicit p: Parameters) extends XSB val jumpPc = Input(UInt(VAddrBits.W)) val jalr_target = Input(UInt(VAddrBits.W)) }) else None - val feedback = if (params.hasFeedback) Some(Vec(params.numDeq, - Flipped(new MemRSFeedbackIO) + val feedback = if (params.hasFeedback) Some(Vec(params.numDeq, + Flipped(new MemRSFeedbackIO) )) else None val checkwait = if (params.checkWaitBit) Some(new Bundle { val stIssuePtr = Input(new SqPtr()) @@ -262,15 +263,20 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod */ // enqueue from dispatch select.io.validVec := statusArray.io.isValid - // agreement with dispatch: don't enqueue when io.redirect.valid - val doEnqueue = VecInit(io.fromDispatch.map(_.fire && !io.redirect.valid)) - val enqShouldNotFlushed = io.fromDispatch.map(d => d.fire && !d.bits.robIdx.needFlush(io.redirect)) - XSPerfAccumulate("wrong_stall", Mux(io.redirect.valid, PopCount(enqShouldNotFlushed), 0.U)) + val doEnqueue = Wire(Vec(params.numEnq, Bool())) + val enqNotFlushed = io.fromDispatch.map(d => d.fire && !d.bits.robIdx.needFlush(io.redirect)) + if (params.dropOnRedirect) { + doEnqueue := io.fromDispatch.map(_.fire && !io.redirect.valid) + XSPerfAccumulate("wrong_stall", Mux(io.redirect.valid, PopCount(enqNotFlushed), 0.U)) + } + else { + doEnqueue := enqNotFlushed + } val needFpSource = io.fromDispatch.map(_.bits.needRfRPort(0, true, false)) for (i <- 0 until params.numEnq) { io.fromDispatch(i).ready := select.io.allocate(i).valid // for better timing, we update statusArray no matter there's a flush or not - statusArray.io.update(i).enable := io.fromDispatch(i).fire() + statusArray.io.update(i).enable := io.fromDispatch(i).fire statusArray.io.update(i).addr := select.io.allocate(i).bits statusArray.io.update(i).data.valid := true.B statusArray.io.update(i).data.scheduled := params.delayedRf.B && needFpSource(i) @@ -764,4 +770,3 @@ class ReservationStation(params: RSParams)(implicit p: Parameters) extends XSMod def size: Int = params.numEntries } - diff --git a/src/main/scala/xiangshan/backend/issue/StatusArray.scala b/src/main/scala/xiangshan/backend/issue/StatusArray.scala index bdafe0f7da36165486aabc50ac5b43bbc95687e5..fdbc7a2a867928e6166498bd364f6094cb968768 100644 --- a/src/main/scala/xiangshan/backend/issue/StatusArray.scala +++ b/src/main/scala/xiangshan/backend/issue/StatusArray.scala @@ -172,7 +172,8 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule val isFlushed = status.valid && status.robIdx.needFlush(io.redirect) val (deqRespValid, deqRespSucc, deqRespType, deqRespDataInvalidSqIdx) = deqResp(i) flushedVec(i) := isFlushed || (deqRespValid && deqRespSucc) - val realUpdateValid = updateValid(i) && !io.redirect.valid + val enqFlushed = if (params.dropOnRedirect) io.redirect.valid else statusNext.robIdx.needFlush(io.redirect) + val realUpdateValid = updateValid(i) && !enqFlushed statusNext.valid := !flushedVec(i) && (realUpdateValid || status.valid) XSError(updateValid(i) && status.valid, p"should not update a valid entry $i\n") @@ -196,14 +197,14 @@ class StatusArray(params: RSParams)(implicit p: Parameters) extends XSModule if (params.checkWaitBit) { val blockNotReleased = isAfter(statusNext.sqIdx, io.stIssuePtr) val storeAddrWaitforIsIssuing = VecInit((0 until StorePipelineWidth).map(i => { - io.memWaitUpdateReq.staIssue(i).valid && + io.memWaitUpdateReq.staIssue(i).valid && io.memWaitUpdateReq.staIssue(i).bits.uop.robIdx.value === statusNext.waitForRobIdx.value })).asUInt.orR && !statusNext.waitForStoreData && !statusNext.strictWait // is waiting for store addr ready val storeDataWaitforIsIssuing = VecInit((0 until StorePipelineWidth).map(i => { - io.memWaitUpdateReq.stdIssue(i).valid && + io.memWaitUpdateReq.stdIssue(i).valid && io.memWaitUpdateReq.stdIssue(i).bits.uop.sqIdx.value === statusNext.waitForSqIdx.value })).asUInt.orR && statusNext.waitForStoreData - statusNext.blocked := Mux(updateValid(i), updateVal(i).blocked, status.blocked) && + statusNext.blocked := Mux(updateValid(i), updateVal(i).blocked, status.blocked) && !storeAddrWaitforIsIssuing && !storeDataWaitforIsIssuing && blockNotReleased diff --git a/src/main/scala/xiangshan/backend/rob/Rob.scala b/src/main/scala/xiangshan/backend/rob/Rob.scala index 830e745a8a2e1de33271427e745d799cb890b877..263ffc7ed967c4bf22cbb472043cec4a6fb3855a 100644 --- a/src/main/scala/xiangshan/backend/rob/Rob.scala +++ b/src/main/scala/xiangshan/backend/rob/Rob.scala @@ -850,7 +850,7 @@ class RobImp(outer: Rob)(implicit p: Parameters) extends LazyModuleImp(outer) exceptionGen.io.enq(i).bits.exceptionVec := ExceptionNO.selectFrontend(io.enq.req(i).bits.cf.exceptionVec) exceptionGen.io.enq(i).bits.flushPipe := io.enq.req(i).bits.ctrl.flushPipe exceptionGen.io.enq(i).bits.replayInst := false.B - assert(io.enq.req(i).bits.ctrl.replayInst === false.B) + XSError(canEnqueue(i) && io.enq.req(i).bits.ctrl.replayInst, "enq should not set replayInst") exceptionGen.io.enq(i).bits.singleStep := io.enq.req(i).bits.ctrl.singleStep exceptionGen.io.enq(i).bits.crossPageIPFFix := io.enq.req(i).bits.cf.crossPageIPFFix exceptionGen.io.enq(i).bits.trigger.clear()