Scheduler.scala 22.9 KB
Newer Older
1 2
/***************************************************************************************
  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
Y
Yinan Xu 已提交
3
* Copyright (c) 2020-2021 Peng Cheng Laboratory
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
  *
  * XiangShan is licensed under Mulan PSL v2.
  * You can use this software according to the terms and conditions of the Mulan PSL v2.
  * You may obtain a copy of Mulan PSL v2 at:
  *          http://license.coscl.org.cn/MulanPSL2
  *
  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
  *
  * See the Mulan PSL v2 for more details.
  ***************************************************************************************/

package xiangshan.backend

import chisel3._
import chisel3.util._
import chipsalliance.rocketchip.config.Parameters
import difftest.{DifftestArchFpRegState, DifftestArchIntRegState}
23
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
24 25
import xiangshan._
import utils._
26
import xiangshan.backend.dispatch.Dispatch2Rs
27
import xiangshan.backend.exu.ExuConfig
28
import xiangshan.backend.fu.fpu.FMAMidResultIO
29
import xiangshan.backend.issue.{ReservationStation, ReservationStationWrapper, RsPerfCounter}
30
import xiangshan.backend.regfile.{Regfile, RfReadPort, RfWritePort}
31
import xiangshan.backend.rename.{BusyTable, BusyTableReadIO}
32
import xiangshan.mem.{LsqEnqIO, MemWaitUpdateReq, SqPtr}
33

34 35 36 37 38 39 40 41 42 43
import scala.collection.mutable.ArrayBuffer

class DispatchArbiter(func: Seq[MicroOp => Bool])(implicit p: Parameters) extends XSModule {
  val numTarget = func.length

  val io = IO(new Bundle {
    val in = Flipped(DecoupledIO(new MicroOp))
    val out = Vec(numTarget, DecoupledIO(new MicroOp))
  })

44
  io.out.zip(func).foreach{ case (o, f) =>
45 46
    o.valid := io.in.valid && f(io.in.bits)
    o.bits := io.in.bits
47
  }
48 49 50 51 52 53 54 55 56 57 58 59

  io.in.ready := VecInit(io.out.map(_.fire())).asUInt.orR
}

object DispatchArbiter {
  def apply(in: DecoupledIO[MicroOp], func: Seq[MicroOp => Bool])(implicit p: Parameters) = {
    val arbiter = Module(new DispatchArbiter(func))
    arbiter.io.in <> in
    arbiter.io.out
  }
}

60
trait HasExuWbHelper {
61 62 63
  def findInWbPorts(wb: Seq[Seq[ExuConfig]], target: ExuConfig) : Seq[Int] = {
    wb.zipWithIndex.filter(_._1.contains(target)).map(_._2)
  }
64

65 66 67
  def findInWbPorts(wb: Seq[Seq[ExuConfig]], targets: Seq[ExuConfig]) : Seq[Int] = {
    targets.map(findInWbPorts(wb, _)).fold(Seq())(_ ++ _)
  }
68

69 70 71 72 73 74 75
  def getFastWakeupIndex(cfg: ExuConfig, intSource: Seq[Int], fpSource: Seq[Int], offset: Int) : Seq[Int] = {
    val sources = Seq(
      (cfg.readIntRf, intSource),
      (cfg.readFpRf, fpSource.map(_ + offset))
    )
    sources.map(c => if (c._1) c._2 else Seq()).reduce(_ ++ _)
  }
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
  def fpUopValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
    val uop = WireInit(x)
    uop.valid := x.valid && x.bits.ctrl.fpWen
    uop
  }
  def fpOutValid(x: ValidIO[ExuOutput]): ValidIO[ExuOutput] = {
    val out = WireInit(x)
    out.valid := x.valid && x.bits.uop.ctrl.fpWen
    out
  }
  def fpOutValid(x: DecoupledIO[ExuOutput], connectReady: Boolean = false): DecoupledIO[ExuOutput] = {
    val out = WireInit(x)
    if(connectReady) x.ready := out.ready
    out.valid := x.valid && x.bits.uop.ctrl.fpWen
    out
  }
  def intUopValid(x: ValidIO[MicroOp]): ValidIO[MicroOp] = {
    val uop = WireInit(x)
    uop.valid := x.valid && x.bits.ctrl.rfWen
    uop
  }
  def intOutValid(x: ValidIO[ExuOutput]): ValidIO[ExuOutput] = {
    val out = WireInit(x)
    out.valid := x.valid && !x.bits.uop.ctrl.fpWen
    out
  }
  def intOutValid(x: DecoupledIO[ExuOutput], connectReady: Boolean = false): DecoupledIO[ExuOutput] = {
    val out = WireInit(x)
    if(connectReady) x.ready := out.ready
    out.valid := x.valid && !x.bits.uop.ctrl.fpWen
    out
  }
  def decoupledIOToValidIO[T <: Data](d: DecoupledIO[T]): Valid[T] = {
    val v = Wire(Valid(d.bits.cloneType))
    v.valid := d.valid
    v.bits := d.bits
    v
  }

  def validIOToDecoupledIO[T <: Data](v: Valid[T]): DecoupledIO[T] = {
    val d = Wire(DecoupledIO(v.bits.cloneType))
    d.valid := v.valid
    d.ready := true.B
    d.bits := v.bits
    d
  }
122 123
}

124
class Scheduler(
125 126 127 128
  val configs: Seq[(ExuConfig, Int, Seq[ExuConfig], Seq[ExuConfig])],
  val dpPorts: Seq[Seq[(Int, Int)]],
  val intRfWbPorts: Seq[Seq[ExuConfig]],
  val fpRfWbPorts: Seq[Seq[ExuConfig]],
129
  val outFastPorts: Seq[Seq[Int]],
130 131 132 133
  val outIntRfReadPorts: Int,
  val outFpRfReadPorts: Int,
  val hasIntRf: Boolean,
  val hasFpRf: Boolean
134
)(implicit p: Parameters) extends LazyModule with HasXSParameter with HasExuWbHelper {
135
  val numDpPorts = dpPorts.length
136 137 138 139 140 141 142 143 144 145 146 147 148
  val dpExuConfigs = dpPorts.map(port => port.map(_._1).map(configs(_)._1))
  def getDispatch2 = {
    if (dpExuConfigs.length > exuParameters.AluCnt) {
      val intDispatch = LazyModule(new Dispatch2Rs(dpExuConfigs.take(exuParameters.AluCnt)))
      val lsDispatch = LazyModule(new Dispatch2Rs(dpExuConfigs.drop(exuParameters.AluCnt)))
      Seq(intDispatch, lsDispatch)
    }
    else {
      val fpDispatch = LazyModule(new Dispatch2Rs(dpExuConfigs))
      Seq(fpDispatch)
    }
  }
  val dispatch2 = getDispatch2
149

150 151 152 153 154
  // regfile parameters: overall read and write ports
  val numIntRfWritePorts = intRfWbPorts.length
  val numFpRfWritePorts = fpRfWbPorts.length

  // reservation station parameters: dispatch, regfile, issue, wakeup, fastWakeup
155
  // instantiate reservation stations and connect the issue ports
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
  val wakeupPorts = configs.map(_._1).map(config => {
    val numInt = if (config.intSrcCnt > 0) numIntRfWritePorts else 0
    val numFp = if (config.fpSrcCnt > 0) numFpRfWritePorts else 0
    numInt + numFp
  })
  val innerIntFastSources = configs.map(_._1).map(cfg => configs.zipWithIndex.filter(c => c._1._3.contains(cfg) && c._1._1.wakeupFromRS))
  val innerFpFastSources = configs.map(_._1).map(cfg => configs.zipWithIndex.filter(c => c._1._4.contains(cfg) && c._1._1.wakeupFromRS))
  val innerFastPorts = configs.map(_._1).zipWithIndex.map{ case (config, i) =>
    val intSource = findInWbPorts(intRfWbPorts, innerIntFastSources(i).map(_._1._1))
    val fpSource = findInWbPorts(fpRfWbPorts, innerFpFastSources(i).map(_._1._1))
    getFastWakeupIndex(config, intSource, fpSource, numIntRfWritePorts)
  }
  println(s"inner fast: $innerFastPorts")
  val numAllFastPorts = innerFastPorts.zip(outFastPorts).map{ case (i, o) => i.length + o.length }
  val reservationStations = configs.zipWithIndex.map{ case ((config, numDeq, _, _), i) =>
171
    val rs = LazyModule(new ReservationStationWrapper())
172
    rs.addIssuePort(config, numDeq)
173 174
    rs.addWakeup(wakeupPorts(i))
    rs.addEarlyWakeup(numAllFastPorts(i))
175
    rs
176
  }
177 178 179 180
  // connect to dispatch
  val dpFuConfigs = dpPorts.map(_.map(p => reservationStations(p._1).addDispatchPort()).reduce(_ ++ _))

  val numIssuePorts = configs.map(_._2).sum
181 182
  val numReplayPorts = reservationStations.filter(_.params.hasFeedback == true).map(_.params.numDeq).sum
  val memRsEntries = reservationStations.filter(_.params.hasFeedback == true).map(_.params.numEntries)
183 184 185 186
  val getMemRsEntries = {
    require(memRsEntries.isEmpty || memRsEntries.max == memRsEntries.min, "different indexes not supported")
    if (memRsEntries.isEmpty) 0 else memRsEntries.max
  }
187 188 189
  val numSTDPorts = reservationStations.filter(_.params.exuCfg.get == StdExeUnitCfg).map(_.params.numDeq).sum

  val numDpPortIntRead = dpPorts.map(_.map(_._1).map(configs(_)._1.intSrcCnt).max)
190
  val numIntRfReadPorts = numDpPortIntRead.sum + outIntRfReadPorts
191
  val numDpPortFpRead = dpPorts.map(_.map(_._1).map(configs(_)._1.fpSrcCnt).max)
192
  val numFpRfReadPorts = numDpPortFpRead.sum + outFpRfReadPorts
193 194

  lazy val module = new SchedulerImp(this)
195 196 197 198

  def canAccept(fuType: UInt): Bool = {
    VecInit(configs.map(_._1.canAccept(fuType))).asUInt.orR
  }
199 200 201
}

class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSParameter {
202
  val memRsEntries = outer.getMemRsEntries
203 204 205 206 207
  val updatedP = p.alter((site, here, up) => {
    case XSCoreParamsKey => up(XSCoreParamsKey).copy(
      IssQueSize = memRsEntries
    )
  })
208 209
  val intRfWritePorts = outer.numIntRfWritePorts
  val fpRfWritePorts = outer.numFpRfWritePorts
210 211
  val intRfConfig = (outer.numIntRfReadPorts > 0 && outer.hasIntRf, outer.numIntRfReadPorts, intRfWritePorts)
  val fpRfConfig = (outer.numFpRfReadPorts > 0 && outer.hasFpRf, outer.numFpRfReadPorts, fpRfWritePorts)
212

213
  val rs_all = outer.reservationStations
214
  val numPerfPorts = outer.reservationStations.map(_.module.perf.length).sum
215 216 217 218 219 220 221

  // print rs info
  println("Scheduler: ")
  println(s"  number of issue ports: ${outer.numIssuePorts}")
  println(s"  number of replay ports: ${outer.numReplayPorts}")
  println(s"  size of load and store RSes: ${outer.getMemRsEntries}")
  println(s"  number of std ports: ${outer.numSTDPorts}")
222 223
  val numLoadPorts = outer.reservationStations.map(_.module.io.load).filter(_.isDefined).map(_.get.fastMatch.length).sum
  println(s"  number of load ports: ${numLoadPorts}")
224
  println(s"  number of perf ports: ${numPerfPorts}")
225 226 227 228 229 230
  if (intRfConfig._1) {
    println(s"INT Regfile: ${intRfConfig._2}R${intRfConfig._3}W")
  }
  if (fpRfConfig._1) {
    println(s"FP  Regfile: ${fpRfConfig._2}R${fpRfConfig._3}W")
  }
231 232 233 234 235 236 237 238
  for ((rs, i) <- rs_all.zipWithIndex) {
    println(s"RS $i: $rs")
    println(s"  innerIntUop: ${outer.innerIntFastSources(i).map(_._2)}")
    println(s"  innerFpUop: ${outer.innerFpFastSources(i).map(_._2)}")
    println(s"  innerFastPorts: ${outer.innerFastPorts(i)}")
    println(s"  outFastPorts: ${outer.outFastPorts(i)}")
    println(s"  loadBalance: ${rs_all(i).params.needBalance}")
  }
239 240 241

  class SchedulerExtraIO extends XSBundle {
    // feedback ports
242
    val feedback = if (outer.numReplayPorts > 0) Some(Vec(outer.numReplayPorts, Flipped(new MemRSFeedbackIO()(updatedP)))) else None
243
    // special ports for RS that needs to read from other schedulers
244 245
    // In: read response from other schedulers
    // Out: read request to other schedulers
246 247 248
    val intRfReadIn = if (!outer.hasIntRf && outer.numIntRfReadPorts > 0) Some(Vec(outer.numIntRfReadPorts, Flipped(new RfReadPort(XLEN)))) else None
    val intRfReadOut = if (outer.outIntRfReadPorts > 0) Some(Vec(outer.outIntRfReadPorts, new RfReadPort(XLEN))) else None
    val fpRfReadIn = if (!outer.hasFpRf && outer.numFpRfReadPorts > 0) Some(Vec(outer.numFpRfReadPorts, Flipped(new RfReadPort(XLEN)))) else None
249
    val fpStateReadIn = if (!outer.hasFpRf && outer.numFpRfReadPorts > 0) Some(Vec(outer.numFpRfReadPorts, Flipped(new BusyTableReadIO))) else None
250
    val fpRfReadOut = if (outer.outFpRfReadPorts > 0) Some(Vec(outer.outFpRfReadPorts, new RfReadPort(XLEN))) else None
251
    val fpStateReadOut = if (outer.outFpRfReadPorts > 0) Some(Vec(outer.outFpRfReadPorts, new BusyTableReadIO)) else None
252
    val loadFastMatch = if (numLoadPorts > 0) Some(Vec(numLoadPorts, Output(UInt(exuParameters.LduCnt.W)))) else None
253 254 255 256
    // misc
    val jumpPc = Input(UInt(VAddrBits.W))
    val jalr_target = Input(UInt(VAddrBits.W))
    val stIssuePtr = Input(new SqPtr())
257
    // special ports for load / store rs
Y
Yinan Xu 已提交
258
    val enqLsq = if (outer.numReplayPorts > 0) Some(Flipped(new LsqEnqIO)) else None
259
    val memWaitUpdateReq = Flipped(new MemWaitUpdateReq)
260 261 262
    // debug
    val debug_int_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
    val debug_fp_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
263
    val perf = Vec(numPerfPorts, Output(new RsPerfCounter))
264 265 266 267 268

    override def cloneType: SchedulerExtraIO.this.type =
      new SchedulerExtraIO().asInstanceOf[this.type]
  }

269 270
  val numFma = outer.reservationStations.map(_.module.io.fmaMid.getOrElse(Seq()).length).sum

271
  val io = IO(new Bundle {
J
Jiawei Lin 已提交
272
    val hartId = Input(UInt(8.W))
273 274 275
    // global control
    val redirect = Flipped(ValidIO(new Redirect))
    // dispatch and issue ports
276 277 278
    // val allocate = Vec(outer.numDpPorts, Flipped(DecoupledIO(new MicroOp)))
    val allocPregs = Vec(RenameWidth, Input(new ResetPregStateReq))
    val in = Vec(dpParams.IntDqDeqWidth * outer.dispatch2.length, Flipped(DecoupledIO(new MicroOp)))
279 280 281 282 283
    val issue = Vec(outer.numIssuePorts, DecoupledIO(new ExuInput))
    val fastUopOut = Vec(outer.numIssuePorts, ValidIO(new MicroOp))
    // wakeup-related ports
    val writeback = Vec(intRfWritePorts + fpRfWritePorts, Flipped(ValidIO(new ExuOutput)))
    val fastUopIn = Vec(intRfWritePorts + fpRfWritePorts, Flipped(ValidIO(new MicroOp)))
284
    // misc ports
285
    val extra = new SchedulerExtraIO
286
    val fmaMid = if (numFma > 0) Some(Vec(numFma, Flipped(new FMAMidResultIO))) else None
287 288
  })

289 290
  val dispatch2 = outer.dispatch2.map(_.module)

Y
Yinan Xu 已提交
291 292 293
  // dirty code for ls dp
  dispatch2.foreach(dp => if (dp.io.enqLsq.isDefined) dp.io.enqLsq.get <> io.extra.enqLsq.get)

294 295
  io.in <> dispatch2.flatMap(_.io.in)
  val readIntState = dispatch2.flatMap(_.io.readIntState.getOrElse(Seq()))
296 297 298 299
  val intbtperfEvents = Wire(new PerfEventsBundle(4))
  val fpbtperfEvents = Wire(new PerfEventsBundle(4))
  intbtperfEvents := DontCare
  fpbtperfEvents  := DontCare
300
  if (readIntState.nonEmpty) {
301 302 303 304 305 306 307 308 309
    val busyTable = Module(new BusyTable(readIntState.length, intRfWritePorts))
    busyTable.io.allocPregs.zip(io.allocPregs).foreach{ case (pregAlloc, allocReq) =>
      pregAlloc.valid := allocReq.isInt
      pregAlloc.bits := allocReq.preg
    }
    busyTable.io.wbPregs.zip(io.writeback.take(intRfWritePorts)).foreach{ case (pregWb, exuWb) =>
      pregWb.valid := exuWb.valid && exuWb.bits.uop.ctrl.rfWen
      pregWb.bits := exuWb.bits.uop.pdest
    }
310
    intbtperfEvents <> busyTable.perfinfo.perfEvents
311 312
    busyTable.io.read <> readIntState
  }
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
  val readFpState = io.extra.fpStateReadOut.getOrElse(Seq()) ++ dispatch2.flatMap(_.io.readFpState.getOrElse(Seq()))
  if (readFpState.nonEmpty) {
    // Some fp states are read from outside
    val numInFpStateRead = io.extra.fpStateReadIn.getOrElse(Seq()).length
    // The left read requests are serviced by internal busytable
    val numBusyTableRead = readFpState.length - numInFpStateRead
    if (numBusyTableRead > 0) {
      val busyTable = Module(new BusyTable(numBusyTableRead, fpRfWritePorts))
      busyTable.io.allocPregs.zip(io.allocPregs).foreach { case (pregAlloc, allocReq) =>
        pregAlloc.valid := allocReq.isFp
        pregAlloc.bits := allocReq.preg
      }
      busyTable.io.wbPregs.zip(io.writeback.drop(intRfWritePorts)).foreach { case (pregWb, exuWb) =>
        pregWb.valid := exuWb.valid && exuWb.bits.uop.ctrl.fpWen
        pregWb.bits := exuWb.bits.uop.pdest
      }
      busyTable.io.read <> readFpState.take(numBusyTableRead)
330 331
      fpbtperfEvents <> busyTable.perfinfo.perfEvents
      busyTable.io.read <> readFpState
332
    }
333 334
    if (io.extra.fpStateReadIn.isDefined) {
      io.extra.fpStateReadIn.get <> readFpState.takeRight(numInFpStateRead)
335 336 337 338
    }
  }
  val allocate = dispatch2.flatMap(_.io.out)

339 340 341 342
  if (io.fmaMid.isDefined) {
    io.fmaMid.get <> outer.reservationStations.flatMap(_.module.io.fmaMid.getOrElse(Seq()))
  }

343
  def extraReadRf(numRead: Seq[Int]): Seq[UInt] = {
344 345
    require(numRead.length == allocate.length)
    allocate.map(_.bits.psrc).zip(numRead).flatMap{ case (src, num) => src.take(num) }
346
  }
347
  def readIntRf: Seq[UInt] = extraReadRf(outer.numDpPortIntRead) ++ io.extra.intRfReadOut.getOrElse(Seq()).map(_.addr)
348
  def readFpRf: Seq[UInt] = extraReadRf(outer.numDpPortFpRead) ++ io.extra.fpRfReadOut.getOrElse(Seq()).map(_.addr)
349

350 351 352 353
  def genRegfile(isInt: Boolean): Seq[UInt] = {
    val wbPorts = if (isInt) io.writeback.take(intRfWritePorts) else io.writeback.drop(intRfWritePorts)
    val waddr = wbPorts.map(_.bits.uop.pdest)
    val wdata = wbPorts.map(_.bits.data)
354
    val debugRead = if (isInt) io.extra.debug_int_rat else io.extra.debug_fp_rat
355 356
    if (isInt) {
      val wen = wbPorts.map(wb => wb.valid && wb.bits.uop.ctrl.rfWen)
357
      Regfile(NRPhyRegs, readIntRf, wen, waddr, wdata, true, debugRead = Some(debugRead))
358 359
    }
    else {
360 361
      // For floating-point function units, every instruction writes either int or fp regfile.
      val wen = wbPorts.map(_.valid)
362
      Regfile(NRPhyRegs, readFpRf, wen, waddr, wdata, false, debugRead = Some(debugRead))
363 364 365
    }
  }

366
  val intRfReadData = if (intRfConfig._1) genRegfile(true) else io.extra.intRfReadIn.getOrElse(Seq()).map(_.data)
367
  val fpRfReadData = if (fpRfConfig._1) genRegfile(false) else DelayN(VecInit(io.extra.fpRfReadIn.getOrElse(Seq()).map(_.data)), 1)
368

369 370
  if (io.extra.intRfReadIn.isDefined) {
    io.extra.intRfReadIn.get.map(_.addr).zip(readIntRf).foreach{ case (r, addr) => r := addr}
371
    require(io.extra.intRfReadIn.get.length == readIntRf.length)
372 373
  }

374 375
  if (io.extra.fpRfReadIn.isDefined) {
    io.extra.fpRfReadIn.get.map(_.addr).zip(readFpRf).foreach{ case (r, addr) => r := addr}
376
    require(io.extra.fpRfReadIn.get.length == readFpRf.length)
377 378
  }

379 380 381
  if (io.extra.intRfReadOut.isDefined) {
    val extraIntReadData = intRfReadData.dropRight(32).takeRight(outer.outIntRfReadPorts)
    io.extra.intRfReadOut.get.map(_.data).zip(extraIntReadData).foreach{ case (a, b) => a := b }
382
    require(io.extra.intRfReadOut.get.length == extraIntReadData.length)
383 384
  }

385
  if (io.extra.fpRfReadOut.isDefined) {
386 387
    val extraFpReadData = fpRfReadData.dropRight(32).takeRight(outer.outFpRfReadPorts)
    io.extra.fpRfReadOut.get.map(_.data).zip(extraFpReadData).foreach{ case (a, b) => a := b }
388
    require(io.extra.fpRfReadOut.get.length == extraFpReadData.length)
389
  }
390

391 392 393
  var issueIdx = 0
  var feedbackIdx = 0
  var stDataIdx = 0
394 395 396 397 398 399 400 401 402
  var fastUopOutIdx = 0
  io.fastUopOut := DontCare
  for (((node, cfg), i) <- rs_all.zip(outer.configs.map(_._1)).zipWithIndex) {
    val rs = node.module

    rs.io.redirect <> io.redirect

    val issueWidth = rs.io.deq.length
    rs.io.deq <> io.issue.slice(issueIdx, issueIdx + issueWidth)
403 404
    if (rs.io.fastWakeup.isDefined) {
      rs.io.fastWakeup.get <> io.fastUopOut.slice(issueIdx, issueIdx + issueWidth)
405
    }
406
    issueIdx += issueWidth
407

408 409 410
    if (rs.io.jump.isDefined) {
      rs.io.jump.get.jumpPc := io.extra.jumpPc
      rs.io.jump.get.jalr_target := io.extra.jalr_target
411
    }
412 413
    if (rs.io.checkwait.isDefined) {
      rs.io.checkwait.get.stIssuePtr <> io.extra.stIssuePtr
414
      rs.io.checkwait.get.memWaitUpdateReq <> io.extra.memWaitUpdateReq
415
    }
416 417
    if (rs.io.feedback.isDefined) {
      val width = rs.io.feedback.get.length
418
      val feedback = io.extra.feedback.get.slice(feedbackIdx, feedbackIdx + width)
419 420
      require(feedback(0).rsIdx.getWidth == rs.io.feedback.get(0).rsIdx.getWidth)
      rs.io.feedback.get.zip(feedback).foreach{ case (r, f) =>
421 422
        r.feedbackFast <> f.feedbackFast
        r.feedbackSlow <> f.feedbackSlow
423 424 425
        r.rsIdx <> f.rsIdx
        r.isFirstIssue <> f.isFirstIssue
      }
426
      feedbackIdx += width
427
    }
428

429 430
    val intWriteback = io.writeback.take(intRfWritePorts)
    val fpWriteback  = io.writeback.drop(intRfWritePorts)
431
    (cfg.intSrcCnt > 0, cfg.fpSrcCnt > 0) match {
432 433 434 435
      case (true,  false) => rs.io.slowPorts := intWriteback
      case (false, true) => rs.io.slowPorts := fpWriteback
      // delay fp for extra one cycle
      case (true,  true) => rs.io.slowPorts := intWriteback ++ RegNext(VecInit(fpWriteback))
436 437
      case _ => throw new RuntimeException("unknown wakeup source")
    }
438

439 440
    val innerIntUop = outer.innerIntFastSources(i).map(_._2).map(rs_all(_).module.io.fastWakeup.get).fold(Seq())(_ ++ _)
    val innerFpUop = outer.innerFpFastSources(i).map(_._2).map(rs_all(_).module.io.fastWakeup.get).fold(Seq())(_ ++ _)
441 442 443 444 445 446 447 448 449
    val innerUop = innerIntUop ++ innerFpUop
    val innerData = outer.innerFastPorts(i).map(io.writeback(_).bits.data)
    node.connectFastWakeup(innerUop, innerData)
    require(innerUop.length == innerData.length)

    val outerUop = outer.outFastPorts(i).map(io.fastUopIn(_))
    val outerData = outer.outFastPorts(i).map(io.writeback(_).bits.data)
    node.connectFastWakeup(outerUop, outerData)
    require(outerUop.length == outerData.length)
450
  }
451
  require(issueIdx == io.issue.length)
452 453 454 455
  if (io.extra.loadFastMatch.isDefined) {
    val allLoadRS = outer.reservationStations.map(_.module.io.load).filter(_.isDefined)
    io.extra.loadFastMatch.get := allLoadRS.map(_.get.fastMatch).fold(Seq())(_ ++ _)
  }
456
  io.extra.perf <> rs_all.flatMap(_.module.perf)
457

458 459 460 461 462
  var intReadPort = 0
  var fpReadPort = 0
  for ((dp, i) <- outer.dpPorts.zipWithIndex) {
    // dp connects only one rs: don't use arbiter
    if (dp.length == 1) {
463
      rs_all(dp.head._1).module.io.fromDispatch(dp.head._2) <> allocate(i)
464 465 466 467
    }
    // dp connects more than one rs: use arbiter to route uop to the correct rs
    else {
      val func = dp.map(rs => (op: MicroOp) => rs_all(rs._1).canAccept(op.ctrl.fuType))
468
      val arbiterOut = DispatchArbiter(allocate(i), func)
469 470 471
      val rsIn = VecInit(dp.map(rs => rs_all(rs._1).module.io.fromDispatch(rs._2)))
      rsIn <> arbiterOut
    }
472

473 474
    val numIntRfPorts = dp.map(_._1).map(rs_all(_).intSrcCnt).max
    if (numIntRfPorts > 0) {
475
      val intRfPorts = VecInit(intRfReadData.slice(intReadPort, intReadPort + numIntRfPorts))
476 477 478 479 480 481
      for ((rs, idx) <- dp) {
        val target = rs_all(rs).module.io.srcRegValue(idx)
        target := intRfPorts.take(target.length)
      }
      intReadPort += numIntRfPorts
    }
482

483 484
    val numFpRfPorts = dp.map(_._1).map(rs_all(_).fpSrcCnt).max
    if (numFpRfPorts > 0) {
485
      val fpRfPorts = VecInit(fpRfReadData.slice(fpReadPort, fpReadPort + numFpRfPorts))
486 487 488
      for ((rs, idx) <- dp) {
        val mod = rs_all(rs).module
        if (numIntRfPorts > 0) {
489
          require(numFpRfPorts == 1 && numIntRfPorts == 1)
490 491
          // dirty code for store
          mod.io.fpRegValue.get(idx) := fpRfPorts.head
492
        }
493 494 495 496 497 498 499
        else {
          val target = mod.io.srcRegValue(idx)
          val isFp = RegNext(mod.io.fromDispatch(idx).bits.ctrl.srcType(0) === SrcType.fp)
          val fromFp = if (numIntRfPorts > 0) isFp else false.B
          when (fromFp) {
            target := fpRfPorts.take(target.length)
          }
500 501 502 503
        }
      }
      fpReadPort += numFpRfPorts
    }
504 505
  }

506
  if ((env.AlwaysBasicDiff || env.EnableDifftest) && intRfConfig._1) {
507
    val difftest = Module(new DifftestArchIntRegState)
508
    difftest.io.clock := clock
J
Jiawei Lin 已提交
509
    difftest.io.coreid := io.hartId
510
    difftest.io.gpr := intRfReadData.takeRight(32)
511
  }
512
  if ((env.AlwaysBasicDiff || env.EnableDifftest) && fpRfConfig._1) {
513
    val difftest = Module(new DifftestArchFpRegState)
514
    difftest.io.clock := clock
J
Jiawei Lin 已提交
515
    difftest.io.coreid := io.hartId
516
    difftest.io.fpr := fpRfReadData.takeRight(32)
517
  }
518

519 520
  XSPerfAccumulate("allocate_valid", PopCount(allocate.map(_.valid)))
  XSPerfAccumulate("allocate_fire", PopCount(allocate.map(_.fire())))
521 522
  XSPerfAccumulate("issue_valid", PopCount(io.issue.map(_.valid)))
  XSPerfAccumulate("issue_fire", PopCount(io.issue.map(_.fire)))
523 524 525 526 527 528 529 530 531 532 533 534 535 536
  val perfEvents_list = Wire(new PerfEventsBundle(2))
  val perfEvents = Seq(
    ("sche_allocate_fire    ", PopCount(allocate.map(_.fire()))   ),
    ("sche_issue_fire       ", PopCount(io.issue.map(_.fire))     ),
  )
  for (((perf_out,(perf_name,perf)),i) <- perfEvents_list.perf_events.zip(perfEvents).zipWithIndex) {
    perf_out.incr_step := RegNext(perf)
  }

  val perf_list =  perfEvents_list.perf_events ++ intbtperfEvents.perf_events ++ fpbtperfEvents.perf_events
  val perfinfo = IO(new Bundle(){
    val perfEvents = Output(new PerfEventsBundle(perf_list.length))
  })
  perfinfo.perfEvents.perf_events := perf_list
537
}