Scheduler.scala 15.8 KB
Newer Older
1 2
/***************************************************************************************
  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
Y
Yinan Xu 已提交
3
* Copyright (c) 2020-2021 Peng Cheng Laboratory
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
  *
  * XiangShan is licensed under Mulan PSL v2.
  * You can use this software according to the terms and conditions of the Mulan PSL v2.
  * You may obtain a copy of Mulan PSL v2 at:
  *          http://license.coscl.org.cn/MulanPSL2
  *
  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
  *
  * See the Mulan PSL v2 for more details.
  ***************************************************************************************/

package xiangshan.backend

import chisel3._
import chisel3.util._
import chipsalliance.rocketchip.config.Parameters
import difftest.{DifftestArchFpRegState, DifftestArchIntRegState}
23
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
24 25
import xiangshan._
import utils._
26
import xiangshan.backend.exu.ExuConfig
27
import xiangshan.backend.issue.{ReservationStation, ReservationStationWrapper}
28
import xiangshan.backend.regfile.{Regfile, RfReadPort, RfWritePort}
29 30
import xiangshan.mem.{SqPtr, StoreDataBundle}

31 32 33 34 35 36 37 38 39 40
import scala.collection.mutable.ArrayBuffer

class DispatchArbiter(func: Seq[MicroOp => Bool])(implicit p: Parameters) extends XSModule {
  val numTarget = func.length

  val io = IO(new Bundle {
    val in = Flipped(DecoupledIO(new MicroOp))
    val out = Vec(numTarget, DecoupledIO(new MicroOp))
  })

41
  io.out.zip(func).foreach{ case (o, f) =>
42 43
    o.valid := io.in.valid && f(io.in.bits)
    o.bits := io.in.bits
44
  }
45 46 47 48 49 50 51 52 53 54 55 56

  io.in.ready := VecInit(io.out.map(_.fire())).asUInt.orR
}

object DispatchArbiter {
  def apply(in: DecoupledIO[MicroOp], func: Seq[MicroOp => Bool])(implicit p: Parameters) = {
    val arbiter = Module(new DispatchArbiter(func))
    arbiter.io.in <> in
    arbiter.io.out
  }
}

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
trait HasExuWbMappingHelper {
  def findInWbPorts(wb: Seq[Seq[ExuConfig]], target: ExuConfig) : Seq[Int] = {
    wb.zipWithIndex.filter(_._1.contains(target)).map(_._2)
  }
  def findInWbPorts(wb: Seq[Seq[ExuConfig]], targets: Seq[ExuConfig]) : Seq[Int] = {
    targets.map(findInWbPorts(wb, _)).fold(Seq())(_ ++ _)
  }
  def getFastWakeupIndex(cfg: ExuConfig, intSource: Seq[Int], fpSource: Seq[Int], offset: Int) : Seq[Int] = {
    val sources = Seq(
      (cfg.readIntRf, intSource),
      (cfg.readFpRf, fpSource.map(_ + offset))
    )
    sources.map(c => if (c._1) c._2 else Seq()).reduce(_ ++ _)
  }
}

73
class Scheduler(
74 75 76 77
  val configs: Seq[(ExuConfig, Int, Seq[ExuConfig], Seq[ExuConfig])],
  val dpPorts: Seq[Seq[(Int, Int)]],
  val intRfWbPorts: Seq[Seq[ExuConfig]],
  val fpRfWbPorts: Seq[Seq[ExuConfig]],
78 79
  val outFastPorts: Seq[Seq[Int]],
  val outFpRfReadPorts: Int
80
)(implicit p: Parameters) extends LazyModule with HasXSParameter with HasExuWbMappingHelper {
81 82
  val numDpPorts = dpPorts.length

83 84 85 86 87
  // regfile parameters: overall read and write ports
  val numIntRfWritePorts = intRfWbPorts.length
  val numFpRfWritePorts = fpRfWbPorts.length

  // reservation station parameters: dispatch, regfile, issue, wakeup, fastWakeup
88
  // instantiate reservation stations and connect the issue ports
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
  val wakeupPorts = configs.map(_._1).map(config => {
    val numInt = if (config.intSrcCnt > 0) numIntRfWritePorts else 0
    val numFp = if (config.fpSrcCnt > 0) numFpRfWritePorts else 0
    numInt + numFp
  })
  val innerIntFastSources = configs.map(_._1).map(cfg => configs.zipWithIndex.filter(c => c._1._3.contains(cfg) && c._1._1.wakeupFromRS))
  val innerFpFastSources = configs.map(_._1).map(cfg => configs.zipWithIndex.filter(c => c._1._4.contains(cfg) && c._1._1.wakeupFromRS))
  val innerFastPorts = configs.map(_._1).zipWithIndex.map{ case (config, i) =>
    val intSource = findInWbPorts(intRfWbPorts, innerIntFastSources(i).map(_._1._1))
    val fpSource = findInWbPorts(fpRfWbPorts, innerFpFastSources(i).map(_._1._1))
    getFastWakeupIndex(config, intSource, fpSource, numIntRfWritePorts)
  }
  println(s"inner fast: $innerFastPorts")
  val numAllFastPorts = innerFastPorts.zip(outFastPorts).map{ case (i, o) => i.length + o.length }
  val reservationStations = configs.zipWithIndex.map{ case ((config, numDeq, _, _), i) =>
104
    val rs = LazyModule(new ReservationStationWrapper())
105
    rs.addIssuePort(config, numDeq)
106 107
    rs.addWakeup(wakeupPorts(i))
    rs.addEarlyWakeup(numAllFastPorts(i))
108
    rs
109
  }
110 111 112 113
  // connect to dispatch
  val dpFuConfigs = dpPorts.map(_.map(p => reservationStations(p._1).addDispatchPort()).reduce(_ ++ _))

  val numIssuePorts = configs.map(_._2).sum
114 115
  val numReplayPorts = reservationStations.filter(_.params.hasFeedback == true).map(_.params.numDeq).sum
  val memRsEntries = reservationStations.filter(_.params.hasFeedback == true).map(_.params.numEntries)
116 117 118 119
  val getMemRsEntries = {
    require(memRsEntries.isEmpty || memRsEntries.max == memRsEntries.min, "different indexes not supported")
    if (memRsEntries.isEmpty) 0 else memRsEntries.max
  }
120 121 122 123 124 125
  val numSTDPorts = reservationStations.filter(_.params.exuCfg.get == StdExeUnitCfg).map(_.params.numDeq).sum

  val numDpPortIntRead = dpPorts.map(_.map(_._1).map(configs(_)._1.intSrcCnt).max)
  val numIntRfReadPorts = numDpPortIntRead.sum
  val numDpPortFpRead = dpPorts.map(_.map(_._1).map(configs(_)._1.fpSrcCnt).max)
  val numFpRfReadPorts = numDpPortFpRead.sum - numSTDPorts + outFpRfReadPorts
126 127

  lazy val module = new SchedulerImp(this)
128 129 130 131

  def canAccept(fuType: UInt): Bool = {
    VecInit(configs.map(_._1.canAccept(fuType))).asUInt.orR
  }
132 133 134
}

class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSParameter {
135
  val memRsEntries = outer.getMemRsEntries
136 137 138 139 140
  val updatedP = p.alter((site, here, up) => {
    case XSCoreParamsKey => up(XSCoreParamsKey).copy(
      IssQueSize = memRsEntries
    )
  })
141 142 143 144
  val intRfWritePorts = outer.numIntRfWritePorts
  val fpRfWritePorts = outer.numFpRfWritePorts
  val intRfConfig = (outer.numIntRfReadPorts > 0, outer.numIntRfReadPorts, intRfWritePorts)
  val fpRfConfig = (outer.numFpRfReadPorts > 0, outer.numFpRfReadPorts, fpRfWritePorts)
145

146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
  val rs_all = outer.reservationStations

  // print rs info
  println("Scheduler: ")
  for ((rs, i) <- rs_all.zipWithIndex) {
    println(s"RS $i: $rs")
    println(s"  innerIntUop: ${outer.innerIntFastSources(i).map(_._2)}")
    println(s"  innerFpUop: ${outer.innerFpFastSources(i).map(_._2)}")
    println(s"  innerFastPorts: ${outer.innerFastPorts(i)}")
    println(s"  outFastPorts: ${outer.outFastPorts(i)}")
  }
  println(s"  number of issue ports: ${outer.numIssuePorts}")
  println(s"  number of replay ports: ${outer.numReplayPorts}")
  println(s"  size of load and store RSes: ${outer.getMemRsEntries}")
  println(s"  number of std ports: ${outer.numSTDPorts}")
  if (intRfConfig._1) {
    println(s"INT Regfile: ${intRfConfig._2}R${intRfConfig._3}W")
  }
  if (fpRfConfig._1) {
    println(s"FP  Regfile: ${fpRfConfig._2}R${fpRfConfig._3}W")
  }

  class SchedulerExtraIO extends XSBundle {
    // feedback ports
    val feedback = if (outer.numReplayPorts > 0) Some(Vec(outer.numReplayPorts, new Bundle {
      val replay = Flipped(ValidIO(new RSFeedback()(updatedP)))
      val rsIdx = Output(UInt(log2Up(memRsEntries).W))
      val isFirstIssue = Output(Bool())
    })) else None
    // special ports for store
    val stData = if (outer.numSTDPorts > 0) Some(Vec(outer.numSTDPorts, ValidIO(new StoreDataBundle))) else None
177 178
    val fpRfReadIn = if (outer.numSTDPorts > 0) Some(Vec(outer.numSTDPorts, Flipped(new RfReadPort(XLEN)))) else None
    val fpRfReadOut = if (outer.outFpRfReadPorts > 0) Some(Vec(outer.outFpRfReadPorts, new RfReadPort(XLEN))) else None
179 180 181 182 183 184 185
    // misc
    val jumpPc = Input(UInt(VAddrBits.W))
    val jalr_target = Input(UInt(VAddrBits.W))
    val stIssuePtr = Input(new SqPtr())
    // debug
    val debug_int_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
    val debug_fp_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203

    override def cloneType: SchedulerExtraIO.this.type =
      new SchedulerExtraIO().asInstanceOf[this.type]
  }

  val io = IO(new Bundle {
    // global control
    val redirect = Flipped(ValidIO(new Redirect))
    val flush = Input(Bool())
    // dispatch and issue ports
    val allocate = Vec(outer.numDpPorts, Flipped(DecoupledIO(new MicroOp)))
    val issue = Vec(outer.numIssuePorts, DecoupledIO(new ExuInput))
    val fastUopOut = Vec(outer.numIssuePorts, ValidIO(new MicroOp))
    // wakeup-related ports
    val writeback = Vec(intRfWritePorts + fpRfWritePorts, Flipped(ValidIO(new ExuOutput)))
    val fastUopIn = Vec(intRfWritePorts + fpRfWritePorts, Flipped(ValidIO(new MicroOp)))
    // feedback ports
    val extra = new SchedulerExtraIO
204 205
  })

206 207 208 209 210
  def extraReadRf(numRead: Seq[Int]): Seq[UInt] = {
    require(numRead.length == io.allocate.length)
    val enq = io.allocate.map(_.bits.psrc)
    // TODO: for store, fp is located at the second operand
    // currently use numInt>0 && numFp>0. should make this configurable
211
    val containsStore = outer.dpFuConfigs.map(_.contains(staCfg))
212
    enq.zip(numRead).zip(containsStore).map{ case ((src, num), hasStore) =>
213
      src.take(num)
214 215 216
    }.fold(Seq())(_ ++ _)
  }
  def readIntRf: Seq[UInt] = extraReadRf(outer.numDpPortIntRead)
217
  def readFpRf: Seq[UInt] = extraReadRf(outer.numDpPortFpRead) ++ io.extra.fpRfReadOut.getOrElse(Seq()).map(_.addr)
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
  def stData: Seq[ValidIO[StoreDataBundle]] = io.extra.stData.getOrElse(Seq())

  def regfile(raddr: Seq[UInt], numWrite: Int, hasZero: Boolean, len: Int): Option[Regfile] = {
    val numReadPorts = raddr.length
    if (numReadPorts > 0) {
      val rf = Module(new Regfile(numReadPorts, numWrite, hasZero, len))
      rf.io.readPorts.map(_.addr).zip(raddr).foreach{ case (r1, r2) => r1 := r2 }
      rf.io.debug_rports := DontCare
      Some(rf)
    }
    else {
      None.asInstanceOf[Option[Regfile]]
    }
  }

  val intRf = regfile(readIntRf, intRfWritePorts, true, XLEN)
234
  val fpRf = if (outer.numFpRfReadPorts > 0) regfile(readFpRf, fpRfWritePorts, false, XLEN) else None
235
  val intRfReadData = if (intRf.isDefined) intRf.get.io.readPorts.map(_.data) else Seq()
236
  val fpRfReadData = if (fpRf.isDefined) fpRf.get.io.readPorts.map(_.data) else io.extra.fpRfReadIn.getOrElse(Seq()).map(_.data)
237

238
  // write ports: 0-3 ALU, 4-5 MUL, 6-7 LOAD
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
  // regfile write ports
  if (intRf.isDefined) {
    intRf.get.io.writePorts.zip(io.writeback.take(intRfWritePorts)).foreach {
      case (rf, wb) =>
        rf.wen := wb.valid && wb.bits.uop.ctrl.rfWen
        rf.addr := wb.bits.uop.pdest
        rf.data := wb.bits.data
    }
  }
  if (fpRf.isDefined) {
    fpRf.get.io.writePorts.zip(io.writeback.drop(intRfWritePorts)).foreach {
      case (rf, wb) =>
        rf.wen := wb.valid
        rf.addr := wb.bits.uop.pdest
        rf.data := wb.bits.data
    }
  }
256

257 258 259 260 261 262 263
  if (io.extra.fpRfReadIn.isDefined) {
    io.extra.fpRfReadIn.get.map(_.addr).zip(readFpRf).foreach{ case (r, addr) => r := addr}
  }

  if (io.extra.fpRfReadOut.isDefined) {
    io.extra.fpRfReadOut.get.map(_.data).zip(fpRfReadData.takeRight(outer.outFpRfReadPorts)).foreach{ case (a, b) => a := b}
  }
264 265 266
  var issueIdx = 0
  var feedbackIdx = 0
  var stDataIdx = 0
267 268 269 270 271 272 273 274 275 276 277
  var fastUopOutIdx = 0
  io.fastUopOut := DontCare
  for (((node, cfg), i) <- rs_all.zip(outer.configs.map(_._1)).zipWithIndex) {
    val rs = node.module

    rs.io.redirect <> io.redirect
    rs.io.redirect <> io.redirect
    rs.io.flush <> io.flush

    val issueWidth = rs.io.deq.length
    rs.io.deq <> io.issue.slice(issueIdx, issueIdx + issueWidth)
278 279
    if (rs.io.fastWakeup.isDefined) {
      rs.io.fastWakeup.get <> io.fastUopOut.slice(issueIdx, issueIdx + issueWidth)
280
    }
281
    issueIdx += issueWidth
282

283 284 285
    if (rs.io.jump.isDefined) {
      rs.io.jump.get.jumpPc := io.extra.jumpPc
      rs.io.jump.get.jalr_target := io.extra.jalr_target
286
    }
287 288
    if (rs.io.checkwait.isDefined) {
      rs.io.checkwait.get.stIssuePtr <> io.extra.stIssuePtr
289
    }
290 291
    if (rs.io.feedback.isDefined) {
      val width = rs.io.feedback.get.length
292
      val feedback = io.extra.feedback.get.slice(feedbackIdx, feedbackIdx + width)
293 294 295 296 297 298
      require(feedback(0).rsIdx.getWidth == rs.io.feedback.get(0).rsIdx.getWidth)
      rs.io.feedback.get.zip(feedback).foreach{ case (r, f) =>
        r.memfeedback <> f.replay
        r.rsIdx <> f.rsIdx
        r.isFirstIssue <> f.isFirstIssue
      }
299
      feedbackIdx += width
300
    }
301 302 303
    if (false && rs.io.store.isDefined) {
      val width = rs.io.store.get.stData.length
      rs.io.store.get.stData <> stData.slice(stDataIdx, stDataIdx + width)
304
      stDataIdx += width
305
    }
306

307 308 309 310
    (cfg.intSrcCnt > 0, cfg.fpSrcCnt > 0) match {
      case (true,  false) => rs.io.slowPorts := io.writeback.take(intRfWritePorts)
      case (false, true) => rs.io.slowPorts := io.writeback.drop(intRfWritePorts)
      case (true,  true) => rs.io.slowPorts := io.writeback
311 312
      case _ => throw new RuntimeException("unknown wakeup source")
    }
313

314 315
    val innerIntUop = outer.innerIntFastSources(i).map(_._2).map(rs_all(_).module.io.fastWakeup.get).fold(Seq())(_ ++ _)
    val innerFpUop = outer.innerFpFastSources(i).map(_._2).map(rs_all(_).module.io.fastWakeup.get).fold(Seq())(_ ++ _)
316 317 318 319 320 321 322 323 324
    val innerUop = innerIntUop ++ innerFpUop
    val innerData = outer.innerFastPorts(i).map(io.writeback(_).bits.data)
    node.connectFastWakeup(innerUop, innerData)
    require(innerUop.length == innerData.length)

    val outerUop = outer.outFastPorts(i).map(io.fastUopIn(_))
    val outerData = outer.outFastPorts(i).map(io.writeback(_).bits.data)
    node.connectFastWakeup(outerUop, outerData)
    require(outerUop.length == outerData.length)
325
  }
326
  require(issueIdx == io.issue.length)
327

328 329 330 331 332 333 334 335 336 337 338 339 340 341
  var intReadPort = 0
  var fpReadPort = 0
  for ((dp, i) <- outer.dpPorts.zipWithIndex) {
    // dp connects only one rs: don't use arbiter
    if (dp.length == 1) {
      rs_all(dp.head._1).module.io.fromDispatch(dp.head._2) <> io.allocate(i)
    }
    // dp connects more than one rs: use arbiter to route uop to the correct rs
    else {
      val func = dp.map(rs => (op: MicroOp) => rs_all(rs._1).canAccept(op.ctrl.fuType))
      val arbiterOut = DispatchArbiter(io.allocate(i), func)
      val rsIn = VecInit(dp.map(rs => rs_all(rs._1).module.io.fromDispatch(rs._2)))
      rsIn <> arbiterOut
    }
342

343 344
    val numIntRfPorts = dp.map(_._1).map(rs_all(_).intSrcCnt).max
    if (numIntRfPorts > 0) {
345
      val intRfPorts = VecInit(intRfReadData.slice(intReadPort, intReadPort + numIntRfPorts))
346 347 348 349 350 351
      for ((rs, idx) <- dp) {
        val target = rs_all(rs).module.io.srcRegValue(idx)
        target := intRfPorts.take(target.length)
      }
      intReadPort += numIntRfPorts
    }
352

353 354
    val numFpRfPorts = dp.map(_._1).map(rs_all(_).fpSrcCnt).max
    if (numFpRfPorts > 0) {
355
      val fpRfPorts = VecInit(fpRfReadData.slice(fpReadPort, fpReadPort + numFpRfPorts))
356 357 358 359
      for ((rs, idx) <- dp) {
        val mod = rs_all(rs).module
        val target = mod.io.srcRegValue(idx)
        // dirty code for store
360 361
        val isFp = RegNext(mod.io.fromDispatch(idx).bits.ctrl.srcType(0) === SrcType.fp)
        val fromFp = if (numIntRfPorts > 0) isFp else false.B
362
        if (numIntRfPorts > 0) {
363
          require(numFpRfPorts == 1 && numIntRfPorts == 1)
364
        }
365
        when (fromFp) {
366 367 368 369 370
          target := fpRfPorts.take(target.length)
        }
      }
      fpReadPort += numFpRfPorts
    }
371 372
  }

373 374
  if (!env.FPGAPlatform && intRf.isDefined) {
    for ((rport, rat) <- intRf.get.io.debug_rports.zip(io.extra.debug_int_rat)) {
375 376 377
      rport.addr := rat
    }
    val difftest = Module(new DifftestArchIntRegState)
378
    difftest.io.clock := clock
379
    difftest.io.coreid := hardId.U
380
    difftest.io.gpr := VecInit(intRf.get.io.debug_rports.map(_.data))
381
  }
382 383
  if (!env.FPGAPlatform && fpRf.isDefined) {
    for ((rport, rat) <- fpRf.get.io.debug_rports.zip(io.extra.debug_fp_rat)) {
384 385 386
      rport.addr := rat
    }
    val difftest = Module(new DifftestArchFpRegState)
387
    difftest.io.clock := clock
388
    difftest.io.coreid := hardId.U
389
    difftest.io.fpr := VecInit(fpRf.get.io.debug_rports.map(_.data))
390
  }
391
}