Scheduler.scala 15.1 KB
Newer Older
1 2
/***************************************************************************************
  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
Y
Yinan Xu 已提交
3
* Copyright (c) 2020-2021 Peng Cheng Laboratory
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
  *
  * XiangShan is licensed under Mulan PSL v2.
  * You can use this software according to the terms and conditions of the Mulan PSL v2.
  * You may obtain a copy of Mulan PSL v2 at:
  *          http://license.coscl.org.cn/MulanPSL2
  *
  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
  *
  * See the Mulan PSL v2 for more details.
  ***************************************************************************************/

package xiangshan.backend

import chisel3._
import chisel3.util._
import chipsalliance.rocketchip.config.Parameters
import difftest.{DifftestArchFpRegState, DifftestArchIntRegState}
23
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
24 25
import xiangshan._
import utils._
26 27
import xiangshan.backend.exu.ExuConfig
import xiangshan.backend.issue.ReservationStation
28
import xiangshan.backend.regfile.{Regfile, RfWritePort}
29 30
import xiangshan.mem.{SqPtr, StoreDataBundle}

31 32 33 34 35 36 37 38 39 40
import scala.collection.mutable.ArrayBuffer

class DispatchArbiter(func: Seq[MicroOp => Bool])(implicit p: Parameters) extends XSModule {
  val numTarget = func.length

  val io = IO(new Bundle {
    val in = Flipped(DecoupledIO(new MicroOp))
    val out = Vec(numTarget, DecoupledIO(new MicroOp))
  })

41
  io.out.zip(func).foreach{ case (o, f) =>
42 43
    o.valid := io.in.valid && f(io.in.bits)
    o.bits := io.in.bits
44
  }
45 46 47 48 49 50 51 52 53 54 55 56

  io.in.ready := VecInit(io.out.map(_.fire())).asUInt.orR
}

object DispatchArbiter {
  def apply(in: DecoupledIO[MicroOp], func: Seq[MicroOp => Bool])(implicit p: Parameters) = {
    val arbiter = Module(new DispatchArbiter(func))
    arbiter.io.in <> in
    arbiter.io.out
  }
}

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
trait HasExuWbMappingHelper {
  def findInWbPorts(wb: Seq[Seq[ExuConfig]], target: ExuConfig) : Seq[Int] = {
    wb.zipWithIndex.filter(_._1.contains(target)).map(_._2)
  }
  def findInWbPorts(wb: Seq[Seq[ExuConfig]], targets: Seq[ExuConfig]) : Seq[Int] = {
    targets.map(findInWbPorts(wb, _)).fold(Seq())(_ ++ _)
  }
  def getFastWakeupIndex(cfg: ExuConfig, intSource: Seq[Int], fpSource: Seq[Int], offset: Int) : Seq[Int] = {
    val sources = Seq(
      (cfg.readIntRf, intSource),
      (cfg.readFpRf, fpSource.map(_ + offset))
    )
    sources.map(c => if (c._1) c._2 else Seq()).reduce(_ ++ _)
  }
}

73
class Scheduler(
74 75 76 77 78 79
  val configs: Seq[(ExuConfig, Int, Seq[ExuConfig], Seq[ExuConfig])],
  val dpPorts: Seq[Seq[(Int, Int)]],
  val intRfWbPorts: Seq[Seq[ExuConfig]],
  val fpRfWbPorts: Seq[Seq[ExuConfig]],
  val outFastPorts: Seq[Seq[Int]]
)(implicit p: Parameters) extends LazyModule with HasXSParameter with HasExuWbMappingHelper {
80 81
  val numDpPorts = dpPorts.length

82 83 84 85 86 87 88 89 90
  // regfile parameters: overall read and write ports
  val numDpPortIntRead = dpPorts.map(_.map(_._1).map(configs(_)._1.intSrcCnt).max)
  val numIntRfReadPorts = numDpPortIntRead.sum
  val numIntRfWritePorts = intRfWbPorts.length
  val numDpPortFpRead = dpPorts.map(_.map(_._1).map(configs(_)._1.fpSrcCnt).max)
  val numFpRfReadPorts = numDpPortFpRead.sum
  val numFpRfWritePorts = fpRfWbPorts.length

  // reservation station parameters: dispatch, regfile, issue, wakeup, fastWakeup
91
  // instantiate reservation stations and connect the issue ports
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
  val wakeupPorts = configs.map(_._1).map(config => {
    val numInt = if (config.intSrcCnt > 0) numIntRfWritePorts else 0
    val numFp = if (config.fpSrcCnt > 0) numFpRfWritePorts else 0
    numInt + numFp
  })
  val innerIntFastSources = configs.map(_._1).map(cfg => configs.zipWithIndex.filter(c => c._1._3.contains(cfg) && c._1._1.wakeupFromRS))
  val innerFpFastSources = configs.map(_._1).map(cfg => configs.zipWithIndex.filter(c => c._1._4.contains(cfg) && c._1._1.wakeupFromRS))
  val innerFastPorts = configs.map(_._1).zipWithIndex.map{ case (config, i) =>
    val intSource = findInWbPorts(intRfWbPorts, innerIntFastSources(i).map(_._1._1))
    val fpSource = findInWbPorts(fpRfWbPorts, innerFpFastSources(i).map(_._1._1))
    getFastWakeupIndex(config, intSource, fpSource, numIntRfWritePorts)
  }
  println(s"inner fast: $innerFastPorts")
  val numAllFastPorts = innerFastPorts.zip(outFastPorts).map{ case (i, o) => i.length + o.length }
  val reservationStations = configs.zipWithIndex.map{ case ((config, numDeq, _, _), i) =>
107 108
    val rs = LazyModule(new ReservationStation())
    rs.addIssuePort(config, numDeq)
109 110
    rs.addWakeup(wakeupPorts(i))
    rs.addEarlyWakeup(numAllFastPorts(i))
111
    rs
112
  }
113 114 115 116
  // connect to dispatch
  val dpFuConfigs = dpPorts.map(_.map(p => reservationStations(p._1).addDispatchPort()).reduce(_ ++ _))

  val numIssuePorts = configs.map(_._2).sum
117 118
  val numReplayPorts = reservationStations.filter(_.params.hasFeedback == true).map(_.params.numDeq).sum
  val memRsEntries = reservationStations.filter(_.params.hasFeedback == true).map(_.params.numEntries)
119 120 121 122
  val getMemRsEntries = {
    require(memRsEntries.isEmpty || memRsEntries.max == memRsEntries.min, "different indexes not supported")
    if (memRsEntries.isEmpty) 0 else memRsEntries.max
  }
123
  val numSTDPorts = reservationStations.filter(_.params.isStore == true).map(_.params.numDeq).sum
124 125

  lazy val module = new SchedulerImp(this)
126 127 128 129

  def canAccept(fuType: UInt): Bool = {
    VecInit(configs.map(_._1.canAccept(fuType))).asUInt.orR
  }
130 131 132
}

class SchedulerImp(outer: Scheduler) extends LazyModuleImp(outer) with HasXSParameter {
133
  val memRsEntries = outer.getMemRsEntries
134 135 136 137 138
  val updatedP = p.alter((site, here, up) => {
    case XSCoreParamsKey => up(XSCoreParamsKey).copy(
      IssQueSize = memRsEntries
    )
  })
139 140 141 142
  val intRfWritePorts = outer.numIntRfWritePorts
  val fpRfWritePorts = outer.numFpRfWritePorts
  val intRfConfig = (outer.numIntRfReadPorts > 0, outer.numIntRfReadPorts, intRfWritePorts)
  val fpRfConfig = (outer.numFpRfReadPorts > 0, outer.numFpRfReadPorts, fpRfWritePorts)
143

144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
  val rs_all = outer.reservationStations

  // print rs info
  println("Scheduler: ")
  for ((rs, i) <- rs_all.zipWithIndex) {
    println(s"RS $i: $rs")
    println(s"  innerIntUop: ${outer.innerIntFastSources(i).map(_._2)}")
    println(s"  innerFpUop: ${outer.innerFpFastSources(i).map(_._2)}")
    println(s"  innerFastPorts: ${outer.innerFastPorts(i)}")
    println(s"  outFastPorts: ${outer.outFastPorts(i)}")
  }
  println(s"  number of issue ports: ${outer.numIssuePorts}")
  println(s"  number of replay ports: ${outer.numReplayPorts}")
  println(s"  size of load and store RSes: ${outer.getMemRsEntries}")
  println(s"  number of std ports: ${outer.numSTDPorts}")
  if (intRfConfig._1) {
    println(s"INT Regfile: ${intRfConfig._2}R${intRfConfig._3}W")
  }
  if (fpRfConfig._1) {
    println(s"FP  Regfile: ${fpRfConfig._2}R${fpRfConfig._3}W")
  }

  class SchedulerExtraIO extends XSBundle {
    // feedback ports
    val feedback = if (outer.numReplayPorts > 0) Some(Vec(outer.numReplayPorts, new Bundle {
      val replay = Flipped(ValidIO(new RSFeedback()(updatedP)))
      val rsIdx = Output(UInt(log2Up(memRsEntries).W))
      val isFirstIssue = Output(Bool())
    })) else None
    // special ports for store
    val stData = if (outer.numSTDPorts > 0) Some(Vec(outer.numSTDPorts, ValidIO(new StoreDataBundle))) else None
175 176 177 178 179 180 181
    // misc
    val jumpPc = Input(UInt(VAddrBits.W))
    val jalr_target = Input(UInt(VAddrBits.W))
    val stIssuePtr = Input(new SqPtr())
    // debug
    val debug_int_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
    val debug_fp_rat = Vec(32, Input(UInt(PhyRegIdxWidth.W)))
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199

    override def cloneType: SchedulerExtraIO.this.type =
      new SchedulerExtraIO().asInstanceOf[this.type]
  }

  val io = IO(new Bundle {
    // global control
    val redirect = Flipped(ValidIO(new Redirect))
    val flush = Input(Bool())
    // dispatch and issue ports
    val allocate = Vec(outer.numDpPorts, Flipped(DecoupledIO(new MicroOp)))
    val issue = Vec(outer.numIssuePorts, DecoupledIO(new ExuInput))
    val fastUopOut = Vec(outer.numIssuePorts, ValidIO(new MicroOp))
    // wakeup-related ports
    val writeback = Vec(intRfWritePorts + fpRfWritePorts, Flipped(ValidIO(new ExuOutput)))
    val fastUopIn = Vec(intRfWritePorts + fpRfWritePorts, Flipped(ValidIO(new MicroOp)))
    // feedback ports
    val extra = new SchedulerExtraIO
200 201
  })

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
  def extraReadRf(numRead: Seq[Int]): Seq[UInt] = {
    require(numRead.length == io.allocate.length)
    val enq = io.allocate.map(_.bits.psrc)
    // TODO: for store, fp is located at the second operand
    // currently use numInt>0 && numFp>0. should make this configurable
    val containsStore = outer.dpFuConfigs.map(_.contains(stuCfg))
    enq.zip(numRead).zip(containsStore).map{ case ((src, num), hasStore) =>
      if (hasStore && num == 1) Seq(src(num)) else src.take(num)
    }.fold(Seq())(_ ++ _)
  }
  def readIntRf: Seq[UInt] = extraReadRf(outer.numDpPortIntRead)
  def readFpRf: Seq[UInt] = extraReadRf(outer.numDpPortFpRead)
  def stData: Seq[ValidIO[StoreDataBundle]] = io.extra.stData.getOrElse(Seq())

  def regfile(raddr: Seq[UInt], numWrite: Int, hasZero: Boolean, len: Int): Option[Regfile] = {
    val numReadPorts = raddr.length
    if (numReadPorts > 0) {
      val rf = Module(new Regfile(numReadPorts, numWrite, hasZero, len))
      rf.io.readPorts.map(_.addr).zip(raddr).foreach{ case (r1, r2) => r1 := r2 }
      rf.io.debug_rports := DontCare
      Some(rf)
    }
    else {
      None.asInstanceOf[Option[Regfile]]
    }
  }

  val intRf = regfile(readIntRf, intRfWritePorts, true, XLEN)
  val fpRf = regfile(readFpRf, fpRfWritePorts, false, XLEN)
  val intRfReadData = if (intRf.isDefined) intRf.get.io.readPorts.map(_.data) else Seq()
  val fpRfReadData = if (fpRf.isDefined) fpRf.get.io.readPorts.map(_.data) else Seq()
233

234
  // write ports: 0-3 ALU, 4-5 MUL, 6-7 LOAD
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
  // regfile write ports
  if (intRf.isDefined) {
    intRf.get.io.writePorts.zip(io.writeback.take(intRfWritePorts)).foreach {
      case (rf, wb) =>
        rf.wen := wb.valid && wb.bits.uop.ctrl.rfWen
        rf.addr := wb.bits.uop.pdest
        rf.data := wb.bits.data
    }
  }
  if (fpRf.isDefined) {
    fpRf.get.io.writePorts.zip(io.writeback.drop(intRfWritePorts)).foreach {
      case (rf, wb) =>
        rf.wen := wb.valid
        rf.addr := wb.bits.uop.pdest
        rf.data := wb.bits.data
    }
  }
252

253 254 255
  var issueIdx = 0
  var feedbackIdx = 0
  var stDataIdx = 0
256 257 258 259 260 261 262 263 264 265 266 267 268 269
  var fastUopOutIdx = 0
  io.fastUopOut := DontCare
  for (((node, cfg), i) <- rs_all.zip(outer.configs.map(_._1)).zipWithIndex) {
    val rs = node.module

    rs.io.redirect <> io.redirect
    rs.io.redirect <> io.redirect
    rs.io.flush <> io.flush

    val issueWidth = rs.io.deq.length
    rs.io.deq <> io.issue.slice(issueIdx, issueIdx + issueWidth)
    if (rs.io_fastWakeup.isDefined) {
      rs.io_fastWakeup.get <> io.fastUopOut.slice(issueIdx, issueIdx + issueWidth)
    }
270
    issueIdx += issueWidth
271 272 273 274

    if (rs.io_jump.isDefined) {
      rs.io_jump.get.jumpPc := io.extra.jumpPc
      rs.io_jump.get.jalr_target := io.extra.jalr_target
275
    }
276 277
    if (rs.io_checkwait.isDefined) {
      rs.io_checkwait.get.stIssuePtr <> io.extra.stIssuePtr
278
    }
279 280 281 282 283 284 285
    if (rs.io_feedback.isDefined) {
      val width = rs.io_feedback.get.memfeedback.length
      val feedback = io.extra.feedback.get.slice(feedbackIdx, feedbackIdx + width)
      require(feedback(0).rsIdx.getWidth == rs.io_feedback.get.rsIdx(0).getWidth)
      rs.io_feedback.get.memfeedback <> feedback.map(_.replay)
      rs.io_feedback.get.rsIdx <> feedback.map(_.rsIdx)
      rs.io_feedback.get.isFirstIssue <> feedback.map(_.isFirstIssue)
286
      feedbackIdx += width
287
    }
288 289 290
    if (rs.io_store.isDefined) {
      val width = rs.io_store.get.stData.length
      rs.io_store.get.stData <> stData.slice(stDataIdx, stDataIdx + width)
291
      stDataIdx += width
292
    }
293

294 295 296 297
    (cfg.intSrcCnt > 0, cfg.fpSrcCnt > 0) match {
      case (true,  false) => rs.io.slowPorts := io.writeback.take(intRfWritePorts)
      case (false, true) => rs.io.slowPorts := io.writeback.drop(intRfWritePorts)
      case (true,  true) => rs.io.slowPorts := io.writeback
298 299
      case _ => throw new RuntimeException("unknown wakeup source")
    }
300

301 302 303 304 305 306 307 308 309 310 311
    val innerIntUop = outer.innerIntFastSources(i).map(_._2).map(rs_all(_).module.io_fastWakeup.get).fold(Seq())(_ ++ _)
    val innerFpUop = outer.innerFpFastSources(i).map(_._2).map(rs_all(_).module.io_fastWakeup.get).fold(Seq())(_ ++ _)
    val innerUop = innerIntUop ++ innerFpUop
    val innerData = outer.innerFastPorts(i).map(io.writeback(_).bits.data)
    node.connectFastWakeup(innerUop, innerData)
    require(innerUop.length == innerData.length)

    val outerUop = outer.outFastPorts(i).map(io.fastUopIn(_))
    val outerData = outer.outFastPorts(i).map(io.writeback(_).bits.data)
    node.connectFastWakeup(outerUop, outerData)
    require(outerUop.length == outerData.length)
312
  }
313
  require(issueIdx == io.issue.length)
314

315 316 317 318 319 320 321 322 323 324 325 326 327 328
  var intReadPort = 0
  var fpReadPort = 0
  for ((dp, i) <- outer.dpPorts.zipWithIndex) {
    // dp connects only one rs: don't use arbiter
    if (dp.length == 1) {
      rs_all(dp.head._1).module.io.fromDispatch(dp.head._2) <> io.allocate(i)
    }
    // dp connects more than one rs: use arbiter to route uop to the correct rs
    else {
      val func = dp.map(rs => (op: MicroOp) => rs_all(rs._1).canAccept(op.ctrl.fuType))
      val arbiterOut = DispatchArbiter(io.allocate(i), func)
      val rsIn = VecInit(dp.map(rs => rs_all(rs._1).module.io.fromDispatch(rs._2)))
      rsIn <> arbiterOut
    }
329

330 331
    val numIntRfPorts = dp.map(_._1).map(rs_all(_).intSrcCnt).max
    if (numIntRfPorts > 0) {
332
      val intRfPorts = VecInit(intRfReadData.slice(intReadPort, intReadPort + numIntRfPorts))
333 334 335 336 337 338
      for ((rs, idx) <- dp) {
        val target = rs_all(rs).module.io.srcRegValue(idx)
        target := intRfPorts.take(target.length)
      }
      intReadPort += numIntRfPorts
    }
339

340 341
    val numFpRfPorts = dp.map(_._1).map(rs_all(_).fpSrcCnt).max
    if (numFpRfPorts > 0) {
342
      val fpRfPorts = VecInit(fpRfReadData.slice(fpReadPort, fpReadPort + numFpRfPorts))
343 344 345 346 347 348 349
      for ((rs, idx) <- dp) {
        val mod = rs_all(rs).module
        val target = mod.io.srcRegValue(idx)
        // dirty code for store
        if (numIntRfPorts > 0) {
          require(numFpRfPorts == 1)
          require(numIntRfPorts == 2)
350
          when(RegNext(mod.io.fromDispatch(idx).bits.ctrl.srcType(1) === SrcType.fp)) {
351 352 353 354 355 356 357 358 359
            target(1) := fpRfPorts(0)
          }
        }
        else {
          target := fpRfPorts.take(target.length)
        }
      }
      fpReadPort += numFpRfPorts
    }
360 361
  }

362 363
  if (!env.FPGAPlatform && intRf.isDefined) {
    for ((rport, rat) <- intRf.get.io.debug_rports.zip(io.extra.debug_int_rat)) {
364 365 366
      rport.addr := rat
    }
    val difftest = Module(new DifftestArchIntRegState)
367
    difftest.io.clock := clock
368
    difftest.io.coreid := hardId.U
369
    difftest.io.gpr := VecInit(intRf.get.io.debug_rports.map(_.data))
370
  }
371 372
  if (!env.FPGAPlatform && fpRf.isDefined) {
    for ((rport, rat) <- fpRf.get.io.debug_rports.zip(io.extra.debug_fp_rat)) {
373 374 375
      rport.addr := rat
    }
    val difftest = Module(new DifftestArchFpRegState)
376
    difftest.io.clock := clock
377
    difftest.io.coreid := hardId.U
378
    difftest.io.fpr := VecInit(fpRf.get.io.debug_rports.map(_.data))
379
  }
380
}