ReservationStation.scala 25.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
package xiangshan.backend.issue

import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.SelImm
import xiangshan.backend.decode.{ImmUnion, Imm_U}
import xiangshan.backend.exu.{Exu, ExuConfig}
import xiangshan.backend.regfile.RfReadPort
import xiangshan.backend.roq.RoqPtr

import scala.math.max

class BypassQueue(number: Int) extends XSModule {
  val io = IO(new Bundle {
    val in  = Flipped(ValidIO(new MicroOp))
    val out = ValidIO(new MicroOp)
    val redirect = Flipped(ValidIO(new Redirect))
Z
ZhangZifei 已提交
20
    val flush = Input(Bool())
21 22 23 24 25 26 27 28 29 30 31 32 33
  })
  if (number < 0) {
    io.out.valid := false.B
    io.out.bits := DontCare
  } else if(number == 0) {
    io.in <> io.out
    io.out.valid := io.in.valid
    // NOTE: no delay bypass don't care redirect
  } else {
    val queue = Seq.fill(number)(RegInit(0.U.asTypeOf(new Bundle{
      val valid = Bool()
      val bits = new MicroOp
    })))
Z
ZhangZifei 已提交
34
    queue(0).valid := io.in.valid && !io.in.bits.roqIdx.needFlush(io.redirect, io.flush)
35 36 37
    queue(0).bits  := io.in.bits
    (0 until (number-1)).map{i =>
      queue(i+1) := queue(i)
Z
ZhangZifei 已提交
38
      queue(i+1).valid := queue(i).valid && !queue(i).bits.roqIdx.needFlush(io.redirect, io.flush)
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
    }
    io.out.valid := queue(number-1).valid
    io.out.bits := queue(number-1).bits
    for (i <- 0 until number) {
      XSDebug(queue(i).valid, p"BPQue(${i.U}): pc:${Hexadecimal(queue(i).bits.cf.pc)} roqIdx:${queue(i).bits.roqIdx}" +
        p" pdest:${queue(i).bits.pdest} rfWen:${queue(i).bits.ctrl.rfWen} fpWen${queue(i).bits.ctrl.fpWen}\n")
    }
  }
}

// multi-read && single-write
// input is data, output is hot-code(not one-hot)
class SingleSrcCAM[T <: Data](val gen: T, val set: Int, val readWidth: Int, rfZero: Boolean) extends XSModule {
  val io = IO(new Bundle {
    val r = new Bundle {
      val req = Input(Vec(readWidth, gen))
      val resp = Output(Vec(readWidth, Vec(set, Bool())))
    }
    val w = new Bundle {
      val valid = Input(Bool())
      val bits = new Bundle {
        val addr = Input(UInt(log2Up(set).W))
        val data = Input(gen)
      }
    }
    val zero = if (rfZero) Output(Vec(set, Bool())) else null
  })

  val wordType = UInt(gen.getWidth.W)
68
  val value = Reg(Vec(set, wordType))
69 70

  io.r.resp.zipWithIndex.map{ case (a,i) =>
71
    a := value.map( src => io.r.req(i).asUInt === src)
72 73 74
  }

  // Note: general reg file don't wakeup zero
75
  if (rfZero) { io.zero.zip(value).map{ case(z, a) => z := a===0.U }}
76 77

  when (io.w.valid) {
78
    value(io.w.bits.addr) := io.w.bits.data
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
  }
}

class ReservationStation
(
  val exuCfg: ExuConfig,
  fastPortsCnt: Int,
  slowPortsCnt: Int,
  fixedDelay: Int,
  fastWakeup: Boolean,
  feedback: Boolean,
) extends XSModule {
  val iqSize = IssQueSize
  val iqIdxWidth = log2Up(iqSize)
  val nonBlocked = fixedDelay >= 0
Z
ZhangZifei 已提交
94
  val srcNum = if (exuCfg == Exu.jumpExeUnitCfg) 2 else max(exuCfg.intSrcCnt, exuCfg.fpSrcCnt)
95 96 97 98 99 100 101 102
  require(nonBlocked==fastWakeup)

  val io = IO(new Bundle {
    val numExist = Output(UInt(iqIdxWidth.W))
    val fromDispatch = Flipped(DecoupledIO(new MicroOp))
    val deq = DecoupledIO(new ExuInput)
    val srcRegValue = Input(Vec(srcNum, UInt((XLEN + 1).W)))
    val jumpPc = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
Z
ZhangZifei 已提交
103
    val jalr_target = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
104 105 106 107 108 109 110

    val fastUopOut = ValidIO(new MicroOp)
    val fastUopsIn = Vec(fastPortsCnt, Flipped(ValidIO(new MicroOp)))
    val fastDatas = Vec(fastPortsCnt, Input(UInt((XLEN+1).W)))
    val slowPorts = Vec(slowPortsCnt, Flipped(ValidIO(new ExuOutput)))

    val redirect = Flipped(ValidIO(new Redirect))
Z
ZhangZifei 已提交
111
    val flush = Input(Bool())
112 113
    val memfeedback = if (feedback) Flipped(ValidIO(new RSFeedback)) else null
    val rsIdx = if (feedback) Output(UInt(log2Up(IssQueSize).W)) else null
114 115 116 117 118 119
  })

  val select = Module(new ReservationStationSelect(exuCfg, fastPortsCnt, slowPortsCnt, fixedDelay, fastWakeup, feedback))
  val ctrl   = Module(new ReservationStationCtrl(exuCfg, fastPortsCnt, slowPortsCnt, fixedDelay, fastWakeup, feedback))
  val data   = Module(new ReservationStationData(exuCfg, fastPortsCnt, slowPortsCnt, fixedDelay, fastWakeup, feedback))

Z
ZhangZifei 已提交
120 121
  select.io.redirect := io.redirect
  select.io.flush := io.flush
122 123 124 125 126 127
  io.numExist := select.io.numExist
  select.io.redirectVec := ctrl.io.redirectVec
  select.io.readyVec := ctrl.io.readyVec
  select.io.enq.valid := io.fromDispatch.valid
  io.fromDispatch.ready := select.io.enq.ready
  select.io.deq.ready := io.deq.ready
128 129 130
  if (feedback) {
    select.io.memfeedback := io.memfeedback
  }
131

Z
ZhangZifei 已提交
132 133
  ctrl.io.in.valid := select.io.enq.fire() && !(io.redirect.valid || io.flush) // NOTE: same as select
  ctrl.io.flush := io.flush
134 135
  ctrl.io.in.bits.addr := select.io.enq.bits
  ctrl.io.in.bits.uop := io.fromDispatch.bits
136 137
  ctrl.io.validVec := select.io.validVec
  ctrl.io.indexVec := select.io.indexVec
138 139 140 141 142 143 144 145 146 147 148
  ctrl.io.redirect := io.redirect
  ctrl.io.sel.valid := select.io.deq.valid
  ctrl.io.sel.bits  := select.io.deq.bits
  io.fastUopOut := ctrl.io.fastUopOut
  ctrl.io.fastUopsIn := io.fastUopsIn
  ctrl.io.slowUops.zip(io.slowPorts).map{ case (c, i) =>
    c.valid := i.valid
    c.bits  := i.bits.uop
  }

  data.io.in.valid := ctrl.io.in.valid
149 150
  data.io.in.addr := select.io.enq.bits
  data.io.in.uop := io.fromDispatch.bits // NOTE: use for imm-pc src value mux
151 152 153 154
  data.io.in.enqSrcReady := ctrl.io.enqSrcReady
  data.io.srcRegValue := io.srcRegValue
  if(exuCfg == Exu.jumpExeUnitCfg) {
    data.io.jumpPc := io.jumpPc
Z
ZhangZifei 已提交
155
    data.io.jalr_target := io.jalr_target
156 157 158 159 160 161 162 163 164 165
  }
  data.io.sel := select.io.deq.bits
  data.io.listen.wen := ctrl.io.listen
  for (i <- 0 until fastPortsCnt) {
    data.io.listen.wdata(i) := io.fastDatas(i)
  }
  for (i <- 0 until slowPortsCnt) {
    data.io.listen.wdata(i + fastPortsCnt) := io.slowPorts(i).bits.data
  }

166 167 168
  if (feedback) {
    io.rsIdx := RegNext(select.io.deq.bits) // NOTE: just for feeback
  }
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
  io.deq.bits := DontCare
  io.deq.bits.uop  := ctrl.io.out.bits
  io.deq.bits.uop.cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
  io.deq.valid     := ctrl.io.out.valid
  io.deq.bits.src1 := data.io.out(0)
  if (srcNum > 1) { io.deq.bits.src2 := data.io.out(1) }
  if (srcNum > 2) { io.deq.bits.src3 := data.io.out(2) }
}

class ReservationStationSelect
(
  val exuCfg: ExuConfig,
  fastPortsCnt: Int,
  slowPortsCnt: Int,
  fixedDelay: Int,
  fastWakeup: Boolean,
  feedback: Boolean,
) extends XSModule with HasCircularQueuePtrHelper{

  val iqSize = IssQueSize
  val iqIdxWidth = log2Up(iqSize)
  val nonBlocked = fixedDelay >= 0
Z
ZhangZifei 已提交
191
  val srcNum = if (exuCfg == Exu.jumpExeUnitCfg) 2 else max(exuCfg.intSrcCnt, exuCfg.fpSrcCnt)
192 193 194 195 196
  require(nonBlocked==fastWakeup)
  val replayDelay = 5

  val io = IO(new Bundle {
    val redirect = Flipped(ValidIO(new Redirect))
Z
ZhangZifei 已提交
197
    val flush = Input(Bool())
198
    val numExist = Output(UInt(iqIdxWidth.W))
199
    val memfeedback = if (feedback) Flipped(ValidIO(new RSFeedback)) else null
200 201 202

    val redirectVec = Input(Vec(IssQueSize, Bool()))
    val readyVec = Input(Vec(IssQueSize, Bool()))
203 204
    val validVec = Output(Vec(IssQueSize, Bool()))
    val indexVec = Output(Vec(IssQueSize, UInt(iqIdxWidth.W)))
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271

    // val enq = Flipped(DecoupledIO(UInt(iqIdxWidth.W)))
    val enq = new Bundle {
      val valid = Input(Bool())
      val bits  = Output(UInt(iqIdxWidth.W))
      val ready = Output(Bool())
      def fire() = valid && ready
    }
    val deq = DecoupledIO(UInt(iqIdxWidth.W))
  })

  def widthMap[T <: Data](f: Int => T) = VecInit((0 until iqSize).map(f))

  /* queue in ctrl part
   * index queue : index
   * state queue : use for replay
   * count   queue : record replay cycle
   */

  val s_idle :: s_valid :: s_wait :: s_replay :: Nil = Enum(4)
  /* state machine
   * s_idle     : empty slot, init state, set when deq
   * s_valid    : ready to be secleted
   * s_wait     : wait for feedback
   * s_replay   : replay after some particular cycle
   */
  val stateQueue    = RegInit(VecInit(Seq.fill(iqSize)(s_idle)))
  val tailPtr       = RegInit(0.U.asTypeOf(new CircularQueuePtr(iqSize)))
  val indexQueue    = RegInit(VecInit((0 until iqSize).map(_.U(iqIdxWidth.W))))
  val validQueue    = VecInit(stateQueue.map(_ === s_valid))
  val emptyQueue    = VecInit(stateQueue.map(_ === s_idle))
  val countQueue    = Reg(Vec(iqSize, UInt(log2Up(replayDelay).W)))
  val validIdxQueue = widthMap(i => validQueue(indexQueue(i)))
  val readyIdxQueue = widthMap(i => validQueue(indexQueue(i)) && io.readyVec(indexQueue(i)))
  val emptyIdxQueue = widthMap(i => emptyQueue(indexQueue(i)))
  val countIdxQueue = widthMap(i => countQueue(indexQueue(i)))

  // select ready
  // for no replay, select just equal to deq (attached)
  // with   replay, select is just two stage with deq.
  val issueFire = Wire(Bool())
  val moveMask = WireInit(0.U(iqSize.W))
  val lastSelMask = Wire(UInt(iqSize.W))
  val selectMask = WireInit(VecInit((0 until iqSize).map(i => readyIdxQueue(i)))).asUInt & lastSelMask
  val selectIndex = ParallelPriorityMux(selectMask.asBools zip indexQueue) // NOTE: the idx in the indexQueue
  val selectPtr = ParallelPriorityMux(selectMask.asBools.zipWithIndex.map{ case (a,i) => (a, i.U)}) // NOTE: the idx of indexQueue
  val haveReady = Cat(selectMask).orR
  val selectIndexReg = RegNext(selectIndex)
  val selectValid = haveReady
  val selectReg = RegNext(selectValid)
  val selectPtrReg = RegNext(Mux(moveMask(selectPtr), selectPtr-1.U, selectPtr))
  lastSelMask := ~Mux(selectReg, UIntToOH(selectPtrReg), 0.U)
  assert(RegNext(!(haveReady && selectPtr >= tailPtr.asUInt)), "bubble should not have valid state like s_valid or s_wait")

  // sel bubble
  val lastbubbleMask = Wire(UInt(iqSize.W))
  val bubbleMask = WireInit(VecInit((0 until iqSize).map(i => emptyIdxQueue(i)))).asUInt & lastbubbleMask
  // val bubbleIndex = ParallelMux(bubbleMask zip indexQueue) // NOTE: the idx in the indexQueue
  val bubblePtr= ParallelPriorityMux(bubbleMask.asBools.zipWithIndex.map{ case (a,i) => (a, i.U)}) // NOTE: the idx of the indexQueue
  val findBubble = Cat(bubbleMask).orR
  val haveBubble = findBubble && (bubblePtr < tailPtr.asUInt)
  val bubbleIndex = indexQueue(bubblePtr)
  val bubbleValid = haveBubble && (if (feedback) true.B else !selectValid)
  val bubbleReg = RegNext(bubbleValid)
  val bubblePtrReg = RegNext(Mux(moveMask(bubblePtr), bubblePtr-1.U, bubblePtr))
  lastbubbleMask := ~Mux(bubbleReg, UIntToOH(bubblePtrReg), 0.U) &
                    (if(feedback) ~(0.U(iqSize.W)) else
Z
ZhangZifei 已提交
272
                    Mux(RegNext(selectValid && (io.redirect.valid || io.flush)), 0.U, ~(0.U(iqSize.W))))
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291

  // deq
  val dequeue = if (feedback) bubbleReg
                else          bubbleReg || issueFire
  val deqPtr =  if (feedback) bubblePtrReg
                else Mux(selectReg, selectPtrReg, bubblePtrReg)
  moveMask := {
    (Fill(iqSize, 1.U(1.W)) << deqPtr)(iqSize-1, 0)
  } & Fill(iqSize, dequeue)

  for (i <- 0 until iqSize - 1) {
    when(moveMask(i)){
      indexQueue(i) := indexQueue(i+1)
    }
  }
  when(dequeue){
    indexQueue.last := indexQueue(deqPtr)
  }

292 293 294 295 296 297 298 299 300
  if (feedback) {
    when (io.memfeedback.valid) {
      stateQueue(io.memfeedback.bits.rsIdx) := Mux(io.memfeedback.bits.hit, s_idle, s_replay)
      when (!io.memfeedback.bits.hit) {
        countQueue(io.memfeedback.bits.rsIdx) := (replayDelay-1).U
      }
    }
  }

301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
  when (issueFire) {
    if (feedback) { when (stateQueue(selectIndexReg) === s_valid) { stateQueue(selectIndexReg) := s_wait } }
    else { stateQueue(selectIndexReg) := s_idle } // NOTE: reset the state for seclectMask timing to avoid operaion '<'
  }

  // redirect and feedback && wakeup
  for (i <- 0 until iqSize) {
    // replay
    val count = countQueue(i)
    when (stateQueue(i) === s_replay) {
      count := count - 1.U
      when (count === 0.U) { stateQueue(i) := s_valid }
    }

    // redirect
316
    when (io.redirectVec(i)) {
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
      stateQueue(i) := s_idle
    }
  }

  // output
  val issueValid = selectReg
  if (nonBlocked) {
    issueFire := issueValid
  } else {
    issueFire := issueValid && io.deq.ready
  }

  // enq
  val isFull = tailPtr.flag
  // agreement with dispatch: don't fire when io.redirect.valid
Z
ZhangZifei 已提交
332
  val enqueue = io.enq.fire() && !(io.redirect.valid || io.flush)
333 334 335 336 337 338 339 340 341 342
  val tailInc = tailPtr + 1.U
  val tailDec = tailPtr - 1.U
  tailPtr := Mux(dequeue === enqueue, tailPtr, Mux(dequeue, tailDec, tailInc))

  val enqPtr = Mux(tailPtr.flag, deqPtr, tailPtr.value)
  val enqIdx = indexQueue(enqPtr)
  when (enqueue) {
    stateQueue(enqIdx) := s_valid
  }

343 344 345
  io.validVec := validIdxQueue.zip(lastSelMask.asBools).map{ case (a, b) => a & b }
  io.indexVec := indexQueue

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
  io.enq.ready := !isFull || (if(feedback || nonBlocked) dequeue else false.B)
  io.enq.bits  := enqIdx
  io.deq.valid := selectValid
  io.deq.bits  := selectIndex

  io.numExist := Mux(tailPtr.flag, (iqSize-1).U, tailPtr.value)

  assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
}

class ReservationStationCtrl
(
  val exuCfg: ExuConfig,
  fastPortsCnt: Int,
  slowPortsCnt: Int,
  fixedDelay: Int,
  fastWakeup: Boolean,
  feedback: Boolean,
) extends XSModule {

  val iqSize = IssQueSize
  val iqIdxWidth = log2Up(iqSize)
  val nonBlocked = fixedDelay >= 0
Z
ZhangZifei 已提交
369
  val srcNum = if (exuCfg == Exu.jumpExeUnitCfg) 2 else max(exuCfg.intSrcCnt, exuCfg.fpSrcCnt)
370 371 372 373 374
  require(nonBlocked==fastWakeup)

  val io = IO(new XSBundle {

    val redirect = Flipped(ValidIO(new Redirect))
Z
ZhangZifei 已提交
375
    val flush = Input(Bool())
376 377 378 379 380 381 382 383 384 385

    val in = Flipped(ValidIO(new Bundle {
      val addr = UInt(iqIdxWidth.W)
      val uop  = new MicroOp
    }))
    val sel = Flipped(ValidIO(UInt(iqIdxWidth.W)))
    val out = ValidIO(new MicroOp)

    val redirectVec = Output(Vec(IssQueSize, Bool()))
    val readyVec = Output(Vec(IssQueSize, Bool()))
386 387
    val validVec = Input(Vec(IssQueSize, Bool()))
    val indexVec = Input(Vec(IssQueSize, UInt(iqIdxWidth.W)))
388 389 390 391 392 393 394 395 396

    val fastUopOut = ValidIO(new MicroOp)
    val fastUopsIn = Flipped(Vec(fastPortsCnt, ValidIO(new MicroOp)))
    val slowUops   = Flipped(Vec(slowPortsCnt, ValidIO(new MicroOp)))

    val listen = Output(Vec(srcNum, Vec(iqSize, Vec(fastPortsCnt + slowPortsCnt, Bool()))))
    val enqSrcReady = Output(Vec(srcNum, Bool()))
  })

397
  val selValid = io.sel.valid
398 399 400 401 402
  val enqPtr = io.in.bits.addr
  val enqPtrReg = RegNext(enqPtr)
  val enqEn  = io.in.valid
  val enqEnReg = RegNext(enqEn)
  val enqUop = io.in.bits.uop
403
  val enqUopReg = RegEnable(enqUop, selValid)
404
  val selPtr = io.sel.bits
405
  val selPtrReg = RegEnable(selPtr, selValid)
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
  val data = io.listen
  data.map(a => a.map(b => b.map(_ := false.B)))

  val fastUops = io.fastUopsIn
  val slowUops = io.slowUops
  val lastFastUops = RegNext(fastUops)

  def stateCheck(src: UInt, srcType: UInt): Bool = {
    (srcType =/= SrcType.reg && srcType =/= SrcType.fp) ||
    (srcType === SrcType.reg && src === 0.U)
  }
  val enqSrcSeq      = Seq(enqUop.psrc1, enqUop.psrc2, enqUop.psrc3)
  val enqSrcTypeSeq  = Seq(enqUop.ctrl.src1Type, enqUop.ctrl.src2Type, enqUop.ctrl.src3Type)
  val enqSrcStateSeq = Seq(enqUop.src1State, enqUop.src2State, enqUop.src3State)
  val enqSrcReady = (0 until srcNum).map(i =>
    stateCheck(enqSrcSeq(i), enqSrcTypeSeq(i)) || (enqSrcStateSeq(i) === SrcState.rdy)
  )
  io.enqSrcReady := enqSrcReady
  val srcUpdate = Wire(Vec(iqSize, Vec(srcNum, Bool())))
425 426 427 428 429 430 431
  val srcUpdateListen = Wire(Vec(iqSize, Vec(srcNum, Vec(fastPortsCnt + slowPortsCnt, Bool()))))
  srcUpdateListen.map(a => a.map(b => b.map(c => c := false.B )))
  for (i <- 0 until iqSize) {
    for (j <- 0 until srcNum) {
      srcUpdate(i)(j) := Cat(srcUpdateListen(i)(j)).orR
    }
  }
432 433 434 435 436 437 438 439 440 441 442 443 444

  val srcQueue      = Reg(Vec(iqSize, Vec(srcNum, Bool())))
  io.readyVec := srcQueue.map(Cat(_).andR)
  when (enqEn) {
    srcQueue(enqPtr).zip(enqSrcReady).map{ case (s, e) => s := e }
  }
  for (i <- 0 until iqSize) {
    for (j <- 0 until srcNum) {
      when (srcUpdate(i)(j)) { srcQueue(i)(j) := true.B }
    }
  }

  val redirectHit = io.redirectVec(selPtr)
445
  val uop = Module(new SyncDataModuleTemplate(new MicroOp, iqSize, 1, 1))
446

447 448
  uop.io.raddr(0) := selPtr
  io.out.valid    := RegNext(selValid && ~redirectHit)
449
  io.out.bits     := uop.io.rdata(0)
450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
  uop.io.wen(0)   := enqEn
  uop.io.waddr(0) := enqPtr
  uop.io.wdata(0) := enqUop

  class fastSendUop extends XSBundle {
    val pdest = UInt(PhyRegIdxWidth.W)
    val rfWen = Bool()
    val fpWen = Bool()
    def apply(uop: MicroOp) = {
      this.pdest := uop.pdest
      this.rfWen := uop.ctrl.rfWen
      this.fpWen := uop.ctrl.fpWen
      this
    }
  }
465 466 467 468 469 470

  val roqIdx = Reg(Vec(IssQueSize, new RoqPtr))
  when (enqEn) {
    roqIdx(enqPtr) := enqUop.roqIdx
  }
  io.redirectVec.zip(roqIdx).map{ case (red, roq) =>
Z
ZhangZifei 已提交
471
    red := roq.needFlush(io.redirect, io.flush)
472
  }
473
  io.out.bits.roqIdx := roqIdx(selPtrReg)
474 475 476

  io.fastUopOut := DontCare
  if (fastWakeup) {
477 478 479 480 481
    val asynUop = Reg(Vec(iqSize, new fastSendUop))
    when (enqEn) { asynUop(enqPtr) := (Wire(new fastSendUop)).apply(enqUop) }
    val asynIdxUop = (0 until iqSize).map(i => asynUop(io.indexVec(i)) )
    val readyIdxVec = (0 until iqSize).map(i => io.validVec(i) && Cat(srcQueue(io.indexVec(i))).andR )
    val fastAsynUop = ParallelPriorityMux(readyIdxVec zip asynIdxUop)
482 483
    val fastSentUop = Wire(new MicroOp)
    fastSentUop := DontCare
484 485 486
    fastSentUop.pdest := fastAsynUop.pdest
    fastSentUop.ctrl.rfWen := fastAsynUop.rfWen
    fastSentUop.ctrl.fpWen := fastAsynUop.fpWen
487

488
    if (fixedDelay == 0) {
489 490
      io.fastUopOut.valid := selValid
      io.fastUopOut.bits  := fastSentUop
491 492
    } else {
      val bpQueue = Module(new BypassQueue(fixedDelay))
493 494
      bpQueue.io.in.valid := selValid
      bpQueue.io.in.bits  := fastSentUop
495
      bpQueue.io.redirect := io.redirect
Z
ZhangZifei 已提交
496
      bpQueue.io.flush    := io.flush
497 498 499
      io.fastUopOut.valid := bpQueue.io.out.valid
      io.fastUopOut.bits  := bpQueue.io.out.bits
    }
500

501
    val fastSentUopReg = RegNext(fastAsynUop)
502 503 504
    io.out.bits.pdest := fastSentUopReg.pdest
    io.out.bits.ctrl.rfWen := fastSentUopReg.rfWen
    io.out.bits.ctrl.fpWen := fastSentUopReg.fpWen
505 506 507 508 509 510 511 512 513 514 515 516 517 518
  }

  val psrc = (0 until srcNum).map(i => Module(new SingleSrcCAM(UInt(PhyRegIdxWidth.W), iqSize, fastPortsCnt + slowPortsCnt, true)).io)
  psrc.map(_.w.valid := false.B)
  val entryListenHit = psrc.map{src =>
    for (i <- 0 until fastPortsCnt) { src.r.req(i) := io.fastUopsIn(i).bits.pdest }
    for (i <- 0 until slowPortsCnt) { src.r.req(i + fastPortsCnt) := io.slowUops(i).bits.pdest }
    src.r.resp
  }
  val srcIsZero = psrc.map{ src => src.zero }
  psrc.map(_.w.bits.addr  := enqPtr)
  psrc.map(_.w.valid := enqEn)
  val enqSrcSeqChecked = enqSrcSeq.zip(enqSrcTypeSeq).map{ case (s, t) =>
    Mux(t === SrcType.fp || t === SrcType.reg, s, 0.U)} // NOTE: if pc/imm -> 0.U and reg (means don't hit)
519
  psrc.zip(enqSrcSeqChecked).map{ case (p,s) => p.w.bits.data := s }
520 521

  // TODO: later, only store will need psrcType
522
  val psrcType = Reg(Vec(srcNum, Vec(iqSize, Bool()))) // fp: false | other: true
523 524 525 526 527 528 529 530 531 532 533 534 535 536
  (0 until srcNum).foreach{ i =>
    when (enqEn) {
      psrcType(i)(enqPtr) := enqSrcTypeSeq(i) =/= SrcType.fp
    }
  }

  def listenHitEnq(uop: MicroOp, src: UInt, srctype: UInt): Bool = {
    (src === uop.pdest) &&
    ((srctype === SrcType.reg && uop.ctrl.rfWen && src=/=0.U) ||
     (srctype === SrcType.fp  && uop.ctrl.fpWen))
  }

  def listenHitEntry(src: Int, port: Int, addr: Int, uop: MicroOp): Bool = {
    entryListenHit(src)(port)(addr) &&
537 538
    ((psrcType(src)(addr) && uop.ctrl.rfWen && !srcIsZero(src)(addr)) ||
     (!psrcType(src)(addr)  && uop.ctrl.fpWen))
539 540 541 542 543 544 545
  }

  for (j <- 0 until srcNum) {
    for (i <- 0 until iqSize) {
      for (k <- 0 until fastPortsCnt) {
        val fastHit = listenHitEntry(j, k, i, fastUops(k).bits) && fastUops(k).valid
        val fastHitNoConflict = fastHit && !(enqPtr===i.U && enqEn)
546
        when (fastHitNoConflict) { srcUpdateListen(i)(j)(k) := true.B }
547 548 549 550 551
        when (RegNext(fastHitNoConflict) && !(enqPtr===i.U && enqEn)) { data(j)(i)(k) := true.B }
      }
      for (k <- 0 until slowPortsCnt) {
        val slowHit = listenHitEntry(j, k + fastPortsCnt, i, slowUops(k).bits) && slowUops(k).valid
        val slowHitNoConflict = slowHit && !(enqPtr===i.U && enqEn)
552
        when (slowHitNoConflict) { srcUpdateListen(i)(j)(k+fastPortsCnt) := true.B }
553 554 555 556 557 558 559 560 561 562
        when (slowHitNoConflict) { data(j)(i)(k + fastPortsCnt) := true.B }
      }
    }
  }

  // enq listen
  for (j <- 0 until srcNum) {
    for (k <- 0 until fastPortsCnt) {
      val fastHit = listenHitEnq(fastUops(k).bits, enqSrcSeq(j), enqSrcTypeSeq(j)) && enqEn && fastUops(k).valid
      val lastFastHit = listenHitEnq(lastFastUops(k).bits, enqSrcSeq(j), enqSrcTypeSeq(j)) && enqEn && lastFastUops(k).valid
563
      when (fastHit || lastFastHit) { srcUpdateListen(enqPtr)(j)(k) := true.B }
564 565 566 567 568 569
      when (lastFastHit)            { data(j)(enqPtr)(k) := true.B }
      when (RegNext(fastHit))       { data(j)(enqPtrReg)(k) := true.B }
    }
    for (k <- 0 until slowPortsCnt) {
      val slowHit = listenHitEnq(slowUops(k).bits, enqSrcSeq(j), enqSrcTypeSeq(j)) && enqEn && slowUops(k).valid
      when (slowHit) {
570
        srcUpdateListen(enqPtr)(j)(k+fastPortsCnt) := true.B
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
        data(j)(enqPtr)(k + fastPortsCnt) := true.B
      }
    }
  }
}

class RSDataSingleSrc(srcLen: Int, numEntries: Int, numListen: Int) extends XSModule {
  val io = IO(new Bundle {
    val r = new Bundle {
      // val valid = Bool() // NOTE: if read valid is necessary, but now it is not completed
      val addr = Input(UInt(log2Up(numEntries).W))
      val rdata = Output(UInt(srcLen.W))
    }
    val w = Input(new Bundle {
      val wen = Bool()
      val addr = UInt(log2Up(numEntries).W)
      val wdata = Input(UInt(srcLen.W))
    })
    val listen = Input(new Bundle {
      val wdata = Vec(numListen, UInt(srcLen.W))
      val wen = Vec(numEntries, Vec(numListen, Bool()))
    })
  })

  val value = Reg(Vec(numEntries, UInt(srcLen.W)))

  val wMask = Mux(io.w.wen, UIntToOH(io.w.addr)(numEntries-1, 0), 0.U(numEntries.W))
  val data = io.listen.wdata :+ io.w.wdata
  val wen = io.listen.wen.zip(wMask.asBools).map{ case (w, m) => w :+ m }
  for (i <- 0 until numEntries) {
    when (Cat(wen(i)).orR) {
      value(i) := ParallelMux(wen(i) zip data)
      assert(RegNext(PopCount(wen(i))===0.U || PopCount(wen(i))===1.U), s"${i}")
    }
  }

  io.r.rdata := value(RegNext(io.r.addr)) // NOTE: the read addr will arrive one cycle before
}

class ReservationStationData
(
  val exuCfg: ExuConfig,
  fastPortsCnt: Int,
  slowPortsCnt: Int,
  fixedDelay: Int,
  fastWakeup: Boolean,
  feedback: Boolean,
) extends XSModule {
  val iqSize = IssQueSize
  val iqIdxWidth = log2Up(iqSize)
  val nonBlocked = fixedDelay >= 0
Z
ZhangZifei 已提交
622
  val srcNum = if (exuCfg == Exu.jumpExeUnitCfg) 2 else max(exuCfg.intSrcCnt, exuCfg.fpSrcCnt)
623 624 625 626 627
  require(nonBlocked==fastWakeup)

  val io = IO(new XSBundle {
    val srcRegValue = Vec(srcNum, Input(UInt((XLEN + 1).W)))
    val jumpPc = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
Z
ZhangZifei 已提交
628
    val jalr_target = if(exuCfg == Exu.jumpExeUnitCfg) Input(UInt(VAddrBits.W)) else null
629 630 631 632 633 634 635 636 637 638 639 640 641 642
    val in  = Input(new Bundle {
      val valid = Input(Bool())
      val addr = Input(UInt(iqIdxWidth.W))
      val uop = Input(new MicroOp)
      val enqSrcReady = Input(Vec(srcNum, Bool()))
    })

    val listen = new Bundle {
      val wen = Input(Vec(srcNum, Vec(iqSize, Vec(fastPortsCnt + slowPortsCnt, Bool()))))
      val wdata = Input(Vec(fastPortsCnt + slowPortsCnt, UInt((XLEN + 1).W)))
    }

    val sel = Input(UInt(iqIdxWidth.W))
    val out = Output(Vec(srcNum, UInt((XLEN + 1).W)))
Z
ZhangZifei 已提交
643
    val pc = if(exuCfg == Exu.jumpExeUnitCfg) Output(UInt(VAddrBits.W)) else null
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
  })

  val enqUopReg = RegEnable(io.in.uop, io.in.valid)

  // Data : single read, multi write
  // ------------------------
  val data = (0 until srcNum).map{i =>
    val d = Module(new RSDataSingleSrc(XLEN + 1, iqSize, fastPortsCnt + slowPortsCnt))
    d.suggestName(s"${this.name}_data${i}")
    d.io
  }
  (0 until srcNum).foreach{ i =>
    data(i).listen.wen := io.listen.wen(i)
    data(i).listen.wdata := io.listen.wdata
  }

  data.map(_.w.addr  := RegEnable(io.in.addr, io.in.valid))
  data.zip(io.in.enqSrcReady).map{ case (src, ready) => src.w.wen := RegNext(ready && io.in.valid) }

Z
ZhangZifei 已提交
663 664 665 666 667 668 669 670 671
  val pcMem = if(exuCfg == Exu.jumpExeUnitCfg)
    Some(Module(new SyncDataModuleTemplate(UInt(VAddrBits.W), iqSize, numRead = 1, numWrite = 1))) else None

  if(pcMem.nonEmpty){
    pcMem.get.io.wen(0) := RegNext(io.in.valid)
    pcMem.get.io.waddr(0) := RegNext(io.in.addr)
    pcMem.get.io.wdata(0) := io.jumpPc
  }

672 673 674 675 676 677 678 679
  exuCfg match {
    case Exu.jumpExeUnitCfg =>
      val src1Mux = Mux(enqUopReg.ctrl.src1Type === SrcType.pc,
                        SignExt(io.jumpPc, XLEN),
                        io.srcRegValue(0)
                    )
      // data.io.w.bits.data(0) := src1Mux
      data(0).w.wdata := src1Mux
Z
ZhangZifei 已提交
680
      data(1).w.wdata := io.jalr_target
681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703

    case Exu.aluExeUnitCfg =>
      val src1Mux = Mux(enqUopReg.ctrl.src1Type === SrcType.pc,
                      SignExt(enqUopReg.cf.pc, XLEN),
                      io.srcRegValue(0)
                    )
      data(0).w.wdata := src1Mux
      // alu only need U type and I type imm
      val imm32 = Mux(enqUopReg.ctrl.selImm === SelImm.IMM_U,
                    ImmUnion.U.toImm32(enqUopReg.ctrl.imm),
                    ImmUnion.I.toImm32(enqUopReg.ctrl.imm)
                  )
      val imm64 = SignExt(imm32, XLEN)
      val src2Mux = Mux(enqUopReg.ctrl.src2Type === SrcType.imm,
                      imm64, io.srcRegValue(1)
                    )
      data(1).w.wdata := src2Mux
    case _ =>
      (0 until srcNum).foreach(i => data(i).w.wdata := io.srcRegValue(i) )
  }
  // deq
  data.map(_.r.addr := io.sel)
  io.out := data.map(_.r.rdata)
Z
ZhangZifei 已提交
704 705 706 707
  if(pcMem.nonEmpty){
    pcMem.get.io.raddr(0) := io.sel
    io.pc := pcMem.get.io.rdata(0)
  }
708
}