LoadQueue.scala 26.7 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
L
LinJiawei 已提交
5
import freechips.rocketchip.tile.HasFPUParameters
6
import utils._
Y
Yinan Xu 已提交
7 8
import xiangshan._
import xiangshan.cache._
L
LinJiawei 已提交
9
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO}
10
import xiangshan.backend.LSUOpType
11
import xiangshan.mem._
12
import xiangshan.backend.roq.RoqLsqIO
13
import xiangshan.backend.fu.HasExceptionNO
W
William Wang 已提交
14 15


16
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
17 18 19 20 21 22 23 24

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
25 26
}

L
LinJiawei 已提交
27 28 29 30 31 32
trait HasLoadHelper { this: XSModule =>
  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    val fpWen = uop.ctrl.fpWen
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
33
      LSUOpType.lw   -> Mux(fpWen, Cat(Fill(32, 1.U(1.W)), rdata(31, 0)), SignExt(rdata(31, 0), XLEN)),
34
      LSUOpType.ld   -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
L
LinJiawei 已提交
35 36 37 38 39
      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
    ))
  }
40 41 42 43 44 45 46

  def fpRdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lw   -> recode(rdata(31, 0), S),
      LSUOpType.ld   -> recode(rdata(63, 0), D)
    ))
  }
L
LinJiawei 已提交
47 48
}

Y
Yinan Xu 已提交
49 50
class LqEnqIO extends XSBundle {
  val canAccept = Output(Bool())
51
  val sqCanAccept = Input(Bool())
Y
Yinan Xu 已提交
52 53 54 55
  val needAlloc = Vec(RenameWidth, Input(Bool()))
  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
  val resp = Vec(RenameWidth, Output(new LqPtr))
}
56

57
// Load Queue
L
LinJiawei 已提交
58 59 60 61
class LoadQueue extends XSModule
  with HasDCacheParameters
  with HasCircularQueuePtrHelper
  with HasLoadHelper
62
  with HasExceptionNO
L
LinJiawei 已提交
63
{
W
William Wang 已提交
64
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
65
    val enq = new LqEnqIO
66 67
    val brqRedirect = Flipped(ValidIO(new Redirect))
    val flush = Input(Bool())
W
William Wang 已提交
68
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
69
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
70
    val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool()))
71
    val needReplayFromRS = Vec(LoadPipelineWidth, Input(Bool()))
72
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
73
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new MaskedLoadForwardQueryIO))
74
    val roq = Flipped(new RoqLsqIO)
75
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
76
    val dcache = Flipped(ValidIO(new Refill))
77
    val uncache = new DCacheWordIO
78
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
79
  })
80

W
William Wang 已提交
81
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
82
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
83
  val dataModule = Module(new LoadQueueData(LoadQueueSize, wbNumRead = LoadPipelineWidth, wbNumWrite = LoadPipelineWidth))
W
William Wang 已提交
84
  dataModule.io := DontCare
W
William Wang 已提交
85
  val vaddrModule = Module(new SyncDataModuleTemplate(UInt(VAddrBits.W), LoadQueueSize, numRead = 1, numWrite = LoadPipelineWidth))
W
William Wang 已提交
86
  vaddrModule.io := DontCare
W
William Wang 已提交
87
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
88
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
89 90
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
91
  // val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
W
William Wang 已提交
92
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
93

W
William Wang 已提交
94
  val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst
95
  val debug_paddr = Reg(Vec(LoadQueueSize, UInt(PAddrBits.W))) // mmio: inst is an mmio inst
W
William Wang 已提交
96

Y
Yinan Xu 已提交
97
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
98
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
W
William Wang 已提交
99
  val deqPtrExtNext = Wire(new LqPtr)
100 101
  val allowEnqueue = RegInit(true.B)

Y
Yinan Xu 已提交
102
  val enqPtr = enqPtrExt(0).value
103
  val deqPtr = deqPtrExt.value
104

105 106
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
107

108 109
  val commitCount = RegNext(io.roq.lcommit)

Y
Yinan Xu 已提交
110 111 112 113 114
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
115 116
  io.enq.canAccept := allowEnqueue

W
William Wang 已提交
117
  for (i <- 0 until RenameWidth) {
Y
Yinan Xu 已提交
118
    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
Y
Yinan Xu 已提交
119
    val lqIdx = enqPtrExt(offset)
120
    val index = lqIdx.value
121
    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !(io.brqRedirect.valid || io.flush)) {
122
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
123
      allocated(index) := true.B
124
      datavalid(index) := false.B
Y
Yinan Xu 已提交
125 126
      writebacked(index) := false.B
      miss(index) := false.B
127
      // listening(index) := false.B
Y
Yinan Xu 已提交
128
      pending(index) := false.B
W
William Wang 已提交
129
    }
130
    io.enq.resp(i) := lqIdx
W
William Wang 已提交
131
  }
132
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
W
William Wang 已提交
133

Y
Yinan Xu 已提交
134 135 136 137 138 139 140 141 142 143 144 145 146
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
147
    dataModule.io.wb.wen(i) := false.B
148
    val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
L
LinJiawei 已提交
149 150
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
151
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
152
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
153 154 155 156
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
157 158 159
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
160
          io.loadIn(i).bits.mmio
161 162 163 164 165 166 167 168 169 170 171 172 173
        )
      }.otherwise {
        XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
        io.loadIn(i).bits.uop.lqIdx.asUInt,
        io.loadIn(i).bits.uop.cf.pc,
        io.loadIn(i).bits.vaddr,
        io.loadIn(i).bits.paddr,
        io.loadIn(i).bits.data,
        io.loadIn(i).bits.mask,
        io.loadIn(i).bits.forwardData.asUInt,
        io.loadIn(i).bits.forwardMask.asUInt,
        io.loadIn(i).bits.mmio
      )}
174 175 176
      datavalid(loadWbIndex) := (!io.loadIn(i).bits.miss || io.loadDataForwarded(i)) && 
        !io.loadIn(i).bits.mmio && // mmio data is not valid until we finished uncache access
        !io.needReplayFromRS(i) // do not writeback if that inst will be resend from rs
177 178 179 180 181
      writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio

      val loadWbData = Wire(new LQDataEntry)
      loadWbData.paddr := io.loadIn(i).bits.paddr
      loadWbData.mask := io.loadIn(i).bits.mask
182
      loadWbData.data := io.loadIn(i).bits.forwardData.asUInt // fwd data
183 184 185 186 187 188
      loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
      dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
      dataModule.io.wb.wen(i) := true.B


      debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
189
      debug_paddr(loadWbIndex) := io.loadIn(i).bits.paddr
190 191

      val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
192
      miss(loadWbIndex) := dcacheMissed && !io.loadDataForwarded(i) && !io.needReplayFromRS(i)
193 194
      pending(loadWbIndex) := io.loadIn(i).bits.mmio
      uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime
Y
Yinan Xu 已提交
195
    }
196 197 198 199
    // vaddrModule write is delayed, as vaddrModule will not be read right after write
    vaddrModule.io.waddr(i) := RegNext(loadWbIndex)
    vaddrModule.io.wdata(i) := RegNext(io.loadIn(i).bits.vaddr)
    vaddrModule.io.wen(i) := RegNext(io.loadIn(i).fire())
200
  }
201

202 203
  when(io.dcache.valid) {
    XSDebug("miss resp: paddr:0x%x data %x\n", io.dcache.bits.addr, io.dcache.bits.data)
204 205
  }

206
  // Refill 64 bit in a cycle
207
  // Refill data comes back from io.dcache.resp
208 209
  dataModule.io.refill.valid := io.dcache.valid
  dataModule.io.refill.paddr := io.dcache.bits.addr
210
  dataModule.io.refill.data := io.dcache.bits.data
W
William Wang 已提交
211

W
William Wang 已提交
212
  (0 until LoadQueueSize).map(i => {
213 214
    dataModule.io.refill.refillMask(i) := allocated(i) && miss(i)
    when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) {
215
      datavalid(i) := true.B
216
      miss(i) := false.B
W
William Wang 已提交
217 218
    }
  })
W
William Wang 已提交
219

220 221 222 223 224 225 226
  // Writeback up to 2 missed load insts to CDB
  //
  // Pick 2 missed load (data refilled), write them back to cdb
  // 2 refilled load will be selected from even/odd entry, separately

  // Stage 0
  // Generate writeback indexes
W
William Wang 已提交
227 228 229 230 231 232 233 234 235 236 237

  def getEvenBits(input: UInt): UInt = {
    require(input.getWidth == LoadQueueSize)
    VecInit((0 until LoadQueueSize/2).map(i => {input(2*i)})).asUInt
  }
  def getOddBits(input: UInt): UInt = {
    require(input.getWidth == LoadQueueSize)
    VecInit((0 until LoadQueueSize/2).map(i => {input(2*i+1)})).asUInt
  }

  val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W))) // index selected last cycle
W
William Wang 已提交
238
  val loadWbSelV = Wire(Vec(LoadPipelineWidth, Bool())) // index selected in last cycle is valid
W
William Wang 已提交
239

W
William Wang 已提交
240
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
241
    allocated(i) && !writebacked(i) && datavalid(i)
L
LinJiawei 已提交
242
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
243 244
  val evenDeqMask = getEvenBits(deqMask)
  val oddDeqMask = getOddBits(deqMask)
245
  // generate lastCycleSelect mask
W
William Wang 已提交
246 247
  val evenSelectMask = Mux(io.ldout(0).fire(), getEvenBits(UIntToOH(loadWbSel(0))), 0.U)
  val oddSelectMask = Mux(io.ldout(1).fire(), getOddBits(UIntToOH(loadWbSel(1))), 0.U)
248 249 250
  // generate real select vec
  val loadEvenSelVec = getEvenBits(loadWbSelVec) & ~evenSelectMask
  val loadOddSelVec = getOddBits(loadWbSelVec) & ~oddSelectMask
W
William Wang 已提交
251 252 253 254

  def toVec(a: UInt): Vec[Bool] = {
    VecInit(a.asBools)
  }
255

256 257
  val loadWbSelGen = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
  val loadWbSelVGen = Wire(Vec(LoadPipelineWidth, Bool()))
258
  loadWbSelGen(0) := Cat(getFirstOne(toVec(loadEvenSelVec), evenDeqMask), 0.U(1.W))
259
  loadWbSelVGen(0):= loadEvenSelVec.asUInt.orR
260
  loadWbSelGen(1) := Cat(getFirstOne(toVec(loadOddSelVec), oddDeqMask), 1.U(1.W))
261
  loadWbSelVGen(1) := loadOddSelVec.asUInt.orR
262

263
  (0 until LoadPipelineWidth).map(i => {
264
    loadWbSel(i) := RegNext(loadWbSelGen(i))
W
William Wang 已提交
265
    loadWbSelV(i) := RegNext(loadWbSelVGen(i), init = false.B)
266
    when(io.ldout(i).fire()){
267 268
      // Mark them as writebacked, so they will not be selected in the next cycle
      writebacked(loadWbSel(i)) := true.B
269 270
    }
  })
271

272 273 274
  // Stage 1
  // Use indexes generated in cycle 0 to read data
  // writeback data to cdb
275
  (0 until LoadPipelineWidth).map(i => {
276
    // data select
W
William Wang 已提交
277
    dataModule.io.wb.raddr(i) := loadWbSelGen(i)
278
    val rdata = dataModule.io.wb.rdata(i).data
279 280
    val seluop = uop(loadWbSel(i))
    val func = seluop.ctrl.fuOpType
281
    val raddr = dataModule.io.wb.rdata(i).paddr
282 283 284 285 286 287 288 289 290 291
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
292 293
    val rdataPartialLoad = rdataHelper(seluop, rdataSel)

294
    // writeback missed int/fp load
295
    //
296 297
    // Int load writeback will finish (if not blocked) in one cycle
    io.ldout(i).bits.uop := seluop
298
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
299
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
300 301
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
W
William Wang 已提交
302
    io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
303
    io.ldout(i).bits.debug.isPerfCnt := false.B
304
    io.ldout(i).bits.debug.paddr := debug_paddr(loadWbSel(i))
L
LinJiawei 已提交
305
    io.ldout(i).bits.fflags := DontCare
W
William Wang 已提交
306
    io.ldout(i).valid := loadWbSelV(i)
307 308

    when(io.ldout(i).fire()) {
W
William Wang 已提交
309
      XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x mmio %x\n",
310
        io.ldout(i).bits.uop.roqIdx.asUInt,
311
        io.ldout(i).bits.uop.lqIdx.asUInt,
312
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
313
        debug_mmio(loadWbSel(i))
314
      )
W
William Wang 已提交
315
    }
316

W
William Wang 已提交
317 318
  })

Y
Yinan Xu 已提交
319 320 321 322 323
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
324
  (0 until CommitWidth).map(i => {
325 326
    when(commitCount > i.U){
      allocated(deqPtr+i.U) := false.B
327 328 329
    }
  })

330
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
331
    val length = mask.length
332
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
333 334
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
335
  }
W
William Wang 已提交
336

337 338 339 340
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
341
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
342 343 344 345 346 347 348 349 350
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
351
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
352 353 354 355
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
356

Y
Yinan Xu 已提交
357 358 359 360 361 362 363 364 365 366
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
367 368
    *   There're three possible types of violations, up to 6 possible redirect requests.
    *   Choose the oldest load (part 1). (4 + 2) -> (1 + 2)
369
    * Cycle 2: Redirect Fire
370 371
    *   Choose the oldest load (part 2). (3 -> 1)
    *   Prepare redirect request according to the detected violation.
372
    *   Fire redirect request (if valid)
Y
Yinan Xu 已提交
373
    */
374 375 376 377 378 379 380 381 382 383 384 385

  // stage 0:        lq l1 wb     l1 wb lq
  //                 |  |  |      |  |  |  (paddr match)
  // stage 1:        lq l1 wb     l1 wb lq
  //                 |  |  |      |  |  |
  //                 |  |------------|  |
  //                 |        |         |
  // stage 2:        lq      l1wb       lq
  //                 |        |         |
  //                 --------------------
  //                          |
  //                      rollback req
Y
Yinan Xu 已提交
386
  io.load_s1 := DontCare
387 388 389 390
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
391
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
392 393 394
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
395 396
    dataModule.io.violation(i).paddr := io.storeIn(i).bits.paddr
    dataModule.io.violation(i).mask := io.storeIn(i).bits.mask
397
    val addrMaskMatch = RegNext(dataModule.io.violation(i).violationMask)
398
    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => {
399
      allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j))
400
    })))
401
    val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
402
      addrMaskMatch(j) && entryNeedCheck(j)
403
    }))
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
424 425 426 427
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
428 429
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
430
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
431 432 433 434
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    XSDebug(
      l1Violation,
W
William Wang 已提交
435
      "need rollback (l1 load) pc %x roqidx %d target %x\n",
436 437 438 439 440 441 442 443 444 445 446 447 448
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

449
    ((lqViolation, lqViolationUop), (wbViolation, wbViolationUop), (l1Violation, l1ViolationUop))
450 451 452
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
453 454 455 456
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
457
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
458 459 460 461 462
        a // sel a
      ),
      b // sel b
    )
  }
463
  val lastCycleRedirect = RegNext(io.brqRedirect)
464
  val lastlastCycleRedirect = RegNext(lastCycleRedirect)
465
  val lastCycleFlush = RegNext(io.flush)
466
  val lastlastCycleFlush = RegNext(lastCycleFlush)
467

468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
  // S2: select rollback (part1) and generate rollback request
  // rollback check
  // Wb/L1 rollback seq check is done in s2
  val rollbackWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  val rollbackL1 = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  val rollbackL1Wb = Wire(Vec(StorePipelineWidth*2, Valid(new MicroOp)))
  // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow
  val rollbackLq = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollbackLq(i).valid := detectedRollback._1._1 && RegNext(io.storeIn(i).valid)
    rollbackLq(i).bits := detectedRollback._1._2
    rollbackWb(i).valid := detectedRollback._2._1 && RegNext(io.storeIn(i).valid)
    rollbackWb(i).bits := detectedRollback._2._2
    rollbackL1(i).valid := detectedRollback._3._1 && RegNext(io.storeIn(i).valid)
    rollbackL1(i).bits := detectedRollback._3._2
    rollbackL1Wb(2*i) := rollbackL1(i)
    rollbackL1Wb(2*i+1) := rollbackWb(i)
  }

  val rollbackL1WbSelected = ParallelOperation(rollbackL1Wb, rollbackSel)
  val rollbackL1WbVReg = RegNext(rollbackL1WbSelected.valid)
  val rollbackL1WbReg = RegEnable(rollbackL1WbSelected.bits, rollbackL1WbSelected.valid)
  val rollbackLq0VReg = RegNext(rollbackLq(0).valid)
  val rollbackLq0Reg = RegEnable(rollbackLq(0).bits, rollbackLq(0).valid)
  val rollbackLq1VReg = RegNext(rollbackLq(1).valid)
  val rollbackLq1Reg = RegEnable(rollbackLq(1).bits, rollbackLq(1).valid)

  // S3: select rollback (part2), generate rollback request, then fire rollback request
497 498
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514

  // FIXME: this is ugly
  val rollbackValidVec = Seq(rollbackL1WbVReg, rollbackLq0VReg, rollbackLq1VReg)
  val rollbackUopVec = Seq(rollbackL1WbReg, rollbackLq0Reg, rollbackLq1Reg)

  // select uop in parallel
  val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
  val oneAfterZero = mask(1)(0)
  val rollbackUop = Mux(oneAfterZero && mask(2)(0),
    rollbackUopVec(0),
    Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

  // check if rollback request is still valid in parallel
  val rollbackValidVecChecked = Wire(Vec(3, Bool()))
  for(((v, uop), idx) <- rollbackValidVec.zip(rollbackUopVec).zipWithIndex) {
    rollbackValidVecChecked(idx) := v && 
W
William Wang 已提交
515 516
      (!lastCycleRedirect.valid || isBefore(uop.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
      (!lastlastCycleRedirect.valid || isBefore(uop.roqIdx, lastlastCycleRedirect.bits.roqIdx))
517 518 519
  }

  io.rollback.bits.roqIdx := rollbackUop.roqIdx
520 521
  io.rollback.bits.ftqIdx := rollbackUop.cf.ftqPtr
  io.rollback.bits.ftqOffset := rollbackUop.cf.ftqOffset
522 523
  io.rollback.bits.level := RedirectLevel.flush
  io.rollback.bits.interrupt := DontCare
524 525 526
  io.rollback.bits.cfiUpdate := DontCare
  io.rollback.bits.cfiUpdate.target := rollbackUop.cf.pc
  // io.rollback.bits.pc := DontCare
527

528
  io.rollback.valid := rollbackValidVecChecked.asUInt.orR && !lastCycleFlush && !lastlastCycleFlush
W
William Wang 已提交
529

Y
Yinan Xu 已提交
530
  when(io.rollback.valid) {
L
Lingrui98 已提交
531
    // XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.cfi, io.rollback.bits.roqIdx.asUInt)
Y
Yinan Xu 已提交
532
  }
W
William Wang 已提交
533

Y
Yinan Xu 已提交
534 535 536
  /**
    * Memory mapped IO / other uncached operations
    *
537 538 539 540 541 542
    * States:
    * (1) writeback from store units: mark as pending
    * (2) when they reach ROB's head, they can be sent to uncache channel
    * (3) response from uncache channel: mark as datavalid
    * (4) writeback to ROB (and other units): mark as writebacked
    * (5) ROB commits the instruction: same as normal instructions
Y
Yinan Xu 已提交
543
    */
544
  //(2) when they reach ROB's head, they can be sent to uncache channel
W
William Wang 已提交
545 546
  val lqTailMmioPending = WireInit(pending(deqPtr))
  val lqTailAllocated = WireInit(allocated(deqPtr))
547 548 549 550
  val s_idle :: s_req :: s_resp :: s_wait :: Nil = Enum(4)
  val uncacheState = RegInit(s_idle)
  switch(uncacheState) {
    is(s_idle) {
W
William Wang 已提交
551
      when(io.roq.pendingld && lqTailMmioPending && lqTailAllocated) {
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
        uncacheState := s_req
      }
    }
    is(s_req) {
      when(io.uncache.req.fire()) {
        uncacheState := s_resp
      }
    }
    is(s_resp) {
      when(io.uncache.resp.fire()) {
        uncacheState := s_wait
      }
    }
    is(s_wait) {
      when(io.roq.commit) {
        uncacheState := s_idle // ready for next mmio
      }
    }
  }
  io.uncache.req.valid := uncacheState === s_req
572

W
William Wang 已提交
573
  dataModule.io.uncache.raddr := deqPtrExtNext.value
574

575
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
576 577 578
  io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr
  io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
  io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
579

A
Allen 已提交
580
  io.uncache.req.bits.id   := DontCare
581 582 583

  io.uncache.resp.ready := true.B

584
  when (io.uncache.req.fire()) {
585
    pending(deqPtr) := false.B
W
William Wang 已提交
586

L
linjiawei 已提交
587
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
588
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
589 590 591 592 593
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
594 595
  }

596
  // (3) response from uncache channel: mark as datavalid
Y
Yinan Xu 已提交
597
  dataModule.io.uncache.wen := false.B
598
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
599 600 601 602
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

603
    XSDebug("uncache resp: data %x\n", io.dcache.bits.data)
604 605
  }

W
William Wang 已提交
606
  // Read vaddr for mem exception
W
William Wang 已提交
607 608
  // Note that both io.roq.lcommit and RegNext(io.roq.lcommit) should be take into consideration
  vaddrModule.io.raddr(0) := (deqPtrExt + commitCount + io.roq.lcommit).value
W
William Wang 已提交
609
  io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
W
William Wang 已提交
610

W
William Wang 已提交
611
  // misprediction recovery / exception redirect
W
William Wang 已提交
612 613 614
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
615
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect, io.flush) && allocated(i)
Y
Yinan Xu 已提交
616
    when (needCancel(i)) {
W
William Wang 已提交
617 618
        allocated(i) := false.B
    }
619
  }
620 621 622 623 624 625

  /**
    * update pointers
    */
  val lastCycleCancelCount = PopCount(RegNext(needCancel))
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
626 627
  val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !(io.brqRedirect.valid || io.flush), PopCount(io.enq.req.map(_.valid)), 0.U)
  when (lastCycleRedirect.valid || lastCycleFlush) {
628 629 630 631
    // we recover the pointers in the next cycle after redirect
    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
  }.otherwise {
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
632
  }
W
William Wang 已提交
633

W
William Wang 已提交
634 635
  deqPtrExtNext := deqPtrExt + commitCount
  deqPtrExt := deqPtrExtNext
636

W
William Wang 已提交
637
  val validCount = distanceBetween(enqPtrExt(0), deqPtrExt)
638

Y
Yinan Xu 已提交
639
  allowEnqueue := validCount + enqNumber <= (LoadQueueSize - RenameWidth).U
640

W
William Wang 已提交
641 642
  // perf counter
  XSPerf("lqRollback", io.rollback.valid, acc = true) // rollback redirect generated
W
William Wang 已提交
643
  XSPerf("lqFull", !allowEnqueue, acc = true)
W
William Wang 已提交
644 645 646
  XSPerf("lqMmioCycle", uncacheState =/= s_idle, acc = true) // lq is busy dealing with uncache req
  XSPerf("lqMmioCnt", io.uncache.req.fire(), acc = true)
  XSPerf("lqRefill", io.dcache.valid, acc = true)
W
William Wang 已提交
647 648
  XSPerf("lqWriteback", PopCount(VecInit(io.ldout.map(i => i.fire()))), acc = true)
  XSPerf("lqWbBlocked", PopCount(VecInit(io.ldout.map(i => i.valid && !i.ready))), acc = true)
W
William Wang 已提交
649

W
William Wang 已提交
650
  // debug info
Y
Yinan Xu 已提交
651
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
652 653

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
654
    when(flag) {
W
William Wang 已提交
655
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
656
    }.otherwise {
W
William Wang 已提交
657 658 659 660
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
661
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
662
    if (i % 4 == 0) XSDebug("")
663
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr)
W
William Wang 已提交
664
    PrintFlag(allocated(i), "a")
665
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
666 667
    PrintFlag(allocated(i) && writebacked(i), "w")
    PrintFlag(allocated(i) && miss(i), "m")
668
    // PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
669
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
670
    XSDebug(false, true.B, " ")
671
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
672
  }
W
William Wang 已提交
673

W
William Wang 已提交
674
}