LoadQueue.scala 24.3 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
5
import utils._
Y
Yinan Xu 已提交
6 7
import xiangshan._
import xiangshan.cache._
8
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
9
import xiangshan.backend.LSUOpType
10
import xiangshan.mem._
Y
Yinan Xu 已提交
11
import xiangshan.backend.roq.RoqPtr
L
linjiawei 已提交
12
import xiangshan.backend.fu.fpu.boxF32ToF64
W
William Wang 已提交
13 14


15
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
16 17 18 19 20 21 22 23

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
24 25
}

Y
Yinan Xu 已提交
26 27
class LqEnqIO extends XSBundle {
  val canAccept = Output(Bool())
28
  val sqCanAccept = Input(Bool())
Y
Yinan Xu 已提交
29 30 31 32
  val needAlloc = Vec(RenameWidth, Input(Bool()))
  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
  val resp = Vec(RenameWidth, Output(new LqPtr))
}
33

34
// Load Queue
35
class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
W
William Wang 已提交
36
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
37
    val enq = new LqEnqIO
W
William Wang 已提交
38
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
39
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
40
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
41
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback load
Y
Yinan Xu 已提交
42
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
43
    val commits = Flipped(new RoqCommitIO)
44
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
45 46
    val dcache = new DCacheLineIO
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
47
    val roqDeqPtr = Input(new RoqPtr)
48
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
49
  })
50

W
William Wang 已提交
51
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
52
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
W
William Wang 已提交
53
  val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth))
W
William Wang 已提交
54
  dataModule.io := DontCare
W
William Wang 已提交
55
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
56
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
57 58 59 60 61
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
  val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
62

W
William Wang 已提交
63 64
  val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst

Y
Yinan Xu 已提交
65
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
66
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
Y
Yinan Xu 已提交
67
  val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
68 69
  val allowEnqueue = RegInit(true.B)

Y
Yinan Xu 已提交
70
  val enqPtr = enqPtrExt(0).value
71
  val deqPtr = deqPtrExt.value
Y
Yinan Xu 已提交
72
  val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag
73 74 75
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
76

Y
Yinan Xu 已提交
77 78
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.info(i).commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.info(i).lqIdx.value)
79

80 81
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
82

Y
Yinan Xu 已提交
83 84 85 86 87
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
88 89
  io.enq.canAccept := allowEnqueue

W
William Wang 已提交
90
  for (i <- 0 until RenameWidth) {
Y
Yinan Xu 已提交
91
    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
Y
Yinan Xu 已提交
92
    val lqIdx = enqPtrExt(offset)
93
    val index = lqIdx.value
94
    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid) {
95
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
96
      allocated(index) := true.B
97
      datavalid(index) := false.B
Y
Yinan Xu 已提交
98 99 100 101 102
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
      listening(index) := false.B
      pending(index) := false.B
W
William Wang 已提交
103
    }
104
    io.enq.resp(i) := lqIdx
W
William Wang 已提交
105
  }
106
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
W
William Wang 已提交
107

Y
Yinan Xu 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
W
William Wang 已提交
121
    dataModule.io.wb(i).wen := false.B
L
LinJiawei 已提交
122 123
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
W
William Wang 已提交
124
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
125
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
126 127 128 129
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
130 131 132
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
W
William Wang 已提交
133
          io.loadIn(i).bits.mmio,
134 135 136 137
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }.otherwise {
138
          XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
139
          io.loadIn(i).bits.uop.lqIdx.asUInt,
140 141 142 143
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
144 145 146
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
147
          io.loadIn(i).bits.mmio,
148 149 150 151
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }
152
        val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
153
        datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
154
        writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
155

W
William Wang 已提交
156
        val loadWbData = Wire(new LsqEntry)
W
William Wang 已提交
157 158 159 160 161 162 163 164 165 166
        loadWbData.paddr := io.loadIn(i).bits.paddr
        loadWbData.vaddr := io.loadIn(i).bits.vaddr
        loadWbData.mask := io.loadIn(i).bits.mask
        loadWbData.data := io.loadIn(i).bits.data // for mmio / misc / debug
        loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
        loadWbData.fwdData := io.loadIn(i).bits.forwardData
        loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
        dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
        dataModule.io.wb(i).wen := true.B

W
William Wang 已提交
167 168
        debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
        
169
        val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
170
        miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
171
        listening(loadWbIndex) := dcacheMissed
172
        pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
173
      }
Y
Yinan Xu 已提交
174
    }
W
William Wang 已提交
175

Y
Yinan Xu 已提交
176 177 178 179 180 181 182 183
  /**
    * Cache miss request
    *
    * (1) writeback: miss
    * (2) send to dcache: listing
    * (3) dcache response: datavalid
    * (4) writeback to ROB: writeback
    */
184 185 186 187
  val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
  val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
  val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)

W
William Wang 已提交
188
  val missRefillSelVec = VecInit(
W
William Wang 已提交
189
    (0 until LoadQueueSize).map{ i =>
W
William Wang 已提交
190
      val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
191 192 193
      allocated(i) && miss(i) && !inflight
    })

194
  val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
W
William Wang 已提交
195
  val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
196 197
  io.dcache.req.valid := missRefillSelVec.asUInt.orR
  io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
198
  io.dcache.req.bits.addr := missRefillBlockAddr
199
  io.dcache.req.bits.data := DontCare
200
  io.dcache.req.bits.mask := DontCare
201

W
William Wang 已提交
202 203
  io.dcache.req.bits.meta.id       := DontCare
  io.dcache.req.bits.meta.vaddr    := DontCare // dataModule.io.rdata(missRefillSel).vaddr
204
  io.dcache.req.bits.meta.paddr    := missRefillBlockAddr
205
  io.dcache.req.bits.meta.uop      := uop(missRefillSel)
W
William Wang 已提交
206
  io.dcache.req.bits.meta.mmio     := false.B // mmio(missRefillSel)
207
  io.dcache.req.bits.meta.tlb_miss := false.B
208
  io.dcache.req.bits.meta.mask     := DontCare
209 210 211 212
  io.dcache.req.bits.meta.replay   := false.B

  io.dcache.resp.ready := true.B

213
  assert(!(debug_mmio(missRefillSel) && io.dcache.req.valid))
214 215

  when(io.dcache.req.fire()) {
W
William Wang 已提交
216 217
    miss(missRefillSel) := false.B
    listening(missRefillSel) := true.B
218 219 220 221 222

    // mark this block as inflight
    inflightReqs(reqBlockIndex).valid := true.B
    inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
    assert(!inflightReqs(reqBlockIndex).valid)
W
William Wang 已提交
223 224
  }

225 226 227 228 229 230 231 232 233 234 235
  when(io.dcache.resp.fire()) {
    val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
    assert(inflight)
    for (i <- 0 until cfg.nLoadMissEntries) {
      when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
        inflightReqs(i).valid := false.B
      }
    }
  }


236
  when(io.dcache.req.fire()){
237
    XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
238
      io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
239 240
      io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
    )
241 242 243
  }

  when(io.dcache.resp.fire()){
244
    XSDebug("miss resp: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x data %x\n",
245
      io.dcache.resp.bits.meta.uop.cf.pc, io.dcache.resp.bits.meta.uop.roqIdx.asUInt, io.dcache.resp.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
246
      io.dcache.resp.bits.meta.paddr, io.dcache.resp.bits.data
247
    )
248 249
  }

250
  // Refill 64 bit in a cycle
251
  // Refill data comes back from io.dcache.resp
W
William Wang 已提交
252
  dataModule.io.refill.dcache := io.dcache.resp.bits
W
William Wang 已提交
253

W
William Wang 已提交
254
  (0 until LoadQueueSize).map(i => {
W
William Wang 已提交
255
    val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === io.dcache.resp.bits.meta.paddr
W
William Wang 已提交
256
    dataModule.io.refill.wen(i) := false.B
257
    when(allocated(i) && listening(i) && blockMatch && io.dcache.resp.fire()) {
W
William Wang 已提交
258
      dataModule.io.refill.wen(i) := true.B
259
      datavalid(i) := true.B
W
William Wang 已提交
260 261 262
      listening(i) := false.B
    }
  })
W
William Wang 已提交
263 264 265

  // writeback up to 2 missed load insts to CDB
  // just randomly pick 2 missed load (data refilled), write them back to cdb
W
William Wang 已提交
266
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
267
    allocated(i) && datavalid(i) && !writebacked(i)
L
LinJiawei 已提交
268
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
269
  val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W)))
270
  val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool()))
L
LinJiawei 已提交
271 272 273
  val lselvec0 = PriorityEncoderOH(loadWbSelVec)
  val lselvec1 = PriorityEncoderOH(loadWbSelVec & (~lselvec0).asUInt)
  loadWbSel(0) := OHToUInt(lselvec0)
274
  loadWbSelV(0):= lselvec0.orR
L
LinJiawei 已提交
275
  loadWbSel(1) := OHToUInt(lselvec1)
276
  loadWbSelV(1) := lselvec1.orR
W
William Wang 已提交
277
  (0 until StorePipelineWidth).map(i => {
278
    // data select
W
William Wang 已提交
279
    val rdata = dataModule.io.rdata(loadWbSel(i)).data
280
    val func = uop(loadWbSel(i)).ctrl.fuOpType
W
William Wang 已提交
281
    val raddr = dataModule.io.rdata(loadWbSel(i)).paddr
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
    val rdataPartialLoad = LookupTree(func, List(
        LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
        LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
        LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
        LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
        LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
        LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
L
LinJiawei 已提交
299 300
        LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN),
        LSUOpType.flw  -> boxF32ToF64(rdataSel(31, 0))
301
    ))
W
William Wang 已提交
302
    io.ldout(i).bits.uop := uop(loadWbSel(i))
W
William Wang 已提交
303
    io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.rdata(loadWbSel(i)).exception.asBools
304
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
305
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
306 307 308
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
309
    io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
L
LinJiawei 已提交
310
    io.ldout(i).bits.fflags := DontCare
311
    io.ldout(i).valid := loadWbSelVec(loadWbSel(i)) && loadWbSelV(i)
L
LinJiawei 已提交
312
    when(io.ldout(i).fire()) {
313
      writebacked(loadWbSel(i)) := true.B
314
      XSInfo("load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
315
        io.ldout(i).bits.uop.roqIdx.asUInt,
316
        io.ldout(i).bits.uop.lqIdx.asUInt,
317
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
318 319
        dataModule.io.rdata(loadWbSel(i)).paddr,
        dataModule.io.rdata(loadWbSel(i)).data,
W
William Wang 已提交
320
        debug_mmio(loadWbSel(i))
321
      )
W
William Wang 已提交
322 323 324
    }
  })

Y
Yinan Xu 已提交
325 326 327 328 329
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
330 331 332 333 334 335 336
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })

337
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
338
    val length = mask.length
339
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
340 341
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
342
  }
W
William Wang 已提交
343

344 345 346 347
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
348
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
349 350 351 352 353 354 355 356 357
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
358
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
359 360 361 362
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
363

Y
Yinan Xu 已提交
364 365 366 367 368 369 370 371 372 373 374 375 376 377
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
    *   There're three possible types of violations. Choose the oldest load.
    *   Set io.redirect according to the detected violation.
    */
  io.load_s1 := DontCare
378 379 380 381
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
382
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
    val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => {
      val addrMatch = allocated(j) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
      val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
      // TODO: update refilled data
      val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
      Cat(violationVec).orR() && entryNeedCheck
    })))
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
414 415 416 417
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
418 419
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
420
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
450

451 452 453 454 455 456 457 458 459
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
460 461 462 463
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
464
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
465 466 467 468 469 470
        a // sel a
      ),
      b // sel b
    )
  }

471 472 473
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)

474 475
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
Y
Yinan Xu 已提交
476
  io.rollback.valid := rollbackSelected.valid &&
477
    (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
478
    !(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional())
479

480 481 482
  io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx
  io.rollback.bits.level := RedirectLevel.flush
  io.rollback.bits.interrupt := DontCare
Y
Yinan Xu 已提交
483
  io.rollback.bits.pc := DontCare
484 485
  io.rollback.bits.target := rollbackSelected.bits.cf.pc
  io.rollback.bits.brTag := rollbackSelected.bits.brTag
W
William Wang 已提交
486

Y
Yinan Xu 已提交
487 488 489
  when(io.rollback.valid) {
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
  }
W
William Wang 已提交
490

Y
Yinan Xu 已提交
491 492 493 494
  /**
    * Memory mapped IO / other uncached operations
    *
    */
495
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
Y
Yinan Xu 已提交
496
    io.commits.info(0).commitType === CommitType.LOAD &&
497
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
498
    !io.commits.isWalk
499

500
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
501 502 503
  io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
  io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
504

Y
Yinan Xu 已提交
505
  io.uncache.req.bits.meta.id       := DontCare
506
  io.uncache.req.bits.meta.vaddr    := DontCare
507 508
  io.uncache.req.bits.meta.paddr    := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
Y
Yinan Xu 已提交
509
  io.uncache.req.bits.meta.mmio     := true.B
510
  io.uncache.req.bits.meta.tlb_miss := false.B
511
  io.uncache.req.bits.meta.mask     := dataModule.io.rdata(deqPtr).mask
512 513 514 515
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

516
  when (io.uncache.req.fire()) {
517
    pending(deqPtr) := false.B
W
William Wang 已提交
518

L
linjiawei 已提交
519
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
520
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
521 522 523 524 525
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
526 527
  }

Y
Yinan Xu 已提交
528
  dataModule.io.uncache.wen := false.B
529
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
530 531 532 533
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

534
    XSDebug("uncache resp: data %x\n", io.dcache.resp.bits.data)
535 536
  }

W
William Wang 已提交
537
  // Read vaddr for mem exception
W
William Wang 已提交
538
  io.exceptionAddr.vaddr := dataModule.io.rdata(io.exceptionAddr.lsIdx.lqIdx.value).vaddr
W
William Wang 已提交
539

W
William Wang 已提交
540
  // misprediction recovery / exception redirect
W
William Wang 已提交
541 542 543
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
Y
Yinan Xu 已提交
544
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
Y
Yinan Xu 已提交
545
    when (needCancel(i)) {
W
William Wang 已提交
546 547
        allocated(i) := false.B
    }
548
  }
549 550 551 552 553 554 555

  /**
    * update pointers
    */
  val lastCycleCancelCount = PopCount(RegNext(needCancel))
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
  val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid, PopCount(io.enq.req.map(_.valid)), 0.U)
556
  when (lastCycleRedirect.valid) {
557 558 559 560
    // we recover the pointers in the next cycle after redirect
    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
  }.otherwise {
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
561
  }
W
William Wang 已提交
562

563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
  val commitCount = PopCount(loadCommit)
  deqPtrExt := deqPtrExt + commitCount

  val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
  val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt)
  validCounter := Mux(lastLastCycleRedirect,
    trueValidCounter,
    validCounter + enqNumber - commitCount
  )

  allowEnqueue := Mux(io.brqRedirect.valid,
    false.B,
    Mux(lastLastCycleRedirect,
      trueValidCounter <= (LoadQueueSize - RenameWidth).U,
      validCounter + enqNumber <= (LoadQueueSize - RenameWidth).U
    )
  )

W
William Wang 已提交
581
  // debug info
Y
Yinan Xu 已提交
582
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
583 584

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
585
    when(flag) {
W
William Wang 已提交
586
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
587
    }.otherwise {
W
William Wang 已提交
588 589 590 591
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
592
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
593
    if (i % 4 == 0) XSDebug("")
W
William Wang 已提交
594
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
W
William Wang 已提交
595
    PrintFlag(allocated(i), "a")
596
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
597
    PrintFlag(allocated(i) && writebacked(i), "w")
598
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
599 600
    PrintFlag(allocated(i) && miss(i), "m")
    PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
601
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
602
    XSDebug(false, true.B, " ")
603
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
604
  }
W
William Wang 已提交
605

W
William Wang 已提交
606
}