LoadQueue.scala 23.7 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
5
import utils._
Y
Yinan Xu 已提交
6 7
import xiangshan._
import xiangshan.cache._
8
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
9
import xiangshan.backend.LSUOpType
10
import xiangshan.mem._
Y
Yinan Xu 已提交
11
import xiangshan.backend.roq.RoqPtr
L
linjiawei 已提交
12
import xiangshan.backend.fu.fpu.boxF32ToF64
W
William Wang 已提交
13 14


15
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
16 17 18 19 20 21 22 23

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
24 25
}

26

27
// Load Queue
28
class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
W
William Wang 已提交
29
  val io = IO(new Bundle() {
30 31 32 33 34
    val enq = new Bundle() {
      val canAccept = Output(Bool())
      val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
      val resp = Vec(RenameWidth, Output(new LqPtr))
    }
W
William Wang 已提交
35
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
36
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
37
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
38
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback load
Y
Yinan Xu 已提交
39
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
40
    val commits = Flipped(new RoqCommitIO)
41
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
42 43
    val dcache = new DCacheLineIO
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
44
    val roqDeqPtr = Input(new RoqPtr)
45
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
46
  })
47

W
William Wang 已提交
48
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
49
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
W
William Wang 已提交
50
  val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth))
W
William Wang 已提交
51
  dataModule.io := DontCare
W
William Wang 已提交
52
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
53
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
54 55 56 57 58
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
  val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
59

Y
Yinan Xu 已提交
60
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
61
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
Y
Yinan Xu 已提交
62
  val enqPtr = enqPtrExt(0).value
63
  val deqPtr = deqPtrExt.value
Y
Yinan Xu 已提交
64
  val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag
65 66 67
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
68

Y
Yinan Xu 已提交
69 70
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).lqIdx.value)
71

72 73
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
74

Y
Yinan Xu 已提交
75 76 77 78 79
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
Y
Yinan Xu 已提交
80
  val validEntries = distanceBetween(enqPtrExt(0), deqPtrExt)
81 82
  val firedDispatch = io.enq.req.map(_.valid)
  io.enq.canAccept := validEntries <= (LoadQueueSize - RenameWidth).U
Y
Yinan Xu 已提交
83
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(firedDispatch))}\n")
W
William Wang 已提交
84
  for (i <- 0 until RenameWidth) {
85
    val offset = if (i == 0) 0.U else PopCount((0 until i).map(firedDispatch(_)))
Y
Yinan Xu 已提交
86
    val lqIdx = enqPtrExt(offset)
87
    val index = lqIdx.value
88 89
    when(io.enq.req(i).valid) {
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
90
      allocated(index) := true.B
91
      datavalid(index) := false.B
Y
Yinan Xu 已提交
92 93 94 95 96
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
      listening(index) := false.B
      pending(index) := false.B
W
William Wang 已提交
97
    }
98 99
    io.enq.resp(i) := lqIdx

Y
Yinan Xu 已提交
100
    XSError(!io.enq.canAccept && io.enq.req(i).valid, "should not valid when not ready\n")
W
William Wang 已提交
101 102
  }

Y
Yinan Xu 已提交
103 104
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
  when (Cat(firedDispatch).orR && !io.brqRedirect.valid) {
Y
Yinan Xu 已提交
105 106 107
    val enqNumber = PopCount(firedDispatch)
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
    XSInfo("dispatched %d insts to lq\n", enqNumber)
W
William Wang 已提交
108 109
  }

Y
Yinan Xu 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
W
William Wang 已提交
123
    dataModule.io.wb(i).wen := false.B
L
LinJiawei 已提交
124 125
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
W
William Wang 已提交
126
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
127
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
128 129 130 131
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
132 133 134
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
W
William Wang 已提交
135
          io.loadIn(i).bits.mmio,
136 137 138 139
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }.otherwise {
140
          XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
141
          io.loadIn(i).bits.uop.lqIdx.asUInt,
142 143 144 145
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
146 147 148
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
149
          io.loadIn(i).bits.mmio,
150 151 152 153
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }
154
        val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
155
        datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
156
        writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
157

W
William Wang 已提交
158
        val loadWbData = Wire(new LsqEntry)
W
William Wang 已提交
159 160 161 162 163 164 165 166 167 168 169
        loadWbData.paddr := io.loadIn(i).bits.paddr
        loadWbData.vaddr := io.loadIn(i).bits.vaddr
        loadWbData.mask := io.loadIn(i).bits.mask
        loadWbData.data := io.loadIn(i).bits.data // for mmio / misc / debug
        loadWbData.mmio := io.loadIn(i).bits.mmio
        loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
        loadWbData.fwdData := io.loadIn(i).bits.forwardData
        loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
        dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
        dataModule.io.wb(i).wen := true.B

170
        val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
171
        miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
172
        listening(loadWbIndex) := dcacheMissed
173
        pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
174
      }
Y
Yinan Xu 已提交
175
    }
W
William Wang 已提交
176

Y
Yinan Xu 已提交
177 178 179 180 181 182 183 184
  /**
    * Cache miss request
    *
    * (1) writeback: miss
    * (2) send to dcache: listing
    * (3) dcache response: datavalid
    * (4) writeback to ROB: writeback
    */
185 186 187 188
  val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
  val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
  val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)

W
William Wang 已提交
189
  val missRefillSelVec = VecInit(
W
William Wang 已提交
190
    (0 until LoadQueueSize).map{ i =>
W
William Wang 已提交
191
      val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
192 193 194
      allocated(i) && miss(i) && !inflight
    })

195
  val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
W
William Wang 已提交
196
  val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
197 198
  io.dcache.req.valid := missRefillSelVec.asUInt.orR
  io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
199
  io.dcache.req.bits.addr := missRefillBlockAddr
200
  io.dcache.req.bits.data := DontCare
201
  io.dcache.req.bits.mask := DontCare
202

W
William Wang 已提交
203 204
  io.dcache.req.bits.meta.id       := DontCare
  io.dcache.req.bits.meta.vaddr    := DontCare // dataModule.io.rdata(missRefillSel).vaddr
205
  io.dcache.req.bits.meta.paddr    := missRefillBlockAddr
206
  io.dcache.req.bits.meta.uop      := uop(missRefillSel)
W
William Wang 已提交
207
  io.dcache.req.bits.meta.mmio     := false.B // dataModule.io.rdata(missRefillSel).mmio
208
  io.dcache.req.bits.meta.tlb_miss := false.B
209
  io.dcache.req.bits.meta.mask     := DontCare
210 211 212 213
  io.dcache.req.bits.meta.replay   := false.B

  io.dcache.resp.ready := true.B

W
William Wang 已提交
214
  assert(!(dataModule.io.rdata(missRefillSel).mmio && io.dcache.req.valid))
215 216

  when(io.dcache.req.fire()) {
W
William Wang 已提交
217 218
    miss(missRefillSel) := false.B
    listening(missRefillSel) := true.B
219 220 221 222 223

    // mark this block as inflight
    inflightReqs(reqBlockIndex).valid := true.B
    inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
    assert(!inflightReqs(reqBlockIndex).valid)
W
William Wang 已提交
224 225
  }

226 227 228 229 230 231 232 233 234 235 236
  when(io.dcache.resp.fire()) {
    val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
    assert(inflight)
    for (i <- 0 until cfg.nLoadMissEntries) {
      when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
        inflightReqs(i).valid := false.B
      }
    }
  }


237
  when(io.dcache.req.fire()){
238
    XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
239
      io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
240 241
      io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
    )
242 243 244
  }

  when(io.dcache.resp.fire()){
245
    XSDebug("miss resp: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x data %x\n",
246
      io.dcache.resp.bits.meta.uop.cf.pc, io.dcache.resp.bits.meta.uop.roqIdx.asUInt, io.dcache.resp.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
247
      io.dcache.resp.bits.meta.paddr, io.dcache.resp.bits.data
248
    )
249 250
  }

251
  // Refill 64 bit in a cycle
252
  // Refill data comes back from io.dcache.resp
W
William Wang 已提交
253
  dataModule.io.refill.dcache := io.dcache.resp.bits
W
William Wang 已提交
254

W
William Wang 已提交
255
  (0 until LoadQueueSize).map(i => {
W
William Wang 已提交
256
    val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === io.dcache.resp.bits.meta.paddr
W
William Wang 已提交
257
    dataModule.io.refill.wen(i) := false.B
258
    when(allocated(i) && listening(i) && blockMatch && io.dcache.resp.fire()) {
W
William Wang 已提交
259
      dataModule.io.refill.wen(i) := true.B
260
      datavalid(i) := true.B
W
William Wang 已提交
261 262 263
      listening(i) := false.B
    }
  })
W
William Wang 已提交
264 265 266

  // writeback up to 2 missed load insts to CDB
  // just randomly pick 2 missed load (data refilled), write them back to cdb
W
William Wang 已提交
267
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
268
    allocated(i) && datavalid(i) && !writebacked(i)
L
LinJiawei 已提交
269
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
270
  val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W)))
271
  val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool()))
L
LinJiawei 已提交
272 273 274
  val lselvec0 = PriorityEncoderOH(loadWbSelVec)
  val lselvec1 = PriorityEncoderOH(loadWbSelVec & (~lselvec0).asUInt)
  loadWbSel(0) := OHToUInt(lselvec0)
275
  loadWbSelV(0):= lselvec0.orR
L
LinJiawei 已提交
276
  loadWbSel(1) := OHToUInt(lselvec1)
277
  loadWbSelV(1) := lselvec1.orR
W
William Wang 已提交
278
  (0 until StorePipelineWidth).map(i => {
279
    // data select
W
William Wang 已提交
280
    val rdata = dataModule.io.rdata(loadWbSel(i)).data
281
    val func = uop(loadWbSel(i)).ctrl.fuOpType
W
William Wang 已提交
282
    val raddr = dataModule.io.rdata(loadWbSel(i)).paddr
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
    val rdataPartialLoad = LookupTree(func, List(
        LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
        LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
        LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
        LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
        LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
        LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
L
LinJiawei 已提交
300 301
        LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN),
        LSUOpType.flw  -> boxF32ToF64(rdataSel(31, 0))
302
    ))
W
William Wang 已提交
303
    io.ldout(i).bits.uop := uop(loadWbSel(i))
W
William Wang 已提交
304
    io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.rdata(loadWbSel(i)).exception.asBools
305
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
306
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
307 308 309
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
310
    io.ldout(i).bits.debug.isMMIO := dataModule.io.rdata(loadWbSel(i)).mmio
L
LinJiawei 已提交
311
    io.ldout(i).bits.fflags := DontCare
312
    io.ldout(i).valid := loadWbSelVec(loadWbSel(i)) && loadWbSelV(i)
L
LinJiawei 已提交
313
    when(io.ldout(i).fire()) {
314
      writebacked(loadWbSel(i)) := true.B
315
      XSInfo("load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
316
        io.ldout(i).bits.uop.roqIdx.asUInt,
317
        io.ldout(i).bits.uop.lqIdx.asUInt,
318
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
319 320 321
        dataModule.io.rdata(loadWbSel(i)).paddr,
        dataModule.io.rdata(loadWbSel(i)).data,
        dataModule.io.rdata(loadWbSel(i)).mmio
322
      )
W
William Wang 已提交
323 324 325
    }
  })

Y
Yinan Xu 已提交
326 327 328 329 330
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
331 332 333 334 335 336
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })
337
  deqPtrExt := deqPtrExt + PopCount(loadCommit)
338

339
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
340
    val length = mask.length
341
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
342 343
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
344
  }
W
William Wang 已提交
345

346 347 348 349
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
350
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
351 352 353 354 355 356 357 358 359
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
360
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
361 362 363 364
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
365

Y
Yinan Xu 已提交
366 367 368 369 370 371 372 373 374 375 376 377 378 379
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
    *   There're three possible types of violations. Choose the oldest load.
    *   Set io.redirect according to the detected violation.
    */
  io.load_s1 := DontCare
380 381 382 383
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
384
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
    val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => {
      val addrMatch = allocated(j) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
      val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
      // TODO: update refilled data
      val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
      Cat(violationVec).orR() && entryNeedCheck
    })))
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
416 417 418 419
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
420 421
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
422
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
452

453 454 455 456 457 458 459 460 461
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
462 463 464 465
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
466
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
467 468 469 470 471 472
        a // sel a
      ),
      b // sel b
    )
  }

473 474 475
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)

476 477
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
Y
Yinan Xu 已提交
478 479
  io.rollback.valid := rollbackSelected.valid &&
    (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx))
480 481 482 483 484
  io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx - 1.U
  io.rollback.bits.isReplay := true.B
  io.rollback.bits.isMisPred := false.B
  io.rollback.bits.isException := false.B
  io.rollback.bits.isFlushPipe := false.B
Y
Yinan Xu 已提交
485
  io.rollback.bits.pc := DontCare
486 487
  io.rollback.bits.target := rollbackSelected.bits.cf.pc
  io.rollback.bits.brTag := rollbackSelected.bits.brTag
W
William Wang 已提交
488

Y
Yinan Xu 已提交
489 490 491
  when(io.rollback.valid) {
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
  }
W
William Wang 已提交
492

Y
Yinan Xu 已提交
493 494 495 496
  /**
    * Memory mapped IO / other uncached operations
    *
    */
Y
Yinan Xu 已提交
497
  val commitType = io.commits.uop(0).ctrl.commitType
498
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
499
    commitType === CommitType.LOAD &&
500
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
501
    !io.commits.isWalk
502

503
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
504 505 506
  io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
  io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
507

Y
Yinan Xu 已提交
508
  io.uncache.req.bits.meta.id       := DontCare
509
  io.uncache.req.bits.meta.vaddr    := DontCare
510 511
  io.uncache.req.bits.meta.paddr    := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
Y
Yinan Xu 已提交
512
  io.uncache.req.bits.meta.mmio     := true.B
513
  io.uncache.req.bits.meta.tlb_miss := false.B
514
  io.uncache.req.bits.meta.mask     := dataModule.io.rdata(deqPtr).mask
515 516 517 518
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

519
  when (io.uncache.req.fire()) {
520
    pending(deqPtr) := false.B
W
William Wang 已提交
521

L
linjiawei 已提交
522
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
523
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
524 525 526 527 528
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
529 530
  }

Y
Yinan Xu 已提交
531
  dataModule.io.uncache.wen := false.B
532
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
533 534 535 536
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

537
    XSDebug("uncache resp: data %x\n", io.dcache.resp.bits.data)
538 539
  }

W
William Wang 已提交
540
  // Read vaddr for mem exception
W
William Wang 已提交
541
  io.exceptionAddr.vaddr := dataModule.io.rdata(io.exceptionAddr.lsIdx.lqIdx.value).vaddr
W
William Wang 已提交
542

W
William Wang 已提交
543
  // misprediction recovery / exception redirect
W
William Wang 已提交
544 545 546
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
Y
Yinan Xu 已提交
547
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
Y
Yinan Xu 已提交
548
    when (needCancel(i)) {
W
William Wang 已提交
549 550
        allocated(i) := false.B
    }
551
  }
Y
Yinan Xu 已提交
552
  // we recover the pointers in the next cycle after redirect
553 554
  val needCancelReg = RegNext(needCancel)
  when (lastCycleRedirect.valid) {
Y
Yinan Xu 已提交
555 556
    val cancelCount = PopCount(needCancelReg)
    enqPtrExt := VecInit(enqPtrExt.map(_ - cancelCount))
557
  }
W
William Wang 已提交
558

W
William Wang 已提交
559
  // debug info
Y
Yinan Xu 已提交
560
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
561 562

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
563
    when(flag) {
W
William Wang 已提交
564
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
565
    }.otherwise {
W
William Wang 已提交
566 567 568 569
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
570
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
571
    if (i % 4 == 0) XSDebug("")
W
William Wang 已提交
572
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
W
William Wang 已提交
573
    PrintFlag(allocated(i), "a")
574
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
575
    PrintFlag(allocated(i) && writebacked(i), "w")
576
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
577 578
    PrintFlag(allocated(i) && miss(i), "m")
    PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
579
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
580
    XSDebug(false, true.B, " ")
581
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
582
  }
W
William Wang 已提交
583

W
William Wang 已提交
584
}