LoadQueue.scala 23.6 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
5
import utils._
Y
Yinan Xu 已提交
6 7
import xiangshan._
import xiangshan.cache._
8
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
9
import xiangshan.backend.LSUOpType
10
import xiangshan.mem._
Y
Yinan Xu 已提交
11
import xiangshan.backend.roq.RoqPtr
L
linjiawei 已提交
12
import xiangshan.backend.fu.fpu.boxF32ToF64
W
William Wang 已提交
13 14


15
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
16 17 18 19 20 21 22 23

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
24 25
}

26

27
// Load Queue
28
class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
W
William Wang 已提交
29
  val io = IO(new Bundle() {
30 31 32 33 34
    val enq = new Bundle() {
      val canAccept = Output(Bool())
      val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
      val resp = Vec(RenameWidth, Output(new LqPtr))
    }
W
William Wang 已提交
35
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
36
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
37
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
38
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback load
W
William Wang 已提交
39
    val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
40
    val commits = Flipped(new RoqCommitIO)
41
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
42 43
    val dcache = new DCacheLineIO
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
44
    val roqDeqPtr = Input(new RoqPtr)
45
    val exceptionAddr = new ExceptionAddrIO
46
    // val refill = Flipped(Valid(new DCacheLineReq ))
W
William Wang 已提交
47
  })
48

W
William Wang 已提交
49
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
50
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
W
William Wang 已提交
51
  val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth))
W
William Wang 已提交
52
  dataModule.io := DontCare
W
William Wang 已提交
53
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
54
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
55 56 57 58 59
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
  val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
60

61 62 63 64 65 66 67 68
  val enqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
  val enqPtr = enqPtrExt.value
  val deqPtr = deqPtrExt.value
  val sameFlag = enqPtrExt.flag === deqPtrExt.flag
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
69

Y
Yinan Xu 已提交
70 71
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).lqIdx.value)
72

73 74 75
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
  val enqDeqMask1 = deqMask ^ enqMask
76
  val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1)
77

W
William Wang 已提交
78
  // Enqueue at dispatch
79
  val validEntries = distanceBetween(enqPtrExt, deqPtrExt)
80 81
  val firedDispatch = io.enq.req.map(_.valid)
  io.enq.canAccept := validEntries <= (LoadQueueSize - RenameWidth).U
Y
Yinan Xu 已提交
82
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(firedDispatch))}\n")
W
William Wang 已提交
83
  for (i <- 0 until RenameWidth) {
84
    val offset = if (i == 0) 0.U else PopCount((0 until i).map(firedDispatch(_)))
85
    val lqIdx = enqPtrExt + offset
86
    val index = lqIdx.value
87 88
    when(io.enq.req(i).valid) {
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
89
      allocated(index) := true.B
90
      datavalid(index) := false.B
Y
Yinan Xu 已提交
91 92 93 94 95
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
      listening(index) := false.B
      pending(index) := false.B
W
William Wang 已提交
96
    }
97 98
    io.enq.resp(i) := lqIdx

Y
Yinan Xu 已提交
99
    XSError(!io.enq.canAccept && io.enq.req(i).valid, "should not valid when not ready\n")
W
William Wang 已提交
100 101
  }

102
  when(Cat(firedDispatch).orR) {
103
    enqPtrExt := enqPtrExt + PopCount(firedDispatch)
W
William Wang 已提交
104
    XSInfo("dispatched %d insts to lq\n", PopCount(firedDispatch))
W
William Wang 已提交
105 106 107 108
  }

  // writeback load
  (0 until LoadPipelineWidth).map(i => {
W
William Wang 已提交
109
    dataModule.io.wb(i).wen := false.B
L
LinJiawei 已提交
110 111
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
W
William Wang 已提交
112
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
113
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
114 115 116 117
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
118 119 120
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
W
William Wang 已提交
121
          io.loadIn(i).bits.mmio,
122 123 124 125
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }.otherwise {
126
          XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
127
          io.loadIn(i).bits.uop.lqIdx.asUInt,
128 129 130 131
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
132 133 134
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
135
          io.loadIn(i).bits.mmio,
136 137 138 139
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }
140
        val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
141
        datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
142
        writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
143 144
        allocated(loadWbIndex) := !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR

W
William Wang 已提交
145
        val loadWbData = Wire(new LsqEntry)
W
William Wang 已提交
146 147 148 149 150 151 152 153 154 155 156
        loadWbData.paddr := io.loadIn(i).bits.paddr
        loadWbData.vaddr := io.loadIn(i).bits.vaddr
        loadWbData.mask := io.loadIn(i).bits.mask
        loadWbData.data := io.loadIn(i).bits.data // for mmio / misc / debug
        loadWbData.mmio := io.loadIn(i).bits.mmio
        loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
        loadWbData.fwdData := io.loadIn(i).bits.forwardData
        loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
        dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
        dataModule.io.wb(i).wen := true.B

157
        val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
158 159 160
        miss(loadWbIndex) := dcacheMissed
        listening(loadWbIndex) := dcacheMissed
        pending(loadWbIndex) := io.loadIn(i).bits.mmio
161
      }
162
    })
W
William Wang 已提交
163

W
William Wang 已提交
164
  // cache miss request
165 166 167 168
  val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
  val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
  val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)

W
William Wang 已提交
169
  val missRefillSelVec = VecInit(
W
William Wang 已提交
170
    (0 until LoadQueueSize).map{ i =>
W
William Wang 已提交
171
      val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
172 173 174
      allocated(i) && miss(i) && !inflight
    })

175
  val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
W
William Wang 已提交
176
  val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
177 178
  io.dcache.req.valid := missRefillSelVec.asUInt.orR
  io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
179
  io.dcache.req.bits.addr := missRefillBlockAddr
180
  io.dcache.req.bits.data := DontCare
181
  io.dcache.req.bits.mask := DontCare
182

W
William Wang 已提交
183 184
  io.dcache.req.bits.meta.id       := DontCare
  io.dcache.req.bits.meta.vaddr    := DontCare // dataModule.io.rdata(missRefillSel).vaddr
185
  io.dcache.req.bits.meta.paddr    := missRefillBlockAddr
186
  io.dcache.req.bits.meta.uop      := uop(missRefillSel)
W
William Wang 已提交
187
  io.dcache.req.bits.meta.mmio     := false.B // dataModule.io.rdata(missRefillSel).mmio
188
  io.dcache.req.bits.meta.tlb_miss := false.B
189
  io.dcache.req.bits.meta.mask     := DontCare
190 191 192 193
  io.dcache.req.bits.meta.replay   := false.B

  io.dcache.resp.ready := true.B

W
William Wang 已提交
194
  assert(!(dataModule.io.rdata(missRefillSel).mmio && io.dcache.req.valid))
195 196

  when(io.dcache.req.fire()) {
W
William Wang 已提交
197 198
    miss(missRefillSel) := false.B
    listening(missRefillSel) := true.B
199 200 201 202 203

    // mark this block as inflight
    inflightReqs(reqBlockIndex).valid := true.B
    inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
    assert(!inflightReqs(reqBlockIndex).valid)
W
William Wang 已提交
204 205
  }

206 207 208 209 210 211 212 213 214 215 216
  when(io.dcache.resp.fire()) {
    val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
    assert(inflight)
    for (i <- 0 until cfg.nLoadMissEntries) {
      when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
        inflightReqs(i).valid := false.B
      }
    }
  }


217
  when(io.dcache.req.fire()){
218
    XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
219
      io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
220 221
      io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
    )
222 223 224
  }

  when(io.dcache.resp.fire()){
225
    XSDebug("miss resp: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x data %x\n",
226
      io.dcache.resp.bits.meta.uop.cf.pc, io.dcache.resp.bits.meta.uop.roqIdx.asUInt, io.dcache.resp.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
227
      io.dcache.resp.bits.meta.paddr, io.dcache.resp.bits.data
228
    )
229 230
  }

231
  // Refill 64 bit in a cycle
232
  // Refill data comes back from io.dcache.resp
W
William Wang 已提交
233
  dataModule.io.refill.dcache := io.dcache.resp.bits
W
William Wang 已提交
234

W
William Wang 已提交
235
  (0 until LoadQueueSize).map(i => {
W
William Wang 已提交
236
    val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === io.dcache.resp.bits.meta.paddr
W
William Wang 已提交
237
    dataModule.io.refill.wen(i) := false.B
238
    when(allocated(i) && listening(i) && blockMatch && io.dcache.resp.fire()) {
W
William Wang 已提交
239
      dataModule.io.refill.wen(i) := true.B
240
      datavalid(i) := true.B
W
William Wang 已提交
241 242 243
      listening(i) := false.B
    }
  })
W
William Wang 已提交
244 245 246

  // writeback up to 2 missed load insts to CDB
  // just randomly pick 2 missed load (data refilled), write them back to cdb
W
William Wang 已提交
247
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
248
    allocated(i) && datavalid(i) && !writebacked(i)
L
LinJiawei 已提交
249
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
250
  val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W)))
251
  val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool()))
L
LinJiawei 已提交
252 253 254
  val lselvec0 = PriorityEncoderOH(loadWbSelVec)
  val lselvec1 = PriorityEncoderOH(loadWbSelVec & (~lselvec0).asUInt)
  loadWbSel(0) := OHToUInt(lselvec0)
255
  loadWbSelV(0):= lselvec0.orR
L
LinJiawei 已提交
256
  loadWbSel(1) := OHToUInt(lselvec1)
257
  loadWbSelV(1) := lselvec1.orR
W
William Wang 已提交
258
  (0 until StorePipelineWidth).map(i => {
259
    // data select
W
William Wang 已提交
260
    val rdata = dataModule.io.rdata(loadWbSel(i)).data
261
    val func = uop(loadWbSel(i)).ctrl.fuOpType
W
William Wang 已提交
262
    val raddr = dataModule.io.rdata(loadWbSel(i)).paddr
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
    val rdataPartialLoad = LookupTree(func, List(
        LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
        LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
        LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
        LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
        LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
        LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
L
LinJiawei 已提交
280 281
        LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN),
        LSUOpType.flw  -> boxF32ToF64(rdataSel(31, 0))
282
    ))
W
William Wang 已提交
283
    io.ldout(i).bits.uop := uop(loadWbSel(i))
W
William Wang 已提交
284
    io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.rdata(loadWbSel(i)).exception.asBools
285
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
286
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
287 288 289
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
290
    io.ldout(i).bits.debug.isMMIO := dataModule.io.rdata(loadWbSel(i)).mmio
L
LinJiawei 已提交
291
    io.ldout(i).bits.fflags := DontCare
292
    io.ldout(i).valid := loadWbSelVec(loadWbSel(i)) && loadWbSelV(i)
L
LinJiawei 已提交
293
    when(io.ldout(i).fire()) {
294
      writebacked(loadWbSel(i)) := true.B
295
      XSInfo("load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
296
        io.ldout(i).bits.uop.roqIdx.asUInt,
297
        io.ldout(i).bits.uop.lqIdx.asUInt,
298
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
299 300 301
        dataModule.io.rdata(loadWbSel(i)).paddr,
        dataModule.io.rdata(loadWbSel(i)).data,
        dataModule.io.rdata(loadWbSel(i)).mmio
302
      )
W
William Wang 已提交
303 304 305
    }
  })

W
William Wang 已提交
306
  // move tailPtr
307
  // allocatedMask: dequeuePtr can go to the next 1-bit
308
  val allocatedMask = VecInit((0 until LoadQueueSize).map(i => allocated(i) || !enqDeqMask(i)))
309
  // find the first one from deqPtr (deqPtr)
310
  val nextTail1 = getFirstOneWithFlag(allocatedMask, deqMask, deqPtrExt.flag)
311 312
  val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, enqPtrExt)
  deqPtrExt := nextTail
W
William Wang 已提交
313

314 315 316 317 318 319 320 321
  // When load commited, mark it as !allocated, this entry will be recycled later
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })

322
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
323
    val length = mask.length
324
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
325 326
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
327
  }
W
William Wang 已提交
328

329
  def getFirstOneWithFlag(mask: Vec[Bool], startMask: UInt, startFlag: Bool) = {
330
    val length = mask.length
331
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
332 333 334
    val highBitsUint = Cat(highBits.reverse)
    val changeDirection = !highBitsUint.orR()
    val index = PriorityEncoder(Mux(!changeDirection, highBitsUint, mask.asUInt))
335
    LqPtr(startFlag ^ changeDirection, index)
336 337
  }

338 339 340 341
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
342
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
343 344 345 346 347 348 349 350 351
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
352
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
353 354 355 356
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
357

358 359 360
  def rangeMask(start: LqPtr, end: LqPtr): UInt = {
    val startMask = (1.U((LoadQueueSize + 1).W) << start.value).asUInt - 1.U
    val endMask = (1.U((LoadQueueSize + 1).W) << end.value).asUInt - 1.U
W
William Wang 已提交
361
    val xorMask = startMask(LoadQueueSize - 1, 0) ^ endMask(LoadQueueSize - 1, 0)
362
    Mux(start.flag === end.flag, xorMask, ~xorMask)
Y
Yinan Xu 已提交
363 364
  }

365 366 367 368 369 370
  // ignore data forward
  (0 until LoadPipelineWidth).foreach(i => {
    io.forward(i).forwardMask := DontCare
    io.forward(i).forwardData := DontCare
  })

W
William Wang 已提交
371
  // store backward query and rollback
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
    val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => {
      val addrMatch = allocated(j) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
      val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
      // TODO: update refilled data
      val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
      Cat(violationVec).orR() && entryNeedCheck
    })))
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.forward(j).valid && // L1 valid
        isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.forward(j).mask).orR
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.forward.map(_.uop))))
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
444

445 446 447 448 449 450 451 452 453
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
454 455 456 457
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
458
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
459 460 461 462 463 464
        a // sel a
      ),
      b // sel b
    )
  }

465 466 467 468 469 470 471 472 473 474 475 476 477
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)

  io.rollback := DontCare
  io.rollback.valid := rollbackSelected.valid && (!lastCycleRedirect.valid || isAfter(lastCycleRedirect.bits.roqIdx, rollbackSelected.bits.roqIdx))

  io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx - 1.U
  io.rollback.bits.isReplay := true.B
  io.rollback.bits.isMisPred := false.B
  io.rollback.bits.isException := false.B
  io.rollback.bits.isFlushPipe := false.B
  io.rollback.bits.target := rollbackSelected.bits.cf.pc
  io.rollback.bits.brTag := rollbackSelected.bits.brTag
W
William Wang 已提交
478

W
William Wang 已提交
479 480
  // Memory mapped IO / other uncached operations

481
  // setup misc mem access req
W
William Wang 已提交
482
  // mask / paddr / data can be get from lq.data
Y
Yinan Xu 已提交
483
  val commitType = io.commits.uop(0).ctrl.commitType
484
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
485
    commitType === CommitType.LOAD &&
486
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
487
    !io.commits.isWalk
488

489
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
490 491 492
  io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
  io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
493

494
  io.uncache.req.bits.meta.id       := DontCare // TODO: // FIXME
495
  io.uncache.req.bits.meta.vaddr    := DontCare
496 497 498
  io.uncache.req.bits.meta.paddr    := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
  io.uncache.req.bits.meta.mmio     := true.B // dataModule.io.rdata(deqPtr).mmio
499
  io.uncache.req.bits.meta.tlb_miss := false.B
500
  io.uncache.req.bits.meta.mask     := dataModule.io.rdata(deqPtr).mask
501 502 503 504
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

505
  when (io.uncache.req.fire()) {
506
    pending(deqPtr) := false.B
507
  }
W
William Wang 已提交
508

W
William Wang 已提交
509
  dataModule.io.uncache.wen := false.B
510
  when(io.uncache.resp.fire()){
511
    datavalid(deqPtr) := true.B
512
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
W
William Wang 已提交
513
    dataModule.io.uncache.wen := true.B
514 515
    // TODO: write back exception info
  }
W
William Wang 已提交
516

517
  when(io.uncache.req.fire()){
L
linjiawei 已提交
518
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
519
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
520 521 522 523 524
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
525 526 527
  }

  when(io.uncache.resp.fire()){
528
    XSDebug("uncache resp: data %x\n", io.dcache.resp.bits.data)
529 530
  }

W
William Wang 已提交
531
  // Read vaddr for mem exception
W
William Wang 已提交
532
  io.exceptionAddr.vaddr := dataModule.io.rdata(io.exceptionAddr.lsIdx.lqIdx.value).vaddr
W
William Wang 已提交
533

W
William Wang 已提交
534
  // misprediction recovery / exception redirect
W
William Wang 已提交
535 536 537
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
Y
Yinan Xu 已提交
538
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
539
    when(needCancel(i)) {
Y
Yinan Xu 已提交
540 541 542 543 544 545 546
      // when(io.brqRedirect.bits.isReplay){
      //   valid(i) := false.B
      //   writebacked(i) := false.B
      //   listening(i) := false.B
      //   miss(i) := false.B
      //   pending(i) := false.B
      // }.otherwise{
W
William Wang 已提交
547
        allocated(i) := false.B
Y
Yinan Xu 已提交
548
      // }
W
William Wang 已提交
549
    }
550 551
  }
  when (io.brqRedirect.valid && io.brqRedirect.bits.isMisPred) {
552
    enqPtrExt := enqPtrExt - PopCount(needCancel)
553
  }
W
William Wang 已提交
554

W
William Wang 已提交
555
  // assert(!io.rollback.valid)
L
LinJiawei 已提交
556
  when(io.rollback.valid) {
Y
Yinan Xu 已提交
557
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
W
William Wang 已提交
558
  }
W
William Wang 已提交
559 560

  // debug info
561
  XSDebug("head %d:%d tail %d:%d\n", enqPtrExt.flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
562 563

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
564
    when(flag) {
W
William Wang 已提交
565
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
566
    }.otherwise {
W
William Wang 已提交
567 568 569 570
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
571
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
572
    if (i % 4 == 0) XSDebug("")
W
William Wang 已提交
573
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
W
William Wang 已提交
574
    PrintFlag(allocated(i), "a")
575
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
576
    PrintFlag(allocated(i) && writebacked(i), "w")
577
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
578 579
    PrintFlag(allocated(i) && miss(i), "m")
    PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
580
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
581
    XSDebug(false, true.B, " ")
582
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
583
  }
W
William Wang 已提交
584

W
William Wang 已提交
585
}