LoadQueue.scala 23.3 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
5
import utils._
Y
Yinan Xu 已提交
6 7
import xiangshan._
import xiangshan.cache._
8
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
9
import xiangshan.backend.LSUOpType
10
import xiangshan.mem._
Y
Yinan Xu 已提交
11
import xiangshan.backend.roq.RoqPtr
L
linjiawei 已提交
12
import xiangshan.backend.fu.fpu.boxF32ToF64
W
William Wang 已提交
13 14


15
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
16 17 18 19 20 21 22 23

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
24 25
}

26

27
// Load Queue
28
class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
W
William Wang 已提交
29
  val io = IO(new Bundle() {
30 31 32 33 34
    val enq = new Bundle() {
      val canAccept = Output(Bool())
      val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
      val resp = Vec(RenameWidth, Output(new LqPtr))
    }
W
William Wang 已提交
35
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
36
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
37
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
38
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback load
W
William Wang 已提交
39
    val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
40
    val commits = Flipped(new RoqCommitIO)
41
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
42 43
    val dcache = new DCacheLineIO
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
44
    val roqDeqPtr = Input(new RoqPtr)
45
    val exceptionAddr = new ExceptionAddrIO
46
    // val refill = Flipped(Valid(new DCacheLineReq ))
W
William Wang 已提交
47
  })
48

W
William Wang 已提交
49
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
50
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
W
William Wang 已提交
51
  val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth))
W
William Wang 已提交
52
  dataModule.io := DontCare
W
William Wang 已提交
53
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
54
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
55 56 57 58 59
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
  val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
60

61 62 63 64 65 66 67 68
  val enqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
  val enqPtr = enqPtrExt.value
  val deqPtr = deqPtrExt.value
  val sameFlag = enqPtrExt.flag === deqPtrExt.flag
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
69

Y
Yinan Xu 已提交
70 71
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).lqIdx.value)
72

73 74 75
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
  val enqDeqMask1 = deqMask ^ enqMask
76
  val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1)
77

W
William Wang 已提交
78
  // Enqueue at dispatch
79
  val validEntries = distanceBetween(enqPtrExt, deqPtrExt)
80 81
  val firedDispatch = io.enq.req.map(_.valid)
  io.enq.canAccept := validEntries <= (LoadQueueSize - RenameWidth).U
Y
Yinan Xu 已提交
82
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(firedDispatch))}\n")
W
William Wang 已提交
83
  for (i <- 0 until RenameWidth) {
84
    val offset = if (i == 0) 0.U else PopCount((0 until i).map(firedDispatch(_)))
85
    val lqIdx = enqPtrExt + offset
86
    val index = lqIdx.value
87 88
    when(io.enq.req(i).valid) {
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
89
      allocated(index) := true.B
90
      datavalid(index) := false.B
Y
Yinan Xu 已提交
91 92 93 94 95
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
      listening(index) := false.B
      pending(index) := false.B
W
William Wang 已提交
96
    }
97 98
    io.enq.resp(i) := lqIdx

Y
Yinan Xu 已提交
99
    XSError(!io.enq.canAccept && io.enq.req(i).valid, "should not valid when not ready\n")
W
William Wang 已提交
100 101
  }

102
  when(Cat(firedDispatch).orR) {
103
    enqPtrExt := enqPtrExt + PopCount(firedDispatch)
W
William Wang 已提交
104
    XSInfo("dispatched %d insts to lq\n", PopCount(firedDispatch))
W
William Wang 已提交
105 106 107 108
  }

  // writeback load
  (0 until LoadPipelineWidth).map(i => {
W
William Wang 已提交
109
    dataModule.io.wb(i).wen := false.B
L
LinJiawei 已提交
110 111
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
W
William Wang 已提交
112
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
113
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
114 115 116 117
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
118 119 120
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
W
William Wang 已提交
121
          io.loadIn(i).bits.mmio,
122 123 124 125
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }.otherwise {
126
          XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
127
          io.loadIn(i).bits.uop.lqIdx.asUInt,
128 129 130 131
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
132 133 134
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
135
          io.loadIn(i).bits.mmio,
136 137 138 139
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }
140
        val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
141
        datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
142
        writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
143

W
William Wang 已提交
144
        val loadWbData = Wire(new LsqEntry)
W
William Wang 已提交
145 146 147 148 149 150 151 152 153 154 155
        loadWbData.paddr := io.loadIn(i).bits.paddr
        loadWbData.vaddr := io.loadIn(i).bits.vaddr
        loadWbData.mask := io.loadIn(i).bits.mask
        loadWbData.data := io.loadIn(i).bits.data // for mmio / misc / debug
        loadWbData.mmio := io.loadIn(i).bits.mmio
        loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
        loadWbData.fwdData := io.loadIn(i).bits.forwardData
        loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
        dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
        dataModule.io.wb(i).wen := true.B

156
        val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
157
        miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
158
        listening(loadWbIndex) := dcacheMissed
159
        pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
160
      }
161
    })
W
William Wang 已提交
162

W
William Wang 已提交
163
  // cache miss request
164 165 166 167
  val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
  val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
  val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)

W
William Wang 已提交
168
  val missRefillSelVec = VecInit(
W
William Wang 已提交
169
    (0 until LoadQueueSize).map{ i =>
W
William Wang 已提交
170
      val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
171 172 173
      allocated(i) && miss(i) && !inflight
    })

174
  val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
W
William Wang 已提交
175
  val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
176 177
  io.dcache.req.valid := missRefillSelVec.asUInt.orR
  io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
178
  io.dcache.req.bits.addr := missRefillBlockAddr
179
  io.dcache.req.bits.data := DontCare
180
  io.dcache.req.bits.mask := DontCare
181

W
William Wang 已提交
182 183
  io.dcache.req.bits.meta.id       := DontCare
  io.dcache.req.bits.meta.vaddr    := DontCare // dataModule.io.rdata(missRefillSel).vaddr
184
  io.dcache.req.bits.meta.paddr    := missRefillBlockAddr
185
  io.dcache.req.bits.meta.uop      := uop(missRefillSel)
W
William Wang 已提交
186
  io.dcache.req.bits.meta.mmio     := false.B // dataModule.io.rdata(missRefillSel).mmio
187
  io.dcache.req.bits.meta.tlb_miss := false.B
188
  io.dcache.req.bits.meta.mask     := DontCare
189 190 191 192
  io.dcache.req.bits.meta.replay   := false.B

  io.dcache.resp.ready := true.B

W
William Wang 已提交
193
  assert(!(dataModule.io.rdata(missRefillSel).mmio && io.dcache.req.valid))
194 195

  when(io.dcache.req.fire()) {
W
William Wang 已提交
196 197
    miss(missRefillSel) := false.B
    listening(missRefillSel) := true.B
198 199 200 201 202

    // mark this block as inflight
    inflightReqs(reqBlockIndex).valid := true.B
    inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
    assert(!inflightReqs(reqBlockIndex).valid)
W
William Wang 已提交
203 204
  }

205 206 207 208 209 210 211 212 213 214 215
  when(io.dcache.resp.fire()) {
    val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
    assert(inflight)
    for (i <- 0 until cfg.nLoadMissEntries) {
      when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
        inflightReqs(i).valid := false.B
      }
    }
  }


216
  when(io.dcache.req.fire()){
217
    XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
218
      io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
219 220
      io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
    )
221 222 223
  }

  when(io.dcache.resp.fire()){
224
    XSDebug("miss resp: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x data %x\n",
225
      io.dcache.resp.bits.meta.uop.cf.pc, io.dcache.resp.bits.meta.uop.roqIdx.asUInt, io.dcache.resp.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
226
      io.dcache.resp.bits.meta.paddr, io.dcache.resp.bits.data
227
    )
228 229
  }

230
  // Refill 64 bit in a cycle
231
  // Refill data comes back from io.dcache.resp
W
William Wang 已提交
232
  dataModule.io.refill.dcache := io.dcache.resp.bits
W
William Wang 已提交
233

W
William Wang 已提交
234
  (0 until LoadQueueSize).map(i => {
W
William Wang 已提交
235
    val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === io.dcache.resp.bits.meta.paddr
W
William Wang 已提交
236
    dataModule.io.refill.wen(i) := false.B
237
    when(allocated(i) && listening(i) && blockMatch && io.dcache.resp.fire()) {
W
William Wang 已提交
238
      dataModule.io.refill.wen(i) := true.B
239
      datavalid(i) := true.B
W
William Wang 已提交
240 241 242
      listening(i) := false.B
    }
  })
W
William Wang 已提交
243 244 245

  // writeback up to 2 missed load insts to CDB
  // just randomly pick 2 missed load (data refilled), write them back to cdb
W
William Wang 已提交
246
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
247
    allocated(i) && datavalid(i) && !writebacked(i)
L
LinJiawei 已提交
248
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
249
  val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W)))
250
  val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool()))
L
LinJiawei 已提交
251 252 253
  val lselvec0 = PriorityEncoderOH(loadWbSelVec)
  val lselvec1 = PriorityEncoderOH(loadWbSelVec & (~lselvec0).asUInt)
  loadWbSel(0) := OHToUInt(lselvec0)
254
  loadWbSelV(0):= lselvec0.orR
L
LinJiawei 已提交
255
  loadWbSel(1) := OHToUInt(lselvec1)
256
  loadWbSelV(1) := lselvec1.orR
W
William Wang 已提交
257
  (0 until StorePipelineWidth).map(i => {
258
    // data select
W
William Wang 已提交
259
    val rdata = dataModule.io.rdata(loadWbSel(i)).data
260
    val func = uop(loadWbSel(i)).ctrl.fuOpType
W
William Wang 已提交
261
    val raddr = dataModule.io.rdata(loadWbSel(i)).paddr
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
    val rdataPartialLoad = LookupTree(func, List(
        LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
        LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
        LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
        LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
        LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
        LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
L
LinJiawei 已提交
279 280
        LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN),
        LSUOpType.flw  -> boxF32ToF64(rdataSel(31, 0))
281
    ))
W
William Wang 已提交
282
    io.ldout(i).bits.uop := uop(loadWbSel(i))
W
William Wang 已提交
283
    io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.rdata(loadWbSel(i)).exception.asBools
284
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
285
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
286 287 288
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
289
    io.ldout(i).bits.debug.isMMIO := dataModule.io.rdata(loadWbSel(i)).mmio
L
LinJiawei 已提交
290
    io.ldout(i).bits.fflags := DontCare
291
    io.ldout(i).valid := loadWbSelVec(loadWbSel(i)) && loadWbSelV(i)
L
LinJiawei 已提交
292
    when(io.ldout(i).fire()) {
293
      writebacked(loadWbSel(i)) := true.B
294
      XSInfo("load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
295
        io.ldout(i).bits.uop.roqIdx.asUInt,
296
        io.ldout(i).bits.uop.lqIdx.asUInt,
297
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
298 299 300
        dataModule.io.rdata(loadWbSel(i)).paddr,
        dataModule.io.rdata(loadWbSel(i)).data,
        dataModule.io.rdata(loadWbSel(i)).mmio
301
      )
W
William Wang 已提交
302 303 304
    }
  })

305 306 307 308 309 310 311
  // When load commited, mark it as !allocated, this entry will be recycled later
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })
312
  deqPtrExt := deqPtrExt + PopCount(loadCommit)
313

314
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
315
    val length = mask.length
316
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
317 318
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
319
  }
W
William Wang 已提交
320

321
  def getFirstOneWithFlag(mask: Vec[Bool], startMask: UInt, startFlag: Bool) = {
322
    val length = mask.length
323
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
324 325 326
    val highBitsUint = Cat(highBits.reverse)
    val changeDirection = !highBitsUint.orR()
    val index = PriorityEncoder(Mux(!changeDirection, highBitsUint, mask.asUInt))
327
    LqPtr(startFlag ^ changeDirection, index)
328 329
  }

330 331 332 333
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
334
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
335 336 337 338 339 340 341 342 343
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
344
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
345 346 347 348
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
349

350 351 352
  def rangeMask(start: LqPtr, end: LqPtr): UInt = {
    val startMask = (1.U((LoadQueueSize + 1).W) << start.value).asUInt - 1.U
    val endMask = (1.U((LoadQueueSize + 1).W) << end.value).asUInt - 1.U
W
William Wang 已提交
353
    val xorMask = startMask(LoadQueueSize - 1, 0) ^ endMask(LoadQueueSize - 1, 0)
354
    Mux(start.flag === end.flag, xorMask, ~xorMask)
Y
Yinan Xu 已提交
355 356
  }

357 358 359 360 361 362
  // ignore data forward
  (0 until LoadPipelineWidth).foreach(i => {
    io.forward(i).forwardMask := DontCare
    io.forward(i).forwardData := DontCare
  })

W
William Wang 已提交
363
  // store backward query and rollback
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
    val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => {
      val addrMatch = allocated(j) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
      val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
      // TODO: update refilled data
      val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
      Cat(violationVec).orR() && entryNeedCheck
    })))
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.forward(j).valid && // L1 valid
        isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.forward(j).mask).orR
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.forward.map(_.uop))))
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
436

437 438 439 440 441 442 443 444 445
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
446 447 448 449
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
450
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
451 452 453 454 455 456
        a // sel a
      ),
      b // sel b
    )
  }

457 458 459 460
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)

  io.rollback := DontCare
461 462 463
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
  io.rollback.valid := rollbackSelected.valid && (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx))
464 465 466 467 468 469 470 471

  io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx - 1.U
  io.rollback.bits.isReplay := true.B
  io.rollback.bits.isMisPred := false.B
  io.rollback.bits.isException := false.B
  io.rollback.bits.isFlushPipe := false.B
  io.rollback.bits.target := rollbackSelected.bits.cf.pc
  io.rollback.bits.brTag := rollbackSelected.bits.brTag
W
William Wang 已提交
472

W
William Wang 已提交
473 474
  // Memory mapped IO / other uncached operations

475
  // setup misc mem access req
W
William Wang 已提交
476
  // mask / paddr / data can be get from lq.data
Y
Yinan Xu 已提交
477
  val commitType = io.commits.uop(0).ctrl.commitType
478
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
479
    commitType === CommitType.LOAD &&
480
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
481
    !io.commits.isWalk
482

483
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
484 485 486
  io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
  io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
487

488
  io.uncache.req.bits.meta.id       := DontCare // TODO: // FIXME
489
  io.uncache.req.bits.meta.vaddr    := DontCare
490 491 492
  io.uncache.req.bits.meta.paddr    := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
  io.uncache.req.bits.meta.mmio     := true.B // dataModule.io.rdata(deqPtr).mmio
493
  io.uncache.req.bits.meta.tlb_miss := false.B
494
  io.uncache.req.bits.meta.mask     := dataModule.io.rdata(deqPtr).mask
495 496 497 498
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

499
  when (io.uncache.req.fire()) {
500
    pending(deqPtr) := false.B
501
  }
W
William Wang 已提交
502

W
William Wang 已提交
503
  dataModule.io.uncache.wen := false.B
504
  when(io.uncache.resp.fire()){
505
    datavalid(deqPtr) := true.B
506
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
W
William Wang 已提交
507
    dataModule.io.uncache.wen := true.B
508 509
    // TODO: write back exception info
  }
W
William Wang 已提交
510

511
  when(io.uncache.req.fire()){
L
linjiawei 已提交
512
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
513
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
514 515 516 517 518
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
519 520 521
  }

  when(io.uncache.resp.fire()){
522
    XSDebug("uncache resp: data %x\n", io.dcache.resp.bits.data)
523 524
  }

W
William Wang 已提交
525
  // Read vaddr for mem exception
W
William Wang 已提交
526
  io.exceptionAddr.vaddr := dataModule.io.rdata(io.exceptionAddr.lsIdx.lqIdx.value).vaddr
W
William Wang 已提交
527

W
William Wang 已提交
528
  // misprediction recovery / exception redirect
W
William Wang 已提交
529 530 531
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
Y
Yinan Xu 已提交
532
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
533
    when(needCancel(i)) {
W
William Wang 已提交
534 535
        allocated(i) := false.B
    }
536
  }
537 538 539 540 541 542
  val needCancelReg = RegNext(needCancel)
  when (io.brqRedirect.valid) {
    enqPtrExt := enqPtrExt
  }
  when (lastCycleRedirect.valid) {
    enqPtrExt := enqPtrExt - PopCount(needCancelReg)
543
  }
W
William Wang 已提交
544

W
William Wang 已提交
545
  // assert(!io.rollback.valid)
L
LinJiawei 已提交
546
  when(io.rollback.valid) {
Y
Yinan Xu 已提交
547
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
W
William Wang 已提交
548
  }
W
William Wang 已提交
549 550

  // debug info
551
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt.flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
552 553

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
554
    when(flag) {
W
William Wang 已提交
555
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
556
    }.otherwise {
W
William Wang 已提交
557 558 559 560
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
561
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
562
    if (i % 4 == 0) XSDebug("")
W
William Wang 已提交
563
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
W
William Wang 已提交
564
    PrintFlag(allocated(i), "a")
565
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
566
    PrintFlag(allocated(i) && writebacked(i), "w")
567
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
568 569
    PrintFlag(allocated(i) && miss(i), "m")
    PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
570
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
571
    XSDebug(false, true.B, " ")
572
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
573
  }
W
William Wang 已提交
574

W
William Wang 已提交
575
}