LoadQueue.scala 23.8 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
L
LinJiawei 已提交
5
import freechips.rocketchip.tile.HasFPUParameters
6
import utils._
Y
Yinan Xu 已提交
7 8
import xiangshan._
import xiangshan.cache._
L
LinJiawei 已提交
9
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO}
10
import xiangshan.backend.LSUOpType
11
import xiangshan.mem._
Y
Yinan Xu 已提交
12
import xiangshan.backend.roq.RoqPtr
13
import xiangshan.backend.fu.HasExceptionNO
W
William Wang 已提交
14 15


16
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
17 18 19 20 21 22 23 24

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
25 26
}

L
LinJiawei 已提交
27 28 29 30 31 32
trait HasLoadHelper { this: XSModule =>
  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    val fpWen = uop.ctrl.fpWen
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
33 34
      LSUOpType.lw   -> Mux(fpWen, rdata, SignExt(rdata(31, 0), XLEN)),
      LSUOpType.ld   -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
L
LinJiawei 已提交
35 36 37 38 39
      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
    ))
  }
40 41 42 43 44 45 46

  def fpRdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lw   -> recode(rdata(31, 0), S),
      LSUOpType.ld   -> recode(rdata(63, 0), D)
    ))
  }
L
LinJiawei 已提交
47 48
}

Y
Yinan Xu 已提交
49 50
class LqEnqIO extends XSBundle {
  val canAccept = Output(Bool())
51
  val sqCanAccept = Input(Bool())
Y
Yinan Xu 已提交
52 53 54 55
  val needAlloc = Vec(RenameWidth, Input(Bool()))
  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
  val resp = Vec(RenameWidth, Output(new LqPtr))
}
56

57
// Load Queue
L
LinJiawei 已提交
58 59 60 61
class LoadQueue extends XSModule
  with HasDCacheParameters
  with HasCircularQueuePtrHelper
  with HasLoadHelper
62
  with HasExceptionNO
L
LinJiawei 已提交
63
{
W
William Wang 已提交
64
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
65
    val enq = new LqEnqIO
66 67
    val brqRedirect = Flipped(ValidIO(new Redirect))
    val flush = Input(Bool())
W
William Wang 已提交
68
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
69
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
70
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
Y
Yinan Xu 已提交
71
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
72
    val commits = Flipped(new RoqCommitIO)
73
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
74
    val dcache = Flipped(ValidIO(new Refill))
75
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
76
    val roqDeqPtr = Input(new RoqPtr)
77
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
78
  })
79

W
William Wang 已提交
80
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
81
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
82
  val dataModule = Module(new LoadQueueData(LoadQueueSize, wbNumRead = LoadPipelineWidth, wbNumWrite = LoadPipelineWidth))
W
William Wang 已提交
83
  dataModule.io := DontCare
W
William Wang 已提交
84 85
  val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), LoadQueueSize, numRead = 1, numWrite = LoadPipelineWidth))
  vaddrModule.io := DontCare
W
William Wang 已提交
86
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
87
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
88 89 90
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
91
  // val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
W
William Wang 已提交
92
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
93

W
William Wang 已提交
94 95
  val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst

Y
Yinan Xu 已提交
96
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
97
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
W
William Wang 已提交
98
  val deqPtrExtNext = Wire(new LqPtr)
Y
Yinan Xu 已提交
99
  val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
100 101
  val allowEnqueue = RegInit(true.B)

Y
Yinan Xu 已提交
102
  val enqPtr = enqPtrExt(0).value
103
  val deqPtr = deqPtrExt.value
Y
Yinan Xu 已提交
104
  val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag
105 106 107
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
108

Y
Yinan Xu 已提交
109 110
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.info(i).commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.info(i).lqIdx.value)
111

112 113
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
114

Y
Yinan Xu 已提交
115 116 117 118 119
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
120 121
  io.enq.canAccept := allowEnqueue

W
William Wang 已提交
122
  for (i <- 0 until RenameWidth) {
Y
Yinan Xu 已提交
123
    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
Y
Yinan Xu 已提交
124
    val lqIdx = enqPtrExt(offset)
125
    val index = lqIdx.value
126
    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !(io.brqRedirect.valid || io.flush)) {
127
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
128
      allocated(index) := true.B
129
      datavalid(index) := false.B
Y
Yinan Xu 已提交
130 131 132
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
133
      // listening(index) := false.B
Y
Yinan Xu 已提交
134
      pending(index) := false.B
W
William Wang 已提交
135
    }
136
    io.enq.resp(i) := lqIdx
W
William Wang 已提交
137
  }
138
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
W
William Wang 已提交
139

Y
Yinan Xu 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
153
    dataModule.io.wb.wen(i) := false.B
W
William Wang 已提交
154
    vaddrModule.io.wen(i) := false.B
L
LinJiawei 已提交
155 156
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
157
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
158
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
159 160 161 162
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
163 164 165
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
166
          io.loadIn(i).bits.mmio
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
        )
      }.otherwise {
        XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
        io.loadIn(i).bits.uop.lqIdx.asUInt,
        io.loadIn(i).bits.uop.cf.pc,
        io.loadIn(i).bits.vaddr,
        io.loadIn(i).bits.paddr,
        io.loadIn(i).bits.data,
        io.loadIn(i).bits.mask,
        io.loadIn(i).bits.forwardData.asUInt,
        io.loadIn(i).bits.forwardMask.asUInt,
        io.loadIn(i).bits.mmio
      )}
      val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
      datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
      writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio

      val loadWbData = Wire(new LQDataEntry)
      loadWbData.paddr := io.loadIn(i).bits.paddr
      loadWbData.mask := io.loadIn(i).bits.mask
      loadWbData.data := io.loadIn(i).bits.data // fwd data
      loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
      dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
      dataModule.io.wb.wen(i) := true.B

      vaddrModule.io.waddr(i) := loadWbIndex
      vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr
      vaddrModule.io.wen(i) := true.B

      debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio

      val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
      miss(loadWbIndex) := dcacheMissed
      pending(loadWbIndex) := io.loadIn(i).bits.mmio
      uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime
Y
Yinan Xu 已提交
202
    }
203
  }
204

205 206
  when(io.dcache.valid) {
    XSDebug("miss resp: paddr:0x%x data %x\n", io.dcache.bits.addr, io.dcache.bits.data)
207 208
  }

209
  // Refill 64 bit in a cycle
210
  // Refill data comes back from io.dcache.resp
211 212
  dataModule.io.refill.valid := io.dcache.valid
  dataModule.io.refill.paddr := io.dcache.bits.addr
213
  dataModule.io.refill.data := io.dcache.bits.data
W
William Wang 已提交
214

W
William Wang 已提交
215
  (0 until LoadQueueSize).map(i => {
216 217
    dataModule.io.refill.refillMask(i) := allocated(i) && miss(i)
    when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) {
218
      datavalid(i) := true.B
219
      miss(i) := false.B
W
William Wang 已提交
220 221
    }
  })
W
William Wang 已提交
222

223 224 225 226 227 228 229
  // Writeback up to 2 missed load insts to CDB
  //
  // Pick 2 missed load (data refilled), write them back to cdb
  // 2 refilled load will be selected from even/odd entry, separately

  // Stage 0
  // Generate writeback indexes
W
William Wang 已提交
230 231 232 233 234 235 236 237 238 239 240

  def getEvenBits(input: UInt): UInt = {
    require(input.getWidth == LoadQueueSize)
    VecInit((0 until LoadQueueSize/2).map(i => {input(2*i)})).asUInt
  }
  def getOddBits(input: UInt): UInt = {
    require(input.getWidth == LoadQueueSize)
    VecInit((0 until LoadQueueSize/2).map(i => {input(2*i+1)})).asUInt
  }

  val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W))) // index selected last cycle
W
William Wang 已提交
241
  val loadWbSelV = Wire(Vec(LoadPipelineWidth, Bool())) // index selected in last cycle is valid
W
William Wang 已提交
242

W
William Wang 已提交
243
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
244
    allocated(i) && !writebacked(i) && datavalid(i)
L
LinJiawei 已提交
245
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
246 247 248
  val evenDeqMask = getEvenBits(deqMask)
  val oddDeqMask = getOddBits(deqMask)
  // generate lastCycleSelect mask 
W
William Wang 已提交
249 250
  val evenSelectMask = Mux(io.ldout(0).fire(), getEvenBits(UIntToOH(loadWbSel(0))), 0.U)
  val oddSelectMask = Mux(io.ldout(1).fire(), getOddBits(UIntToOH(loadWbSel(1))), 0.U)
251 252 253
  // generate real select vec
  val loadEvenSelVec = getEvenBits(loadWbSelVec) & ~evenSelectMask
  val loadOddSelVec = getOddBits(loadWbSelVec) & ~oddSelectMask
W
William Wang 已提交
254 255 256 257

  def toVec(a: UInt): Vec[Bool] = {
    VecInit(a.asBools)
  }
258

259 260
  val loadWbSelGen = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
  val loadWbSelVGen = Wire(Vec(LoadPipelineWidth, Bool()))
261
  loadWbSelGen(0) := Cat(getFirstOne(toVec(loadEvenSelVec), evenDeqMask), 0.U(1.W))
262
  loadWbSelVGen(0):= loadEvenSelVec.asUInt.orR
263
  loadWbSelGen(1) := Cat(getFirstOne(toVec(loadOddSelVec), oddDeqMask), 1.U(1.W))
264 265 266
  loadWbSelVGen(1) := loadOddSelVec.asUInt.orR
  
  (0 until LoadPipelineWidth).map(i => {
267
    loadWbSel(i) := RegNext(loadWbSelGen(i))
W
William Wang 已提交
268
    loadWbSelV(i) := RegNext(loadWbSelVGen(i), init = false.B)
269
    when(io.ldout(i).fire()){
270 271
      // Mark them as writebacked, so they will not be selected in the next cycle
      writebacked(loadWbSel(i)) := true.B
272 273
    }
  })
274

275 276 277
  // Stage 1
  // Use indexes generated in cycle 0 to read data
  // writeback data to cdb
278
  (0 until LoadPipelineWidth).map(i => {
279
    // data select
W
William Wang 已提交
280
    dataModule.io.wb.raddr(i) := loadWbSelGen(i)
281
    val rdata = dataModule.io.wb.rdata(i).data
282 283
    val seluop = uop(loadWbSel(i))
    val func = seluop.ctrl.fuOpType
284
    val raddr = dataModule.io.wb.rdata(i).paddr
285 286 287 288 289 290 291 292 293 294
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
295 296
    val rdataPartialLoad = rdataHelper(seluop, rdataSel)

297
    // writeback missed int/fp load
298
    //
299 300
    // Int load writeback will finish (if not blocked) in one cycle
    io.ldout(i).bits.uop := seluop
301
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
302
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
303 304 305
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
306
    io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
307
    io.ldout(i).bits.debug.isPerfCnt := false.B
L
LinJiawei 已提交
308
    io.ldout(i).bits.fflags := DontCare
W
William Wang 已提交
309
    io.ldout(i).valid := loadWbSelV(i)
310 311

    when(io.ldout(i).fire()) {
W
William Wang 已提交
312
      XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x mmio %x\n",
313
        io.ldout(i).bits.uop.roqIdx.asUInt,
314
        io.ldout(i).bits.uop.lqIdx.asUInt,
315
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
316
        debug_mmio(loadWbSel(i))
317
      )
W
William Wang 已提交
318
    }
319

W
William Wang 已提交
320 321
  })

Y
Yinan Xu 已提交
322 323 324 325 326
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
327 328 329 330 331 332 333
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })

334
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
335
    val length = mask.length
336
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
337 338
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
339
  }
W
William Wang 已提交
340

341 342 343 344
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
345
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
346 347 348 349 350 351 352 353 354
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
355
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
356 357 358 359
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
360

Y
Yinan Xu 已提交
361 362 363 364 365 366 367 368 369 370 371
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
    *   There're three possible types of violations. Choose the oldest load.
372 373 374
    *   Prepare redirect request according to the detected violation.
    * Cycle 2: Redirect Fire
    *   Fire redirect request (if valid)
Y
Yinan Xu 已提交
375 376
    */
  io.load_s1 := DontCare
377 378 379 380
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
381
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
382 383 384
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
385 386
    dataModule.io.violation(i).paddr := io.storeIn(i).bits.paddr
    dataModule.io.violation(i).mask := io.storeIn(i).bits.mask
387
    val addrMaskMatch = RegNext(dataModule.io.violation(i).violationMask)
388
    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => {
389
      allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j))
390
    })))
391
    val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
392
      addrMaskMatch(j) && entryNeedCheck(j)
393
    }))
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
414 415 416 417
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
418 419
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
420
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
450

451 452 453 454 455 456 457 458 459
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
460 461 462 463
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
464
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
465 466 467 468 469 470
        a // sel a
      ),
      b // sel b
    )
  }

471 472
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)
473
  val lastCycleFlush = RegNext(io.flush)
474

475
  // S2: select rollback and generate rollback request 
476 477
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
478 479 480
  val rollbackGen = Wire(Valid(new Redirect))
  val rollbackReg = Reg(Valid(new Redirect))
  rollbackGen.valid := rollbackSelected.valid &&
481
    (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
482
    !lastCycleFlush
483

484 485 486 487 488 489 490 491 492 493 494 495 496
  rollbackGen.bits.roqIdx := rollbackSelected.bits.roqIdx
  rollbackGen.bits.level := RedirectLevel.flush
  rollbackGen.bits.interrupt := DontCare
  rollbackGen.bits.pc := DontCare
  rollbackGen.bits.target := rollbackSelected.bits.cf.pc
  rollbackGen.bits.brTag := rollbackSelected.bits.brTag

  rollbackReg := rollbackGen

  // S3: fire rollback request
  io.rollback := rollbackReg
  io.rollback.valid := rollbackReg.valid && 
    (!lastCycleRedirect.valid || !isAfter(rollbackReg.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
497
    !lastCycleFlush
W
William Wang 已提交
498

Y
Yinan Xu 已提交
499 500 501
  when(io.rollback.valid) {
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
  }
W
William Wang 已提交
502

Y
Yinan Xu 已提交
503 504 505 506
  /**
    * Memory mapped IO / other uncached operations
    *
    */
507
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
Y
Yinan Xu 已提交
508
    io.commits.info(0).commitType === CommitType.LOAD &&
509
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
510
    !io.commits.isWalk
511

W
William Wang 已提交
512
  dataModule.io.uncache.raddr := deqPtrExtNext.value
513

514
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
515 516 517
  io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr
  io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
  io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
518

Y
Yinan Xu 已提交
519
  io.uncache.req.bits.meta.id       := DontCare
520
  io.uncache.req.bits.meta.vaddr    := DontCare
521
  io.uncache.req.bits.meta.paddr    := dataModule.io.uncache.rdata.paddr
522
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
Y
Yinan Xu 已提交
523
  io.uncache.req.bits.meta.mmio     := true.B
524
  io.uncache.req.bits.meta.tlb_miss := false.B
525
  io.uncache.req.bits.meta.mask     := dataModule.io.uncache.rdata.mask
526 527 528 529
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

530
  when (io.uncache.req.fire()) {
531
    pending(deqPtr) := false.B
W
William Wang 已提交
532

L
linjiawei 已提交
533
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
534
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
535 536 537 538 539
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
540 541
  }

Y
Yinan Xu 已提交
542
  dataModule.io.uncache.wen := false.B
543
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
544 545 546 547
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

548
    XSDebug("uncache resp: data %x\n", io.dcache.bits.data)
549 550
  }

W
William Wang 已提交
551
  // Read vaddr for mem exception
W
William Wang 已提交
552 553
  vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.lqIdx.value
  io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
W
William Wang 已提交
554

W
William Wang 已提交
555
  // misprediction recovery / exception redirect
W
William Wang 已提交
556 557 558
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
559
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect, io.flush) && allocated(i) && !commited(i)
Y
Yinan Xu 已提交
560
    when (needCancel(i)) {
W
William Wang 已提交
561 562
        allocated(i) := false.B
    }
563
  }
564 565 566 567 568 569

  /**
    * update pointers
    */
  val lastCycleCancelCount = PopCount(RegNext(needCancel))
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
570 571
  val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !(io.brqRedirect.valid || io.flush), PopCount(io.enq.req.map(_.valid)), 0.U)
  when (lastCycleRedirect.valid || lastCycleFlush) {
572 573 574 575
    // we recover the pointers in the next cycle after redirect
    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
  }.otherwise {
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
576
  }
W
William Wang 已提交
577

578
  val commitCount = PopCount(loadCommit)
W
William Wang 已提交
579 580
  deqPtrExtNext := deqPtrExt + commitCount
  deqPtrExt := deqPtrExtNext
581 582 583 584 585 586 587 588

  val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
  val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt)
  validCounter := Mux(lastLastCycleRedirect,
    trueValidCounter,
    validCounter + enqNumber - commitCount
  )

589
  allowEnqueue := Mux(io.brqRedirect.valid || io.flush,
590 591 592 593 594 595 596
    false.B,
    Mux(lastLastCycleRedirect,
      trueValidCounter <= (LoadQueueSize - RenameWidth).U,
      validCounter + enqNumber <= (LoadQueueSize - RenameWidth).U
    )
  )

W
William Wang 已提交
597
  // debug info
Y
Yinan Xu 已提交
598
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
599 600

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
601
    when(flag) {
W
William Wang 已提交
602
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
603
    }.otherwise {
W
William Wang 已提交
604 605 606 607
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
608
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
609
    if (i % 4 == 0) XSDebug("")
610
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr)
W
William Wang 已提交
611
    PrintFlag(allocated(i), "a")
612
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
613
    PrintFlag(allocated(i) && writebacked(i), "w")
614
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
615
    PrintFlag(allocated(i) && miss(i), "m")
616
    // PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
617
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
618
    XSDebug(false, true.B, " ")
619
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
620
  }
W
William Wang 已提交
621

W
William Wang 已提交
622
}