LoadQueue.scala 23.8 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
L
LinJiawei 已提交
5
import freechips.rocketchip.tile.HasFPUParameters
6
import utils._
Y
Yinan Xu 已提交
7 8
import xiangshan._
import xiangshan.cache._
L
LinJiawei 已提交
9
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO}
10
import xiangshan.backend.LSUOpType
11
import xiangshan.mem._
Y
Yinan Xu 已提交
12
import xiangshan.backend.roq.RoqPtr
13
import xiangshan.backend.fu.HasExceptionNO
W
William Wang 已提交
14 15


16
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
17 18 19 20 21 22 23 24

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
25 26
}

L
LinJiawei 已提交
27 28 29 30 31 32
trait HasLoadHelper { this: XSModule =>
  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    val fpWen = uop.ctrl.fpWen
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
33 34
      LSUOpType.lw   -> Mux(fpWen, rdata, SignExt(rdata(31, 0), XLEN)),
      LSUOpType.ld   -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
L
LinJiawei 已提交
35 36 37 38 39
      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
    ))
  }
40 41 42 43 44 45 46

  def fpRdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lw   -> recode(rdata(31, 0), S),
      LSUOpType.ld   -> recode(rdata(63, 0), D)
    ))
  }
L
LinJiawei 已提交
47 48
}

Y
Yinan Xu 已提交
49 50
class LqEnqIO extends XSBundle {
  val canAccept = Output(Bool())
51
  val sqCanAccept = Input(Bool())
Y
Yinan Xu 已提交
52 53 54 55
  val needAlloc = Vec(RenameWidth, Input(Bool()))
  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
  val resp = Vec(RenameWidth, Output(new LqPtr))
}
56

57
// Load Queue
L
LinJiawei 已提交
58 59 60 61
class LoadQueue extends XSModule
  with HasDCacheParameters
  with HasCircularQueuePtrHelper
  with HasLoadHelper
62
  with HasExceptionNO
L
LinJiawei 已提交
63
{
W
William Wang 已提交
64
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
65
    val enq = new LqEnqIO
W
William Wang 已提交
66
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
67
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
68
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
69
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
Y
Yinan Xu 已提交
70
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
71
    val commits = Flipped(new RoqCommitIO)
72
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
73
    val dcache = Flipped(ValidIO(new Refill))
74
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
75
    val roqDeqPtr = Input(new RoqPtr)
76
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
77
  })
78

W
William Wang 已提交
79
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
80
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
81
  val dataModule = Module(new LoadQueueData(LoadQueueSize, wbNumRead = LoadPipelineWidth, wbNumWrite = LoadPipelineWidth))
W
William Wang 已提交
82
  dataModule.io := DontCare
W
William Wang 已提交
83 84
  val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), LoadQueueSize, numRead = 1, numWrite = LoadPipelineWidth))
  vaddrModule.io := DontCare
W
William Wang 已提交
85
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
86
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
87 88 89
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
90
  // val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
W
William Wang 已提交
91
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
92

W
William Wang 已提交
93 94
  val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst

Y
Yinan Xu 已提交
95
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
96
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
W
William Wang 已提交
97
  val deqPtrExtNext = Wire(new LqPtr)
Y
Yinan Xu 已提交
98
  val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
99 100
  val allowEnqueue = RegInit(true.B)

Y
Yinan Xu 已提交
101
  val enqPtr = enqPtrExt(0).value
102
  val deqPtr = deqPtrExt.value
Y
Yinan Xu 已提交
103
  val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag
104 105 106
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
107

Y
Yinan Xu 已提交
108 109
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.info(i).commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.info(i).lqIdx.value)
110

111 112
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
113

Y
Yinan Xu 已提交
114 115 116 117 118
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
119 120
  io.enq.canAccept := allowEnqueue

W
William Wang 已提交
121
  for (i <- 0 until RenameWidth) {
Y
Yinan Xu 已提交
122
    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
Y
Yinan Xu 已提交
123
    val lqIdx = enqPtrExt(offset)
124
    val index = lqIdx.value
125
    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid) {
126
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
127
      allocated(index) := true.B
128
      datavalid(index) := false.B
Y
Yinan Xu 已提交
129 130 131
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
132
      // listening(index) := false.B
Y
Yinan Xu 已提交
133
      pending(index) := false.B
W
William Wang 已提交
134
    }
135
    io.enq.resp(i) := lqIdx
W
William Wang 已提交
136
  }
137
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
W
William Wang 已提交
138

Y
Yinan Xu 已提交
139 140 141 142 143 144 145 146 147 148 149 150 151
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
152
    dataModule.io.wb.wen(i) := false.B
W
William Wang 已提交
153
    vaddrModule.io.wen(i) := false.B
L
LinJiawei 已提交
154 155
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
156
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
157
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
158 159 160 161
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
162 163 164
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
165
          io.loadIn(i).bits.mmio
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
        )
      }.otherwise {
        XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
        io.loadIn(i).bits.uop.lqIdx.asUInt,
        io.loadIn(i).bits.uop.cf.pc,
        io.loadIn(i).bits.vaddr,
        io.loadIn(i).bits.paddr,
        io.loadIn(i).bits.data,
        io.loadIn(i).bits.mask,
        io.loadIn(i).bits.forwardData.asUInt,
        io.loadIn(i).bits.forwardMask.asUInt,
        io.loadIn(i).bits.mmio
      )}
      val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
      datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
      writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio

      val loadWbData = Wire(new LQDataEntry)
      loadWbData.paddr := io.loadIn(i).bits.paddr
      loadWbData.mask := io.loadIn(i).bits.mask
      loadWbData.data := io.loadIn(i).bits.data // fwd data
      loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
      dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
      dataModule.io.wb.wen(i) := true.B

      vaddrModule.io.waddr(i) := loadWbIndex
      vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr
      vaddrModule.io.wen(i) := true.B

      debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio

      val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
      miss(loadWbIndex) := dcacheMissed
      pending(loadWbIndex) := io.loadIn(i).bits.mmio
      uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime
Y
Yinan Xu 已提交
201
    }
202
  }
203

204 205
  when(io.dcache.valid) {
    XSDebug("miss resp: paddr:0x%x data %x\n", io.dcache.bits.addr, io.dcache.bits.data)
206 207
  }

208
  // Refill 64 bit in a cycle
209
  // Refill data comes back from io.dcache.resp
210 211
  dataModule.io.refill.valid := io.dcache.valid
  dataModule.io.refill.paddr := io.dcache.bits.addr
212
  dataModule.io.refill.data := io.dcache.bits.data
W
William Wang 已提交
213

W
William Wang 已提交
214
  (0 until LoadQueueSize).map(i => {
215 216
    dataModule.io.refill.refillMask(i) := allocated(i) && miss(i)
    when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) {
217
      datavalid(i) := true.B
218
      miss(i) := false.B
W
William Wang 已提交
219 220
    }
  })
W
William Wang 已提交
221

222 223 224 225 226 227 228
  // Writeback up to 2 missed load insts to CDB
  //
  // Pick 2 missed load (data refilled), write them back to cdb
  // 2 refilled load will be selected from even/odd entry, separately

  // Stage 0
  // Generate writeback indexes
W
William Wang 已提交
229 230 231 232 233 234 235 236 237 238 239

  def getEvenBits(input: UInt): UInt = {
    require(input.getWidth == LoadQueueSize)
    VecInit((0 until LoadQueueSize/2).map(i => {input(2*i)})).asUInt
  }
  def getOddBits(input: UInt): UInt = {
    require(input.getWidth == LoadQueueSize)
    VecInit((0 until LoadQueueSize/2).map(i => {input(2*i+1)})).asUInt
  }

  val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W))) // index selected last cycle
W
William Wang 已提交
240
  val loadWbSelV = Wire(Vec(LoadPipelineWidth, Bool())) // index selected in last cycle is valid
W
William Wang 已提交
241

W
William Wang 已提交
242
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
243
    allocated(i) && !writebacked(i) && datavalid(i)
L
LinJiawei 已提交
244
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
245 246 247
  val evenDeqMask = getEvenBits(deqMask)
  val oddDeqMask = getOddBits(deqMask)
  // generate lastCycleSelect mask 
W
William Wang 已提交
248 249
  val evenSelectMask = Mux(io.ldout(0).fire(), getEvenBits(UIntToOH(loadWbSel(0))), 0.U)
  val oddSelectMask = Mux(io.ldout(1).fire(), getOddBits(UIntToOH(loadWbSel(1))), 0.U)
250 251 252
  // generate real select vec
  val loadEvenSelVec = getEvenBits(loadWbSelVec) & ~evenSelectMask
  val loadOddSelVec = getOddBits(loadWbSelVec) & ~oddSelectMask
W
William Wang 已提交
253 254 255 256

  def toVec(a: UInt): Vec[Bool] = {
    VecInit(a.asBools)
  }
257

258 259
  val loadWbSelGen = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
  val loadWbSelVGen = Wire(Vec(LoadPipelineWidth, Bool()))
260
  loadWbSelGen(0) := Cat(getFirstOne(toVec(loadEvenSelVec), evenDeqMask), 0.U(1.W))
261
  loadWbSelVGen(0):= loadEvenSelVec.asUInt.orR
262
  loadWbSelGen(1) := Cat(getFirstOne(toVec(loadOddSelVec), oddDeqMask), 1.U(1.W))
263 264 265
  loadWbSelVGen(1) := loadOddSelVec.asUInt.orR
  
  (0 until LoadPipelineWidth).map(i => {
266
    loadWbSel(i) := RegNext(loadWbSelGen(i))
W
William Wang 已提交
267
    loadWbSelV(i) := RegNext(loadWbSelVGen(i), init = false.B)
268
    when(io.ldout(i).fire()){
269 270
      // Mark them as writebacked, so they will not be selected in the next cycle
      writebacked(loadWbSel(i)) := true.B
271 272
    }
  })
273

274 275 276
  // Stage 1
  // Use indexes generated in cycle 0 to read data
  // writeback data to cdb
277
  (0 until LoadPipelineWidth).map(i => {
278
    // data select
W
William Wang 已提交
279
    dataModule.io.wb.raddr(i) := loadWbSelGen(i)
280
    val rdata = dataModule.io.wb.rdata(i).data
281 282
    val seluop = uop(loadWbSel(i))
    val func = seluop.ctrl.fuOpType
283
    val raddr = dataModule.io.wb.rdata(i).paddr
284 285 286 287 288 289 290 291 292 293
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
294 295
    val rdataPartialLoad = rdataHelper(seluop, rdataSel)

296
    // writeback missed int/fp load
297
    //
298 299
    // Int load writeback will finish (if not blocked) in one cycle
    io.ldout(i).bits.uop := seluop
300
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
301
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
302 303 304
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
305
    io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
306
    io.ldout(i).bits.debug.isPerfCnt := false.B
L
LinJiawei 已提交
307
    io.ldout(i).bits.fflags := DontCare
W
William Wang 已提交
308
    io.ldout(i).valid := loadWbSelV(i)
309 310

    when(io.ldout(i).fire()) {
W
William Wang 已提交
311
      XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x mmio %x\n",
312
        io.ldout(i).bits.uop.roqIdx.asUInt,
313
        io.ldout(i).bits.uop.lqIdx.asUInt,
314
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
315
        debug_mmio(loadWbSel(i))
316
      )
W
William Wang 已提交
317
    }
318

W
William Wang 已提交
319 320
  })

Y
Yinan Xu 已提交
321 322 323 324 325
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
326 327 328 329 330 331 332
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })

333
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
334
    val length = mask.length
335
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
336 337
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
338
  }
W
William Wang 已提交
339

340 341 342 343
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
344
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
345 346 347 348 349 350 351 352 353
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
354
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
355 356 357 358
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
359

Y
Yinan Xu 已提交
360 361 362 363 364 365 366 367 368 369 370
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
    *   There're three possible types of violations. Choose the oldest load.
371 372 373
    *   Prepare redirect request according to the detected violation.
    * Cycle 2: Redirect Fire
    *   Fire redirect request (if valid)
Y
Yinan Xu 已提交
374 375
    */
  io.load_s1 := DontCare
376 377 378 379
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
380
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
381 382 383
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
384 385
    dataModule.io.violation(i).paddr := io.storeIn(i).bits.paddr
    dataModule.io.violation(i).mask := io.storeIn(i).bits.mask
386
    val addrMaskMatch = RegNext(dataModule.io.violation(i).violationMask)
387
    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => {
388
      allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j))
389
    })))
390
    val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
391
      addrMaskMatch(j) && entryNeedCheck(j)
392
    }))
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
413 414 415 416
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
417 418
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
419
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
449

450 451 452 453 454 455 456 457 458
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
459 460 461 462
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
463
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
464 465 466 467 468 469
        a // sel a
      ),
      b // sel b
    )
  }

470 471 472
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)

473
  // S2: select rollback and generate rollback request 
474 475
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
476 477 478
  val rollbackGen = Wire(Valid(new Redirect))
  val rollbackReg = Reg(Valid(new Redirect))
  rollbackGen.valid := rollbackSelected.valid &&
479
    (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
480
    !(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional())
481

482 483 484 485 486 487 488 489 490 491 492 493 494 495
  rollbackGen.bits.roqIdx := rollbackSelected.bits.roqIdx
  rollbackGen.bits.level := RedirectLevel.flush
  rollbackGen.bits.interrupt := DontCare
  rollbackGen.bits.pc := DontCare
  rollbackGen.bits.target := rollbackSelected.bits.cf.pc
  rollbackGen.bits.brTag := rollbackSelected.bits.brTag

  rollbackReg := rollbackGen

  // S3: fire rollback request
  io.rollback := rollbackReg
  io.rollback.valid := rollbackReg.valid && 
    (!lastCycleRedirect.valid || !isAfter(rollbackReg.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
    !(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional())
W
William Wang 已提交
496

Y
Yinan Xu 已提交
497 498 499
  when(io.rollback.valid) {
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
  }
W
William Wang 已提交
500

Y
Yinan Xu 已提交
501 502 503 504
  /**
    * Memory mapped IO / other uncached operations
    *
    */
505
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
Y
Yinan Xu 已提交
506
    io.commits.info(0).commitType === CommitType.LOAD &&
507
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
508
    !io.commits.isWalk
509

W
William Wang 已提交
510
  dataModule.io.uncache.raddr := deqPtrExtNext.value
511

512
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
513 514 515
  io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr
  io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
  io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
516

Y
Yinan Xu 已提交
517
  io.uncache.req.bits.meta.id       := DontCare
518
  io.uncache.req.bits.meta.vaddr    := DontCare
519
  io.uncache.req.bits.meta.paddr    := dataModule.io.uncache.rdata.paddr
520
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
Y
Yinan Xu 已提交
521
  io.uncache.req.bits.meta.mmio     := true.B
522
  io.uncache.req.bits.meta.tlb_miss := false.B
523
  io.uncache.req.bits.meta.mask     := dataModule.io.uncache.rdata.mask
524 525 526 527
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

528
  when (io.uncache.req.fire()) {
529
    pending(deqPtr) := false.B
W
William Wang 已提交
530

L
linjiawei 已提交
531
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
532
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
533 534 535 536 537
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
538 539
  }

Y
Yinan Xu 已提交
540
  dataModule.io.uncache.wen := false.B
541
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
542 543 544 545
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

546
    XSDebug("uncache resp: data %x\n", io.dcache.bits.data)
547 548
  }

W
William Wang 已提交
549
  // Read vaddr for mem exception
W
William Wang 已提交
550 551
  vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.lqIdx.value
  io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
W
William Wang 已提交
552

W
William Wang 已提交
553
  // misprediction recovery / exception redirect
W
William Wang 已提交
554 555 556
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
Y
Yinan Xu 已提交
557
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
Y
Yinan Xu 已提交
558
    when (needCancel(i)) {
W
William Wang 已提交
559 560
        allocated(i) := false.B
    }
561
  }
562 563 564 565 566 567 568

  /**
    * update pointers
    */
  val lastCycleCancelCount = PopCount(RegNext(needCancel))
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
  val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid, PopCount(io.enq.req.map(_.valid)), 0.U)
569
  when (lastCycleRedirect.valid) {
570 571 572 573
    // we recover the pointers in the next cycle after redirect
    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
  }.otherwise {
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
574
  }
W
William Wang 已提交
575

576
  val commitCount = PopCount(loadCommit)
W
William Wang 已提交
577 578
  deqPtrExtNext := deqPtrExt + commitCount
  deqPtrExt := deqPtrExtNext
579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594

  val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
  val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt)
  validCounter := Mux(lastLastCycleRedirect,
    trueValidCounter,
    validCounter + enqNumber - commitCount
  )

  allowEnqueue := Mux(io.brqRedirect.valid,
    false.B,
    Mux(lastLastCycleRedirect,
      trueValidCounter <= (LoadQueueSize - RenameWidth).U,
      validCounter + enqNumber <= (LoadQueueSize - RenameWidth).U
    )
  )

W
William Wang 已提交
595
  // debug info
Y
Yinan Xu 已提交
596
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
597 598

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
599
    when(flag) {
W
William Wang 已提交
600
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
601
    }.otherwise {
W
William Wang 已提交
602 603 604 605
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
606
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
607
    if (i % 4 == 0) XSDebug("")
608
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr)
W
William Wang 已提交
609
    PrintFlag(allocated(i), "a")
610
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
611
    PrintFlag(allocated(i) && writebacked(i), "w")
612
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
613
    PrintFlag(allocated(i) && miss(i), "m")
614
    // PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
615
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
616
    XSDebug(false, true.B, " ")
617
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
618
  }
W
William Wang 已提交
619

W
William Wang 已提交
620
}