LoadQueue.scala 23.8 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
L
LinJiawei 已提交
5
import freechips.rocketchip.tile.HasFPUParameters
6
import utils._
Y
Yinan Xu 已提交
7 8
import xiangshan._
import xiangshan.cache._
L
LinJiawei 已提交
9
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO}
10
import xiangshan.backend.LSUOpType
11
import xiangshan.mem._
12
import xiangshan.backend.roq.RoqLsqIO
13
import xiangshan.backend.fu.HasExceptionNO
W
William Wang 已提交
14 15


16
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
17 18 19 20 21 22 23 24

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
25 26
}

L
LinJiawei 已提交
27 28 29 30 31 32
trait HasLoadHelper { this: XSModule =>
  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    val fpWen = uop.ctrl.fpWen
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
33 34
      LSUOpType.lw   -> Mux(fpWen, rdata, SignExt(rdata(31, 0), XLEN)),
      LSUOpType.ld   -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
L
LinJiawei 已提交
35 36 37 38 39
      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
    ))
  }
40 41 42 43 44 45 46

  def fpRdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lw   -> recode(rdata(31, 0), S),
      LSUOpType.ld   -> recode(rdata(63, 0), D)
    ))
  }
L
LinJiawei 已提交
47 48
}

Y
Yinan Xu 已提交
49 50
class LqEnqIO extends XSBundle {
  val canAccept = Output(Bool())
51
  val sqCanAccept = Input(Bool())
Y
Yinan Xu 已提交
52 53 54 55
  val needAlloc = Vec(RenameWidth, Input(Bool()))
  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
  val resp = Vec(RenameWidth, Output(new LqPtr))
}
56

57
// Load Queue
L
LinJiawei 已提交
58 59 60 61
class LoadQueue extends XSModule
  with HasDCacheParameters
  with HasCircularQueuePtrHelper
  with HasLoadHelper
62
  with HasExceptionNO
L
LinJiawei 已提交
63
{
W
William Wang 已提交
64
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
65
    val enq = new LqEnqIO
W
William Wang 已提交
66
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
67
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
68
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
69
    val loadDataForwarded = Vec(LoadPipelineWidth, Input(Bool()))
70
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
Y
Yinan Xu 已提交
71
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
72
    val roq = Flipped(new RoqLsqIO)
73
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
74
    val dcache = Flipped(ValidIO(new Refill))
75
    val uncache = new DCacheWordIO
76
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
77
  })
78

W
William Wang 已提交
79
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
80
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
81
  val dataModule = Module(new LoadQueueData(LoadQueueSize, wbNumRead = LoadPipelineWidth, wbNumWrite = LoadPipelineWidth))
W
William Wang 已提交
82
  dataModule.io := DontCare
W
William Wang 已提交
83 84
  val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), LoadQueueSize, numRead = 1, numWrite = LoadPipelineWidth))
  vaddrModule.io := DontCare
W
William Wang 已提交
85
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
86
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
87 88
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
89
  // val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
W
William Wang 已提交
90
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
91

W
William Wang 已提交
92 93
  val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst

Y
Yinan Xu 已提交
94
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
95
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
W
William Wang 已提交
96
  val deqPtrExtNext = Wire(new LqPtr)
97 98
  val allowEnqueue = RegInit(true.B)

Y
Yinan Xu 已提交
99
  val enqPtr = enqPtrExt(0).value
100
  val deqPtr = deqPtrExt.value
101

102 103
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
104

105 106
  val commitCount = RegNext(io.roq.lcommit)

Y
Yinan Xu 已提交
107 108 109 110 111
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
112 113
  io.enq.canAccept := allowEnqueue

W
William Wang 已提交
114
  for (i <- 0 until RenameWidth) {
Y
Yinan Xu 已提交
115
    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
Y
Yinan Xu 已提交
116
    val lqIdx = enqPtrExt(offset)
117
    val index = lqIdx.value
118
    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid) {
119
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
120
      allocated(index) := true.B
121
      datavalid(index) := false.B
Y
Yinan Xu 已提交
122 123
      writebacked(index) := false.B
      miss(index) := false.B
124
      // listening(index) := false.B
Y
Yinan Xu 已提交
125
      pending(index) := false.B
W
William Wang 已提交
126
    }
127
    io.enq.resp(i) := lqIdx
W
William Wang 已提交
128
  }
129
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
W
William Wang 已提交
130

Y
Yinan Xu 已提交
131 132 133 134 135 136 137 138 139 140 141 142 143
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
144
    dataModule.io.wb.wen(i) := false.B
W
William Wang 已提交
145
    vaddrModule.io.wen(i) := false.B
L
LinJiawei 已提交
146 147
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
148
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
149
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
150 151 152 153
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
154 155 156
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
157
          io.loadIn(i).bits.mmio
158 159 160 161 162 163 164 165 166 167 168 169 170 171
        )
      }.otherwise {
        XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
        io.loadIn(i).bits.uop.lqIdx.asUInt,
        io.loadIn(i).bits.uop.cf.pc,
        io.loadIn(i).bits.vaddr,
        io.loadIn(i).bits.paddr,
        io.loadIn(i).bits.data,
        io.loadIn(i).bits.mask,
        io.loadIn(i).bits.forwardData.asUInt,
        io.loadIn(i).bits.forwardMask.asUInt,
        io.loadIn(i).bits.mmio
      )}
      val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
172
      datavalid(loadWbIndex) := (!io.loadIn(i).bits.miss || io.loadDataForwarded(i)) && !io.loadIn(i).bits.mmio
173 174 175 176 177
      writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio

      val loadWbData = Wire(new LQDataEntry)
      loadWbData.paddr := io.loadIn(i).bits.paddr
      loadWbData.mask := io.loadIn(i).bits.mask
178
      loadWbData.data := io.loadIn(i).bits.forwardData.asUInt // fwd data
179 180 181 182 183 184 185 186 187 188 189
      loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
      dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
      dataModule.io.wb.wen(i) := true.B

      vaddrModule.io.waddr(i) := loadWbIndex
      vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr
      vaddrModule.io.wen(i) := true.B

      debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio

      val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
190
      miss(loadWbIndex) := dcacheMissed && !io.loadDataForwarded(i)
191 192
      pending(loadWbIndex) := io.loadIn(i).bits.mmio
      uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime
Y
Yinan Xu 已提交
193
    }
194
  }
195

196 197
  when(io.dcache.valid) {
    XSDebug("miss resp: paddr:0x%x data %x\n", io.dcache.bits.addr, io.dcache.bits.data)
198 199
  }

200
  // Refill 64 bit in a cycle
201
  // Refill data comes back from io.dcache.resp
202 203
  dataModule.io.refill.valid := io.dcache.valid
  dataModule.io.refill.paddr := io.dcache.bits.addr
204
  dataModule.io.refill.data := io.dcache.bits.data
W
William Wang 已提交
205

W
William Wang 已提交
206
  (0 until LoadQueueSize).map(i => {
207 208
    dataModule.io.refill.refillMask(i) := allocated(i) && miss(i)
    when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) {
209
      datavalid(i) := true.B
210
      miss(i) := false.B
W
William Wang 已提交
211 212
    }
  })
W
William Wang 已提交
213

214 215 216 217 218 219 220
  // Writeback up to 2 missed load insts to CDB
  //
  // Pick 2 missed load (data refilled), write them back to cdb
  // 2 refilled load will be selected from even/odd entry, separately

  // Stage 0
  // Generate writeback indexes
W
William Wang 已提交
221 222 223 224 225 226 227 228 229 230 231

  def getEvenBits(input: UInt): UInt = {
    require(input.getWidth == LoadQueueSize)
    VecInit((0 until LoadQueueSize/2).map(i => {input(2*i)})).asUInt
  }
  def getOddBits(input: UInt): UInt = {
    require(input.getWidth == LoadQueueSize)
    VecInit((0 until LoadQueueSize/2).map(i => {input(2*i+1)})).asUInt
  }

  val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W))) // index selected last cycle
W
William Wang 已提交
232
  val loadWbSelV = Wire(Vec(LoadPipelineWidth, Bool())) // index selected in last cycle is valid
W
William Wang 已提交
233

W
William Wang 已提交
234
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
235
    allocated(i) && !writebacked(i) && datavalid(i)
L
LinJiawei 已提交
236
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
237 238 239
  val evenDeqMask = getEvenBits(deqMask)
  val oddDeqMask = getOddBits(deqMask)
  // generate lastCycleSelect mask 
W
William Wang 已提交
240 241
  val evenSelectMask = Mux(io.ldout(0).fire(), getEvenBits(UIntToOH(loadWbSel(0))), 0.U)
  val oddSelectMask = Mux(io.ldout(1).fire(), getOddBits(UIntToOH(loadWbSel(1))), 0.U)
242 243 244
  // generate real select vec
  val loadEvenSelVec = getEvenBits(loadWbSelVec) & ~evenSelectMask
  val loadOddSelVec = getOddBits(loadWbSelVec) & ~oddSelectMask
W
William Wang 已提交
245 246 247 248

  def toVec(a: UInt): Vec[Bool] = {
    VecInit(a.asBools)
  }
249

250 251
  val loadWbSelGen = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
  val loadWbSelVGen = Wire(Vec(LoadPipelineWidth, Bool()))
252
  loadWbSelGen(0) := Cat(getFirstOne(toVec(loadEvenSelVec), evenDeqMask), 0.U(1.W))
253
  loadWbSelVGen(0):= loadEvenSelVec.asUInt.orR
254
  loadWbSelGen(1) := Cat(getFirstOne(toVec(loadOddSelVec), oddDeqMask), 1.U(1.W))
255 256 257
  loadWbSelVGen(1) := loadOddSelVec.asUInt.orR
  
  (0 until LoadPipelineWidth).map(i => {
258
    loadWbSel(i) := RegNext(loadWbSelGen(i))
W
William Wang 已提交
259
    loadWbSelV(i) := RegNext(loadWbSelVGen(i), init = false.B)
260
    when(io.ldout(i).fire()){
261 262
      // Mark them as writebacked, so they will not be selected in the next cycle
      writebacked(loadWbSel(i)) := true.B
263 264
    }
  })
265

266 267 268
  // Stage 1
  // Use indexes generated in cycle 0 to read data
  // writeback data to cdb
269
  (0 until LoadPipelineWidth).map(i => {
270
    // data select
W
William Wang 已提交
271
    dataModule.io.wb.raddr(i) := loadWbSelGen(i)
272
    val rdata = dataModule.io.wb.rdata(i).data
273 274
    val seluop = uop(loadWbSel(i))
    val func = seluop.ctrl.fuOpType
275
    val raddr = dataModule.io.wb.rdata(i).paddr
276 277 278 279 280 281 282 283 284 285
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
286 287
    val rdataPartialLoad = rdataHelper(seluop, rdataSel)

288
    // writeback missed int/fp load
289
    //
290 291
    // Int load writeback will finish (if not blocked) in one cycle
    io.ldout(i).bits.uop := seluop
292
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
293
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
294 295 296
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
297
    io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
298
    io.ldout(i).bits.debug.isPerfCnt := false.B
L
LinJiawei 已提交
299
    io.ldout(i).bits.fflags := DontCare
W
William Wang 已提交
300
    io.ldout(i).valid := loadWbSelV(i)
301 302

    when(io.ldout(i).fire()) {
W
William Wang 已提交
303
      XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x mmio %x\n",
304
        io.ldout(i).bits.uop.roqIdx.asUInt,
305
        io.ldout(i).bits.uop.lqIdx.asUInt,
306
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
307
        debug_mmio(loadWbSel(i))
308
      )
W
William Wang 已提交
309
    }
310

W
William Wang 已提交
311 312
  })

Y
Yinan Xu 已提交
313 314 315 316 317
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
318
  (0 until CommitWidth).map(i => {
319 320
    when(commitCount > i.U){
      allocated(deqPtr+i.U) := false.B
321 322 323
    }
  })

324
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
325
    val length = mask.length
326
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
327 328
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
329
  }
W
William Wang 已提交
330

331 332 333 334
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
335
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
336 337 338 339 340 341 342 343 344
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
345
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
346 347 348 349
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
350

Y
Yinan Xu 已提交
351 352 353 354 355 356 357 358 359 360 361
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
    *   There're three possible types of violations. Choose the oldest load.
362 363 364
    *   Prepare redirect request according to the detected violation.
    * Cycle 2: Redirect Fire
    *   Fire redirect request (if valid)
Y
Yinan Xu 已提交
365 366
    */
  io.load_s1 := DontCare
367 368 369 370
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
371
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
372 373 374
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
375 376
    dataModule.io.violation(i).paddr := io.storeIn(i).bits.paddr
    dataModule.io.violation(i).mask := io.storeIn(i).bits.mask
377
    val addrMaskMatch = RegNext(dataModule.io.violation(i).violationMask)
378
    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => {
379
      allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j))
380
    })))
381
    val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
382
      addrMaskMatch(j) && entryNeedCheck(j)
383
    }))
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
404 405 406 407
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
408 409
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
410
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
440

441 442 443 444 445 446 447 448 449
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
450 451 452 453
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
454
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
455 456 457 458 459 460
        a // sel a
      ),
      b // sel b
    )
  }

461 462
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)
463
  val lastlastCycleRedirect = RegNext(lastCycleRedirect)
464

465
  // S2: select rollback and generate rollback request 
466 467
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
468 469
  val rollbackGen = Wire(Valid(new Redirect))
  val rollbackReg = Reg(Valid(new Redirect))
470
  rollbackGen.valid := rollbackSelected.valid
471

472 473 474 475 476 477 478 479 480 481 482 483 484
  rollbackGen.bits.roqIdx := rollbackSelected.bits.roqIdx
  rollbackGen.bits.level := RedirectLevel.flush
  rollbackGen.bits.interrupt := DontCare
  rollbackGen.bits.pc := DontCare
  rollbackGen.bits.target := rollbackSelected.bits.cf.pc
  rollbackGen.bits.brTag := rollbackSelected.bits.brTag

  rollbackReg := rollbackGen

  // S3: fire rollback request
  io.rollback := rollbackReg
  io.rollback.valid := rollbackReg.valid && 
    (!lastCycleRedirect.valid || !isAfter(rollbackReg.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
485 486 487
    !(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional()) &&
    (!lastlastCycleRedirect.valid || !isAfter(rollbackReg.bits.roqIdx, lastlastCycleRedirect.bits.roqIdx)) &&
    !(lastlastCycleRedirect.valid && lastlastCycleRedirect.bits.isUnconditional())
W
William Wang 已提交
488

Y
Yinan Xu 已提交
489 490 491
  when(io.rollback.valid) {
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
  }
W
William Wang 已提交
492

Y
Yinan Xu 已提交
493 494 495
  /**
    * Memory mapped IO / other uncached operations
    *
496 497 498 499 500 501
    * States:
    * (1) writeback from store units: mark as pending
    * (2) when they reach ROB's head, they can be sent to uncache channel
    * (3) response from uncache channel: mark as datavalid
    * (4) writeback to ROB (and other units): mark as writebacked
    * (5) ROB commits the instruction: same as normal instructions
Y
Yinan Xu 已提交
502
    */
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
  //(2) when they reach ROB's head, they can be sent to uncache channel
  val s_idle :: s_req :: s_resp :: s_wait :: Nil = Enum(4)
  val uncacheState = RegInit(s_idle)
  switch(uncacheState) {
    is(s_idle) {
      when(io.roq.pendingld && pending(deqPtr) && allocated(deqPtr)) {
        uncacheState := s_req
      }
    }
    is(s_req) {
      when(io.uncache.req.fire()) {
        uncacheState := s_resp
      }
    }
    is(s_resp) {
      when(io.uncache.resp.fire()) {
        uncacheState := s_wait
      }
    }
    is(s_wait) {
      when(io.roq.commit) {
        uncacheState := s_idle // ready for next mmio
      }
    }
  }
  io.uncache.req.valid := uncacheState === s_req
529

W
William Wang 已提交
530
  dataModule.io.uncache.raddr := deqPtrExtNext.value
531

532
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
533 534 535
  io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr
  io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
  io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
536

Y
Yinan Xu 已提交
537
  io.uncache.req.bits.meta.id       := DontCare
538
  io.uncache.req.bits.meta.vaddr    := DontCare
539
  io.uncache.req.bits.meta.paddr    := dataModule.io.uncache.rdata.paddr
540
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
Y
Yinan Xu 已提交
541
  io.uncache.req.bits.meta.mmio     := true.B
542
  io.uncache.req.bits.meta.tlb_miss := false.B
543
  io.uncache.req.bits.meta.mask     := dataModule.io.uncache.rdata.mask
544 545 546 547
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

548
  when (io.uncache.req.fire()) {
549
    pending(deqPtr) := false.B
W
William Wang 已提交
550

L
linjiawei 已提交
551
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
552
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
553 554 555 556 557
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
558 559
  }

560
  // (3) response from uncache channel: mark as datavalid
Y
Yinan Xu 已提交
561
  dataModule.io.uncache.wen := false.B
562
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
563 564 565 566
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

567
    XSDebug("uncache resp: data %x\n", io.dcache.bits.data)
568 569
  }

W
William Wang 已提交
570
  // Read vaddr for mem exception
571
  vaddrModule.io.raddr(0) := deqPtr + commitCount
W
William Wang 已提交
572
  io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
W
William Wang 已提交
573

W
William Wang 已提交
574
  // misprediction recovery / exception redirect
W
William Wang 已提交
575 576 577
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
578
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i)
Y
Yinan Xu 已提交
579
    when (needCancel(i)) {
W
William Wang 已提交
580 581
        allocated(i) := false.B
    }
582
  }
583 584 585 586 587 588 589

  /**
    * update pointers
    */
  val lastCycleCancelCount = PopCount(RegNext(needCancel))
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
  val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid, PopCount(io.enq.req.map(_.valid)), 0.U)
590
  when (lastCycleRedirect.valid) {
591 592 593 594
    // we recover the pointers in the next cycle after redirect
    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
  }.otherwise {
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
595
  }
W
William Wang 已提交
596

W
William Wang 已提交
597 598
  deqPtrExtNext := deqPtrExt + commitCount
  deqPtrExt := deqPtrExtNext
599 600

  val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
W
William Wang 已提交
601
  val validCount = distanceBetween(enqPtrExt(0), deqPtrExt)
602

Y
Yinan Xu 已提交
603
  allowEnqueue := validCount + enqNumber <= (LoadQueueSize - RenameWidth).U
604

W
William Wang 已提交
605
  // debug info
Y
Yinan Xu 已提交
606
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
607 608

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
609
    when(flag) {
W
William Wang 已提交
610
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
611
    }.otherwise {
W
William Wang 已提交
612 613 614 615
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
616
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
617
    if (i % 4 == 0) XSDebug("")
618
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr)
W
William Wang 已提交
619
    PrintFlag(allocated(i), "a")
620
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
621 622
    PrintFlag(allocated(i) && writebacked(i), "w")
    PrintFlag(allocated(i) && miss(i), "m")
623
    // PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
624
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
625
    XSDebug(false, true.B, " ")
626
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
627
  }
W
William Wang 已提交
628

W
William Wang 已提交
629
}