LoadQueue.scala 25.3 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
L
LinJiawei 已提交
5
import freechips.rocketchip.tile.HasFPUParameters
6
import utils._
Y
Yinan Xu 已提交
7 8
import xiangshan._
import xiangshan.cache._
L
LinJiawei 已提交
9
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO}
10
import xiangshan.backend.LSUOpType
11
import xiangshan.mem._
Y
Yinan Xu 已提交
12
import xiangshan.backend.roq.RoqPtr
W
William Wang 已提交
13 14


15
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
16 17 18 19 20 21 22 23

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
24 25
}

L
LinJiawei 已提交
26 27 28 29 30 31
trait HasLoadHelper { this: XSModule =>
  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    val fpWen = uop.ctrl.fpWen
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
32 33
      LSUOpType.lw   -> Mux(fpWen, rdata, SignExt(rdata(31, 0), XLEN)),
      LSUOpType.ld   -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
L
LinJiawei 已提交
34 35 36 37 38
      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
    ))
  }
39 40 41 42 43 44 45

  def fpRdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lw   -> recode(rdata(31, 0), S),
      LSUOpType.ld   -> recode(rdata(63, 0), D)
    ))
  }
L
LinJiawei 已提交
46 47
}

Y
Yinan Xu 已提交
48 49
class LqEnqIO extends XSBundle {
  val canAccept = Output(Bool())
50
  val sqCanAccept = Input(Bool())
Y
Yinan Xu 已提交
51 52 53 54
  val needAlloc = Vec(RenameWidth, Input(Bool()))
  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
  val resp = Vec(RenameWidth, Output(new LqPtr))
}
55

56
// Load Queue
L
LinJiawei 已提交
57 58 59 60 61
class LoadQueue extends XSModule
  with HasDCacheParameters
  with HasCircularQueuePtrHelper
  with HasLoadHelper
{
W
William Wang 已提交
62
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
63
    val enq = new LqEnqIO
W
William Wang 已提交
64
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
65
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
66
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
67
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
Y
Yinan Xu 已提交
68
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
69
    val commits = Flipped(new RoqCommitIO)
70
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
71
    val dcache = Flipped(ValidIO(new Refill))
72
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
73
    val roqDeqPtr = Input(new RoqPtr)
74
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
75
  })
76

W
William Wang 已提交
77
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
78
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
79
  val dataModule = Module(new LoadQueueData(LoadQueueSize, wbNumRead = LoadPipelineWidth, wbNumWrite = LoadPipelineWidth))
W
William Wang 已提交
80
  dataModule.io := DontCare
W
William Wang 已提交
81 82
  val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), LoadQueueSize, numRead = 1, numWrite = LoadPipelineWidth))
  vaddrModule.io := DontCare
W
William Wang 已提交
83
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
84
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
85 86 87
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
88
  // val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
W
William Wang 已提交
89
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
90

W
William Wang 已提交
91 92
  val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst

Y
Yinan Xu 已提交
93
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
94
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
Y
Yinan Xu 已提交
95
  val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
96 97
  val allowEnqueue = RegInit(true.B)

Y
Yinan Xu 已提交
98
  val enqPtr = enqPtrExt(0).value
99
  val deqPtr = deqPtrExt.value
Y
Yinan Xu 已提交
100
  val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag
101 102 103
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
104

Y
Yinan Xu 已提交
105 106
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.info(i).commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.info(i).lqIdx.value)
107

108 109
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
110

Y
Yinan Xu 已提交
111 112 113 114 115
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
116 117
  io.enq.canAccept := allowEnqueue

W
William Wang 已提交
118
  for (i <- 0 until RenameWidth) {
Y
Yinan Xu 已提交
119
    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
Y
Yinan Xu 已提交
120
    val lqIdx = enqPtrExt(offset)
121
    val index = lqIdx.value
122
    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid) {
123
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
124
      allocated(index) := true.B
125
      datavalid(index) := false.B
Y
Yinan Xu 已提交
126 127 128
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
129
      // listening(index) := false.B
Y
Yinan Xu 已提交
130
      pending(index) := false.B
W
William Wang 已提交
131
    }
132
    io.enq.resp(i) := lqIdx
W
William Wang 已提交
133
  }
134
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
W
William Wang 已提交
135

Y
Yinan Xu 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
149
    dataModule.io.wb.wen(i) := false.B
W
William Wang 已提交
150
    vaddrModule.io.wen(i) := false.B
L
LinJiawei 已提交
151 152
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
W
William Wang 已提交
153
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
154
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
155 156 157 158
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
159 160 161
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
W
William Wang 已提交
162
          io.loadIn(i).bits.mmio,
163 164 165 166
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }.otherwise {
167
          XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
168
          io.loadIn(i).bits.uop.lqIdx.asUInt,
169 170 171 172
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
173 174 175
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
176
          io.loadIn(i).bits.mmio,
177 178 179 180
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }
181
        val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
182
        datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
183
        writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
184

W
William Wang 已提交
185
        val loadWbData = Wire(new LQDataEntry)
W
William Wang 已提交
186 187
        loadWbData.paddr := io.loadIn(i).bits.paddr
        loadWbData.mask := io.loadIn(i).bits.mask
W
William Wang 已提交
188
        loadWbData.data := io.loadIn(i).bits.data // fwd data
W
William Wang 已提交
189 190 191
        loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
        loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
        dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
192
        dataModule.io.wb.wen(i) := true.B
W
William Wang 已提交
193

W
William Wang 已提交
194 195 196 197
        vaddrModule.io.waddr(i) := loadWbIndex
        vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr
        vaddrModule.io.wen(i) := true.B

W
William Wang 已提交
198 199
        debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
        
200
        val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
201
        miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
202
        // listening(loadWbIndex) := dcacheMissed
203
        pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
204
      }
Y
Yinan Xu 已提交
205
    }
W
William Wang 已提交
206

Y
Yinan Xu 已提交
207 208 209 210 211 212 213 214
  /**
    * Cache miss request
    *
    * (1) writeback: miss
    * (2) send to dcache: listing
    * (3) dcache response: datavalid
    * (4) writeback to ROB: writeback
    */
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
  // val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
  // val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
  // val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)

  // val missRefillSelVec = VecInit(
  //   (0 until LoadQueueSize).map{ i =>
  //     val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
  //     allocated(i) && miss(i) && !inflight
  //   })

  // val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
  // val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
  // io.dcache.req.valid := missRefillSelVec.asUInt.orR
  // io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
  // io.dcache.req.bits.addr := missRefillBlockAddr
  // io.dcache.req.bits.data := DontCare
  // io.dcache.req.bits.mask := DontCare

  // io.dcache.req.bits.meta.id       := DontCare
  // io.dcache.req.bits.meta.vaddr    := DontCare // dataModule.io.rdata(missRefillSel).vaddr
  // io.dcache.req.bits.meta.paddr    := missRefillBlockAddr
  // io.dcache.req.bits.meta.uop      := uop(missRefillSel)
  // io.dcache.req.bits.meta.mmio     := false.B // dataModule.io.rdata(missRefillSel).mmio
  // io.dcache.req.bits.meta.tlb_miss := false.B
  // io.dcache.req.bits.meta.mask     := DontCare
  // io.dcache.req.bits.meta.replay   := false.B
241

242
  // assert(!(dataModule.io.rdata(missRefillSel).mmio && io.dcache.req.valid))
243

244 245 246
  // when(io.dcache.req.fire()) {
  //   miss(missRefillSel) := false.B
    // listening(missRefillSel) := true.B
247 248

    // mark this block as inflight
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
  //   inflightReqs(reqBlockIndex).valid := true.B
  //   inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
  //   assert(!inflightReqs(reqBlockIndex).valid)
  // }

  // when(io.dcache.resp.fire()) {
  //   val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
  //   assert(inflight)
  //   for (i <- 0 until cfg.nLoadMissEntries) {
  //     when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
  //       inflightReqs(i).valid := false.B
  //     }
  //   }
  // }


  // when(io.dcache.req.fire()){
  //   XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
  //     io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
  //     io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
  //   )
  // }
271

272 273
  when(io.dcache.valid) {
    XSDebug("miss resp: paddr:0x%x data %x\n", io.dcache.bits.addr, io.dcache.bits.data)
274 275
  }

276
  // Refill 64 bit in a cycle
277
  // Refill data comes back from io.dcache.resp
278 279
  dataModule.io.refill.valid := io.dcache.valid
  dataModule.io.refill.paddr := io.dcache.bits.addr
280
  dataModule.io.refill.data := io.dcache.bits.data
W
William Wang 已提交
281

W
William Wang 已提交
282
  (0 until LoadQueueSize).map(i => {
283 284
    dataModule.io.refill.refillMask(i) := allocated(i) && miss(i)
    when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) {
285
      datavalid(i) := true.B
286
      miss(i) := false.B
W
William Wang 已提交
287 288
    }
  })
W
William Wang 已提交
289 290 291

  // writeback up to 2 missed load insts to CDB
  // just randomly pick 2 missed load (data refilled), write them back to cdb
W
William Wang 已提交
292
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
293
    allocated(i) && datavalid(i) && !writebacked(i)
L
LinJiawei 已提交
294
  })).asUInt() // use uint instead vec to reduce verilog lines
295 296
  val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
  val loadWbSelV= Wire(Vec(LoadPipelineWidth, Bool()))
297 298
  val loadEvenSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i)}))
  val loadOddSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i+1)}))
299 300 301
  val evenDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i)})).asUInt
  val oddDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i+1)})).asUInt
  loadWbSel(0) := Cat(getFirstOne(loadEvenSelVec, evenDeqMask), 0.U(1.W))
302
  loadWbSelV(0):= loadEvenSelVec.asUInt.orR
303
  loadWbSel(1) := Cat(getFirstOne(loadOddSelVec, oddDeqMask), 1.U(1.W))
304
  loadWbSelV(1) := loadOddSelVec.asUInt.orR
305 306

  (0 until LoadPipelineWidth).map(i => {
307
    // data select
308 309
    dataModule.io.wb.raddr(i) := loadWbSel(i)
    val rdata = dataModule.io.wb.rdata(i).data
310 311
    val seluop = uop(loadWbSel(i))
    val func = seluop.ctrl.fuOpType
312
    val raddr = dataModule.io.wb.rdata(i).paddr
313 314 315 316 317 318 319 320 321 322
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
323 324 325 326
    val rdataPartialLoad = rdataHelper(seluop, rdataSel)

    val validWb = loadWbSelVec(loadWbSel(i)) && loadWbSelV(i)

327
    // writeback missed int/fp load
328 329 330
    // 
    // Int load writeback will finish (if not blocked) in one cycle
    io.ldout(i).bits.uop := seluop
331
    io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.wb.rdata(i).exception.asBools
332
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
333
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
334 335 336
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
337
    io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
L
LinJiawei 已提交
338
    io.ldout(i).bits.fflags := DontCare
339
    io.ldout(i).valid := validWb
340
    
341
    when(io.ldout(i).fire()){
342
      writebacked(loadWbSel(i)) := true.B
343 344 345 346
    }

    when(io.ldout(i).fire()) {
      XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
347
        io.ldout(i).bits.uop.roqIdx.asUInt,
348
        io.ldout(i).bits.uop.lqIdx.asUInt,
349
        io.ldout(i).bits.uop.cf.pc,
350 351
        dataModule.io.debug(loadWbSel(i)).paddr,
        dataModule.io.debug(loadWbSel(i)).data,
W
William Wang 已提交
352
        debug_mmio(loadWbSel(i))
353
      )
W
William Wang 已提交
354
    }
355

W
William Wang 已提交
356 357
  })

Y
Yinan Xu 已提交
358 359 360 361 362
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
363 364 365 366 367 368 369
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })

370
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
371
    val length = mask.length
372
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
373 374
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
375
  }
W
William Wang 已提交
376

377 378 379 380
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
381
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
382 383 384 385 386 387 388 389 390
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
391
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
392 393 394 395
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
396

Y
Yinan Xu 已提交
397 398 399 400 401 402 403 404 405 406 407 408 409 410
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
    *   There're three possible types of violations. Choose the oldest load.
    *   Set io.redirect according to the detected violation.
    */
  io.load_s1 := DontCare
411 412 413 414
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
415
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
416 417 418
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
419 420
    dataModule.io.violation(i).paddr := io.storeIn(i).bits.paddr
    dataModule.io.violation(i).mask := io.storeIn(i).bits.mask
421
    val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => {
422 423
      val entryNeedCheck = allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j))
      entryNeedCheck && dataModule.io.violation(i).violationMask(j)
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
    })))
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
445 446 447 448
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
449 450
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
451
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
481

482 483 484 485 486 487 488 489 490
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
491 492 493 494
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
495
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
496 497 498 499 500 501
        a // sel a
      ),
      b // sel b
    )
  }

502 503 504
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)

505 506
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
Y
Yinan Xu 已提交
507
  io.rollback.valid := rollbackSelected.valid &&
508
    (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
509
    !(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional())
510

511 512 513
  io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx
  io.rollback.bits.level := RedirectLevel.flush
  io.rollback.bits.interrupt := DontCare
Y
Yinan Xu 已提交
514
  io.rollback.bits.pc := DontCare
515 516
  io.rollback.bits.target := rollbackSelected.bits.cf.pc
  io.rollback.bits.brTag := rollbackSelected.bits.brTag
W
William Wang 已提交
517

Y
Yinan Xu 已提交
518 519 520
  when(io.rollback.valid) {
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
  }
W
William Wang 已提交
521

Y
Yinan Xu 已提交
522 523 524 525
  /**
    * Memory mapped IO / other uncached operations
    *
    */
526
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
Y
Yinan Xu 已提交
527
    io.commits.info(0).commitType === CommitType.LOAD &&
528
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
529
    !io.commits.isWalk
530

531 532
  dataModule.io.uncache.raddr := deqPtr

533
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
534 535 536
  io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr
  io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
  io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
537

Y
Yinan Xu 已提交
538
  io.uncache.req.bits.meta.id       := DontCare
539
  io.uncache.req.bits.meta.vaddr    := DontCare
540
  io.uncache.req.bits.meta.paddr    := dataModule.io.uncache.rdata.paddr
541
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
Y
Yinan Xu 已提交
542
  io.uncache.req.bits.meta.mmio     := true.B
543
  io.uncache.req.bits.meta.tlb_miss := false.B
544
  io.uncache.req.bits.meta.mask     := dataModule.io.uncache.rdata.mask
545 546 547 548
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

549
  when (io.uncache.req.fire()) {
550
    pending(deqPtr) := false.B
W
William Wang 已提交
551

L
linjiawei 已提交
552
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
553
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
554 555 556 557 558
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
559 560
  }

Y
Yinan Xu 已提交
561
  dataModule.io.uncache.wen := false.B
562
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
563 564 565 566
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

567
    XSDebug("uncache resp: data %x\n", io.dcache.bits.data)
568 569
  }

W
William Wang 已提交
570
  // Read vaddr for mem exception
W
William Wang 已提交
571 572
  vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.lqIdx.value
  io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
W
William Wang 已提交
573

W
William Wang 已提交
574
  // misprediction recovery / exception redirect
W
William Wang 已提交
575 576 577
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
Y
Yinan Xu 已提交
578
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
Y
Yinan Xu 已提交
579
    when (needCancel(i)) {
W
William Wang 已提交
580 581
        allocated(i) := false.B
    }
582
  }
583 584 585 586 587 588 589

  /**
    * update pointers
    */
  val lastCycleCancelCount = PopCount(RegNext(needCancel))
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
  val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid, PopCount(io.enq.req.map(_.valid)), 0.U)
590
  when (lastCycleRedirect.valid) {
591 592 593 594
    // we recover the pointers in the next cycle after redirect
    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
  }.otherwise {
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
595
  }
W
William Wang 已提交
596

597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
  val commitCount = PopCount(loadCommit)
  deqPtrExt := deqPtrExt + commitCount

  val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
  val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt)
  validCounter := Mux(lastLastCycleRedirect,
    trueValidCounter,
    validCounter + enqNumber - commitCount
  )

  allowEnqueue := Mux(io.brqRedirect.valid,
    false.B,
    Mux(lastLastCycleRedirect,
      trueValidCounter <= (LoadQueueSize - RenameWidth).U,
      validCounter + enqNumber <= (LoadQueueSize - RenameWidth).U
    )
  )

W
William Wang 已提交
615
  // debug info
Y
Yinan Xu 已提交
616
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
617 618

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
619
    when(flag) {
W
William Wang 已提交
620
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
621
    }.otherwise {
W
William Wang 已提交
622 623 624 625
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
626
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
627
    if (i % 4 == 0) XSDebug("")
628
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr)
W
William Wang 已提交
629
    PrintFlag(allocated(i), "a")
630
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
631
    PrintFlag(allocated(i) && writebacked(i), "w")
632
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
633
    PrintFlag(allocated(i) && miss(i), "m")
634
    // PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
635
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
636
    XSDebug(false, true.B, " ")
637
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
638
  }
W
William Wang 已提交
639

W
William Wang 已提交
640
}