LoadQueue.scala 26.2 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
L
LinJiawei 已提交
5
import freechips.rocketchip.tile.HasFPUParameters
6
import utils._
Y
Yinan Xu 已提交
7 8
import xiangshan._
import xiangshan.cache._
L
LinJiawei 已提交
9
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO}
10
import xiangshan.backend.LSUOpType
11
import xiangshan.mem._
Y
Yinan Xu 已提交
12
import xiangshan.backend.roq.RoqPtr
W
William Wang 已提交
13 14


15
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
16 17 18 19 20 21 22 23

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
24 25
}

L
LinJiawei 已提交
26 27 28 29 30 31
trait HasLoadHelper { this: XSModule =>
  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    val fpWen = uop.ctrl.fpWen
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
32 33
      LSUOpType.lw   -> Mux(fpWen, rdata, SignExt(rdata(31, 0), XLEN)),
      LSUOpType.ld   -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
L
LinJiawei 已提交
34 35 36 37 38
      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
    ))
  }
39 40 41 42 43 44 45

  def fpRdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lw   -> recode(rdata(31, 0), S),
      LSUOpType.ld   -> recode(rdata(63, 0), D)
    ))
  }
L
LinJiawei 已提交
46 47
}

Y
Yinan Xu 已提交
48 49
class LqEnqIO extends XSBundle {
  val canAccept = Output(Bool())
50
  val sqCanAccept = Input(Bool())
Y
Yinan Xu 已提交
51 52 53 54
  val needAlloc = Vec(RenameWidth, Input(Bool()))
  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
  val resp = Vec(RenameWidth, Output(new LqPtr))
}
55

56
// Load Queue
L
LinJiawei 已提交
57 58 59 60 61
class LoadQueue extends XSModule
  with HasDCacheParameters
  with HasCircularQueuePtrHelper
  with HasLoadHelper
{
W
William Wang 已提交
62
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
63
    val enq = new LqEnqIO
W
William Wang 已提交
64
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
65
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
66
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
67
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
Y
Yinan Xu 已提交
68
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
69
    val commits = Flipped(new RoqCommitIO)
70
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
71
    val dcache = Flipped(ValidIO(new Refill))
72
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
73
    val roqDeqPtr = Input(new RoqPtr)
74
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
75
  })
76

W
William Wang 已提交
77
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
78
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
79
  val dataModule = Module(new LoadQueueData(LoadQueueSize, wbNumRead = LoadPipelineWidth, wbNumWrite = LoadPipelineWidth))
W
William Wang 已提交
80
  dataModule.io := DontCare
W
William Wang 已提交
81 82
  val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), LoadQueueSize, numRead = 1, numWrite = LoadPipelineWidth))
  vaddrModule.io := DontCare
W
William Wang 已提交
83
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
84
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
85 86 87
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
88
  // val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
W
William Wang 已提交
89
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
90

W
William Wang 已提交
91 92
  val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst

Y
Yinan Xu 已提交
93
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
94
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
Y
Yinan Xu 已提交
95
  val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
96 97
  val allowEnqueue = RegInit(true.B)

Y
Yinan Xu 已提交
98
  val enqPtr = enqPtrExt(0).value
99
  val deqPtr = deqPtrExt.value
Y
Yinan Xu 已提交
100
  val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag
101 102 103
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
104

Y
Yinan Xu 已提交
105 106
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.info(i).commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.info(i).lqIdx.value)
107

108 109
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
110

Y
Yinan Xu 已提交
111 112 113 114 115
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
116 117
  io.enq.canAccept := allowEnqueue

W
William Wang 已提交
118
  for (i <- 0 until RenameWidth) {
Y
Yinan Xu 已提交
119
    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
Y
Yinan Xu 已提交
120
    val lqIdx = enqPtrExt(offset)
121
    val index = lqIdx.value
122
    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid) {
123
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
124
      allocated(index) := true.B
125
      datavalid(index) := false.B
Y
Yinan Xu 已提交
126 127 128
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
129
      // listening(index) := false.B
Y
Yinan Xu 已提交
130
      pending(index) := false.B
W
William Wang 已提交
131
    }
132
    io.enq.resp(i) := lqIdx
W
William Wang 已提交
133
  }
134
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
W
William Wang 已提交
135

Y
Yinan Xu 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
149
    dataModule.io.wb.wen(i) := false.B
W
William Wang 已提交
150
    vaddrModule.io.wen(i) := false.B
L
LinJiawei 已提交
151 152
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
W
William Wang 已提交
153
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
154
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
155 156 157 158
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
159 160 161
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
W
William Wang 已提交
162
          io.loadIn(i).bits.mmio,
163 164 165 166
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }.otherwise {
167
          XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
168
          io.loadIn(i).bits.uop.lqIdx.asUInt,
169 170 171 172
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
173 174 175
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
176
          io.loadIn(i).bits.mmio,
177 178 179 180
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }
181
        val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
182
        datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
183
        writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
184

W
William Wang 已提交
185
        val loadWbData = Wire(new LQDataEntry)
W
William Wang 已提交
186 187
        loadWbData.paddr := io.loadIn(i).bits.paddr
        loadWbData.mask := io.loadIn(i).bits.mask
W
William Wang 已提交
188
        loadWbData.data := io.loadIn(i).bits.data // fwd data
W
William Wang 已提交
189 190 191
        loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
        loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
        dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
192
        dataModule.io.wb.wen(i) := true.B
W
William Wang 已提交
193

W
William Wang 已提交
194 195 196 197
        vaddrModule.io.waddr(i) := loadWbIndex
        vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr
        vaddrModule.io.wen(i) := true.B

W
William Wang 已提交
198 199
        debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
        
200
        val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
201
        miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
202
        // listening(loadWbIndex) := dcacheMissed
203
        pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
204
      }
Y
Yinan Xu 已提交
205
    }
W
William Wang 已提交
206

Y
Yinan Xu 已提交
207 208 209 210 211 212 213 214
  /**
    * Cache miss request
    *
    * (1) writeback: miss
    * (2) send to dcache: listing
    * (3) dcache response: datavalid
    * (4) writeback to ROB: writeback
    */
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
  // val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
  // val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
  // val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)

  // val missRefillSelVec = VecInit(
  //   (0 until LoadQueueSize).map{ i =>
  //     val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
  //     allocated(i) && miss(i) && !inflight
  //   })

  // val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
  // val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
  // io.dcache.req.valid := missRefillSelVec.asUInt.orR
  // io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
  // io.dcache.req.bits.addr := missRefillBlockAddr
  // io.dcache.req.bits.data := DontCare
  // io.dcache.req.bits.mask := DontCare

  // io.dcache.req.bits.meta.id       := DontCare
  // io.dcache.req.bits.meta.vaddr    := DontCare // dataModule.io.rdata(missRefillSel).vaddr
  // io.dcache.req.bits.meta.paddr    := missRefillBlockAddr
  // io.dcache.req.bits.meta.uop      := uop(missRefillSel)
  // io.dcache.req.bits.meta.mmio     := false.B // dataModule.io.rdata(missRefillSel).mmio
  // io.dcache.req.bits.meta.tlb_miss := false.B
  // io.dcache.req.bits.meta.mask     := DontCare
  // io.dcache.req.bits.meta.replay   := false.B
241

242
  // assert(!(dataModule.io.rdata(missRefillSel).mmio && io.dcache.req.valid))
243

244 245 246
  // when(io.dcache.req.fire()) {
  //   miss(missRefillSel) := false.B
    // listening(missRefillSel) := true.B
247 248

    // mark this block as inflight
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
  //   inflightReqs(reqBlockIndex).valid := true.B
  //   inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
  //   assert(!inflightReqs(reqBlockIndex).valid)
  // }

  // when(io.dcache.resp.fire()) {
  //   val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
  //   assert(inflight)
  //   for (i <- 0 until cfg.nLoadMissEntries) {
  //     when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
  //       inflightReqs(i).valid := false.B
  //     }
  //   }
  // }


  // when(io.dcache.req.fire()){
  //   XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
  //     io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
  //     io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
  //   )
  // }
271

272 273
  when(io.dcache.valid) {
    XSDebug("miss resp: paddr:0x%x data %x\n", io.dcache.bits.addr, io.dcache.bits.data)
274 275
  }

276
  // Refill 64 bit in a cycle
277
  // Refill data comes back from io.dcache.resp
278 279
  dataModule.io.refill.valid := io.dcache.valid
  dataModule.io.refill.paddr := io.dcache.bits.addr
280
  dataModule.io.refill.data := io.dcache.bits.data
W
William Wang 已提交
281

W
William Wang 已提交
282
  (0 until LoadQueueSize).map(i => {
283 284
    dataModule.io.refill.refillMask(i) := allocated(i) && miss(i)
    when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) {
285
      datavalid(i) := true.B
286
      miss(i) := false.B
W
William Wang 已提交
287 288
    }
  })
W
William Wang 已提交
289

290 291 292 293 294 295 296
  // Writeback up to 2 missed load insts to CDB
  //
  // Pick 2 missed load (data refilled), write them back to cdb
  // 2 refilled load will be selected from even/odd entry, separately

  // Stage 0
  // Generate writeback indexes
W
William Wang 已提交
297
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
298
    allocated(i) && !writebacked(i) && datavalid(i)
L
LinJiawei 已提交
299
  })).asUInt() // use uint instead vec to reduce verilog lines
300 301
  val loadEvenSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i)}))
  val loadOddSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i+1)}))
302 303
  val evenDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i)})).asUInt
  val oddDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i+1)})).asUInt
304

305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
  val loadWbSelGen = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
  val loadWbSelVGen = Wire(Vec(LoadPipelineWidth, Bool()))
  loadWbSelGen(0) := Cat(getFirstOne(loadEvenSelVec, evenDeqMask), 0.U(1.W))
  loadWbSelVGen(0):= loadEvenSelVec.asUInt.orR
  loadWbSelGen(1) := Cat(getFirstOne(loadOddSelVec, oddDeqMask), 1.U(1.W))
  loadWbSelVGen(1) := loadOddSelVec.asUInt.orR
  
  val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
  val loadWbSelV = RegInit(VecInit(List.fill(LoadPipelineWidth)(false.B)))
  (0 until LoadPipelineWidth).map(i => {
    val canGo = io.ldout(i).fire() || !loadWbSelV(i)
    val valid = loadWbSelVGen(i)
    // store selected index in pipeline reg
    loadWbSel(i) := RegEnable(loadWbSelGen(i), valid && canGo)
    // Mark them as writebacked, so they will not be selected in the next cycle
    when(valid && canGo){
      writebacked(loadWbSelGen(i)) := true.B
    }
    // update loadWbSelValidReg
    when(io.ldout(i).fire()){
      loadWbSelV(i) := false.B
    }
    when(valid && canGo){
      loadWbSelV(i) := true.B
    }
  })
  
  // Stage 1
  // Use indexes generated in cycle 0 to read data
  // writeback data to cdb
335
  (0 until LoadPipelineWidth).map(i => {
336
    // data select
337 338
    dataModule.io.wb.raddr(i) := loadWbSel(i)
    val rdata = dataModule.io.wb.rdata(i).data
339 340
    val seluop = uop(loadWbSel(i))
    val func = seluop.ctrl.fuOpType
341
    val raddr = dataModule.io.wb.rdata(i).paddr
342 343 344 345 346 347 348 349 350 351
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
352 353
    val rdataPartialLoad = rdataHelper(seluop, rdataSel)

354
    // writeback missed int/fp load
355 356 357
    // 
    // Int load writeback will finish (if not blocked) in one cycle
    io.ldout(i).bits.uop := seluop
358
    io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.wb.rdata(i).exception.asBools
359
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
360
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
361 362 363
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
364
    io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
L
LinJiawei 已提交
365
    io.ldout(i).bits.fflags := DontCare
W
William Wang 已提交
366
    io.ldout(i).valid := loadWbSelV(i)
367 368 369

    when(io.ldout(i).fire()) {
      XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
370
        io.ldout(i).bits.uop.roqIdx.asUInt,
371
        io.ldout(i).bits.uop.lqIdx.asUInt,
372
        io.ldout(i).bits.uop.cf.pc,
373 374
        dataModule.io.debug(loadWbSel(i)).paddr,
        dataModule.io.debug(loadWbSel(i)).data,
W
William Wang 已提交
375
        debug_mmio(loadWbSel(i))
376
      )
W
William Wang 已提交
377
    }
378

W
William Wang 已提交
379 380
  })

Y
Yinan Xu 已提交
381 382 383 384 385
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
386 387 388 389 390 391 392
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })

393
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
394
    val length = mask.length
395
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
396 397
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
398
  }
W
William Wang 已提交
399

400 401 402 403
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
404
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
405 406 407 408 409 410 411 412 413
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
414
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
415 416 417 418
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
419

Y
Yinan Xu 已提交
420 421 422 423 424 425 426 427 428 429 430 431 432 433
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
    *   There're three possible types of violations. Choose the oldest load.
    *   Set io.redirect according to the detected violation.
    */
  io.load_s1 := DontCare
434 435 436 437
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
438
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
439 440 441
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
442 443
    dataModule.io.violation(i).paddr := io.storeIn(i).bits.paddr
    dataModule.io.violation(i).mask := io.storeIn(i).bits.mask
444
    val addrMaskMatch = RegNext(dataModule.io.violation(i).violationMask)
445
    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => {
446
      allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j))
447
    })))
448
    val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
449
      addrMaskMatch(j) && entryNeedCheck(j)
450
    }))
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
471 472 473 474
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
475 476
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
477
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
507

508 509 510 511 512 513 514 515 516
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
517 518 519 520
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
521
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
522 523 524 525 526 527
        a // sel a
      ),
      b // sel b
    )
  }

528 529 530
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)

531 532
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
Y
Yinan Xu 已提交
533
  io.rollback.valid := rollbackSelected.valid &&
534
    (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
535
    !(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional())
536

537 538 539
  io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx
  io.rollback.bits.level := RedirectLevel.flush
  io.rollback.bits.interrupt := DontCare
Y
Yinan Xu 已提交
540
  io.rollback.bits.pc := DontCare
541 542
  io.rollback.bits.target := rollbackSelected.bits.cf.pc
  io.rollback.bits.brTag := rollbackSelected.bits.brTag
W
William Wang 已提交
543

Y
Yinan Xu 已提交
544 545 546
  when(io.rollback.valid) {
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
  }
W
William Wang 已提交
547

Y
Yinan Xu 已提交
548 549 550 551
  /**
    * Memory mapped IO / other uncached operations
    *
    */
552
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
Y
Yinan Xu 已提交
553
    io.commits.info(0).commitType === CommitType.LOAD &&
554
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
555
    !io.commits.isWalk
556

557 558
  dataModule.io.uncache.raddr := deqPtr

559
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
560 561 562
  io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr
  io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
  io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
563

Y
Yinan Xu 已提交
564
  io.uncache.req.bits.meta.id       := DontCare
565
  io.uncache.req.bits.meta.vaddr    := DontCare
566
  io.uncache.req.bits.meta.paddr    := dataModule.io.uncache.rdata.paddr
567
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
Y
Yinan Xu 已提交
568
  io.uncache.req.bits.meta.mmio     := true.B
569
  io.uncache.req.bits.meta.tlb_miss := false.B
570
  io.uncache.req.bits.meta.mask     := dataModule.io.uncache.rdata.mask
571 572 573 574
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

575
  when (io.uncache.req.fire()) {
576
    pending(deqPtr) := false.B
W
William Wang 已提交
577

L
linjiawei 已提交
578
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
579
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
580 581 582 583 584
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
585 586
  }

Y
Yinan Xu 已提交
587
  dataModule.io.uncache.wen := false.B
588
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
589 590 591 592
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

593
    XSDebug("uncache resp: data %x\n", io.dcache.bits.data)
594 595
  }

W
William Wang 已提交
596
  // Read vaddr for mem exception
W
William Wang 已提交
597 598
  vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.lqIdx.value
  io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
W
William Wang 已提交
599

W
William Wang 已提交
600
  // misprediction recovery / exception redirect
W
William Wang 已提交
601 602 603
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
Y
Yinan Xu 已提交
604
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
Y
Yinan Xu 已提交
605
    when (needCancel(i)) {
W
William Wang 已提交
606 607
        allocated(i) := false.B
    }
608
  }
609 610 611 612 613 614 615

  /**
    * update pointers
    */
  val lastCycleCancelCount = PopCount(RegNext(needCancel))
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
  val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid, PopCount(io.enq.req.map(_.valid)), 0.U)
616
  when (lastCycleRedirect.valid) {
617 618 619 620
    // we recover the pointers in the next cycle after redirect
    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
  }.otherwise {
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
621
  }
W
William Wang 已提交
622

623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640
  val commitCount = PopCount(loadCommit)
  deqPtrExt := deqPtrExt + commitCount

  val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
  val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt)
  validCounter := Mux(lastLastCycleRedirect,
    trueValidCounter,
    validCounter + enqNumber - commitCount
  )

  allowEnqueue := Mux(io.brqRedirect.valid,
    false.B,
    Mux(lastLastCycleRedirect,
      trueValidCounter <= (LoadQueueSize - RenameWidth).U,
      validCounter + enqNumber <= (LoadQueueSize - RenameWidth).U
    )
  )

W
William Wang 已提交
641
  // debug info
Y
Yinan Xu 已提交
642
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
643 644

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
645
    when(flag) {
W
William Wang 已提交
646
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
647
    }.otherwise {
W
William Wang 已提交
648 649 650 651
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
652
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
653
    if (i % 4 == 0) XSDebug("")
654
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr)
W
William Wang 已提交
655
    PrintFlag(allocated(i), "a")
656
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
657
    PrintFlag(allocated(i) && writebacked(i), "w")
658
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
659
    PrintFlag(allocated(i) && miss(i), "m")
660
    // PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
661
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
662
    XSDebug(false, true.B, " ")
663
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
664
  }
W
William Wang 已提交
665

W
William Wang 已提交
666
}