LoadQueue.scala 25.3 KB
Newer Older
Y
Yinan Xu 已提交
1
package xiangshan.mem
W
William Wang 已提交
2 3 4

import chisel3._
import chisel3.util._
L
LinJiawei 已提交
5
import freechips.rocketchip.tile.HasFPUParameters
6
import utils._
Y
Yinan Xu 已提交
7 8
import xiangshan._
import xiangshan.cache._
L
LinJiawei 已提交
9
import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbRequestIO}
10
import xiangshan.backend.LSUOpType
11
import xiangshan.mem._
Y
Yinan Xu 已提交
12
import xiangshan.backend.roq.RoqPtr
W
William Wang 已提交
13 14


15
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
16 17 18 19 20 21 22 23

object LqPtr extends HasXSParameter {
  def apply(f: Bool, v: UInt): LqPtr = {
    val ptr = Wire(new LqPtr)
    ptr.flag := f
    ptr.value := v
    ptr
  }
24 25
}

L
LinJiawei 已提交
26 27 28 29 30 31
trait HasLoadHelper { this: XSModule =>
  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    val fpWen = uop.ctrl.fpWen
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
32 33
      LSUOpType.lw   -> Mux(fpWen, rdata, SignExt(rdata(31, 0), XLEN)),
      LSUOpType.ld   -> Mux(fpWen, rdata, SignExt(rdata(63, 0), XLEN)),
L
LinJiawei 已提交
34 35 36 37 38
      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
    ))
  }
39 40 41 42 43 44 45

  def fpRdataHelper(uop: MicroOp, rdata: UInt): UInt = {
    LookupTree(uop.ctrl.fuOpType, List(
      LSUOpType.lw   -> recode(rdata(31, 0), S),
      LSUOpType.ld   -> recode(rdata(63, 0), D)
    ))
  }
L
LinJiawei 已提交
46 47
}

Y
Yinan Xu 已提交
48 49
class LqEnqIO extends XSBundle {
  val canAccept = Output(Bool())
50
  val sqCanAccept = Input(Bool())
Y
Yinan Xu 已提交
51 52 53 54
  val needAlloc = Vec(RenameWidth, Input(Bool()))
  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
  val resp = Vec(RenameWidth, Output(new LqPtr))
}
55

56
// Load Queue
L
LinJiawei 已提交
57 58 59 60 61
class LoadQueue extends XSModule
  with HasDCacheParameters
  with HasCircularQueuePtrHelper
  with HasLoadHelper
{
W
William Wang 已提交
62
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
63
    val enq = new LqEnqIO
W
William Wang 已提交
64
    val brqRedirect = Input(Valid(new Redirect))
W
William Wang 已提交
65
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
66
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
67
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback int load
Y
Yinan Xu 已提交
68
    val load_s1 = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
69
    val commits = Flipped(new RoqCommitIO)
70
    val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
71 72
    val dcache = new DCacheLineIO
    val uncache = new DCacheWordIO
Y
Yinan Xu 已提交
73
    val roqDeqPtr = Input(new RoqPtr)
74
    val exceptionAddr = new ExceptionAddrIO
W
William Wang 已提交
75
  })
76

W
William Wang 已提交
77
  val uop = Reg(Vec(LoadQueueSize, new MicroOp))
W
William Wang 已提交
78
  // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
W
William Wang 已提交
79
  val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth))
W
William Wang 已提交
80
  dataModule.io := DontCare
W
William Wang 已提交
81
  val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
82
  val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
W
William Wang 已提交
83 84 85 86 87
  val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
  val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
  val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
  val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
  val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
88

W
William Wang 已提交
89 90
  val debug_mmio = Reg(Vec(LoadQueueSize, Bool())) // mmio: inst is an mmio inst

Y
Yinan Xu 已提交
91
  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
92
  val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
Y
Yinan Xu 已提交
93
  val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
94 95
  val allowEnqueue = RegInit(true.B)

Y
Yinan Xu 已提交
96
  val enqPtr = enqPtrExt(0).value
97
  val deqPtr = deqPtrExt.value
Y
Yinan Xu 已提交
98
  val sameFlag = enqPtrExt(0).flag === deqPtrExt.flag
99 100 101
  val isEmpty = enqPtr === deqPtr && sameFlag
  val isFull = enqPtr === deqPtr && !sameFlag
  val allowIn = !isFull
102

Y
Yinan Xu 已提交
103 104
  val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.info(i).commitType === CommitType.LOAD)
  val mcommitIdx = (0 until CommitWidth).map(i => io.commits.info(i).lqIdx.value)
105

106 107
  val deqMask = UIntToMask(deqPtr, LoadQueueSize)
  val enqMask = UIntToMask(enqPtr, LoadQueueSize)
108

Y
Yinan Xu 已提交
109 110 111 112 113
  /**
    * Enqueue at dispatch
    *
    * Currently, LoadQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
    */
114 115
  io.enq.canAccept := allowEnqueue

W
William Wang 已提交
116
  for (i <- 0 until RenameWidth) {
Y
Yinan Xu 已提交
117
    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
Y
Yinan Xu 已提交
118
    val lqIdx = enqPtrExt(offset)
119
    val index = lqIdx.value
120
    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid) {
121
      uop(index) := io.enq.req(i).bits
Y
Yinan Xu 已提交
122
      allocated(index) := true.B
123
      datavalid(index) := false.B
Y
Yinan Xu 已提交
124 125 126 127 128
      writebacked(index) := false.B
      commited(index) := false.B
      miss(index) := false.B
      listening(index) := false.B
      pending(index) := false.B
W
William Wang 已提交
129
    }
130
    io.enq.resp(i) := lqIdx
W
William Wang 已提交
131
  }
132
  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
W
William Wang 已提交
133

Y
Yinan Xu 已提交
134 135 136 137 138 139 140 141 142 143 144 145 146
  /**
    * Writeback load from load units
    *
    * Most load instructions writeback to regfile at the same time.
    * However,
    *   (1) For an mmio instruction with exceptions, it writes back to ROB immediately.
    *   (2) For an mmio instruction without exceptions, it does not write back.
    * The mmio instruction will be sent to lower level when it reaches ROB's head.
    * After uncache response, it will write back through arbiter with loadUnit.
    *   (3) For cache misses, it is marked miss and sent to dcache later.
    * After cache refills, it will write back through arbiter with loadUnit.
    */
  for (i <- 0 until LoadPipelineWidth) {
W
William Wang 已提交
147
    dataModule.io.wb(i).wen := false.B
L
LinJiawei 已提交
148 149
    when(io.loadIn(i).fire()) {
      when(io.loadIn(i).bits.miss) {
W
William Wang 已提交
150
        XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
151
          io.loadIn(i).bits.uop.lqIdx.asUInt,
W
William Wang 已提交
152 153 154 155
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
156 157 158
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
W
William Wang 已提交
159
          io.loadIn(i).bits.mmio,
160 161 162 163
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }.otherwise {
164
          XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
165
          io.loadIn(i).bits.uop.lqIdx.asUInt,
166 167 168 169
          io.loadIn(i).bits.uop.cf.pc,
          io.loadIn(i).bits.vaddr,
          io.loadIn(i).bits.paddr,
          io.loadIn(i).bits.data,
170 171 172
          io.loadIn(i).bits.mask,
          io.loadIn(i).bits.forwardData.asUInt,
          io.loadIn(i).bits.forwardMask.asUInt,
173
          io.loadIn(i).bits.mmio,
174 175 176 177
          io.loadIn(i).bits.rollback,
          io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
          )
        }
178
        val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
179
        datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
180
        writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
181

W
William Wang 已提交
182
        val loadWbData = Wire(new LsqEntry)
W
William Wang 已提交
183 184 185 186 187 188 189 190 191 192
        loadWbData.paddr := io.loadIn(i).bits.paddr
        loadWbData.vaddr := io.loadIn(i).bits.vaddr
        loadWbData.mask := io.loadIn(i).bits.mask
        loadWbData.data := io.loadIn(i).bits.data // for mmio / misc / debug
        loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
        loadWbData.fwdData := io.loadIn(i).bits.forwardData
        loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
        dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
        dataModule.io.wb(i).wen := true.B

W
William Wang 已提交
193 194
        debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
        
195
        val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
196
        miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
197
        listening(loadWbIndex) := dcacheMissed
198
        pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
199
      }
Y
Yinan Xu 已提交
200
    }
W
William Wang 已提交
201

Y
Yinan Xu 已提交
202 203 204 205 206 207 208 209
  /**
    * Cache miss request
    *
    * (1) writeback: miss
    * (2) send to dcache: listing
    * (3) dcache response: datavalid
    * (4) writeback to ROB: writeback
    */
210 211 212 213
  val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
  val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
  val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)

W
William Wang 已提交
214
  val missRefillSelVec = VecInit(
W
William Wang 已提交
215
    (0 until LoadQueueSize).map{ i =>
W
William Wang 已提交
216
      val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
217 218 219
      allocated(i) && miss(i) && !inflight
    })

220
  val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
W
William Wang 已提交
221
  val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
222 223
  io.dcache.req.valid := missRefillSelVec.asUInt.orR
  io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
224
  io.dcache.req.bits.addr := missRefillBlockAddr
225
  io.dcache.req.bits.data := DontCare
226
  io.dcache.req.bits.mask := DontCare
227

W
William Wang 已提交
228 229
  io.dcache.req.bits.meta.id       := DontCare
  io.dcache.req.bits.meta.vaddr    := DontCare // dataModule.io.rdata(missRefillSel).vaddr
230
  io.dcache.req.bits.meta.paddr    := missRefillBlockAddr
231
  io.dcache.req.bits.meta.uop      := uop(missRefillSel)
W
William Wang 已提交
232
  io.dcache.req.bits.meta.mmio     := false.B // mmio(missRefillSel)
233
  io.dcache.req.bits.meta.tlb_miss := false.B
234
  io.dcache.req.bits.meta.mask     := DontCare
235 236 237 238
  io.dcache.req.bits.meta.replay   := false.B

  io.dcache.resp.ready := true.B

239
  assert(!(debug_mmio(missRefillSel) && io.dcache.req.valid))
240 241

  when(io.dcache.req.fire()) {
W
William Wang 已提交
242 243
    miss(missRefillSel) := false.B
    listening(missRefillSel) := true.B
244 245 246 247 248

    // mark this block as inflight
    inflightReqs(reqBlockIndex).valid := true.B
    inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
    assert(!inflightReqs(reqBlockIndex).valid)
W
William Wang 已提交
249 250
  }

251 252 253 254 255 256 257 258 259 260 261
  when(io.dcache.resp.fire()) {
    val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
    assert(inflight)
    for (i <- 0 until cfg.nLoadMissEntries) {
      when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
        inflightReqs(i).valid := false.B
      }
    }
  }


262
  when(io.dcache.req.fire()){
263
    XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
264
      io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
265 266
      io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
    )
267 268 269
  }

  when(io.dcache.resp.fire()){
270
    XSDebug("miss resp: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x data %x\n",
271
      io.dcache.resp.bits.meta.uop.cf.pc, io.dcache.resp.bits.meta.uop.roqIdx.asUInt, io.dcache.resp.bits.meta.uop.lqIdx.asUInt,
Y
Yinan Xu 已提交
272
      io.dcache.resp.bits.meta.paddr, io.dcache.resp.bits.data
273
    )
274 275
  }

276
  // Refill 64 bit in a cycle
277
  // Refill data comes back from io.dcache.resp
W
William Wang 已提交
278
  dataModule.io.refill.dcache := io.dcache.resp.bits
W
William Wang 已提交
279

W
William Wang 已提交
280
  (0 until LoadQueueSize).map(i => {
W
William Wang 已提交
281
    val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === io.dcache.resp.bits.meta.paddr
W
William Wang 已提交
282
    dataModule.io.refill.wen(i) := false.B
283
    when(allocated(i) && listening(i) && blockMatch && io.dcache.resp.fire()) {
W
William Wang 已提交
284
      dataModule.io.refill.wen(i) := true.B
285
      datavalid(i) := true.B
W
William Wang 已提交
286 287 288
      listening(i) := false.B
    }
  })
W
William Wang 已提交
289 290 291

  // writeback up to 2 missed load insts to CDB
  // just randomly pick 2 missed load (data refilled), write them back to cdb
W
William Wang 已提交
292
  val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
293
    allocated(i) && datavalid(i) && !writebacked(i)
L
LinJiawei 已提交
294
  })).asUInt() // use uint instead vec to reduce verilog lines
W
William Wang 已提交
295
  val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W)))
296
  val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool()))
297 298
  val loadEvenSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i)}))
  val loadOddSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i+1)}))
299 300 301
  val evenDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i)})).asUInt
  val oddDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i+1)})).asUInt
  loadWbSel(0) := Cat(getFirstOne(loadEvenSelVec, evenDeqMask), 0.U(1.W))
302
  loadWbSelV(0):= loadEvenSelVec.asUInt.orR
303
  loadWbSel(1) := Cat(getFirstOne(loadOddSelVec, oddDeqMask), 1.U(1.W))
304
  loadWbSelV(1) := loadOddSelVec.asUInt.orR
W
William Wang 已提交
305
  (0 until StorePipelineWidth).map(i => {
306
    // data select
W
William Wang 已提交
307
    val rdata = dataModule.io.rdata(loadWbSel(i)).data
308 309
    val seluop = uop(loadWbSel(i))
    val func = seluop.ctrl.fuOpType
W
William Wang 已提交
310
    val raddr = dataModule.io.rdata(loadWbSel(i)).paddr
311 312 313 314 315 316 317 318 319 320
    val rdataSel = LookupTree(raddr(2, 0), List(
      "b000".U -> rdata(63, 0),
      "b001".U -> rdata(63, 8),
      "b010".U -> rdata(63, 16),
      "b011".U -> rdata(63, 24),
      "b100".U -> rdata(63, 32),
      "b101".U -> rdata(63, 40),
      "b110".U -> rdata(63, 48),
      "b111".U -> rdata(63, 56)
    ))
321 322 323 324
    val rdataPartialLoad = rdataHelper(seluop, rdataSel)

    val validWb = loadWbSelVec(loadWbSel(i)) && loadWbSelV(i)

325
    // writeback missed int/fp load
326 327 328
    // 
    // Int load writeback will finish (if not blocked) in one cycle
    io.ldout(i).bits.uop := seluop
W
William Wang 已提交
329
    io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.rdata(loadWbSel(i)).exception.asBools
330
    io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
331
    io.ldout(i).bits.data := rdataPartialLoad
W
William Wang 已提交
332 333 334
    io.ldout(i).bits.redirectValid := false.B
    io.ldout(i).bits.redirect := DontCare
    io.ldout(i).bits.brUpdate := DontCare
W
William Wang 已提交
335
    io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
L
LinJiawei 已提交
336
    io.ldout(i).bits.fflags := DontCare
337
    io.ldout(i).valid := validWb
338
    
339
    when(io.ldout(i).fire()){
340
      writebacked(loadWbSel(i)) := true.B
341 342 343 344
    }

    when(io.ldout(i).fire()) {
      XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
345
        io.ldout(i).bits.uop.roqIdx.asUInt,
346
        io.ldout(i).bits.uop.lqIdx.asUInt,
347
        io.ldout(i).bits.uop.cf.pc,
W
William Wang 已提交
348 349
        dataModule.io.rdata(loadWbSel(i)).paddr,
        dataModule.io.rdata(loadWbSel(i)).data,
W
William Wang 已提交
350
        debug_mmio(loadWbSel(i))
351
      )
W
William Wang 已提交
352
    }
353

W
William Wang 已提交
354 355
  })

Y
Yinan Xu 已提交
356 357 358 359 360
  /**
    * Load commits
    *
    * When load commited, mark it as !allocated and move deqPtrExt forward.
    */
361 362 363 364 365 366 367
  (0 until CommitWidth).map(i => {
    when(loadCommit(i)) {
      allocated(mcommitIdx(i)) := false.B
      XSDebug("load commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
    }
  })

368
  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
369
    val length = mask.length
370
    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
Y
Yinan Xu 已提交
371 372
    val highBitsUint = Cat(highBits.reverse)
    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
373
  }
W
William Wang 已提交
374

375 376 377 378
  def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    assert(valid.length == 2)
    Mux(valid(0) && valid(1),
Y
Yinan Xu 已提交
379
      Mux(isAfter(uop(0).roqIdx, uop(1).roqIdx), uop(1), uop(0)),
380 381 382 383 384 385 386 387 388
      Mux(valid(0) && !valid(1), uop(0), uop(1)))
  }

  def getAfterMask(valid: Seq[Bool], uop: Seq[MicroOp]) = {
    assert(valid.length == uop.length)
    val length = valid.length
    (0 until length).map(i => {
      (0 until length).map(j => {
        Mux(valid(i) && valid(j),
Y
Yinan Xu 已提交
389
          isAfter(uop(i).roqIdx, uop(j).roqIdx),
390 391 392 393
          Mux(!valid(i), true.B, false.B))
      })
    })
  }
W
William Wang 已提交
394

Y
Yinan Xu 已提交
395 396 397 398 399 400 401 402 403 404 405 406 407 408
  /**
    * Memory violation detection
    *
    * When store writes back, it searches LoadQueue for younger load instructions
    * with the same load physical address. They loaded wrong data and need re-execution.
    *
    * Cycle 0: Store Writeback
    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
    *   Besides, load instructions in LoadUnit_S1 and S2 are also checked.
    * Cycle 1: Redirect Generation
    *   There're three possible types of violations. Choose the oldest load.
    *   Set io.redirect according to the detected violation.
    */
  io.load_s1 := DontCare
409 410 411 412
  def detectRollback(i: Int) = {
    val startIndex = io.storeIn(i).bits.uop.lqIdx.value
    val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
    val xorMask = lqIdxMask ^ enqMask
Y
Yinan Xu 已提交
413
    val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt(0).flag
414 415 416
    val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)

    // check if load already in lq needs to be rolledback
417
    val addrMatch = RegNext(VecInit((0 until LoadQueueSize).map(j => {
418 419
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
    })))
420 421 422 423 424 425 426 427 428 429
    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => {
      allocated(j) && toEnqPtrMask(j) && (datavalid(j) || listening(j) || miss(j))
    })))
    val overlap = RegNext(VecInit((0 until LoadQueueSize).map(j => {
      val overlapVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
      Cat(overlapVec).orR()
    })))
    val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
      addrMatch(j) && entryNeedCheck(j) && overlap(j)
    }))
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
    val lqViolation = lqViolationVec.asUInt().orR()
    val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
    val lqViolationUop = uop(lqViolationIndex)
    // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
    // lqViolationUop.lqIdx.value := lqViolationIndex
    XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")

    // when l/s writeback to roq together, check if rollback is needed
    val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
      io.loadIn(j).valid &&
        isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
    })))
    val wbViolation = wbViolationVec.asUInt().orR()
    val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
    XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")

    // check if rollback is needed for load in l1
    val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
Y
Yinan Xu 已提交
450 451 452 453
      io.load_s1(j).valid && // L1 valid
        isAfter(io.load_s1(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
        io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.load_s1(j).paddr(PAddrBits - 1, 3) &&
        (io.storeIn(i).bits.mask & io.load_s1(j).mask).orR
454 455
    })))
    val l1Violation = l1ViolationVec.asUInt().orR()
Y
Yinan Xu 已提交
456
    val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.load_s1.map(_.uop))))
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
    XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")

    val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
    val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)

    val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
    val oneAfterZero = mask(1)(0)
    val rollbackUop = Mux(oneAfterZero && mask(2)(0),
      rollbackUopVec(0),
      Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))

    XSDebug(
      l1Violation,
      "need rollback (l4 load) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
    )
    XSDebug(
      lqViolation,
      "need rollback (ld wb before store) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
    )
    XSDebug(
      wbViolation,
      "need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
    )

    (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
  }
W
William Wang 已提交
486

487 488 489 490 491 492 493 494 495
  // rollback check
  val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
  for (i <- 0 until StorePipelineWidth) {
    val detectedRollback = detectRollback(i)
    rollback(i).valid := detectedRollback._1
    rollback(i).bits := detectedRollback._2
  }

  def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
L
LinJiawei 已提交
496 497 498 499
    Mux(
      a.valid,
      Mux(
        b.valid,
Y
Yinan Xu 已提交
500
        Mux(isAfter(a.bits.roqIdx, b.bits.roqIdx), b, a), // a,b both valid, sel oldest
L
LinJiawei 已提交
501 502 503 504 505 506
        a // sel a
      ),
      b // sel b
    )
  }

507 508 509
  val rollbackSelected = ParallelOperation(rollback, rollbackSel)
  val lastCycleRedirect = RegNext(io.brqRedirect)

510 511
  // Note that we use roqIdx - 1.U to flush the load instruction itself.
  // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
Y
Yinan Xu 已提交
512
  io.rollback.valid := rollbackSelected.valid &&
513
    (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
514
    !(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional())
515

516 517 518
  io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx
  io.rollback.bits.level := RedirectLevel.flush
  io.rollback.bits.interrupt := DontCare
Y
Yinan Xu 已提交
519
  io.rollback.bits.pc := DontCare
520 521
  io.rollback.bits.target := rollbackSelected.bits.cf.pc
  io.rollback.bits.brTag := rollbackSelected.bits.brTag
W
William Wang 已提交
522

Y
Yinan Xu 已提交
523 524 525
  when(io.rollback.valid) {
    XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
  }
W
William Wang 已提交
526

Y
Yinan Xu 已提交
527 528 529 530
  /**
    * Memory mapped IO / other uncached operations
    *
    */
531
  io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
Y
Yinan Xu 已提交
532
    io.commits.info(0).commitType === CommitType.LOAD &&
533
    io.roqDeqPtr === uop(deqPtr).roqIdx &&
Y
Yinan Xu 已提交
534
    !io.commits.isWalk
535

536
  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XRD
537 538 539
  io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
  io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
540

Y
Yinan Xu 已提交
541
  io.uncache.req.bits.meta.id       := DontCare
542
  io.uncache.req.bits.meta.vaddr    := DontCare
543 544
  io.uncache.req.bits.meta.paddr    := dataModule.io.rdata(deqPtr).paddr
  io.uncache.req.bits.meta.uop      := uop(deqPtr)
Y
Yinan Xu 已提交
545
  io.uncache.req.bits.meta.mmio     := true.B
546
  io.uncache.req.bits.meta.tlb_miss := false.B
547
  io.uncache.req.bits.meta.mask     := dataModule.io.rdata(deqPtr).mask
548 549 550 551
  io.uncache.req.bits.meta.replay   := false.B

  io.uncache.resp.ready := true.B

552
  when (io.uncache.req.fire()) {
553
    pending(deqPtr) := false.B
W
William Wang 已提交
554

L
linjiawei 已提交
555
    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
556
      uop(deqPtr).cf.pc,
L
linjiawei 已提交
557 558 559 560 561
      io.uncache.req.bits.addr,
      io.uncache.req.bits.data,
      io.uncache.req.bits.cmd,
      io.uncache.req.bits.mask
    )
562 563
  }

Y
Yinan Xu 已提交
564
  dataModule.io.uncache.wen := false.B
565
  when(io.uncache.resp.fire()){
Y
Yinan Xu 已提交
566 567 568 569
    datavalid(deqPtr) := true.B
    dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
    dataModule.io.uncache.wen := true.B

570
    XSDebug("uncache resp: data %x\n", io.dcache.resp.bits.data)
571 572
  }

W
William Wang 已提交
573
  // Read vaddr for mem exception
W
William Wang 已提交
574
  io.exceptionAddr.vaddr := dataModule.io.rdata(io.exceptionAddr.lsIdx.lqIdx.value).vaddr
W
William Wang 已提交
575

W
William Wang 已提交
576
  // misprediction recovery / exception redirect
W
William Wang 已提交
577 578 579
  // invalidate lq term using robIdx
  val needCancel = Wire(Vec(LoadQueueSize, Bool()))
  for (i <- 0 until LoadQueueSize) {
Y
Yinan Xu 已提交
580
    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
Y
Yinan Xu 已提交
581
    when (needCancel(i)) {
W
William Wang 已提交
582 583
        allocated(i) := false.B
    }
584
  }
585 586 587 588 589 590 591

  /**
    * update pointers
    */
  val lastCycleCancelCount = PopCount(RegNext(needCancel))
  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
  val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept && !io.brqRedirect.valid, PopCount(io.enq.req.map(_.valid)), 0.U)
592
  when (lastCycleRedirect.valid) {
593 594 595 596
    // we recover the pointers in the next cycle after redirect
    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
  }.otherwise {
    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
597
  }
W
William Wang 已提交
598

599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
  val commitCount = PopCount(loadCommit)
  deqPtrExt := deqPtrExt + commitCount

  val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
  val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt)
  validCounter := Mux(lastLastCycleRedirect,
    trueValidCounter,
    validCounter + enqNumber - commitCount
  )

  allowEnqueue := Mux(io.brqRedirect.valid,
    false.B,
    Mux(lastLastCycleRedirect,
      trueValidCounter <= (LoadQueueSize - RenameWidth).U,
      validCounter + enqNumber <= (LoadQueueSize - RenameWidth).U
    )
  )

W
William Wang 已提交
617
  // debug info
Y
Yinan Xu 已提交
618
  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt.flag, deqPtr)
W
William Wang 已提交
619 620

  def PrintFlag(flag: Bool, name: String): Unit = {
L
LinJiawei 已提交
621
    when(flag) {
W
William Wang 已提交
622
      XSDebug(false, true.B, name)
L
LinJiawei 已提交
623
    }.otherwise {
W
William Wang 已提交
624 625 626 627
      XSDebug(false, true.B, " ")
    }
  }

W
William Wang 已提交
628
  for (i <- 0 until LoadQueueSize) {
L
LinJiawei 已提交
629
    if (i % 4 == 0) XSDebug("")
W
William Wang 已提交
630
    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
W
William Wang 已提交
631
    PrintFlag(allocated(i), "a")
632
    PrintFlag(allocated(i) && datavalid(i), "v")
W
William Wang 已提交
633
    PrintFlag(allocated(i) && writebacked(i), "w")
634
    PrintFlag(allocated(i) && commited(i), "c")
W
William Wang 已提交
635 636
    PrintFlag(allocated(i) && miss(i), "m")
    PrintFlag(allocated(i) && listening(i), "l")
W
William Wang 已提交
637
    PrintFlag(allocated(i) && pending(i), "p")
W
William Wang 已提交
638
    XSDebug(false, true.B, " ")
639
    if (i % 4 == 3 || i == LoadQueueSize - 1) XSDebug(false, true.B, "\n")
W
William Wang 已提交
640
  }
W
William Wang 已提交
641

W
William Wang 已提交
642
}