LSQWrapper.scala 11.8 KB
Newer Older
Y
Yinan Xu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
package xiangshan.mem

import chisel3._
import chisel3.util._
import utils._
import xiangshan._
import xiangshan.cache._
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
import xiangshan.backend.LSUOpType
import xiangshan.mem._
import xiangshan.backend.roq.RoqPtr

class ExceptionAddrIO extends XSBundle {
  val lsIdx = Input(new LSIdx)
  val isStore = Input(Bool())
  val vaddr = Output(UInt(VAddrBits.W))
}


Y
Yinan Xu 已提交
20
class LsqEntry extends XSBundle {
Y
Yinan Xu 已提交
21 22 23 24 25 26 27 28 29 30
  val vaddr = UInt(VAddrBits.W) // TODO: need opt
  val paddr = UInt(PAddrBits.W)
  val mask = UInt(8.W)
  val data = UInt(XLEN.W)
  val exception = UInt(16.W) // TODO: opt size
  val mmio = Bool()
  val fwdMask = Vec(8, Bool())
  val fwdData = Vec(8, UInt(8.W))
}

W
William Wang 已提交
31 32 33 34 35
class FwdEntry extends XSBundle {
  val mask = Vec(8, Bool())
  val data = Vec(8, UInt(8.W))
}

36 37 38 39 40 41

class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
  val io = IO(new Bundle() {
    val wb = Vec(nchannel, new Bundle() {
      val wen = Input(Bool())
      val index = Input(UInt(log2Up(size).W))
W
William Wang 已提交
42
      val wdata = Input(new LsqEntry)
43 44 45 46 47 48 49 50 51 52 53 54
    })
    val uncache = new Bundle() {
      val wen = Input(Bool())
      val index = Input(UInt(log2Up(size).W))
      val wdata = Input(UInt(XLEN.W))
    }
    val refill = new Bundle() {
      val wen = Input(Vec(size, Bool()))
      val dcache = Input(new DCacheLineResp)
    }
    val needForward = Input(Vec(nchannel, Vec(2, UInt(size.W))))
    val forward = Vec(nchannel, Flipped(new LoadForwardQueryIO))
W
William Wang 已提交
55
    val rdata = Output(Vec(size, new LsqEntry))
56 57
    
    // val debug = new Bundle() {
W
William Wang 已提交
58
    //   val debug_data = Vec(LoadQueueSize, new LsqEntry)
59 60
    // }

W
William Wang 已提交
61
    def wbWrite(channel: Int, index: UInt, wdata: LsqEntry): Unit = {
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
      require(channel < nchannel && channel >= 0)
      // need extra "this.wb(channel).wen := true.B"
      this.wb(channel).index := index
      this.wb(channel).wdata := wdata
    }

    def uncacheWrite(index: UInt, wdata: UInt): Unit = {
      // need extra "this.uncache.wen := true.B"
      this.uncache.index := index
      this.uncache.wdata := wdata
    }

    def forwardQuery(channel: Int, paddr: UInt, needForward1: Data, needForward2: Data): Unit = {
      this.needForward(channel)(0) := needForward1
      this.needForward(channel)(1) := needForward2
      this.forward(channel).paddr := paddr
    }
    
    // def refillWrite(ldIdx: Int): Unit = {
    // }
    // use "this.refill.wen(ldIdx) := true.B" instead
  })

  io := DontCare

W
William Wang 已提交
87
  val data = Reg(Vec(size, new LsqEntry))
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131

  // writeback to lq/sq
  (0 until 2).map(i => {
    when(io.wb(i).wen){
      data(io.wb(i).index) := io.wb(i).wdata
    }
  })

  when(io.uncache.wen){
    data(io.uncache.index).data := io.uncache.wdata
  }

  // refill missed load
  def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = {
    val res = Wire(Vec(8, UInt(8.W)))
    (0 until 8).foreach(i => {
      res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i))
    })
    res.asUInt
  }

  // split dcache result into words
  val words = VecInit((0 until blockWords) map { i =>
    io.refill.dcache.data(DataBits * (i + 1) - 1, DataBits * i)
  })


  (0 until size).map(i => {
    when(io.refill.wen(i) ){
      val refillData = words(get_word(data(i).paddr))
      data(i).data := mergeRefillData(refillData, data(i).fwdData.asUInt, data(i).fwdMask.asUInt)
      XSDebug("miss resp: pos %d addr %x data %x + %x(%b)\n", i.U, data(i).paddr, refillData, data(i).fwdData.asUInt, data(i).fwdMask.asUInt)
    }
  })

  // forwarding
  // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
  // (1) if they have the same flag, we need to check range(tail, sqIdx)
  // (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx)
  // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize))
  // Forward2: Mux(same_flag, 0.U,                   range(0, sqIdx)    )
  // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise

  // entry with larger index should have higher priority since it's data is younger
W
William Wang 已提交
132 133

  // FIXME: old fwd logic for assertion, remove when rtl freeze
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
  (0 until nchannel).map(i => {

    val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B)))
    val forwardData1 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
    val forwardMask2 = WireInit(VecInit(Seq.fill(8)(false.B)))
    val forwardData2 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))

    for (j <- 0 until size) {
      val needCheck = io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
      (0 until XLEN / 8).foreach(k => {
        when (needCheck && data(j).mask(k)) {
          when (io.needForward(i)(0)(j)) {
            forwardMask1(k) := true.B
            forwardData1(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
          }
          when (io.needForward(i)(1)(j)) {
            forwardMask2(k) := true.B
            forwardData2(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
          }
          XSDebug(io.needForward(i)(0)(j) || io.needForward(i)(1)(j),
            p"forwarding $k-th byte ${Hexadecimal(data(j).data(8 * (k + 1) - 1, 8 * k))} " +
            p"from ptr $j\n")
        }
      })
    }

    // merge forward lookup results
    // forward2 is younger than forward1 and should have higher priority
W
William Wang 已提交
162
    val oldFwdResult = Wire(new FwdEntry)
163
    (0 until XLEN / 8).map(k => {
W
William Wang 已提交
164 165
      oldFwdResult.mask(k) := RegNext(forwardMask1(k) || forwardMask2(k))
      oldFwdResult.data(k) := RegNext(Mux(forwardMask2(k), forwardData2(k), forwardData1(k)))
166
    })
W
William Wang 已提交
167 168

    // parallel fwd logic
W
William Wang 已提交
169
    val paddrMatch = Wire(Vec(size, Bool()))
W
William Wang 已提交
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
    val matchResultVec = Wire(Vec(size * 2, new FwdEntry))

    def parallelFwd(xs: Seq[Data]): Data = {
      ParallelOperation(xs, (a: Data, b: Data) => {
        val l = a.asTypeOf(new FwdEntry)
        val r = b.asTypeOf(new FwdEntry)
        val res = Wire(new FwdEntry)
        (0 until 8).map(p => {
          res.mask(p) := l.mask(p) || r.mask(p)
          res.data(p) := Mux(r.mask(p), r.data(p), l.data(p))
        })
        res
      })
    }

    for (j <- 0 until size) {
W
William Wang 已提交
186 187 188 189 190 191
      paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
    }

    for (j <- 0 until size) {
      val needCheck0 = RegNext(paddrMatch(j) && io.needForward(i)(0)(j))
      val needCheck1 = RegNext(paddrMatch(j) && io.needForward(i)(1)(j))
W
William Wang 已提交
192
      (0 until XLEN / 8).foreach(k => {
W
William Wang 已提交
193
        matchResultVec(j).mask(k) := needCheck0 && data(j).mask(k)
W
William Wang 已提交
194
        matchResultVec(j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
W
William Wang 已提交
195
        matchResultVec(size + j).mask(k) := needCheck1 && data(j).mask(k)
W
William Wang 已提交
196 197 198 199
        matchResultVec(size + j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
      })
    }

W
William Wang 已提交
200
    val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
W
William Wang 已提交
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218

    io.forward(i).forwardMask := parallelFwdResult.mask
    io.forward(i).forwardData := parallelFwdResult.data

    when(
      oldFwdResult.mask.asUInt =/= parallelFwdResult.mask.asUInt
    ){
      printf("%d: mask error: right: %b false %b\n", GTimer(), oldFwdResult.mask.asUInt, parallelFwdResult.mask.asUInt)
    }

    for (p <- 0 until 8) {
      when(
        oldFwdResult.data(p) =/= parallelFwdResult.data(p) && oldFwdResult.mask(p)
      ){
        printf("%d: data "+p+" error: right: %x false %x\n", GTimer(), oldFwdResult.data(p), parallelFwdResult.data(p))
      }
    }

219 220 221 222 223 224 225
  })

  // data read
  io.rdata := data
  // io.debug.debug_data := data
}

Y
Yinan Xu 已提交
226 227 228 229 230 231 232 233 234
// inflight miss block reqs
class InflightBlockInfo extends XSBundle {
  val block_addr = UInt(PAddrBits.W)
  val valid = Bool()
}

// Load / Store Queue Wrapper for XiangShan Out of Order LSU
class LsqWrappper extends XSModule with HasDCacheParameters {
  val io = IO(new Bundle() {
235 236 237 238 239
    val enq = new Bundle() {
      val canAccept = Output(Bool())
      val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
      val resp = Vec(RenameWidth, Output(new LSIdx))
    }
Y
Yinan Xu 已提交
240 241 242 243 244
    val brqRedirect = Input(Valid(new Redirect))
    val loadIn = Vec(LoadPipelineWidth, Flipped(Valid(new LsPipelineBundle)))
    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
    val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
    val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback store
245
    val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
Y
Yinan Xu 已提交
246
    val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
Y
Yinan Xu 已提交
247
    val commits = Flipped(new RoqCommitIO)
Y
Yinan Xu 已提交
248 249 250 251 252 253 254 255 256 257
    val rollback = Output(Valid(new Redirect))
    val dcache = new DCacheLineIO
    val uncache = new DCacheWordIO
    val roqDeqPtr = Input(new RoqPtr)
    val exceptionAddr = new ExceptionAddrIO
  })

  val loadQueue = Module(new LoadQueue)
  val storeQueue = Module(new StoreQueue)

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
  // io.enq logic
  // LSQ: send out canAccept when both load queue and store queue are ready
  // Dispatch: send instructions to LSQ only when they are ready
  io.enq.canAccept := loadQueue.io.enq.canAccept && storeQueue.io.enq.canAccept
  for (i <- 0 until RenameWidth) {
    val isStore = CommitType.lsInstIsStore(io.enq.req(i).bits.ctrl.commitType)
    loadQueue.io.enq.req(i).valid  := !isStore && io.enq.req(i).valid
    storeQueue.io.enq.req(i).valid :=  isStore && io.enq.req(i).valid
    loadQueue.io.enq.req(i).bits  := io.enq.req(i).bits
    storeQueue.io.enq.req(i).bits := io.enq.req(i).bits
    io.enq.resp(i).lqIdx := loadQueue.io.enq.resp(i)
    io.enq.resp(i).sqIdx := storeQueue.io.enq.resp(i)

    XSError(!io.enq.canAccept && io.enq.req(i).valid, "should not enqueue LSQ when not")
  }

Y
Yinan Xu 已提交
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
  // load queue wiring
  loadQueue.io.brqRedirect <> io.brqRedirect
  loadQueue.io.loadIn <> io.loadIn
  loadQueue.io.storeIn <> io.storeIn
  loadQueue.io.ldout <> io.ldout
  loadQueue.io.commits <> io.commits
  loadQueue.io.rollback <> io.rollback
  loadQueue.io.dcache <> io.dcache
  loadQueue.io.roqDeqPtr <> io.roqDeqPtr
  loadQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx
  loadQueue.io.exceptionAddr.isStore := DontCare

  // store queue wiring
  // storeQueue.io <> DontCare
  storeQueue.io.brqRedirect <> io.brqRedirect
  storeQueue.io.storeIn <> io.storeIn
  storeQueue.io.sbuffer <> io.sbuffer
291
  storeQueue.io.mmioStout <> io.mmioStout
Y
Yinan Xu 已提交
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
  storeQueue.io.commits <> io.commits
  storeQueue.io.roqDeqPtr <> io.roqDeqPtr
  storeQueue.io.exceptionAddr.lsIdx := io.exceptionAddr.lsIdx
  storeQueue.io.exceptionAddr.isStore := DontCare

  loadQueue.io.forward <> io.forward
  storeQueue.io.forward <> io.forward // overlap forwardMask & forwardData, DO NOT CHANGE SEQUENCE

  io.exceptionAddr.vaddr := Mux(io.exceptionAddr.isStore, storeQueue.io.exceptionAddr.vaddr, loadQueue.io.exceptionAddr.vaddr)

  // naive uncache arbiter
  val s_idle :: s_load :: s_store :: Nil = Enum(3)
  val uncacheState = RegInit(s_idle)

  switch(uncacheState){
    is(s_idle){
      when(io.uncache.req.fire()){
        uncacheState := Mux(loadQueue.io.uncache.req.valid, s_load, s_store)
      }
    }
    is(s_load){
      when(io.uncache.resp.fire()){
        uncacheState := s_idle
      }
    }
    is(s_store){
      when(io.uncache.resp.fire()){
        uncacheState := s_idle
      }
    }
  }

  loadQueue.io.uncache := DontCare
  storeQueue.io.uncache := DontCare
  loadQueue.io.uncache.resp.valid := false.B
  storeQueue.io.uncache.resp.valid := false.B
  when(loadQueue.io.uncache.req.valid){
    io.uncache.req <> loadQueue.io.uncache.req
  }.otherwise{
    io.uncache.req <> storeQueue.io.uncache.req
  }
  when(uncacheState === s_load){
    io.uncache.resp <> loadQueue.io.uncache.resp
  }.otherwise{
    io.uncache.resp <> storeQueue.io.uncache.resp
  }

  assert(!(loadQueue.io.uncache.req.valid && storeQueue.io.uncache.req.valid))
  assert(!(loadQueue.io.uncache.resp.valid && storeQueue.io.uncache.resp.valid))
  assert(!((loadQueue.io.uncache.resp.valid || storeQueue.io.uncache.resp.valid) && uncacheState === s_idle))

}