LoadUnit.scala 11.0 KB
Newer Older
Y
Yinan Xu 已提交
1 2 3 4 5 6
package xiangshan.mem

import chisel3._
import chisel3.util._
import utils._
import xiangshan._
7
import xiangshan.cache.{DCacheLoadIO, DtlbToLsuIO, MemoryOpConstants}
Y
Yinan Xu 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20

class LoadToLsroqIO extends XSBundle {
  val loadIn = ValidIO(new LsPipelineBundle)
  val ldout = Flipped(DecoupledIO(new ExuOutput))
  val forward = new LoadForwardQueryIO
}

class LoadUnit extends XSModule {
  val io = IO(new Bundle() {
    val ldin = Flipped(Decoupled(new ExuInput))
    val ldout = Decoupled(new ExuOutput)
    val redirect = Flipped(ValidIO(new Redirect))
    val tlbFeedback = ValidIO(new TlbFeedback)
21
    val dcache = new DCacheLoadIO
Y
Yinan Xu 已提交
22 23 24 25
    val dtlb = Flipped(new DtlbToLsuIO)
    val sbuffer = new LoadForwardQueryIO
    val lsroq = new LoadToLsroqIO
  })
26 27 28 29
  
  when(io.ldin.valid){
    XSDebug("load enpipe %x iw %x fw %x\n", io.ldin.bits.uop.cf.pc, io.ldin.bits.uop.ctrl.rfWen, io.ldin.bits.uop.ctrl.fpWen)
  }
Y
Yinan Xu 已提交
30 31 32 33 34 35 36 37 38 39 40 41 42 43

  //-------------------------------------------------------
  // Load Pipeline
  //-------------------------------------------------------

  val l2_out = Wire(Decoupled(new LsPipelineBundle))
  val l4_out = Wire(Decoupled(new LsPipelineBundle))
  val l5_in  = Wire(Flipped(Decoupled(new LsPipelineBundle)))

  //-------------------------------------------------------
  // LD Pipeline Stage 2
  // Generate addr, use addr to query DCache Tag and DTLB
  //-------------------------------------------------------

44 45 46 47 48 49
  val l2_dtlb_hit  = Wire(new Bool())
  val l2_dtlb_miss = Wire(new Bool())
  val l2_dcache = Wire(new Bool())
  val l2_mmio = Wire(new Bool())
  val isMMIOReq = Wire(new Bool())

Y
Yinan Xu 已提交
50 51 52 53 54 55
  // l2_out is used to generate dcache req
  l2_out.bits := DontCare
  l2_out.bits.vaddr := io.ldin.bits.src1 + io.ldin.bits.uop.ctrl.imm
  l2_out.bits.paddr := io.dtlb.resp.bits.paddr
  l2_out.bits.uop := io.ldin.bits.uop
  l2_out.bits.mask := genWmask(l2_out.bits.vaddr, io.ldin.bits.uop.ctrl.fuOpType(1,0))
L
linjiawei 已提交
56
  l2_out.valid := io.ldin.valid && !io.ldin.bits.uop.needFlush(io.redirect)
57 58
  // when we are sure it's a MMIO req, we do not need to wait for cache ready
  l2_out.ready := (l2_dcache && io.dcache.req.ready) || l2_mmio || l2_dtlb_miss
Y
Yinan Xu 已提交
59 60 61 62 63
  io.ldin.ready := l2_out.ready

  // send req to dtlb
  io.dtlb.req.valid := l2_out.valid
  io.dtlb.req.bits.vaddr := l2_out.bits.vaddr
64 65 66 67 68 69

  l2_dtlb_hit  := io.dtlb.resp.valid && !io.dtlb.resp.bits.miss
  l2_dtlb_miss := io.dtlb.resp.valid && io.dtlb.resp.bits.miss
  isMMIOReq := AddressSpace.isMMIO(io.dtlb.resp.bits.paddr)
  l2_dcache := l2_dtlb_hit && !isMMIOReq
  l2_mmio   := l2_dtlb_hit && isMMIOReq
70

Y
Yinan Xu 已提交
71
  // send result to dcache
72 73
  // never send tlb missed or MMIO reqs to dcache
  io.dcache.req.valid     := l2_dcache
74 75

  io.dcache.req.bits.cmd  := MemoryOpConstants.M_XRD
76 77
  // TODO: vaddr
  io.dcache.req.bits.addr := io.dtlb.resp.bits.paddr 
78 79 80 81 82 83 84
  io.dcache.req.bits.data := DontCare
  io.dcache.req.bits.mask := l2_out.bits.mask

  io.dcache.req.bits.meta.id       := DontCare
  io.dcache.req.bits.meta.vaddr    := l2_out.bits.vaddr
  io.dcache.req.bits.meta.paddr    := io.dtlb.resp.bits.paddr
  io.dcache.req.bits.meta.uop      := l2_out.bits.uop
85
  io.dcache.req.bits.meta.mmio     := isMMIOReq
86 87 88 89
  io.dcache.req.bits.meta.tlb_miss := io.dtlb.resp.bits.miss
  io.dcache.req.bits.meta.mask     := l2_out.bits.mask
  io.dcache.req.bits.meta.replay   := false.B

Y
Yinan Xu 已提交
90 91 92 93 94

  val l2_tlbFeedback = Wire(new TlbFeedback)
  l2_tlbFeedback.hit := !io.dtlb.resp.bits.miss
  l2_tlbFeedback.roqIdx := l2_out.bits.uop.roqIdx

95 96 97 98 99 100 101 102 103
  // dump l2
  XSDebug(l2_out.valid, "L2: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n",
    l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr,
    l2_out.bits.uop.ctrl.fuOpType, l2_out.bits.data, l2_out.bits.mask,
    l2_dtlb_miss, l2_dcache, l2_mmio)

  XSDebug(l2_out.fire(), "load req: pc 0x%x addr 0x%x -> 0x%x op %b\n",
    l2_out.bits.uop.cf.pc, l2_out.bits.vaddr, l2_out.bits.paddr, l2_out.bits.uop.ctrl.fuOpType)

Y
Yinan Xu 已提交
104 105 106 107
  //-------------------------------------------------------
  // LD Pipeline Stage 3
  // Compare tag, use addr to query DCache Data
  //-------------------------------------------------------
Y
Yinan Xu 已提交
108 109

  val l3_valid = RegNext(l2_out.fire(), false.B)
110 111 112 113
  val l3_dtlb_miss = RegEnable(next = l2_dtlb_miss, enable = l2_out.fire(), init = false.B)
  val l3_dcache = RegEnable(next = l2_dcache, enable = l2_out.fire(), init = false.B)
  val l3_mmio = RegEnable(next = l2_mmio, enable = l2_out.fire(), init = false.B)
  val l3_tlbFeedback = RegEnable(next = l2_tlbFeedback, enable = l2_out.fire())
Y
Yinan Xu 已提交
114
  val l3_uop = RegEnable(l2_out.bits.uop, l2_out.fire())
115 116
  val l3_bundle = RegEnable(next = l2_out.bits, enable = l2_out.fire())
  // dltb miss reqs ends here
A
Allen 已提交
117
  val l3_passdown = l3_valid && !l3_dtlb_miss && !l3_uop.needFlush(io.redirect)
118

Y
Yinan Xu 已提交
119 120
  io.tlbFeedback.valid := l3_valid
  io.tlbFeedback.bits := l3_tlbFeedback
121
  io.dcache.s1_kill := l3_valid && l3_dcache && l3_uop.needFlush(io.redirect)
122 123 124 125 126 127 128 129 130 131 132

  // dump l3
  XSDebug(l3_valid, "l3: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x dltb_miss %b dcache %b mmio %b\n",
    l3_bundle.uop.cf.pc, l3_bundle.vaddr, l3_bundle.paddr,
    l3_bundle.uop.ctrl.fuOpType, l3_bundle.data, l3_bundle.mask,
    l3_dtlb_miss, l3_dcache, l3_mmio)

  XSDebug(io.tlbFeedback.valid, "tlbFeedback: hit %b roqIdx %d\n",
    io.tlbFeedback.bits.hit, io.tlbFeedback.bits.roqIdx)

  XSDebug(io.dcache.s1_kill, "l3: dcache s1_kill\n")
Y
Yinan Xu 已提交
133

Y
Yinan Xu 已提交
134
  // Done in Dcache
Y
Yinan Xu 已提交
135

Y
Yinan Xu 已提交
136 137 138 139
  //-------------------------------------------------------
  // LD Pipeline Stage 4
  // Dcache return result, do tag ecc check and forward check
  //-------------------------------------------------------
Y
Yinan Xu 已提交
140

141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
  val l4_valid = RegNext(l3_passdown, false.B)
  val l4_dcache = RegNext(l3_dcache, false.B)
  val l4_mmio = RegNext(l3_mmio, false.B)
  val l4_bundle = RegNext(l3_bundle)

  assert(!(io.dcache.resp.ready && !io.dcache.resp.valid), "DCache response got lost")
  io.dcache.resp.ready := l4_valid && l4_dcache
  when (io.dcache.resp.fire()) {
    l4_out.bits := DontCare
    l4_out.bits.data  := io.dcache.resp.bits.data
    l4_out.bits.paddr := io.dcache.resp.bits.meta.paddr
    l4_out.bits.uop   := io.dcache.resp.bits.meta.uop
    l4_out.bits.mmio  := io.dcache.resp.bits.meta.mmio
    l4_out.bits.mask  := io.dcache.resp.bits.meta.mask
    l4_out.bits.miss  := io.dcache.resp.bits.miss
    } .otherwise {
    l4_out.bits := l4_bundle
  }
  l4_out.valid := l4_valid && !l4_out.bits.uop.needFlush(io.redirect)
Y
Yinan Xu 已提交
160 161 162 163 164

  // Store addr forward match
  // If match, get data / fmask from store queue / store buffer

  io.lsroq.forward.paddr := l4_out.bits.paddr
165
  io.lsroq.forward.mask := io.dcache.resp.bits.meta.mask
Y
Yinan Xu 已提交
166
  io.lsroq.forward.lsroqIdx := l4_out.bits.uop.lsroqIdx
167
  io.lsroq.forward.uop := l4_out.bits.uop
Y
Yinan Xu 已提交
168
  io.lsroq.forward.pc := l4_out.bits.uop.cf.pc
L
linjiawei 已提交
169
  io.lsroq.forward.valid := io.dcache.resp.valid //TODO: opt timing
Y
Yinan Xu 已提交
170 171

  io.sbuffer.paddr := l4_out.bits.paddr
172
  io.sbuffer.mask := io.dcache.resp.bits.meta.mask
Y
Yinan Xu 已提交
173
  io.sbuffer.lsroqIdx := l4_out.bits.uop.lsroqIdx
174
  io.sbuffer.uop := DontCare
Y
Yinan Xu 已提交
175 176 177
  io.sbuffer.pc := l4_out.bits.uop.cf.pc
  io.sbuffer.valid := l4_out.valid

178 179
  val forwardVec = WireInit(io.sbuffer.forwardData)
  val forwardMask = WireInit(io.sbuffer.forwardMask)
Y
Yinan Xu 已提交
180 181
  // generate XLEN/8 Muxs
  (0 until XLEN/8).map(j => {
182
    when(io.lsroq.forward.forwardMask(j)) {
Y
Yinan Xu 已提交
183
      forwardMask(j) := true.B
184
      forwardVec(j) := io.lsroq.forward.forwardData(j)
Y
Yinan Xu 已提交
185 186 187 188
    }
  })
  l4_out.bits.forwardMask := forwardMask
  l4_out.bits.forwardData := forwardVec
189

190
  PipelineConnect(l4_out, l5_in, io.ldout.fire() || l5_in.bits.miss && l5_in.valid, false.B)
Y
Yinan Xu 已提交
191

192
  XSDebug(l4_valid, "l4: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x dcache %b mmio %b\n",
193 194
    l4_out.bits.uop.cf.pc, l4_out.bits.vaddr, l4_out.bits.paddr,
    l4_out.bits.uop.ctrl.fuOpType, l4_out.bits.data, l4_out.bits.mask,
195 196 197 198 199 200 201 202 203 204 205 206
    l4_out.bits.forwardData.asUInt, l4_out.bits.forwardMask.asUInt, l4_dcache, l4_mmio)

  XSDebug(l5_in.valid, "L5: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x mask %x forwardData: 0x%x forwardMask: %x\n",
    l5_in.bits.uop.cf.pc,  l5_in.bits.vaddr, l5_in.bits.paddr,
    l5_in.bits.uop.ctrl.fuOpType , l5_in.bits.data,  l5_in.bits.mask,
    l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt)

  XSDebug(l4_valid, "l4: sbuffer forwardData: 0x%x forwardMask: %x\n",
    io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt)

  XSDebug(l4_valid, "l4: lsroq forwardData: 0x%x forwardMask: %x\n",
    io.lsroq.forward.forwardData.asUInt, io.lsroq.forward.forwardMask.asUInt)
207 208


Y
Yinan Xu 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
  //-------------------------------------------------------
  // LD Pipeline Stage 5
  // Do data ecc check, merge result and write back to LS ROQ
  // If cache hit, return writeback result to CDB
  //-------------------------------------------------------

  val loadWriteBack = l5_in.fire()

  // data merge
  val rdata = VecInit((0 until 8).map(j => {
    Mux(l5_in.bits.forwardMask(j),
      l5_in.bits.forwardData(j),
      l5_in.bits.data(8*(j+1)-1, 8*j)
    )
  })).asUInt
  val func = l5_in.bits.uop.ctrl.fuOpType
  val raddr = l5_in.bits.paddr
  val rdataSel = LookupTree(raddr(2, 0), List(
    "b000".U -> rdata(63, 0),
    "b001".U -> rdata(63, 8),
    "b010".U -> rdata(63, 16),
    "b011".U -> rdata(63, 24),
    "b100".U -> rdata(63, 32),
    "b101".U -> rdata(63, 40),
    "b110".U -> rdata(63, 48),
    "b111".U -> rdata(63, 56)
  ))
  val rdataPartialLoad = LookupTree(func, List(
      LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
      LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
      LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
      LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
      LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN),
      LSUOpType.ldu  -> ZeroExt(rdataSel(63, 0), XLEN)
  ))

  // ecc check
  // TODO

  // if hit, writeback result to CDB
  // val ldout = Vec(2, Decoupled(new ExuOutput))
  // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb
  val hitLoadOut = Wire(Decoupled(new ExuOutput))
  hitLoadOut.bits.uop := l5_in.bits.uop
  hitLoadOut.bits.data := rdataPartialLoad
  hitLoadOut.bits.redirectValid := false.B
  hitLoadOut.bits.redirect := DontCare
  hitLoadOut.bits.brUpdate := DontCare
  hitLoadOut.bits.debug.isMMIO := l5_in.bits.mmio
260
  hitLoadOut.valid := l5_in.valid && !l5_in.bits.mmio && !l5_in.bits.miss // MMIO will be done in lsroq
Y
Yinan Xu 已提交
261 262 263 264 265 266 267 268 269 270
  XSDebug(hitLoadOut.fire(), "load writeback: pc %x data %x (%x + %x(%b))\n",
    hitLoadOut.bits.uop.cf.pc, rdataPartialLoad, l5_in.bits.data,
    l5_in.bits.forwardData.asUInt, l5_in.bits.forwardMask.asUInt
  )

  // writeback to LSROQ
  // Current dcache use MSHR

  io.lsroq.loadIn.bits := l5_in.bits
  io.lsroq.loadIn.bits.data := rdataPartialLoad // for debug
L
linjiawei 已提交
271
  io.lsroq.loadIn.valid := loadWriteBack
Y
Yinan Xu 已提交
272 273 274 275 276 277 278 279 280 281 282

  // pipeline control
  l5_in.ready := io.ldout.ready

  io.lsroq.ldout.ready := false.B // TODO
  // TODO: writeback missed loads

  val cdbArb = Module(new Arbiter(new ExuOutput, 2))
  io.ldout <> cdbArb.io.out
  hitLoadOut <> cdbArb.io.in(0)
  io.lsroq.ldout <> cdbArb.io.in(1) // missLoadOut
283 284 285 286

  when(l5_in.valid){
    XSDebug("load depipe %x iw %x fw %x\n", io.ldout.bits.uop.cf.pc, io.ldout.bits.uop.ctrl.rfWen, io.ldout.bits.uop.ctrl.fpWen)
  }
Y
Yinan Xu 已提交
287
}