LoadUnit.scala 8.4 KB
Newer Older
Y
Yinan Xu 已提交
1 2 3 4 5 6
package xiangshan.mem

import chisel3._
import chisel3.util._
import utils._
import xiangshan._
7
import xiangshan.cache.{DCacheWordIO, TlbRequestIO, TlbCmd, MemoryOpConstants}
8
import xiangshan.backend.LSUOpType
Y
Yinan Xu 已提交
9 10 11 12 13 14 15

class LoadToLsroqIO extends XSBundle {
  val loadIn = ValidIO(new LsPipelineBundle)
  val ldout = Flipped(DecoupledIO(new ExuOutput))
  val forward = new LoadForwardQueryIO
}

Y
Yinan Xu 已提交
16 17 18
// Load Pipeline Stage 0
// Generate addr, use addr to query DCache and DTLB
class LoadUnit_S0 extends XSModule {
Y
Yinan Xu 已提交
19
  val io = IO(new Bundle() {
Y
Yinan Xu 已提交
20 21
    val in = Flipped(Decoupled(new ExuInput))
    val out = Decoupled(new LsPipelineBundle)
Y
Yinan Xu 已提交
22
    val redirect = Flipped(ValidIO(new Redirect))
Y
Yinan Xu 已提交
23
    val dtlb = Valid(new TlbReq)
24
    val dcache = DecoupledIO(new DCacheLoadReq)
Y
Yinan Xu 已提交
25 26
  })

Y
Yinan Xu 已提交
27 28 29 30 31 32 33 34 35 36 37 38 39 40
  val s0_uop = io.in.bits.uop
  val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm
  val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))

  // query DTLB
  io.dtlb.valid := io.out.valid
  io.dtlb.bits.vaddr := s0_vaddr
  io.dtlb.bits.cmd := TlbCmd.read
  io.dtlb.bits.roqIdx := s0_uop.roqIdx
  io.dtlb.bits.debug.pc := s0_uop.cf.pc
  io.dtlb.bits.debug.lsroqIdx := s0_uop.lsroqIdx

  // query DCache
  io.dcache.valid := io.out.valid
41 42 43
  io.dcache.bits.cmd  := MemoryOpConstants.M_XRD
  io.dcache.bits.addr := s0_vaddr
  io.dcache.bits.mask := s0_mask
Y
Yinan Xu 已提交
44 45 46 47 48 49

  val addrAligned = LookupTree(s0_uop.ctrl.fuOpType(1, 0), List(
    "b00".U   -> true.B,                   //b
    "b01".U   -> (s0_vaddr(0)    === 0.U), //h
    "b10".U   -> (s0_vaddr(1, 0) === 0.U), //w
    "b11".U   -> (s0_vaddr(2, 0) === 0.U)  //d
50
  ))
Y
Yinan Xu 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72

  io.out.valid := io.in.valid && !s0_uop.needFlush(io.redirect)
  io.out.bits := DontCare
  io.out.bits.vaddr := s0_vaddr
  io.out.bits.mask := s0_mask
  io.out.bits.uop := s0_uop
  io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned

  io.in.ready := io.out.ready
}


// Load Pipeline Stage 1
// TLB resp (send paddr to dcache)
class LoadUnit_S1 extends XSModule {
  val io = IO(new Bundle() {
    val in = Flipped(Decoupled(new LsPipelineBundle))
    val out = Decoupled(new LsPipelineBundle)
    val redirect = Flipped(ValidIO(new Redirect))
    val tlbFeedback = ValidIO(new TlbFeedback)
    val dtlb = Valid(new TlbResp)
    val forward = new LoadForwardQueryIO
73 74
    val s1_kill = Output(Bool())
    val s1_paddr = Output(UInt(PAddBits.W))
Y
Yinan Xu 已提交
75
  })
76

Y
Yinan Xu 已提交
77 78 79 80 81 82 83 84 85 86
  val s1_uop = io.in.bits.uop
  val s1_tlb_miss = io.dtlb.resp.bits.miss
  val s1_paddr = io.dtlb.resp.bits.paddr
  val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr)

  io.dtlb.ready := io.out.ready

  io.tlbFeedback.valid := io.out.valid
  io.tlbFeedback.bits.hit := !s1_tlb_miss
  io.tlbFeedback.bits.roqIdx := s1_uop.roqIdx
Y
Yinan Xu 已提交
87

88 89 90 91 92
  // if tlb misses or mmio, kill prvious cycles dcache request
  // TODO: kill dcache request when flushed
  io.s1_kill :=  s1_tlb_miss || s1_mmio
  io.s1_paddr :=  s1_paddr

Y
Yinan Xu 已提交
93 94 95 96
  io.forward.valid := io.out.valid
  io.forward.paddr := s1_paddr
  io.forward.mask := io.in.bits.mask
  io.forward.lsroqIdx := s1_uop.lsroqIdx
97
  io.forward.sqIdx := s1_uop.sqIdx
Y
Yinan Xu 已提交
98 99 100 101 102 103 104 105 106 107
  io.forward.uop := s1_uop
  io.forward.pc := s1_uop.cf.pc

  io.out.valid := io.in.valid && !s1_uop.needFlush(io.redirect)
  io.out.bits := io.in.bits
  io.out.bits.paddr := s1_paddr
  io.out.bits.mmio := s1_mmio
  io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld

  io.in.ready := io.out.ready || !io.in.valid
Y
Yinan Xu 已提交
108

Y
Yinan Xu 已提交
109
}
110 111


Y
Yinan Xu 已提交
112 113 114 115 116 117 118 119 120 121 122
// Load Pipeline Stage 2
// DCache resp
class LoadUnit_S2 extends XSModule {
  val io = IO(new Bundle() {
    val in = Flipped(Decoupled(new LsPipelineBundle))
    val out = Decoupled(new LsPipelineBundle)
    val redirect = Flipped(ValidIO(new Redirect))
    val dcache = Flipped(DecoupledIO(new DCacheWordResp))
    val sbuffer = new LoadForwardQueryIO
    val lsroq = new LoadForwardQueryIO
  })
123

Y
Yinan Xu 已提交
124 125 126 127
  val s2_uop = io.in.bits.uop
  val s2_mask = io.in.bits.mask
  val s2_paddr = io.in.bits.paddr
  val s2_cache_miss = io.dcache.resp.miss
128

Y
Yinan Xu 已提交
129 130
  io.dcache.ready := true.B
  assert(!(io.in.valid && !io.dcache.resp.valid), "DCache response got lost")
Y
Yinan Xu 已提交
131

Y
Yinan Xu 已提交
132 133 134 135 136 137 138 139 140 141
  val forwardMask = WireInit(io.sbuffer.forwardMask)
  val forwardData = WireInit(io.sbuffer.forwardData)
  // generate XLEN/8 Muxs
  for (i <- 0 until XLEN / 8) {
    when(io.lsroq.forwardMask(i)) {
      forwardMask(i) := true.B
      forwardData(i) := io.lsroq.forwardData(i)
    }
  }
  val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U
Y
Yinan Xu 已提交
142 143

  // data merge
Y
Yinan Xu 已提交
144 145 146
  val rdata = VecInit((0 until XLEN / 8).map(j => 
    Mux(forwardMask(j), forwardData(j), io.dcache.resp.data(8*(j+1)-1, 8*j)))).asUInt
  val rdataSel = LookupTree(s2_paddr(2, 0), List(
Y
Yinan Xu 已提交
147 148 149 150 151 152 153 154 155
    "b000".U -> rdata(63, 0),
    "b001".U -> rdata(63, 8),
    "b010".U -> rdata(63, 16),
    "b011".U -> rdata(63, 24),
    "b100".U -> rdata(63, 32),
    "b101".U -> rdata(63, 40),
    "b110".U -> rdata(63, 48),
    "b111".U -> rdata(63, 56)
  ))
Y
Yinan Xu 已提交
156
  val rdataPartialLoad = LookupTree(s2_uop.ctrl.fuOpType, List(
Y
Yinan Xu 已提交
157 158 159 160 161 162
      LSUOpType.lb   -> SignExt(rdataSel(7, 0) , XLEN),
      LSUOpType.lh   -> SignExt(rdataSel(15, 0), XLEN),
      LSUOpType.lw   -> SignExt(rdataSel(31, 0), XLEN),
      LSUOpType.ld   -> SignExt(rdataSel(63, 0), XLEN),
      LSUOpType.lbu  -> ZeroExt(rdataSel(7, 0) , XLEN),
      LSUOpType.lhu  -> ZeroExt(rdataSel(15, 0), XLEN),
A
Allen 已提交
163
      LSUOpType.lwu  -> ZeroExt(rdataSel(31, 0), XLEN)
Y
Yinan Xu 已提交
164 165
  ))

Y
Yinan Xu 已提交
166
  // TODO: ECC check
Y
Yinan Xu 已提交
167

Y
Yinan Xu 已提交
168 169 170 171 172 173 174 175
  io.out.valid := io.in.valid && !s2_uop.needFlush(io.redirect)
  io.out.bits := io.in.bits
  io.out.bits.data := rdataPartialLoad
  io.out.bits.miss := s2_cache_miss && !fullForward

  io.in.ready := io.out.ready || !io.in.valid

}
Y
Yinan Xu 已提交
176

Y
Yinan Xu 已提交
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

class LoadUnit extends XSModule {
  val io = IO(new Bundle() {
    val ldin = Flipped(Decoupled(new ExuInput))
    val ldout = Decoupled(new ExuOutput)
    val redirect = Flipped(ValidIO(new Redirect))
    val tlbFeedback = ValidIO(new TlbFeedback)
    val dcache = new DCacheWordIO
    val dtlb = new TlbRequestIO()
    val sbuffer = new LoadForwardQueryIO
    val lsroq = new LoadToLsroqIO
  })

  val load_s0 = Module(new LoadUnit_S0)
  val load_s1 = Module(new LoadUnit_S1)
  val load_s2 = Module(new LoadUnit_S2)

  load_s0.io.in <> io.ldin
  load_s0.io.redirect <> io.redirect
  load_s0.io.dtlb <> io.dtlb.req
  load_s0.io.dcache <> io.dcache.req

  PipelineConnect(load_s0.io.out, load_s1.io.in, load_s1.io.out.fire(), false.B)

  io.dcache.req.bits.paddr := load_s1.io.out.bits.paddr
  load_s1.io.redirect <> io.redirect
  load_s1.io.tlbFeedback <> io.tlbFeedback
  load_s1.io.dtlb <> io.dtlb.resp
  io.sbuffer <> load_s1.io.forward
  io.lsroq.forward <> load_s1.io.forward

  PipelineConnect(load_s1.io.out, load_s2.io.in, load_s2.io.out.fire(), false.B)

  load_s2.io.redirect <> io.redirect
  load_s2.io.dcache <> io.dcache.resp
  load_s2.io.sbuffer.forwardMask := io.sbuffer.forwardMask
  load_s2.io.sbuffer.forwardData := io.sbuffer.forwardData
  load_s2.io.lsroq.forwardMask := io.lsroq.forward.forwardMask
  load_s2.io.lsroq.forwardData := io.lsroq.forward.forwardData

  XSDebug(load_s0.io.out.valid,
    p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, " +
    p"vaddr ${Hexadecimal(load_s0.io.out.bits.vaddr)}, mask ${Hexadecimal(load_s0.io.out.bits.mask)}\n")
  XSDebug(load_s1.io.out.valid, 
    p"S1: pc ${Hexadecimal(load_s1.io.out.bits.uop.cf.pc)}, tlb_miss ${io.dtlb.resp.bits.miss}, " + 
    p"paddr ${Hexadecimal(load_s1.io.out.bits.paddr)}, mmio ${load_s1.io.out.bits.mmio}")
Y
Yinan Xu 已提交
223 224 225

  // writeback to LSROQ
  // Current dcache use MSHR
Y
Yinan Xu 已提交
226 227
  io.lsroq.loadIn.valid := load_s2.io.out.valid
  io.lsroq.loadIn.bits := load_s2.io.out.bits
Y
Yinan Xu 已提交
228

Y
Yinan Xu 已提交
229 230 231 232 233 234 235 236
  val hitLoadOut = Wire(Valid(new ExuOutput))
  hitLoadOut.valid := load_s2.io.out.valid && !load_s2.io.out.bits.miss
  hitLoadOut.bits.uop := load_s2.io.out.bits.uop
  hitLoadOut.bits.data := load_s2.io.out.bits.data
  hitLoadOut.bits.redirectValid := false.B
  hitLoadOut.bits.redirect := DontCare
  hitLoadOut.bits.brUpdate := DontCare
  hitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio
Y
Yinan Xu 已提交
237

Y
Yinan Xu 已提交
238 239 240 241 242 243 244 245 246 247 248
  // TODO: arbiter
  // if hit, writeback result to CDB
  // val ldout = Vec(2, Decoupled(new ExuOutput))
  // when io.loadIn(i).fire() && !io.io.loadIn(i).miss, commit load to cdb
  // val cdbArb = Module(new Arbiter(new ExuOutput, 2))
  // io.ldout <> cdbArb.io.out
  // hitLoadOut <> cdbArb.io.in(0)
  // io.lsroq.ldout <> cdbArb.io.in(1) // missLoadOut
  load_s2.io.out.ready := true.B
  io.lsroq.ldout.ready := !hitLoadOut.valid
  io.ldout.bits := Mux(load_s2.io.out.ready, hitLoadOut.bits, io.lsroq.ldout.bits)
249

250 251
  when(io.ldout.fire()){
    XSDebug("ldout %x iw %x fw %x\n", io.ldout.bits.uop.cf.pc, io.ldout.bits.uop.ctrl.rfWen, io.ldout.bits.uop.ctrl.fpWen)
252
  }
Y
Yinan Xu 已提交
253
}