MemBlock.scala 23.3 KB
Newer Older
L
Lemover 已提交
1 2
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
Y
Yinan Xu 已提交
3
* Copyright (c) 2020-2021 Peng Cheng Laboratory
L
Lemover 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*          http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/

17 18
package xiangshan.backend

19
import chipsalliance.rocketchip.config.Parameters
20 21
import chisel3._
import chisel3.util._
22 23
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.tile.HasFPUParameters
24
import utils._
25
import xiangshan._
26 27
import xiangshan.backend.exu.StdExeUnit
import xiangshan.backend.fu._
Y
Yinan Xu 已提交
28
import xiangshan.backend.rob.RobLsqIO
29
import xiangshan.cache._
30
import xiangshan.cache.mmu.{BTlbPtwIO, TLB, TlbReplace}
31
import xiangshan.mem._
32

33 34 35 36 37 38 39
class Std(implicit p: Parameters) extends FunctionUnit {
  io.in.ready := true.B
  io.out.valid := io.in.valid
  io.out.bits.uop := io.in.bits.uop
  io.out.bits.data := io.in.bits.src(0)
}

40 41
class MemBlock()(implicit p: Parameters) extends LazyModule
  with HasXSParameter with HasWritebackSource {
42

43
  val dcache = LazyModule(new DCacheWrapper())
44 45
  val uncache = LazyModule(new Uncache())

46
  lazy val module = new MemBlockImp(this)
47 48 49 50 51 52 53

  override val writebackSourceParams: Seq[WritebackSourceParams] = {
    val params = new WritebackSourceParams
    params.exuConfigs = (loadExuConfigs ++ storeExuConfigs).map(cfg => Seq(cfg))
    Seq(params)
  }
  override lazy val writebackSourceImp: HasWritebackSourceImp = module
54 55
}

56
class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
57 58
  with HasXSParameter
  with HasFPUParameters
59
  with HasWritebackSourceImp
60
  with HasPerfEvents
61
{
L
LinJiawei 已提交
62

63
  val io = IO(new Bundle {
J
Jiawei Lin 已提交
64
    val hartId = Input(UInt(8.W))
65 66
    val redirect = Flipped(ValidIO(new Redirect))
    // in
67
    val issue = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, Flipped(DecoupledIO(new ExuInput)))
68
    val loadFastMatch = Vec(exuParameters.LduCnt, Input(UInt(exuParameters.LduCnt.W)))
L
Lemover 已提交
69
    val rsfeedback = Vec(exuParameters.LsExuCnt, new MemRSFeedbackIO)
70 71
    val stIssuePtr = Output(new SqPtr())
    // out
72
    val writeback = Vec(exuParameters.LsExuCnt + exuParameters.StuCnt, DecoupledIO(new ExuOutput))
Y
Yinan Xu 已提交
73
    val otherFastWakeup = Vec(exuParameters.LduCnt + 2 * exuParameters.StuCnt, ValidIO(new MicroOp))
74 75 76
    // misc
    val stIn = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
    val memoryViolation = ValidIO(new Redirect)
77
    val ptw = new BTlbPtwIO(exuParameters.LduCnt + exuParameters.StuCnt)
Y
Yinan Xu 已提交
78 79 80
    val sfence = Input(new SfenceBundle)
    val tlbCsr = Input(new TlbCsrBundle)
    val fenceToSbuffer = Flipped(new FenceToSbuffer)
81
    val enqLsq = new LsqEnqIO
82
    val memPredUpdate = Vec(exuParameters.StuCnt, Input(new MemPredUpdateReq))
Y
Yinan Xu 已提交
83 84
    val lsqio = new Bundle {
      val exceptionAddr = new ExceptionAddrIO // to csr
Y
Yinan Xu 已提交
85
      val rob = Flipped(new RobLsqIO) // rob to lsq
Y
Yinan Xu 已提交
86
    }
87
    val csrCtrl = Flipped(new CustomCSRCtrlIO)
88
    val csrUpdate = new DistributedCSRUpdateReq
89
    val error = new L1CacheErrorInfo
90 91 92 93 94
    val memInfo = new Bundle {
      val sqFull = Output(Bool())
      val lqFull = Output(Bool())
      val dcacheMSHRFull = Output(Bool())
    }
95
    val perfEventsPTW = Input(Vec(19, new PerfEvent))
96 97 98
    val lqCancelCnt = Output(UInt(log2Up(LoadQueueSize + 1).W))
    val sqCancelCnt = Output(UInt(log2Up(StoreQueueSize + 1).W))
    val sqDeq = Output(UInt(2.W))
99
  })
L
Li Qianruo 已提交
100

101
  override def writebackSource1: Option[Seq[Seq[DecoupledIO[ExuOutput]]]] = Some(Seq(io.writeback))
102

103 104 105
  val dcache = outer.dcache.module
  val uncache = outer.uncache.module

L
Li Qianruo 已提交
106 107
  val csrCtrl = DelayN(io.csrCtrl, 2)
  dcache.io.csr.distribute_csr <> csrCtrl.distribute_csr
108
  io.csrUpdate := RegNext(dcache.io.csr.update)
109
  io.error <> RegNext(RegNext(dcache.io.error))
110
  when(!csrCtrl.cache_error_enable){
111
    io.error.report_to_beu := false.B
112 113
    io.error.valid := false.B
  }
114

L
LinJiawei 已提交
115 116
  val loadUnits = Seq.fill(exuParameters.LduCnt)(Module(new LoadUnit))
  val storeUnits = Seq.fill(exuParameters.StuCnt)(Module(new StoreUnit))
117
  val stdExeUnits = Seq.fill(exuParameters.StuCnt)(Module(new StdExeUnit))
118
  val stData = stdExeUnits.map(_.io.out)
L
LinJiawei 已提交
119 120
  val exeUnits = loadUnits ++ storeUnits

121 122 123
  loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
  storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))

L
LinJiawei 已提交
124 125
  val atomicsUnit = Module(new AtomicsUnit)

L
Li Qianruo 已提交
126 127 128 129 130 131
  // Atom inst comes from sta / std, then its result 
  // will be writebacked using load writeback port
  // 
  // However, atom exception will be writebacked to rob
  // using store writeback port

L
LinJiawei 已提交
132
  val loadWritebackOverride  = Mux(atomicsUnit.io.out.valid, atomicsUnit.io.out.bits, loadUnits.head.io.ldout.bits)
Y
Yinan Xu 已提交
133 134 135
  val ldOut0 = Wire(Decoupled(new ExuOutput))
  ldOut0.valid := atomicsUnit.io.out.valid || loadUnits.head.io.ldout.valid
  ldOut0.bits  := loadWritebackOverride
L
LinJiawei 已提交
136 137
  atomicsUnit.io.out.ready := ldOut0.ready
  loadUnits.head.io.ldout.ready := ldOut0.ready
L
Li Qianruo 已提交
138 139 140
  when(atomicsUnit.io.out.valid){
    ldOut0.bits.uop.cf.exceptionVec := 0.U(16.W).asBools // exception will be writebacked via store wb port
  }
L
LinJiawei 已提交
141

142 143
  val ldExeWbReqs = ldOut0 +: loadUnits.tail.map(_.io.ldout)
  io.writeback <> ldExeWbReqs ++ VecInit(storeUnits.map(_.io.stout)) ++ VecInit(stdExeUnits.map(_.io.out))
144 145
  io.otherFastWakeup := DontCare
  io.otherFastWakeup.take(2).zip(loadUnits.map(_.io.fastUop)).foreach{case(a,b)=> a := b}
146
  val stOut = io.writeback.drop(exuParameters.LduCnt).dropRight(exuParameters.StuCnt)
147

148
  // TODO: fast load wakeup
149
  val lsq     = Module(new LsqWrappper)
Z
zhanglinjuan 已提交
150
  val sbuffer = Module(new Sbuffer)
L
LinJiawei 已提交
151 152
  // if you wants to stress test dcache store, use FakeSbuffer
  // val sbuffer = Module(new FakeSbuffer)
153
  io.stIssuePtr := lsq.io.issuePtrExt
L
LinJiawei 已提交
154

J
Jiawei Lin 已提交
155 156 157 158 159
  dcache.io.hartId := io.hartId
  lsq.io.hartId := io.hartId
  sbuffer.io.hartId := io.hartId
  atomicsUnit.io.hartId := io.hartId

L
LinJiawei 已提交
160
  // dtlb
161 162 163 164 165 166 167 168 169 170 171 172 173 174
  val sfence = RegNext(io.sfence)
  val tlbcsr = RegNext(io.tlbCsr)
  val dtlb_ld = VecInit(Seq.fill(exuParameters.LduCnt){
    val tlb_ld = Module(new TLB(1, ldtlbParams))
    tlb_ld.io // let the module have name in waveform
  })
  val dtlb_st = VecInit(Seq.fill(exuParameters.StuCnt){
    val tlb_st = Module(new TLB(1 , sttlbParams))
    tlb_st.io // let the module have name in waveform
  })
  dtlb_ld.map(_.sfence := sfence)
  dtlb_st.map(_.sfence := sfence)
  dtlb_ld.map(_.csr := tlbcsr)
  dtlb_st.map(_.csr := tlbcsr)
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
  if (refillBothTlb) {
    require(ldtlbParams.outReplace == sttlbParams.outReplace)
    require(ldtlbParams.outReplace)

    val replace = Module(new TlbReplace(exuParameters.LduCnt + exuParameters.StuCnt, ldtlbParams))
    replace.io.apply_sep(dtlb_ld.map(_.replace) ++ dtlb_st.map(_.replace), io.ptw.resp.bits.data.entry.tag)
  } else {
    if (ldtlbParams.outReplace) {
      val replace_ld = Module(new TlbReplace(exuParameters.LduCnt, ldtlbParams))
      replace_ld.io.apply_sep(dtlb_ld.map(_.replace), io.ptw.resp.bits.data.entry.tag)
    }
    if (sttlbParams.outReplace) {
      val replace_st = Module(new TlbReplace(exuParameters.StuCnt, sttlbParams))
      replace_st.io.apply_sep(dtlb_st.map(_.replace), io.ptw.resp.bits.data.entry.tag)
    }
190
  }
L
Lemover 已提交
191
  val dtlb = dtlb_ld ++ dtlb_st
192

L
Li Qianruo 已提交
193 194 195 196
  val ptw_resp_next = RegEnable(io.ptw.resp.bits, io.ptw.resp.valid)
  val ptw_resp_v = RegNext(io.ptw.resp.valid && !(sfence.valid && tlbcsr.satp.changed), init = false.B)
  io.ptw.resp.ready := true.B

197 198
  (dtlb_ld.map(_.ptw.req) ++ dtlb_st.map(_.ptw.req)).zipWithIndex.map{ case (tlb, i) =>
    tlb(0) <> io.ptw.req(i)
L
Li Qianruo 已提交
199 200 201 202 203
    val vector_hit = if (refillBothTlb) Cat(ptw_resp_next.vector).orR
      else if (i < exuParameters.LduCnt) Cat(ptw_resp_next.vector.take(exuParameters.LduCnt)).orR
      else Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt)).orR
    io.ptw.req(i).valid := tlb(0).valid && !(ptw_resp_v && vector_hit &&
      ptw_resp_next.data.entry.hit(tlb(0).bits.vpn, tlbcsr.satp.asid, allType = true, ignoreAsid = true))
204
  }
L
Li Qianruo 已提交
205 206
  dtlb_ld.map(_.ptw.resp.bits := ptw_resp_next.data)
  dtlb_st.map(_.ptw.resp.bits := ptw_resp_next.data)
207
  if (refillBothTlb) {
L
Li Qianruo 已提交
208 209
    dtlb_ld.map(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
    dtlb_st.map(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector).orR)
210
  } else {
L
Li Qianruo 已提交
211 212
    dtlb_ld.map(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.take(exuParameters.LduCnt)).orR)
    dtlb_st.map(_.ptw.resp.valid := ptw_resp_v && Cat(ptw_resp_next.vector.drop(exuParameters.LduCnt)).orR)
213
  }
L
Li Qianruo 已提交
214

L
LinJiawei 已提交
215

L
Lemover 已提交
216 217
  // pmp
  val pmp = Module(new PMP())
L
Li Qianruo 已提交
218
  pmp.io.distribute_csr <> csrCtrl.distribute_csr
L
Lemover 已提交
219 220 221

  val pmp_check = VecInit(Seq.fill(exuParameters.LduCnt + exuParameters.StuCnt)(Module(new PMPChecker(3)).io))
  for ((p,d) <- pmp_check zip dtlb.map(_.pmp(0))) {
222
    p.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, d)
L
Lemover 已提交
223 224
    require(p.req.bits.size.getWidth == d.bits.size.getWidth)
  }
L
Li Qianruo 已提交
225 226 227 228 229 230
  val pmp_check_ptw = Module(new PMPCheckerv2(lgMaxSize = 3, sameCycle = false, leaveHitMux = true))
  pmp_check_ptw.io.apply(tlbcsr.priv.dmode, pmp.io.pmp, pmp.io.pma, io.ptw.resp.valid,
    Cat(io.ptw.resp.bits.data.entry.ppn, 0.U(12.W)).asUInt)
  dtlb_ld.map(_.ptw_replenish := pmp_check_ptw.io.resp)
  dtlb_st.map(_.ptw_replenish := pmp_check_ptw.io.resp)

231
  val tdata = RegInit(VecInit(Seq.fill(6)(0.U.asTypeOf(new MatchTriggerIO))))
232
  val tEnable = RegInit(VecInit(Seq.fill(6)(false.B)))
L
Li Qianruo 已提交
233
  val en = csrCtrl.trigger_enable
234
  tEnable := VecInit(en(2), en (3), en(7), en(4), en(5), en(9))
L
Li Qianruo 已提交
235 236
  when(csrCtrl.mem_trigger.t.valid) {
    tdata(csrCtrl.mem_trigger.t.bits.addr) := csrCtrl.mem_trigger.t.bits.tdata
237
  }
L
Li Qianruo 已提交
238 239
  val lTriggerMapping = Map(0 -> 2, 1 -> 3, 2 -> 5)
  val sTriggerMapping = Map(0 -> 0, 1 -> 1, 2 -> 4)
240 241
  val lChainMapping = Map(0 -> 2)
  val sChainMapping = Map(0 -> 1)
L
Li Qianruo 已提交
242 243 244
  XSDebug(tEnable.asUInt.orR, "Debug Mode: At least one store trigger is enabled\n")
  for(j <- 0 until 3)
    PrintTriggerInfo(tEnable(j), tdata(j))
L
Lemover 已提交
245

L
LinJiawei 已提交
246 247
  // LoadUnit
  for (i <- 0 until exuParameters.LduCnt) {
L
Li Qianruo 已提交
248 249 250 251 252
    loadUnits(i).io.redirect <> io.redirect
    loadUnits(i).io.feedbackSlow <> io.rsfeedback(i).feedbackSlow
    loadUnits(i).io.feedbackFast <> io.rsfeedback(i).feedbackFast
    loadUnits(i).io.rsIdx := io.rsfeedback(i).rsIdx
    loadUnits(i).io.isFirstIssue := io.rsfeedback(i).isFirstIssue // NOTE: just for dtlb's perf cnt
253
    loadUnits(i).io.loadFastMatch <> io.loadFastMatch(i)
L
LinJiawei 已提交
254
    // get input form dispatch
L
Li Qianruo 已提交
255
    loadUnits(i).io.ldin <> io.issue(i)
L
LinJiawei 已提交
256
    // dcache access
L
Li Qianruo 已提交
257
    loadUnits(i).io.dcache <> dcache.io.lsu.load(i)
L
LinJiawei 已提交
258
    // forward
L
Li Qianruo 已提交
259 260
    loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
    loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
W
William Wang 已提交
261 262
    // ld-ld violation check
    loadUnits(i).io.lsq.loadViolationQuery <> lsq.io.loadViolationQuery(i)
L
Li Qianruo 已提交
263
    loadUnits(i).io.csrCtrl       <> csrCtrl
L
Lemover 已提交
264
    // dtlb
L
Li Qianruo 已提交
265
    loadUnits(i).io.tlb <> dtlb_ld(i).requestor(0)
L
Lemover 已提交
266
    // pmp
L
Li Qianruo 已提交
267
    loadUnits(i).io.pmp <> pmp_check(i).resp
268

269 270
    // laod to load fast forward
    for (j <- 0 until exuParameters.LduCnt) {
L
Li Qianruo 已提交
271
      loadUnits(i).io.fastpathIn(j) <> loadUnits(j).io.fastpathOut
272 273
    }

274
    // Lsq to load unit's rs
L
LinJiawei 已提交
275

Y
Yinan Xu 已提交
276
    // passdown to lsq
L
Li Qianruo 已提交
277 278 279 280 281
    lsq.io.loadIn(i) <> loadUnits(i).io.lsq.loadIn
    lsq.io.ldout(i) <> loadUnits(i).io.lsq.ldout
    lsq.io.loadDataForwarded(i) <> loadUnits(i).io.lsq.loadDataForwarded
    lsq.io.trigger(i) <> loadUnits(i).io.lsq.trigger

282
    // update mem dependency predictor
283
    io.memPredUpdate(i) := DontCare
284
    lsq.io.dcacheRequireReplay(i)    <> loadUnits(i).io.lsq.dcacheRequireReplay
285 286 287

    // Trigger Regs
    // addr: 0-2 for store, 3-5 for load
L
Li Qianruo 已提交
288 289 290 291 292 293 294 295 296 297
//    for (j <- 0 until 10) {
//      io.writeback(i).bits.uop.cf.trigger.triggerHitVec(j) := false.B
//      io.writeback(i).bits.uop.cf.trigger.triggerTiming(j) := false.B
//      if (lChainMapping.contains(j)) io.writeback(i).bits.uop.cf.trigger.triggerChainVec(j) := false.B
//    }

    // --------------------------------
    // Load Triggers
    // --------------------------------
    val hit = Wire(Vec(3, Bool()))
298
    for (j <- 0 until 3) {
L
Li Qianruo 已提交
299 300 301 302
      loadUnits(i).io.trigger(j).tdata2 := tdata(j + 3).tdata2
      loadUnits(i).io.trigger(j).matchType := tdata(lTriggerMapping(j)).matchType
      loadUnits(i).io.trigger(j).tEnable := tEnable(lTriggerMapping(j))
      // Just let load triggers that match data unavailable
L
Li Qianruo 已提交
303
      hit(j) := loadUnits(i).io.trigger(j).addrHit && tdata(j).select // Mux(tdata(j + 3).select, loadUnits(i).io.trigger(j).lastDataHit, loadUnits(i).io.trigger(j).addrHit)
L
Li Qianruo 已提交
304
      io.writeback(i).bits.uop.cf.trigger.backendHit(lTriggerMapping(j)) := hit(j)
L
Li Qianruo 已提交
305
//      io.writeback(i).bits.uop.cf.trigger.backendTiming(lTriggerMapping(j)) := tdata(lTriggerMapping(j)).timing
L
Li Qianruo 已提交
306 307 308 309 310 311
      //      if (lChainMapping.contains(j)) io.writeback(i).bits.uop.cf.trigger.triggerChainVec(lChainMapping(j)) := hit && tdata(j+3).chain
    }
    when(tdata(2).chain) {
      io.writeback(i).bits.uop.cf.trigger.backendHit(2) := hit(0) && hit(1)
      io.writeback(i).bits.uop.cf.trigger.backendHit(3) := hit(0) && hit(1)
    }
L
Li Qianruo 已提交
312
    when(!io.writeback(i).bits.uop.cf.trigger.backendEn(1)) {
L
Li Qianruo 已提交
313
      io.writeback(i).bits.uop.cf.trigger.backendHit(5) := false.B
314
    }
L
Li Qianruo 已提交
315

L
Li Qianruo 已提交
316
    XSDebug(io.writeback(i).bits.uop.cf.trigger.getHitBackend && io.writeback(i).valid, p"Debug Mode: Load Inst No.${i}" +
L
Li Qianruo 已提交
317
    p"has trigger hit vec ${io.writeback(i).bits.uop.cf.trigger.backendHit}\n")
L
Li Qianruo 已提交
318

L
LinJiawei 已提交
319 320 321 322
  }

  // StoreUnit
  for (i <- 0 until exuParameters.StuCnt) {
323 324
    val stu = storeUnits(i)

325
    stdExeUnits(i).io.redirect <> io.redirect
326
    stdExeUnits(i).io.fromInt <> io.issue(i + exuParameters.LduCnt + exuParameters.StuCnt)
327 328 329
    stdExeUnits(i).io.fromFp := DontCare
    stdExeUnits(i).io.out := DontCare

330 331 332
    stu.io.redirect     <> io.redirect
    stu.io.feedbackSlow <> io.rsfeedback(exuParameters.LduCnt + i).feedbackSlow
    stu.io.rsIdx        <> io.rsfeedback(exuParameters.LduCnt + i).rsIdx
333
    // NOTE: just for dtlb's perf cnt
334 335 336
    stu.io.isFirstIssue <> io.rsfeedback(exuParameters.LduCnt + i).isFirstIssue
    stu.io.stin         <> io.issue(exuParameters.LduCnt + i)
    stu.io.lsq          <> lsq.io.storeIn(i)
337
    stu.io.lsq_replenish <> lsq.io.storeInRe(i)
L
Lemover 已提交
338
    // dtlb
339
    stu.io.tlb          <> dtlb_st(i).requestor(0)
L
Lemover 已提交
340
    stu.io.pmp          <> pmp_check(i+exuParameters.LduCnt).resp
341

342 343 344
    // store unit does not need fast feedback
    io.rsfeedback(exuParameters.LduCnt + i).feedbackFast := DontCare

345
    // Lsq to load unit's rs
346
    lsq.io.storeDataIn(i) := stData(i)
347

348 349
    // 1. sync issue info to store set LFST
    // 2. when store issue, broadcast issued sqPtr to wake up the following insts
350 351
    io.stIn(i).valid := io.issue(exuParameters.LduCnt + i).valid
    io.stIn(i).bits := io.issue(exuParameters.LduCnt + i).bits
352

353
    stu.io.stout.ready := true.B
354

L
Li Qianruo 已提交
355 356 357 358 359 360 361
   // store vaddr
   when(stOut(i).fire()){
     val hit = Wire(Vec(3, Bool()))
     for (j <- 0 until 3) {
       when(!tdata(sTriggerMapping(j)).select) {
         hit(j) := TriggerCmp(stOut(i).bits.data, tdata(sTriggerMapping(j)).tdata2, tdata(sTriggerMapping(j)).matchType, tEnable(sTriggerMapping(j)))
         stOut(i).bits.uop.cf.trigger.backendHit(sTriggerMapping(j)) := hit(j)
L
Li Qianruo 已提交
362
//         stOut(i).bits.uop.cf.trigger.backendTiming(sTriggerMapping(j)) := tdata(sTriggerMapping(j)).timing
L
Li Qianruo 已提交
363 364 365 366 367
//          if (sChainMapping.contains(j)) stOut(i).bits.uop.cf.trigger.triggerChainVec(sChainMapping(j)) := hit && tdata(j + 3).chain
       } .otherwise {
         hit := VecInit(Seq.fill(3)(false.B))
       }

L
Li Qianruo 已提交
368
       when(!stOut(i).bits.uop.cf.trigger.backendEn(0)) {
L
Li Qianruo 已提交
369 370 371 372
         stOut(i).bits.uop.cf.trigger.backendHit(4) := false.B
       }
     }
   }
373
    // store data
L
Li Qianruo 已提交
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
//    when(lsq.io.storeDataIn(i).fire()){
//
//      val hit = Wire(Vec(3, Bool()))
//      for (j <- 0 until 3) {
//        when(tdata(sTriggerMapping(j)).select) {
//          hit(j) := TriggerCmp(lsq.io.storeDataIn(i).bits.data, tdata(sTriggerMapping(j)).tdata2, tdata(sTriggerMapping(j)).matchType, tEnable(sTriggerMapping(j)))
//          lsq.io.storeDataIn(i).bits.uop.cf.trigger.backendHit(sTriggerMapping(j)) := hit(j)
//          lsq.io.storeDataIn(i).bits.uop.cf.trigger.backendTiming(sTriggerMapping(j)) := tdata(sTriggerMapping(j)).timing
////          if (sChainMapping.contains(j)) lsq.io.storeDataIn(i).bits.uop.cf.trigger.triggerChainVec(sChainMapping(j)) := hit && tdata(j + 3).chain
//        }
//      }
//
//      when(tdata(0).chain) {
//        lsq.io.storeDataIn(i).bits.uop.cf.trigger.backendHit(0) := hit(0) && hit(1)
//        lsq.io.storeDataIn(i).bits.uop.cf.trigger.backendHit(1) := hit(0) && hit(1)
//      }
//      when(lsq.io.storeDataIn(i).bits.uop.cf.trigger.backendEn(1)) {
//        lsq.io.storeDataIn(i).bits.uop.cf.trigger.backendHit(4) := Mux(io.writeback(i).bits.uop.cf.trigger.backendConsiderTiming(1),
//          tdata(4).timing === lsq.io.storeDataIn(i).bits.uop.cf.trigger.backendChainTiming(1), true.B) && hit(2)
//      } .otherwise {
//        lsq.io.storeDataIn(i).bits.uop.cf.trigger.backendHit(4) := false.B
//      }
//    }
397 398 399 400
  }

  // mmio store writeback will use store writeback port 0
  lsq.io.mmioStout.ready := false.B
401
  when (lsq.io.mmioStout.valid && !storeUnits(0).io.stout.valid) {
402 403
    stOut(0).valid := true.B
    stOut(0).bits  := lsq.io.mmioStout.bits
404
    lsq.io.mmioStout.ready := true.B
L
LinJiawei 已提交
405 406
  }

L
Li Qianruo 已提交
407 408 409 410 411 412 413
  // atom inst will use store writeback port 0 to writeback exception info
  when (atomicsUnit.io.out.valid) {
    stOut(0).valid := true.B
    stOut(0).bits  := atomicsUnit.io.out.bits
    assert(!lsq.io.mmioStout.valid && !storeUnits(0).io.stout.valid)
  }

Y
Yinan Xu 已提交
414
  // Lsq
Y
Yinan Xu 已提交
415
  lsq.io.rob            <> io.lsqio.rob
416 417 418
  lsq.io.enq            <> io.enqLsq
  lsq.io.brqRedirect    <> io.redirect
  io.memoryViolation    <> lsq.io.rollback
419
  lsq.io.uncache        <> uncache.io.lsq
420 421
  // delay dcache refill for 1 cycle for better timing
  // TODO: remove RegNext after fixing refill paddr timing
422 423
  // lsq.io.dcache         <> dcache.io.lsu.lsq
  lsq.io.dcache         := RegNext(dcache.io.lsu.lsq)
W
William Wang 已提交
424
  lsq.io.release        := dcache.io.lsu.release
425 426 427
  lsq.io.lqCancelCnt <> io.lqCancelCnt
  lsq.io.sqCancelCnt <> io.sqCancelCnt
  lsq.io.sqDeq <> io.sqDeq
L
LinJiawei 已提交
428

Y
Yinan Xu 已提交
429
  // LSQ to store buffer
430
  lsq.io.sbuffer        <> sbuffer.io.in
W
William Wang 已提交
431
  lsq.io.sqempty        <> sbuffer.io.sqempty
L
LinJiawei 已提交
432 433

  // Sbuffer
L
Li Qianruo 已提交
434
  sbuffer.io.csrCtrl    <> csrCtrl
435
  sbuffer.io.dcache     <> dcache.io.lsu.store
Z
zhanglinjuan 已提交
436 437 438
  // TODO: if dcache sbuffer resp needs to ne delayed 
  // sbuffer.io.dcache.pipe_resp.valid := RegNext(dcache.io.lsu.store.pipe_resp.valid)
  // sbuffer.io.dcache.pipe_resp.bits := RegNext(dcache.io.lsu.store.pipe_resp.bits)
L
LinJiawei 已提交
439 440

  // flush sbuffer
Y
Yinan Xu 已提交
441
  val fenceFlush = io.fenceToSbuffer.flushSb
L
LinJiawei 已提交
442
  val atomicsFlush = atomicsUnit.io.flush_sbuffer.valid
443
  io.fenceToSbuffer.sbIsEmpty := RegNext(sbuffer.io.flush.empty)
L
LinJiawei 已提交
444 445 446
  // if both of them tries to flush sbuffer at the same time
  // something must have gone wrong
  assert(!(fenceFlush && atomicsFlush))
447
  sbuffer.io.flush.valid := RegNext(fenceFlush || atomicsFlush)
L
LinJiawei 已提交
448

449
  // AtomicsUnit: AtomicsUnit will override other control signials,
L
LinJiawei 已提交
450
  // as atomics insts (LR/SC/AMO) will block the pipeline
451 452
  val s_normal :: s_atomics_0 :: s_atomics_1 :: Nil = Enum(3)
  val state = RegInit(s_normal)
L
LinJiawei 已提交
453

454 455
  val atomic_rs0  = exuParameters.LduCnt + 0
  val atomic_rs1  = exuParameters.LduCnt + 1
456 457
  val st0_atomics = io.issue(atomic_rs0).valid && FuType.storeIsAMO(io.issue(atomic_rs0).bits.uop.ctrl.fuType)
  val st1_atomics = io.issue(atomic_rs1).valid && FuType.storeIsAMO(io.issue(atomic_rs1).bits.uop.ctrl.fuType)
L
LinJiawei 已提交
458

459 460
  val st0_data_atomics = stData(0).valid && FuType.storeIsAMO(stData(0).bits.uop.ctrl.fuType)
  val st1_data_atomics = stData(1).valid && FuType.storeIsAMO(stData(1).bits.uop.ctrl.fuType)
461

L
LinJiawei 已提交
462
  when (st0_atomics) {
463
    io.issue(atomic_rs0).ready := atomicsUnit.io.in.ready
L
LinJiawei 已提交
464 465
    storeUnits(0).io.stin.valid := false.B

466 467
    state := s_atomics_0
    assert(!st1_atomics)
L
LinJiawei 已提交
468
  }
469
  when (st1_atomics) {
470
    io.issue(atomic_rs1).ready := atomicsUnit.io.in.ready
471
    storeUnits(1).io.stin.valid := false.B
L
LinJiawei 已提交
472

473 474
    state := s_atomics_1
    assert(!st0_atomics)
L
LinJiawei 已提交
475
  }
476 477 478 479 480 481
  when (atomicsUnit.io.out.valid) {
    assert(state === s_atomics_0 || state === s_atomics_1)
    state := s_normal
  }

  atomicsUnit.io.in.valid := st0_atomics || st1_atomics
482
  atomicsUnit.io.in.bits  := Mux(st0_atomics, io.issue(atomic_rs0).bits, io.issue(atomic_rs1).bits)
483
  atomicsUnit.io.storeDataIn.valid := st0_data_atomics || st1_data_atomics
484
  atomicsUnit.io.storeDataIn.bits  := Mux(st0_data_atomics, stData(0).bits, stData(1).bits)
485
  atomicsUnit.io.rsIdx    := Mux(st0_atomics, io.rsfeedback(atomic_rs0).rsIdx, io.rsfeedback(atomic_rs1).rsIdx)
486
  atomicsUnit.io.redirect <> io.redirect
487

L
Lemover 已提交
488
  // TODO: complete amo's pmp support
489
  val amoTlb = dtlb_ld(0).requestor(0)
490 491
  atomicsUnit.io.dtlb.resp.valid := false.B
  atomicsUnit.io.dtlb.resp.bits  := DontCare
492
  atomicsUnit.io.dtlb.req.ready  := amoTlb.req.ready
493
  atomicsUnit.io.pmpResp := pmp_check(0).resp
L
LinJiawei 已提交
494

495
  atomicsUnit.io.dcache <> dcache.io.lsu.atomics
L
LinJiawei 已提交
496 497
  atomicsUnit.io.flush_sbuffer.empty := sbuffer.io.flush.empty

L
Li Qianruo 已提交
498 499
  atomicsUnit.io.csrCtrl := csrCtrl

500
  // for atomicsUnit, it uses loadUnit(0)'s TLB port
501

502
  when (state === s_atomics_0 || state === s_atomics_1) {
L
LinJiawei 已提交
503
    loadUnits(0).io.ldout.ready := false.B
504
    atomicsUnit.io.dtlb <> amoTlb
505 506 507 508 509 510

    // make sure there's no in-flight uops in load unit
    assert(!loadUnits(0).io.ldout.valid)
  }

  when (state === s_atomics_0) {
511
    atomicsUnit.io.feedbackSlow <> io.rsfeedback(atomic_rs0).feedbackSlow
512

513
    assert(!storeUnits(0).io.feedbackSlow.valid)
514 515
  }
  when (state === s_atomics_1) {
516
    atomicsUnit.io.feedbackSlow <> io.rsfeedback(atomic_rs1).feedbackSlow
517

518
    assert(!storeUnits(1).io.feedbackSlow.valid)
L
LinJiawei 已提交
519 520
  }

Y
Yinan Xu 已提交
521
  lsq.io.exceptionAddr.isStore := io.lsqio.exceptionAddr.isStore
522 523
  // Exception address is used serveral cycles after flush.
  // We delay it by 10 cycles to ensure its flush safety.
524 525 526 527 528 529
  val atomicsException = RegInit(false.B)
  when (DelayN(io.redirect.valid, 10) && atomicsException) {
    atomicsException := false.B
  }.elsewhen (atomicsUnit.io.exceptionAddr.valid) {
    atomicsException := true.B
  }
530
  val atomicsExceptionAddress = RegEnable(atomicsUnit.io.exceptionAddr.bits, atomicsUnit.io.exceptionAddr.valid)
531
  io.lsqio.exceptionAddr.vaddr := Mux(atomicsException, atomicsExceptionAddress, lsq.io.exceptionAddr.vaddr)
532
  XSError(atomicsException && atomicsUnit.io.in.valid, "new instruction before exception triggers\n")
533 534 535 536

  io.memInfo.sqFull := RegNext(lsq.io.sqFull)
  io.memInfo.lqFull := RegNext(lsq.io.lqFull)
  io.memInfo.dcacheMSHRFull := RegNext(dcache.io.mshrFull)
537

538 539
  val ldDeqCount = PopCount(io.issue.take(2).map(_.valid))
  val stDeqCount = PopCount(io.issue.drop(2).map(_.valid))
540 541 542 543 544 545
  val rsDeqCount = ldDeqCount + stDeqCount
  XSPerfAccumulate("load_rs_deq_count", ldDeqCount)
  XSPerfHistogram("load_rs_deq_count", ldDeqCount, true.B, 1, 2, 1)
  XSPerfAccumulate("store_rs_deq_count", stDeqCount)
  XSPerfHistogram("store_rs_deq_count", stDeqCount, true.B, 1, 2, 1)
  XSPerfAccumulate("ls_rs_deq_count", rsDeqCount)
546 547

  val pfevent = Module(new PFEvent)
L
Li Qianruo 已提交
548
  pfevent.io.distribute_csr := csrCtrl.distribute_csr
549 550
  val csrevents = pfevent.io.hpmevent.slice(16,24)

551 552 553 554 555 556 557 558
  val memBlockPerfEvents = Seq(
    ("ldDeqCount", ldDeqCount),
    ("stDeqCount", stDeqCount),
  )
  val allPerfEvents = memBlockPerfEvents ++ (loadUnits ++ Seq(sbuffer, lsq, dcache)).flatMap(_.getPerfEvents)
  val hpmEvents = allPerfEvents.map(_._2.asTypeOf(new PerfEvent)) ++ io.perfEventsPTW
  val perfEvents = HPerfMonitor(csrevents, hpmEvents).getPerfEvents
  generatePerfEvent()
559
}