XSCore.scala 15.8 KB
Newer Older
L
Lemover 已提交
1 2
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
Y
Yinan Xu 已提交
3
* Copyright (c) 2020-2021 Peng Cheng Laboratory
L
Lemover 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*          http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/

L
LinJiawei 已提交
17 18 19 20 21
package xiangshan

import chisel3._
import chisel3.util._
import xiangshan.backend._
22
import xiangshan.backend.fu.HasExceptionNO
23
import xiangshan.backend.exu.{ExuConfig, WbArbiter}
G
GouLingrui 已提交
24
import xiangshan.frontend._
25
import xiangshan.cache.mmu._
L
linjiawei 已提交
26
import chipsalliance.rocketchip.config
27
import chipsalliance.rocketchip.config.Parameters
28
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
L
LinJiawei 已提交
29
import freechips.rocketchip.tile.HasFPUParameters
30
import system.{HasSoCParameter, L1CacheErrorInfo, SoCParamsKey}
L
LinJiawei 已提交
31
import utils._
L
LinJiawei 已提交
32

33
abstract class XSModule(implicit val p: Parameters) extends MultiIOModule
L
LinJiawei 已提交
34 35
  with HasXSParameter
  with HasExceptionNO
36
  with HasFPUParameters {
L
LinJiawei 已提交
37 38
  def io: Record
}
L
LinJiawei 已提交
39

40
//remove this trait after impl module logic
41 42
trait NeedImpl {
  this: RawModule =>
43
  override protected def IO[T <: Data](iodef: T): T = {
L
LinJiawei 已提交
44
    println(s"[Warn]: (${this.name}) please reomve 'NeedImpl' after implement this module")
45 46 47 48 49 50
    val io = chisel3.experimental.IO(iodef)
    io <> DontCare
    io
  }
}

51
abstract class XSBundle(implicit val p: Parameters) extends Bundle
L
LinJiawei 已提交
52 53
  with HasXSParameter

L
LinJiawei 已提交
54
case class EnviromentParameters
L
LinJiawei 已提交
55 56
(
  FPGAPlatform: Boolean = true,
Y
Yinan Xu 已提交
57
  EnableDebug: Boolean = false,
Z
zoujr 已提交
58
  EnablePerfDebug: Boolean = true,
59
  DualCore: Boolean = false
L
LinJiawei 已提交
60 61
)

62
abstract class XSCoreBase()(implicit p: config.Parameters) extends LazyModule
63
  with HasXSParameter with HasExuWbMappingHelper
64
{
Y
Yinan Xu 已提交
65
  // outer facing nodes
J
jinyue110 已提交
66
  val frontend = LazyModule(new Frontend())
67
  val ptw = LazyModule(new PTWWrapper())
68

69
  val intConfigs = exuConfigs.filter(_.writeIntRf)
70
  val intArbiter = LazyModule(new WbArbiter(intConfigs, NRIntWritePorts, isFp = false))
71 72 73 74
  val intWbPorts = intArbiter.allConnections.map(c => c.map(intConfigs(_)))
  val numIntWbPorts = intWbPorts.length

  val fpConfigs = exuConfigs.filter(_.writeFpRf)
75
  val fpArbiter = LazyModule(new WbArbiter(fpConfigs, NRFpWritePorts, isFp = true))
76 77
  val fpWbPorts = fpArbiter.allConnections.map(c => c.map(fpConfigs(_)))
  val numFpWbPorts = fpWbPorts.length
78

79 80 81 82 83 84
  // TODO: better RS organization
  // generate rs according to number of function units
  require(exuParameters.JmpCnt == 1)
  require(exuParameters.MduCnt <= exuParameters.AluCnt && exuParameters.MduCnt > 0)
  require(exuParameters.FmiscCnt <= exuParameters.FmacCnt && exuParameters.FmiscCnt > 0)
  require(exuParameters.LduCnt == 2 && exuParameters.StuCnt == 2)
85

86 87 88
  // one RS every 2 MDUs
  val schedulePorts = Seq(
    // exuCfg, numDeq, intFastWakeupTarget, fpFastWakeupTarget
89
    Seq(
90
      (AluExeUnitCfg, exuParameters.AluCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg, JumpCSRExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq())
91 92
    ),
    Seq(
93 94 95
      (MulDivExeUnitCfg, exuParameters.MduCnt, Seq(AluExeUnitCfg, MulDivExeUnitCfg, JumpCSRExeUnitCfg, LdExeUnitCfg, StaExeUnitCfg), Seq()),
      (JumpCSRExeUnitCfg, 1, Seq(), Seq()),
      (StdExeUnitCfg, exuParameters.StuCnt, Seq(), Seq())
96 97 98 99 100 101 102
    ),
    Seq(
      (FmacExeUnitCfg, exuParameters.FmacCnt, Seq(), Seq(FmacExeUnitCfg, FmiscExeUnitCfg)),
      (FmiscExeUnitCfg, exuParameters.FmiscCnt, Seq(), Seq())
    ),
    Seq(
      (LdExeUnitCfg, exuParameters.LduCnt, Seq(AluExeUnitCfg, LdExeUnitCfg), Seq()),
103
      (StaExeUnitCfg, exuParameters.StuCnt, Seq(), Seq())
104
    )
105
  )
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125

  // should do outer fast wakeup ports here
  val otherFastPorts = schedulePorts.zipWithIndex.map { case (sche, i) =>
    val otherCfg = schedulePorts.zipWithIndex.filter(_._2 != i).map(_._1).reduce(_ ++ _)
    val outerPorts = sche.map(cfg => {
      // exe units from this scheduler need fastUops from exeunits
      val outerWakeupInSche = sche.filter(_._1.wakeupFromExu)
      val intraIntScheOuter = outerWakeupInSche.filter(_._3.contains(cfg._1)).map(_._1)
      val intraFpScheOuter = outerWakeupInSche.filter(_._4.contains(cfg._1)).map(_._1)
      // exe units from other schedulers need fastUop from outside
      val otherIntSource = otherCfg.filter(_._3.contains(cfg._1)).map(_._1)
      val otherFpSource = otherCfg.filter(_._4.contains(cfg._1)).map(_._1)
      val intSource = findInWbPorts(intWbPorts, intraIntScheOuter ++ otherIntSource)
      val fpSource = findInWbPorts(fpWbPorts, intraFpScheOuter ++ otherFpSource)
      getFastWakeupIndex(cfg._1, intSource, fpSource, numIntWbPorts).sorted
    })
    println(s"inter-scheduler wakeup sources for $i: $outerPorts")
    outerPorts
  }

126
  // allow mdu and fmisc to have 2*numDeq enqueue ports
127
  val intDpPorts = (0 until exuParameters.AluCnt).map(i => Seq((0, i)))
128
  val int1DpPorts = (0 until exuParameters.MduCnt).map(i => {
129
    if (i < exuParameters.JmpCnt) Seq((0, i), (1, i))
130
    else Seq((0, i))
131
  }) ++ (0 until exuParameters.StuCnt).map(i => Seq((2, i)))
132
  val fpDpPorts = (0 until exuParameters.FmacCnt).map(i => {
133 134
    if (i < 2*exuParameters.FmiscCnt) Seq((0, i), (1, i))
    else Seq((1, i))
135 136
  })
  val lsDpPorts = Seq(
137 138 139 140
    Seq((0, 0)),
    Seq((0, 1)),
    Seq((1, 0)),
    Seq((1, 1))
141
  )
142
  val dispatchPorts = Seq(intDpPorts, int1DpPorts, fpDpPorts, lsDpPorts)
143

144 145 146
  val outFpRfReadPorts = Seq(0, 0, 2, 0)
  val exuBlocks = schedulePorts.zip(dispatchPorts).zip(otherFastPorts).zip(outFpRfReadPorts).reverse.drop(1).reverseMap { case (((sche, disp), other), ofp) =>
    LazyModule(new ExuBlock(sche, disp, intWbPorts, fpWbPorts, other, ofp))
147
  }
148 149

  val memScheduler = LazyModule(new Scheduler(schedulePorts.last, dispatchPorts.last, intWbPorts, fpWbPorts, otherFastPorts.last, outFpRfReadPorts.last))
150 151
  val memBlock = LazyModule(new MemBlock()(p.alter((site, here, up) => {
    case XSCoreParamsKey => up(XSCoreParamsKey).copy(
152
      IssQueSize = memScheduler.memRsEntries.max
153 154
    )
  })))
155 156 157 158 159
}

class XSCore()(implicit p: config.Parameters) extends XSCoreBase
  with HasXSDts
{
L
linjiawei 已提交
160 161 162
  lazy val module = new XSCoreImp(this)
}

163
class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
164
  with HasXSParameter
165
  with HasSoCParameter
166
  with HasExeBlockHelper {
Y
Yinan Xu 已提交
167
  val io = IO(new Bundle {
168
    val hartId = Input(UInt(64.W))
Y
Yinan Xu 已提交
169
    val externalInterrupt = new ExternalInterruptIO
170
    val l2_pf_enable = Output(Bool())
L
ljw 已提交
171
    val l1plus_error, icache_error, dcache_error = Output(new L1CacheErrorInfo)
Y
Yinan Xu 已提交
172
  })
173

Z
ZhangZifei 已提交
174
  println(s"FPGAPlatform:${env.FPGAPlatform} EnableDebug:${env.EnableDebug}")
W
William Wang 已提交
175
  AddressSpace.checkMemmap()
176
  AddressSpace.printMemmap()
Z
ZhangZifei 已提交
177

178
  val ctrlBlock = Module(new CtrlBlock)
179

J
jinyue110 已提交
180
  val frontend = outer.frontend.module
181
  val memBlock = outer.memBlock.module
L
linjiawei 已提交
182
  val ptw = outer.ptw.module
183 184 185 186
  val exuBlocks = outer.exuBlocks.map(_.module)
  val memScheduler = outer.memScheduler.module

  val allWriteback = exuBlocks.map(_.io.fuWriteback).fold(Seq())(_ ++ _) ++ memBlock.io.writeback
L
linjiawei 已提交
187

188
  val intWriteback = allWriteback.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1)
189
  require(exuConfigs.length == allWriteback.length)
190
  // set default value for ready
191 192 193 194
  exuBlocks.foreach(_.io.fuWriteback.foreach(_.ready := true.B))
  memBlock.io.writeback.foreach(_.ready := true.B)

  val intArbiter = outer.intArbiter.module
195 196 197 198 199 200 201 202
  intArbiter.io.in.zip(intWriteback).foreach { case (arb, wb) =>
    arb.valid := wb.valid && !wb.bits.uop.ctrl.fpWen
    arb.bits := wb.bits
    when (arb.valid) {
      wb.ready := arb.ready
    }
  }

203
  val fpArbiter = outer.fpArbiter.module
204
  val fpWriteback = allWriteback.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1)
205 206 207 208 209 210 211 212
  fpArbiter.io.in.zip(fpWriteback).foreach{ case (arb, wb) =>
    arb.valid := wb.valid && wb.bits.uop.ctrl.fpWen
    arb.bits := wb.bits
    when (arb.valid) {
      wb.ready := arb.ready
    }
  }

213 214
  val rfWriteback = VecInit(intArbiter.io.out ++ fpArbiter.io.out)

215
  io.l1plus_error <> DontCare
L
ljw 已提交
216
  io.icache_error <> frontend.io.error
217 218
  io.dcache_error <> memBlock.io.error

219 220 221 222 223
  require(exuBlocks.count(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)) == 1)
  val csrFenceMod = exuBlocks.filter(_.fuConfigs.map(_._1).contains(JumpCSRExeUnitCfg)).head
  val csrioIn = csrFenceMod.io.fuExtra.csrio.get
  val fenceio = csrFenceMod.io.fuExtra.fenceio.get

224
  frontend.io.backend <> ctrlBlock.io.frontend
225 226 227 228
  frontend.io.sfence <> fenceio.sfence
  frontend.io.tlbCsr <> csrioIn.tlb
  frontend.io.csrCtrl <> csrioIn.customCtrl
  frontend.io.fencei := fenceio.fencei
229

230 231 232
  ctrlBlock.io.csrCtrl <> csrioIn.customCtrl
  val redirectBlocks = exuBlocks.reverse.filter(_.fuConfigs.map(_._1).map(_.hasRedirect).reduce(_ || _))
  ctrlBlock.io.exuRedirect <> redirectBlocks.map(_.io.fuExtra.exuRedirect).fold(Seq())(_ ++ _)
233 234 235 236
  ctrlBlock.io.stIn <> memBlock.io.stIn
  ctrlBlock.io.stOut <> memBlock.io.stOut
  ctrlBlock.io.memoryViolation <> memBlock.io.memoryViolation
  ctrlBlock.io.enqLsq <> memBlock.io.enqLsq
237 238 239 240 241 242 243 244 245 246
  ctrlBlock.io.writeback <> rfWriteback

  val allFastUop = exuBlocks.map(_.io.fastUopOut).fold(Seq())(_ ++ _) ++ memBlock.io.otherFastWakeup
  val intFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeIntRf).map(_._1)
  val fpFastUop = allFastUop.zip(exuConfigs).filter(_._2.writeFpRf).map(_._1)
  val intFastUop1 = outer.intArbiter.allConnections.map(c => intFastUop(c.head))
  val fpFastUop1 = outer.fpArbiter.allConnections.map(c => fpFastUop(c.head))
  val allFastUop1 = intFastUop1 ++ fpFastUop1

  ctrlBlock.io.enqIQ <> exuBlocks(0).io.allocate ++ exuBlocks(2).io.allocate ++ memScheduler.io.allocate
247
  for (i <- 0 until exuParameters.MduCnt) {
248 249 250 251 252 253
    val rsIn = VecInit(Seq(exuBlocks(0).io.allocate(i), exuBlocks(1).io.allocate(i)))
    val func1 = (op: MicroOp) => outer.exuBlocks(0).scheduler.canAccept(op.ctrl.fuType)
    val func2 = (op: MicroOp) => outer.exuBlocks(1).scheduler.canAccept(op.ctrl.fuType)
    val arbiterOut = DispatchArbiter(ctrlBlock.io.enqIQ(i), Seq(func1, func2))
    rsIn <> arbiterOut
  }
254 255 256 257 258 259 260
  for (i <- exuParameters.MduCnt until exuParameters.AluCnt) {
    val rsIn = exuBlocks(0).io.allocate(i)
    val dpOut = ctrlBlock.io.enqIQ(i)
    rsIn.valid := dpOut.valid && outer.exuBlocks(0).scheduler.canAccept(dpOut.bits.ctrl.fuType)
    dpOut.ready := rsIn.ready && outer.exuBlocks(0).scheduler.canAccept(dpOut.bits.ctrl.fuType)
  }

261 262 263 264 265 266 267 268 269 270 271 272
  val stdAllocate = exuBlocks(1).io.allocate.takeRight(2)
  val staAllocate = memScheduler.io.allocate.takeRight(2)
  stdAllocate.zip(staAllocate).zip(ctrlBlock.io.enqIQ.takeRight(2)).zipWithIndex.foreach{ case (((std, sta), enq), i) =>
    std.valid := enq.valid && sta.ready
    sta.valid := enq.valid && std.ready
    std.bits := enq.bits
    sta.bits := enq.bits
    std.bits.ctrl.lsrc(0) := enq.bits.ctrl.lsrc(1)
    std.bits.psrc(0) := enq.bits.psrc(1)
    std.bits.srcState(0) := enq.bits.srcState(1)
    std.bits.ctrl.srcType(0) := enq.bits.ctrl.srcType(1)
    enq.ready := sta.ready && std.ready
273 274 275
    XSPerfAccumulate(s"st_rs_not_ready_$i", enq.valid && !enq.ready)
    XSPerfAccumulate(s"sta_rs_not_ready_$i", sta.valid && !sta.ready)
    XSPerfAccumulate(s"std_rs_not_ready_$i", std.valid && !std.ready)
276 277 278
  }
  exuBlocks(1).io.scheExtra.fpRfReadIn.get <> exuBlocks(2).io.scheExtra.fpRfReadOut.get

279 280
  memScheduler.io.redirect <> ctrlBlock.io.redirect
  memScheduler.io.flush <> ctrlBlock.io.flush
281
  memBlock.io.issue <> memScheduler.io.issue
282 283
  // By default, instructions do not have exceptions when they enter the function units.
  memBlock.io.issue.map(_.bits.uop.clearExceptions())
284 285 286 287 288
  memScheduler.io.writeback <> rfWriteback
  memScheduler.io.fastUopIn <> allFastUop1
  memScheduler.io.extra.jumpPc <> ctrlBlock.io.jumpPc
  memScheduler.io.extra.jalr_target <> ctrlBlock.io.jalr_target
  memScheduler.io.extra.stIssuePtr <> memBlock.io.stIssuePtr
289
  memScheduler.io.extra.loadFastMatch.get <> memBlock.io.loadFastMatch
290 291 292 293 294 295 296 297 298 299 300 301 302 303
  memScheduler.io.extra.debug_int_rat <> ctrlBlock.io.debug_int_rat
  memScheduler.io.extra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat

  exuBlocks.map(_.io).foreach { exu =>
    exu.redirect <> ctrlBlock.io.redirect
    exu.flush <> ctrlBlock.io.flush
    exu.rfWriteback <> rfWriteback
    exu.fastUopIn <> allFastUop1
    exu.scheExtra.jumpPc <> ctrlBlock.io.jumpPc
    exu.scheExtra.jalr_target <> ctrlBlock.io.jalr_target
    exu.scheExtra.stIssuePtr <> memBlock.io.stIssuePtr
    exu.scheExtra.debug_fp_rat <> ctrlBlock.io.debug_fp_rat
    exu.scheExtra.debug_int_rat <> ctrlBlock.io.debug_int_rat
  }
304 305
  XSPerfHistogram("fastIn_count", PopCount(allFastUop1.map(_.valid)), true.B, 0, allFastUop1.length, 1)
  XSPerfHistogram("wakeup_count", PopCount(rfWriteback.map(_.valid)), true.B, 0, rfWriteback.length, 1)
306 307 308

  csrioIn.hartId <> io.hartId
  csrioIn.perf <> DontCare
Y
Yinan Xu 已提交
309
  csrioIn.perf.retiredInstr <> ctrlBlock.io.robio.toCSR.perfinfo.retiredInstr
310 311 312 313
  csrioIn.perf.ctrlInfo <> ctrlBlock.io.perfInfo.ctrlInfo
  csrioIn.perf.memInfo <> memBlock.io.memInfo
  csrioIn.perf.frontendInfo <> frontend.io.frontendInfo

Y
Yinan Xu 已提交
314
  csrioIn.fpu.fflags <> ctrlBlock.io.robio.toCSR.fflags
315
  csrioIn.fpu.isIllegal := false.B
Y
Yinan Xu 已提交
316
  csrioIn.fpu.dirty_fs <> ctrlBlock.io.robio.toCSR.dirty_fs
317
  csrioIn.fpu.frm <> exuBlocks(2).io.fuExtra.frm.get
Y
Yinan Xu 已提交
318 319 320 321
  csrioIn.exception <> ctrlBlock.io.robio.exception
  csrioIn.isXRet <> ctrlBlock.io.robio.toCSR.isXRet
  csrioIn.trapTarget <> ctrlBlock.io.robio.toCSR.trapTarget
  csrioIn.interrupt <> ctrlBlock.io.robio.toCSR.intrBitSet
322 323 324 325 326
  csrioIn.memExceptionVAddr <> memBlock.io.lsqio.exceptionAddr.vaddr
  csrioIn.externalInterrupt <> io.externalInterrupt

  fenceio.sfence <> memBlock.io.sfence
  fenceio.sbuffer <> memBlock.io.fenceToSbuffer
Y
Yinan Xu 已提交
327

328 329
  memBlock.io.redirect <> ctrlBlock.io.redirect
  memBlock.io.flush <> ctrlBlock.io.flush
330 331 332
  memBlock.io.replay <> memScheduler.io.extra.feedback.get.map(_.replay)
  memBlock.io.rsIdx <> memScheduler.io.extra.feedback.get.map(_.rsIdx)
  memBlock.io.isFirstIssue <> memScheduler.io.extra.feedback.get.map(_.isFirstIssue)
333 334
  val stData = exuBlocks.map(_.io.fuExtra.stData.getOrElse(Seq())).reduce(_ ++ _)
  memBlock.io.stData := stData
335 336
  memBlock.io.csrCtrl <> csrioIn.customCtrl
  memBlock.io.tlbCsr <> csrioIn.tlb
Y
Yinan Xu 已提交
337 338 339 340
  memBlock.io.lsqio.rob <> ctrlBlock.io.robio.lsq
  memBlock.io.lsqio.exceptionAddr.lsIdx.lqIdx := ctrlBlock.io.robio.exception.bits.uop.lqIdx
  memBlock.io.lsqio.exceptionAddr.lsIdx.sqIdx := ctrlBlock.io.robio.exception.bits.uop.sqIdx
  memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.robio.exception.bits.uop.ctrl.commitType)
341

J
JinYue 已提交
342
  val itlbRepeater = Module(new PTWRepeater(2))
343
  val dtlbRepeater = Module(new PTWFilter(LoadPipelineWidth + StorePipelineWidth, l2tlbParams.missQueueSize-1))
344 345
  itlbRepeater.io.tlb <> frontend.io.ptw
  dtlbRepeater.io.tlb <> memBlock.io.ptw
346 347
  itlbRepeater.io.sfence <> fenceio.sfence
  dtlbRepeater.io.sfence <> fenceio.sfence
348 349
  ptw.io.tlb(0) <> itlbRepeater.io.ptw
  ptw.io.tlb(1) <> dtlbRepeater.io.ptw
350 351
  ptw.io.sfence <> fenceio.sfence
  ptw.io.csr <> csrioIn.tlb
352

353
  // if l2 prefetcher use stream prefetch, it should be placed in XSCore
354
  io.l2_pf_enable := csrioIn.customCtrl.l2_pf_enable
355

356
  val ptw_reset_gen = Module(new ResetGen(2, !debugOpts.FPGAPlatform))
357 358 359 360
  ptw.reset := ptw_reset_gen.io.out
  itlbRepeater.reset := ptw_reset_gen.io.out
  dtlbRepeater.reset := ptw_reset_gen.io.out

361
  val memBlock_reset_gen = Module(new ResetGen(3, !debugOpts.FPGAPlatform))
362
  memBlock.reset := memBlock_reset_gen.io.out
363
  memScheduler.reset := memBlock_reset_gen.io.out
364

365 366
  val exuBlock_reset_gen = Module(new ResetGen(4, !debugOpts.FPGAPlatform))
  exuBlocks.foreach(_.reset := exuBlock_reset_gen.io.out)
367

368
  val ctrlBlock_reset_gen = Module(new ResetGen(6, !debugOpts.FPGAPlatform))
369 370
  ctrlBlock.reset := ctrlBlock_reset_gen.io.out

371
  val frontend_reset_gen = Module(new ResetGen(7, !debugOpts.FPGAPlatform))
372
  frontend.reset := frontend_reset_gen.io.out
L
LinJiawei 已提交
373
}