未验证 提交 a0301c0d 编写于 作者: L Lemover 提交者: GitHub

l0tlb: add a new level tlb, a load tlb and a store tlb (#961)

* Revert "Revert "l0tlb: add a new level tlb to each mem pipeline (#936)" (#945)"

This reverts commit b052b972.

* fu: remove unused import

* mmu.tlb: 2 load/store pipeline has 1 dtlb

* mmu: remove btlb, the l1-tlb

* mmu: set split-tlb to 32 to check perf effect

* mmu: wrap tlb's param with TLBParameters

* mmu: add params 'useBTlb'

dtlb size is small: normal 8, super 2

* mmu.tlb: add Bundle TlbEntry, simplify tlb hit logic(coding)

* mmu.tlb: seperate tlb's storage, relative hit/sfence logic

tlb now supports full-associate, set-associate, directive-associate.
more: change tlb's parameter usage, change util.Random to support
case that mod is 1.

* mmu.tlb: support normalAsVictim, super(fa) -> normal(sa/da)

be carefull to use tlb's parameter, only a part of param combination
is supported

* mmu.tlb: fix bug of hit method and victim write

* mmu.tlb: add tlb storage's perf counter

* mmu.tlb: rewrite replace part, support set or non-set

* mmu.tlb: add param outReplace to receive out replace index

* mmu.tlb: change param superSize to superNWays

add param superNSets, which should always be 1

* mmu.tlb: change some perf counter's name and change some params

* mmu.tlb: fix bug of replace io bundle

* mmu.tlb: remove unused signal wayIdx in tlbstorageio

* mmu.tlb: separate tlb_ld/st into two 'same' tlb

* mmu.tlb: when nWays is 1, replace returns 0.U

before, replace will return 1.U, no influence for refill but bad
for perf counter

* mmu.tlb: give tlb_ld and tlb_st a name (in waveform)
上级 a260c31a
......@@ -32,7 +32,7 @@ import xiangshan.backend.exu.ExuParameters
import xiangshan.backend.dispatch.DispatchParameters
import xiangshan.cache.{DCacheParameters, L1plusCacheParameters}
import xiangshan.cache.prefetch.{BOPParameters, L1plusPrefetcherParameters, L2PrefetcherParameters, StreamPrefetchParameters}
import xiangshan.cache.mmu.{L2TLBParameters}
import xiangshan.cache.mmu.{L2TLBParameters, TLBParameters}
import device.{XSDebugModuleParams, EnableJtag}
class DefaultConfig(n: Int) extends Config((site, here, up) => {
......@@ -109,8 +109,44 @@ class MinimalConfig(n: Int = 1) extends Config(
),
EnableBPD = false, // disable TAGE
EnableLoop = false,
TlbEntrySize = 32,
TlbSPEntrySize = 4,
itlbParameters = TLBParameters(
name = "itlb",
fetchi = true,
useDmode = false,
sameCycle = true,
normalReplacer = Some("plru"),
superReplacer = Some("plru"),
normalNWays = 4,
normalNSets = 1,
superNWays = 2,
shouldBlock = true
),
ldtlbParameters = TLBParameters(
name = "ldtlb",
normalNSets = 4, // when da or sa
normalNWays = 1, // when fa or sa
normalAssociative = "sa",
normalReplacer = Some("setplru"),
superNWays = 4,
normalAsVictim = true,
outReplace = true
),
sttlbParameters = TLBParameters(
name = "sttlb",
normalNSets = 4, // when da or sa
normalNWays = 1, // when fa or sa
normalAssociative = "sa",
normalReplacer = Some("setplru"),
normalAsVictim = true,
superNWays = 4,
outReplace = true
),
btlbParameters = TLBParameters(
name = "btlb",
normalNSets = 1,
normalNWays = 8,
superNWays = 2
),
l2tlbParameters = L2TLBParameters(
l1Size = 4,
l2nSets = 4,
......
......@@ -56,12 +56,14 @@ object MaskGen {
object Random
{
def apply(mod: Int, random: UInt): UInt = {
if (isPow2(mod)) random(log2Ceil(mod)-1,0)
if (mod == 1) 0.U
else if (isPow2(mod)) random(log2Ceil(mod)-1,0)
else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod))
}
def apply(mod: Int): UInt = apply(mod, randomizer)
def oneHot(mod: Int, random: UInt): UInt = {
if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
if (mod == 1) 1.U(1.W)
else if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
else VecInit(PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod))).asUInt
}
def oneHot(mod: Int): UInt = oneHot(mod, randomizer)
......
......@@ -25,8 +25,8 @@ import xiangshan.backend.fu.fpu._
import xiangshan.backend.dispatch.DispatchParameters
import xiangshan.cache.{DCacheParameters, L1plusCacheParameters}
import xiangshan.cache.prefetch.{BOPParameters, L1plusPrefetcherParameters, L2PrefetcherParameters, StreamPrefetchParameters}
import xiangshan.cache.mmu.{TLBParameters, L2TLBParameters}
import xiangshan.frontend.{BIM, BasePredictor, BranchPredictionResp, FTB, FakePredictor, ICacheParameters, MicroBTB, RAS, Tage, Tage_SC}
import xiangshan.cache.mmu.{L2TLBParameters}
import freechips.rocketchip.diplomacy.AddressSet
case object XSCoreParamsKey extends Field[XSCoreParameters]
......@@ -140,8 +140,42 @@ case class XSCoreParameters
StoreBufferSize: Int = 16,
StoreBufferThreshold: Int = 7,
RefillSize: Int = 512,
TlbEntrySize: Int = 32,
TlbSPEntrySize: Int = 4,
itlbParameters: TLBParameters = TLBParameters(
name = "itlb",
fetchi = true,
useDmode = false,
sameCycle = true,
normalReplacer = Some("plru"),
superReplacer = Some("plru"),
shouldBlock = true
),
ldtlbParameters: TLBParameters = TLBParameters(
name = "ldtlb",
normalNSets = 128,
normalNWays = 1,
normalAssociative = "sa",
normalReplacer = Some("setplru"),
superNWays = 8,
normalAsVictim = true,
outReplace = true
),
sttlbParameters: TLBParameters = TLBParameters(
name = "sttlb",
normalNSets = 128,
normalNWays = 1,
normalAssociative = "sa",
normalReplacer = Some("setplru"),
superNWays = 8,
normalAsVictim = true,
outReplace = true
),
btlbParameters: TLBParameters = TLBParameters(
name = "btlb",
normalNSets = 1,
normalNWays = 64,
superNWays = 4,
),
useBTlb: Boolean = false,
l2tlbParameters: L2TLBParameters = L2TLBParameters(),
NumPerfCounters: Int = 16,
icacheParameters: ICacheParameters = ICacheParameters(
......@@ -167,7 +201,6 @@ case class XSCoreParameters
),
L2Size: Int = 512 * 1024, // 512KB
L2NWays: Int = 8,
usePTWRepeater: Boolean = false,
useFakePTW: Boolean = false,
useFakeDCache: Boolean = false,
useFakeL1plusCache: Boolean = false,
......@@ -281,9 +314,12 @@ trait HasXSParameter {
val StoreBufferSize = coreParams.StoreBufferSize
val StoreBufferThreshold = coreParams.StoreBufferThreshold
val RefillSize = coreParams.RefillSize
val DTLBWidth = coreParams.LoadPipelineWidth + coreParams.StorePipelineWidth
val TlbEntrySize = coreParams.TlbEntrySize
val TlbSPEntrySize = coreParams.TlbSPEntrySize
val BTLBWidth = coreParams.LoadPipelineWidth + coreParams.StorePipelineWidth
val useBTlb = coreParams.useBTlb
val itlbParams = coreParams.itlbParameters
val ldtlbParams = coreParams.ldtlbParameters
val sttlbParams = coreParams.sttlbParameters
val btlbParams = coreParams.btlbParameters
val l2tlbParams = coreParams.l2tlbParameters
val NumPerfCounters = coreParams.NumPerfCounters
......@@ -300,7 +336,6 @@ trait HasXSParameter {
// cache hierarchy configurations
val l1BusDataWidth = 256
val usePTWRepeater = coreParams.usePTWRepeater
val useFakeDCache = coreParams.useFakeDCache
val useFakePTW = coreParams.useFakePTW
val useFakeL1plusCache = coreParams.useFakeL1plusCache
......
......@@ -338,11 +338,7 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
memBlock.io.lsqio.exceptionAddr.isStore := CommitType.lsInstIsStore(ctrlBlock.io.roqio.exception.bits.uop.ctrl.commitType)
val itlbRepeater = Module(new PTWRepeater(2))
val dtlbRepeater = if (usePTWRepeater) {
Module(new PTWRepeater(LoadPipelineWidth + StorePipelineWidth))
} else {
Module(new PTWFilter(LoadPipelineWidth + StorePipelineWidth, l2tlbParams.missQueueSize))
}
val dtlbRepeater = Module(new PTWFilter(LoadPipelineWidth + StorePipelineWidth, l2tlbParams.missQueueSize))
itlbRepeater.io.tlb <> frontend.io.ptw
dtlbRepeater.io.tlb <> memBlock.io.ptw
itlbRepeater.io.sfence <> fenceio.sfence
......
......@@ -48,12 +48,12 @@ trait HasXSDts {
)
val dtlb = Map(
"d-tlb-size" -> TlbEntrySize.asProperty,
"d-tlb-size" -> (ldtlbParams.normalNSets * ldtlbParams.normalNWays).asProperty,
"d-tlb-sets" -> 1.asProperty
)
val itlb = Map(
"i-tlb-size" -> TlbEntrySize.asProperty,
"i-tlb-size" -> (itlbParams.normalNSets * itlbParams.normalNWays).asProperty,
"i-tlb-sets" -> 1.asProperty
)
......
......@@ -20,7 +20,6 @@ import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import utils._
import freechips.rocketchip.tile.FType
import xiangshan._
import xiangshan.backend.exu._
import xiangshan.backend.fu.CSRFileIO
......
......@@ -25,7 +25,7 @@ import system.L1CacheErrorInfo
import xiangshan._
import xiangshan.backend.roq.RoqLsqIO
import xiangshan.cache._
import xiangshan.cache.mmu.{TLB, TlbPtwIO}
import xiangshan.cache.mmu.{BTlbPtwIO, BridgeTLB, PtwResp, TLB, TlbReplace}
import xiangshan.mem._
import xiangshan.backend.fu.{FenceToSbuffer, FunctionUnit, HasExceptionNO}
import utils._
......@@ -70,12 +70,12 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val stIn = Vec(exuParameters.StuCnt, ValidIO(new ExuInput))
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput))
val memoryViolation = ValidIO(new Redirect)
val ptw = new TlbPtwIO(LoadPipelineWidth + StorePipelineWidth)
val ptw = new BTlbPtwIO(exuParameters.LduCnt + exuParameters.StuCnt)
val sfence = Input(new SfenceBundle)
val tlbCsr = Input(new TlbCsrBundle)
val fenceToSbuffer = Flipped(new FenceToSbuffer)
val enqLsq = new LsqEnqIO
val memPredUpdate = Vec(StorePipelineWidth, Input(new MemPredUpdateReq))
val memPredUpdate = Vec(exuParameters.StuCnt, Input(new MemPredUpdateReq))
val lsqio = new Bundle {
val exceptionAddr = new ExceptionAddrIO // to csr
val roq = Flipped(new RoqLsqIO) // roq to lsq
......@@ -117,7 +117,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// TODO: fast load wakeup
val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
val lsq = Module(new LsqWrappper)
val sbuffer = Module(new NewSbuffer)
// if you wants to stress test dcache store, use FakeSbuffer
......@@ -125,9 +125,55 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io.stIssuePtr := lsq.io.issuePtrExt
// dtlb
io.ptw <> dtlb.io.ptw
dtlb.io.sfence <> RegNext(io.sfence)
dtlb.io.csr <> RegNext(io.tlbCsr)
val sfence = RegNext(io.sfence)
val tlbcsr = RegNext(io.tlbCsr)
val dtlb_ld = VecInit(Seq.fill(exuParameters.LduCnt){
val tlb_ld = Module(new TLB(1, ldtlbParams))
tlb_ld.io // let the module have name in waveform
})
val dtlb_st = VecInit(Seq.fill(exuParameters.StuCnt){
val tlb_st = Module(new TLB(1 , sttlbParams))
tlb_st.io // let the module have name in waveform
})
dtlb_ld.map(_.sfence := sfence)
dtlb_st.map(_.sfence := sfence)
dtlb_ld.map(_.csr := tlbcsr)
dtlb_st.map(_.csr := tlbcsr)
if (ldtlbParams.outReplace) {
val replace_ld = Module(new TlbReplace(exuParameters.LduCnt, ldtlbParams))
replace_ld.io.apply_sep(dtlb_ld.map(_.replace), io.ptw.resp.bits.data.entry.tag)
}
if (sttlbParams.outReplace) {
val replace_st = Module(new TlbReplace(exuParameters.StuCnt, sttlbParams))
replace_st.io.apply_sep(dtlb_st.map(_.replace), io.ptw.resp.bits.data.entry.tag)
}
if (!useBTlb) {
(dtlb_ld.map(_.ptw.req) ++ dtlb_st.map(_.ptw.req)).zipWithIndex.map{ case (tlb, i) =>
tlb(0) <> io.ptw.req(i)
}
dtlb_ld.map(_.ptw.resp.bits := io.ptw.resp.bits.data)
dtlb_st.map(_.ptw.resp.bits := io.ptw.resp.bits.data)
dtlb_ld.map(_.ptw.resp.valid := io.ptw.resp.valid && Cat(io.ptw.resp.bits.vector.take(exuParameters.LduCnt)).orR)
dtlb_st.map(_.ptw.resp.valid := io.ptw.resp.valid && Cat(io.ptw.resp.bits.vector.drop(exuParameters.LduCnt)).orR)
} else {
val btlb = Module(new BridgeTLB(BTLBWidth, btlbParams))
btlb.suggestName("btlb")
io.ptw <> btlb.io.ptw
btlb.io.sfence <> sfence
btlb.io.csr <> tlbcsr
btlb.io.requestor.take(exuParameters.LduCnt).map(_.req(0)).zip(dtlb_ld.map(_.ptw.req)).map{case (a,b) => a <> b}
btlb.io.requestor.drop(exuParameters.LduCnt).map(_.req(0)).zip(dtlb_st.map(_.ptw.req)).map{case (a,b) => a <> b}
val arb_ld = Module(new Arbiter(new PtwResp, exuParameters.LduCnt))
val arb_st = Module(new Arbiter(new PtwResp, exuParameters.StuCnt))
arb_ld.io.in <> btlb.io.requestor.take(exuParameters.LduCnt).map(_.resp)
arb_st.io.in <> btlb.io.requestor.drop(exuParameters.LduCnt).map(_.resp)
VecInit(dtlb_ld.map(_.ptw.resp)) <> arb_ld.io.out
VecInit(dtlb_st.map(_.ptw.resp)) <> arb_st.io.out
}
io.ptw.resp.ready := true.B
// LoadUnit
for (i <- 0 until exuParameters.LduCnt) {
......@@ -136,7 +182,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits(i).io.rsFeedback <> io.replay(i)
loadUnits(i).io.rsIdx := io.rsIdx(i) // TODO: beautify it
loadUnits(i).io.isFirstIssue := io.isFirstIssue(i) // NOTE: just for dtlb's perf cnt
loadUnits(i).io.dtlb <> dtlb.io.requestor(i)
loadUnits(i).io.loadFastMatch <> io.loadFastMatch(i)
// get input form dispatch
loadUnits(i).io.ldin <> io.issue(i)
......@@ -145,6 +190,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// forward
loadUnits(i).io.lsq.forward <> lsq.io.forward(i)
loadUnits(i).io.sbuffer <> sbuffer.io.forward(i)
// l0tlb
loadUnits(i).io.tlb <> dtlb_ld(i).requestor(0)
// laod to load fast forward
for (j <- 0 until exuParameters.LduCnt) {
......@@ -167,7 +214,6 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// StoreUnit
for (i <- 0 until exuParameters.StuCnt) {
val stu = storeUnits(i)
val dtlbReq = dtlb.io.requestor(exuParameters.LduCnt + i)
stu.io.redirect <> io.redirect
stu.io.flush <> io.flush
......@@ -175,9 +221,10 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
stu.io.rsIdx <> io.rsIdx(exuParameters.LduCnt + i)
// NOTE: just for dtlb's perf cnt
stu.io.isFirstIssue <> io.isFirstIssue(exuParameters.LduCnt + i)
stu.io.dtlb <> dtlbReq
stu.io.stin <> io.issue(exuParameters.LduCnt + i)
stu.io.lsq <> lsq.io.storeIn(i)
// l0tlb
stu.io.tlb <> dtlb_st(i).requestor(0)
// Lsq to load unit's rs
// rs.io.storeData <> lsq.io.storeDataIn(i)
......@@ -272,22 +319,21 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
atomicsUnit.io.redirect <> io.redirect
atomicsUnit.io.flush <> io.flush
val amoTlb = dtlb_ld(0).requestor(0)
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready
atomicsUnit.io.dtlb.req.ready := amoTlb.req.ready
atomicsUnit.io.dcache <> dcache.io.lsu.atomics
atomicsUnit.io.flush_sbuffer.empty := sbuffer.io.flush.empty
// for atomicsUnit, it uses loadUnit(0)'s TLB port
when (state === s_atomics_0 || state === s_atomics_1) {
atomicsUnit.io.dtlb <> dtlb.io.requestor(0)
loadUnits(0).io.dtlb.resp.valid := false.B
when (state === s_atomics_0 || state === s_atomics_1) {
loadUnits(0).io.ldout.ready := false.B
atomicsUnit.io.dtlb <> amoTlb
// make sure there's no in-flight uops in load unit
assert(!loadUnits(0).io.dtlb.req.valid)
assert(!loadUnits(0).io.ldout.valid)
}
......
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.cache.mmu
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.fu.util.HasCSRConst
/** Bridge TLB is the bridge between L0 TLB(the old, normal TLB) and L2TLB
* 1. store the entry from L2TLB, and seed it to L0TLB
* 2. do not translate or do the pma and pmp work, just a small edition fa L2TLB
*/
class BridgeTLB(Width: Int, q: TLBParameters)(implicit p: Parameters) extends TlbModule with HasCSRConst {
val io = IO(new BridgeTLBIO(Width))
val req = io.requestor.map(_.req)
val resp = io.requestor.map(_.resp)
val ptw = io.ptw
val sfence = io.sfence
// val csr = io.csr
// val satp = csr.satp
// val priv = csr.priv
// val mode = priv.dmode
// val vmEnable = satp.mode === 8.U // && (mode < ModeM) // FIXME: fix me when boot xv6/linux...
// val vmEnable = if(EnbaleTlbDebug) (satp.mode === 8.U)
// else (satp.mode === 8.U && (mode < ModeM))
// TODO: the code above is same with TLB, may need a abstract TLB module class
val entries = Reg(Vec(q.normalNWays, new PtwResp))
val entries_v = RegInit(VecInit(Seq.fill(q.normalNWays)(false.B)))
val replace = ReplacementPolicy.fromString(Some("random"), q.normalNWays)
val refillIdx = replaceWrapper(entries_v, replace.way)
// val WaitingSetSize = 4
// val waiting_set = Reg(Vec(Width, Vec(WaitingSetSize, UInt(vpnLen.W))))
// val waiting_set_v = RegInit(Vec(Width, Vec(WaitingSetSize, false.B)))
val refillMask = Mux(io.ptw.resp.valid, UIntToOH(refillIdx)(q.normalNWays-1, 0), 0.U).asBools
for (i <- req.indices) {
val vpn = req(i)(0).bits.vpn
val hitVec = VecInit(entries.zipWithIndex.map{ case (e, i) =>
e.entry.hit(vpn, allType = true) && entries_v(i) && ~refillMask(i)
})
hitVec.suggestName("hitVec")
/* ============ next cycle =============== */
val hitVecReg = RegNext(hitVec)
val hitEntry = Mux1H(hitVecReg, entries)
val hit = Cat(hitVecReg).orR
hitEntry.suggestName("hitEntry")
hit.suggestName("hit")
resp(i).valid := (RegNext(req(i)(0).valid) && hit) ||
(io.ptw.resp.valid && io.ptw.resp.bits.vector(i))
resp(i).bits := Mux(RegNext(req(i)(0).valid) && hit, hitEntry, io.ptw.resp.bits.data)
req(i)(0).ready := true.B // TODO: handle the ready
io.ptw.req(i).valid := RegNext(req(i)(0).valid) && !hit
io.ptw.req(i).bits.vpn := RegNext(vpn)
XSPerfAccumulate("access" + Integer.toString(i, 10), req(i)(0).valid)
XSPerfAccumulate("hit" + Integer.toString(i, 10), RegNext(req(i)(0).valid) && hit)
XSPerfAccumulate("hit_resp_conflit" + Integer.toString(i, 10), (RegNext(req(i)(0).valid) && hit) && (io.ptw.resp.valid && io.ptw.resp.bits.vector(i)))
XSPerfAccumulate("hit_out" + Integer.toString(i, 10), (RegNext(req(i)(0).valid) && hit) && !(io.ptw.resp.valid && io.ptw.resp.bits.vector(i)))
XSPerfAccumulate("resp_out" + Integer.toString(i, 10), !(RegNext(req(i)(0).valid) && hit) && (io.ptw.resp.valid && io.ptw.resp.bits.vector(i)))
}
when (Cat(io.ptw.req.map(_.valid)).orR) {
replace.miss
}
when (io.ptw.resp.valid) {
entries_v(refillIdx) := true.B
entries(refillIdx) := io.ptw.resp.bits.data
}
io.ptw.resp.ready := true.B
val sfence_vpn = sfence.bits.addr.asTypeOf(new VaBundle().cloneType).vpn
val sfence_hit = entries.map(_.entry.hit(sfence_vpn))
when (sfence.valid) {
// entries_v := 0.U.asTypeOf(entries_v.cloneType)
when (sfence.bits.rs1) {
when (sfence.bits.rs2) {
entries_v := 0.U.asTypeOf(entries_v.cloneType)
}.otherwise {
entries_v.zipWithIndex.map{a => a._1 := a._1 & entries(a._2).entry.perm.get.g}
}
}.otherwise {
when (sfence.bits.rs2) {
entries_v := (entries_v.zip(sfence_hit).map(a => a._1 & !a._2))
}.otherwise {
entries_v := (entries_v.zipWithIndex.map(a => a._1 & !(sfence_hit(a._2) && !entries(a._2).entry.perm.get.g)))
}
}
}
XSPerfAccumulate("ptw_resp_count", ptw.resp.fire())
XSPerfAccumulate("ptw_resp_vector_count", Mux(ptw.resp.fire(), PopCount(ptw.resp.bits.vector), 0.U))
XSPerfAccumulate("ptw_resp_pf_count", ptw.resp.fire() && ptw.resp.bits.data.pf)
for (i <- 0 until q.normalNWays) {
XSPerfAccumulate(s"RefillIndex${i}", ptw.resp.valid && i.U === refillIdx)
}
XSPerfAccumulate(s"Refill4KBPage", ptw.resp.valid && ptw.resp.bits.data.entry.level.get === 2.U)
XSPerfAccumulate(s"Refill2MBPage", ptw.resp.valid && ptw.resp.bits.data.entry.level.get === 1.U)
XSPerfAccumulate(s"Refill1GBPage", ptw.resp.valid && ptw.resp.bits.data.entry.level.get === 0.U)
println(s"${q.name}: normal page: ${q.normalNWays} ${q.normalAssociative} ${q.normalReplacer.get} super page: ${q.superNWays} ${q.superAssociative} ${q.superReplacer.get}")
}
......@@ -29,6 +29,17 @@ import freechips.rocketchip.tilelink._
abstract class TlbBundle(implicit p: Parameters) extends XSBundle with HasTlbConst
abstract class TlbModule(implicit p: Parameters) extends XSModule with HasTlbConst
// case class ITLBKey
// case class LDTLBKey
// case class STTLBKey
class VaBundle(implicit p: Parameters) extends TlbBundle {
val vpn = UInt(vpnLen.W)
val off = UInt(offLen.W)
}
class PtePermBundle(implicit p: Parameters) extends TlbBundle {
val d = Bool()
val a = Bool()
......@@ -165,6 +176,82 @@ class TlbData(superpage: Boolean = false)(implicit p: Parameters) extends TlbBun
override def cloneType: this.type = (new TlbData(superpage)).asInstanceOf[this.type]
}
class TlbEntry(pageNormal: Boolean, pageSuper: Boolean)(implicit p: Parameters) extends TlbBundle {
require(pageNormal || pageSuper)
val tag = if (!pageNormal) UInt((vpnLen - vpnnLen).W)
else UInt(vpnLen.W)
val level = if (!pageNormal) Some(UInt(1.W))
else if (!pageSuper) None
else Some(UInt(2.W))
val ppn = if (!pageNormal) UInt((ppnLen - vpnnLen).W)
else UInt(ppnLen.W)
val perm = new TlbPermBundle
def hit(vpn: UInt): Bool = {
if (!pageSuper) vpn === tag
else if (!pageNormal) MuxLookup(level.get, false.B, Seq(
0.U -> (tag(vpnnLen*2-1, vpnnLen) === vpn(vpnLen-1, vpnnLen*2)),
1.U -> (tag === vpn(vpnLen-1, vpnnLen)),
))
else MuxLookup(level.get, false.B, Seq(
0.U -> (tag(vpnLen-1, vpnnLen*2) === vpn(vpnLen-1, vpnnLen*2)),
1.U -> (tag(vpnLen-1, vpnnLen) === vpn(vpnLen-1, vpnnLen)),
2.U -> (tag === vpn) // if pageNormal is false, this will always be false
))
}
def apply(item: PtwResp): TlbEntry = {
this.tag := {if (pageNormal) item.entry.tag else item.entry.tag(vpnLen-1, vpnnLen)}
val inner_level = item.entry.level.getOrElse(0.U)
this.level.map(_ := { if (pageNormal && pageSuper) inner_level
else if (pageSuper) inner_level(0)
else 0.U})
this.ppn := { if (!pageNormal) item.entry.ppn(ppnLen-1, vpnnLen)
else item.entry.ppn }
val ptePerm = item.entry.perm.get.asTypeOf(new PtePermBundle().cloneType)
this.perm.pf := item.pf
this.perm.d := ptePerm.d
this.perm.a := ptePerm.a
this.perm.g := ptePerm.g
this.perm.u := ptePerm.u
this.perm.x := ptePerm.x
this.perm.w := ptePerm.w
this.perm.r := ptePerm.r
// get pma perm
val (pmaMode, accessWidth) = AddressSpace.memmapAddrMatch(Cat(item.entry.ppn, 0.U(12.W)))
this.perm.pr := PMAMode.read(pmaMode)
this.perm.pw := PMAMode.write(pmaMode)
this.perm.pe := PMAMode.execute(pmaMode)
this.perm.pa := PMAMode.atomic(pmaMode)
this.perm.pi := PMAMode.icache(pmaMode)
this.perm.pd := PMAMode.dcache(pmaMode)
this
}
def genPPN(vpn: UInt) : UInt = {
if (!pageSuper) ppn
else if (!pageNormal) MuxLookup(level.get, 0.U, Seq(
0.U -> Cat(ppn(ppn.getWidth-1, vpnnLen), vpn(vpnnLen*2-1, 0)),
1.U -> Cat(ppn, vpn(vpnnLen-1, 0))
))
else MuxLookup(level.get, 0.U, Seq(
0.U -> Cat(ppn(ppn.getWidth-1, vpnnLen*2), vpn(vpnnLen*2-1, 0)),
1.U -> Cat(ppn(ppn.getWidth-1, vpnnLen), vpn(vpnnLen-1, 0)),
2.U -> ppn
))
}
override def toPrintable: Printable = {
val inner_level = level.getOrElse(2.U)
p"level:${inner_level} vpn:${Hexadecimal(tag)} ppn:${Hexadecimal(ppn)} perm:${perm}"
}
override def cloneType: this.type = (new TlbEntry(pageNormal, pageSuper)).asInstanceOf[this.type]
}
object TlbCmd {
def read = "b00".U
def write = "b01".U
......@@ -181,6 +268,78 @@ object TlbCmd {
def isAtom(a: UInt) = a(2)
}
class TlbStorageIO(nSets: Int, nWays: Int, ports: Int)(implicit p: Parameters) extends TlbBundle {
val r = new Bundle {
val req = Vec(ports, Flipped(DecoupledIO(new Bundle {
val vpn = Output(UInt(vpnLen.W))
})))
val resp = Vec(ports, ValidIO(new Bundle{
val hit = Output(Bool())
val ppn = Output(UInt(ppnLen.W))
val perm = Output(new TlbPermBundle())
val hitVec = Output(UInt(nWays.W))
}))
}
val w = Flipped(ValidIO(new Bundle {
val wayIdx = Output(UInt(log2Up(nWays).W))
val data = Output(new PtwResp)
}))
val victim = new Bundle {
val out = ValidIO(Output(new TlbEntry(pageNormal = true, pageSuper = false)))
val in = Flipped(ValidIO(Output(new TlbEntry(pageNormal = true, pageSuper = false))))
}
val sfence = Input(new SfenceBundle())
def r_req_apply(valid: Bool, vpn: UInt, i: Int): Unit = {
this.r.req(i).valid := valid
this.r.req(i).bits.vpn := vpn
}
def r_resp_apply(i: Int) = {
(this.r.resp(i).bits.hit, this.r.resp(i).bits.ppn, this.r.resp(i).bits.perm, this.r.resp(i).bits.hitVec)
}
def w_apply(valid: Bool, wayIdx: UInt, data: PtwResp): Unit = {
this.w.valid := valid
this.w.bits.wayIdx := wayIdx
this.w.bits.data := data
}
override def cloneType: this.type = new TlbStorageIO(nSets, nWays, ports).asInstanceOf[this.type]
}
class ReplaceIO(Width: Int, nSets: Int, nWays: Int)(implicit p: Parameters) extends TlbBundle {
val access = Flipped(new Bundle {
val sets = Output(Vec(Width, UInt(log2Up(nSets).W)))
val touch_ways = Vec(Width, ValidIO(Output(UInt(log2Up(nWays).W))))
})
val refillIdx = Output(UInt(log2Up(nWays).W))
val chosen_set = Flipped(Output(UInt(log2Up(nSets).W)))
def apply_sep(in: Seq[ReplaceIO], vpn: UInt): Unit = {
for (i <- 0 until Width) {
this.access.sets(i) := in(i).access.sets(0)
this.access.touch_ways(i) := in(i).access.touch_ways(0)
this.chosen_set := get_idx(vpn, nSets)
in(i).refillIdx := this.refillIdx
}
}
}
class TlbReplaceIO(Width: Int, q: TLBParameters)(implicit p: Parameters) extends
TlbBundle {
val normalPage = new ReplaceIO(Width, q.normalNSets, q.normalNWays)
val superPage = new ReplaceIO(Width, q.superNSets, q.superNWays)
def apply_sep(in: Seq[TlbReplaceIO], vpn: UInt) = {
this.normalPage.apply_sep(in.map(_.normalPage), vpn)
this.superPage.apply_sep(in.map(_.superPage), vpn)
}
override def cloneType = (new TlbReplaceIO(Width, q)).asInstanceOf[this.type]
}
class TlbReq(implicit p: Parameters) extends TlbBundle {
val vaddr = UInt(VAddrBits.W)
val cmd = TlbCmd()
......@@ -239,13 +398,36 @@ class TlbPtwIO(Width: Int = 1)(implicit p: Parameters) extends TlbBundle {
}
}
class TlbIO(Width: Int)(implicit p: Parameters) extends TlbBundle {
val requestor = Vec(Width, Flipped(new TlbRequestIO))
val ptw = new TlbPtwIO(Width)
class TlbBaseBundle(implicit p: Parameters) extends TlbBundle {
val sfence = Input(new SfenceBundle)
val csr = Input(new TlbCsrBundle)
}
class TlbIO(Width: Int, q: TLBParameters)(implicit p: Parameters) extends
TlbBaseBundle {
val requestor = Vec(Width, Flipped(new TlbRequestIO))
val ptw = new TlbPtwIO(Width)
val replace = if (q.outReplace) Flipped(new TlbReplaceIO(Width, q)) else null
override def cloneType: this.type = (new TlbIO(Width, q)).asInstanceOf[this.type]
}
class BTlbPtwIO(Width: Int)(implicit p: Parameters) extends TlbBundle {
val req = Vec(Width, DecoupledIO(new PtwReq))
val resp = Flipped(DecoupledIO(new Bundle {
val data = new PtwResp
val vector = Output(Vec(Width, Bool()))
}))
override def cloneType: this.type = (new TlbIO(Width)).asInstanceOf[this.type]
override def cloneType: this.type = (new BTlbPtwIO(Width)).asInstanceOf[this.type]
}
/**************************** Bridge TLB *******************************/
class BridgeTLBIO(Width: Int)(implicit p: Parameters) extends TlbBaseBundle {
val requestor = Vec(Width, Flipped(new TlbPtwIO()))
val ptw = new BTlbPtwIO(Width)
override def cloneType: this.type = (new BridgeTLBIO(Width)).asInstanceOf[this.type]
}
......@@ -325,8 +507,8 @@ class PtwEntry(tagLen: Int, hasPerm: Boolean = false, hasLevel: Boolean = false)
def refill(vpn: UInt, pte: UInt, level: UInt = 0.U) {
tag := vpn(vpnLen - 1, vpnLen - tagLen)
ppn := pte.asTypeOf(pteBundle).ppn
perm.map(_ := pte.asTypeOf(pteBundle).perm)
ppn := pte.asTypeOf(new PteBundle().cloneType).ppn
perm.map(_ := pte.asTypeOf(new PteBundle().cloneType).perm)
this.level.map(_ := level)
}
......@@ -442,4 +624,4 @@ object OneCycleValid {
when (flush) { valid := false.B }
valid
}
}
\ No newline at end of file
}
......@@ -25,6 +25,25 @@ import utils._
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.tilelink._
case class TLBParameters
(
name: String = "none",
fetchi: Boolean = false, // TODO: remove it
useDmode: Boolean = true,
sameCycle: Boolean = false,
normalNSets: Int = 1, // when da or sa
normalNWays: Int = 8, // when fa or sa
superNSets: Int = 1,
superNWays: Int = 2,
normalReplacer: Option[String] = Some("random"),
superReplacer: Option[String] = Some("plru"),
normalAssociative: String = "fa", // "fa", "sa", "da", "sa" is not supported
superAssociative: String = "fa", // must be fa
normalAsVictim: Boolean = false, // when get replace from fa, store it into sram
outReplace: Boolean = false,
shouldBlock: Boolean = false // only for perf, not support for io
)
case class L2TLBParameters
(
name: String = "l2tlb",
......@@ -45,8 +64,6 @@ case class L2TLBParameters
spReplacer: Option[String] = Some("plru"),
// miss queue
missQueueSize: Int = 8,
// sram
sramSinglePort: Boolean = true,
// way size
blockBytes: Int = 64
)
......@@ -62,24 +79,10 @@ trait HasTlbConst extends HasXSParameter {
val pteResLen = XLEN - ppnLen - 2 - flagLen
val asidLen = 16
def vaBundle = new Bundle {
val vpn = UInt(vpnLen.W)
val off = UInt(offLen.W)
}
def pteBundle = new Bundle {
val reserved = UInt(pteResLen.W)
val ppn = UInt(ppnLen.W)
val rsw = UInt(2.W)
val perm = new Bundle {
val d = Bool()
val a = Bool()
val g = Bool()
val u = Bool()
val x = Bool()
val w = Bool()
val r = Bool()
val v = Bool()
}
val sramSinglePort = true
def get_idx(vpn: UInt, nSets: Int): UInt = {
vpn(log2Up(nSets)-1, 0)
}
def replaceWrapper(v: UInt, lruIdx: UInt): UInt = {
......@@ -92,11 +95,11 @@ trait HasTlbConst extends HasXSParameter {
def replaceWrapper(v: Seq[Bool], lruIdx: UInt): UInt = {
replaceWrapper(VecInit(v).asUInt, lruIdx)
}
}
trait HasPtwConst extends HasTlbConst with MemoryOpConstants{
val PtwWidth = 2
val sramSinglePort = true // NOTE: ptwl2, ptwl3 sram single port or not
val blockBits = l2tlbParams.blockBytes * 8
val bPtwWidth = log2Up(PtwWidth)
......
......@@ -68,14 +68,15 @@ class PTWRepeater(Width: Int = 1)(implicit p: Parameters) extends XSModule with
*/
class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
val io = IO(new Bundle {
val tlb = Flipped(new TlbPtwIO(Width))
val ptw = new TlbPtwIO
val tlb = Flipped(new BTlbPtwIO(Width))
val ptw = new TlbPtwIO()
val sfence = Input(new SfenceBundle)
})
require(Size >= Width)
val v = RegInit(VecInit(Seq.fill(Size)(false.B)))
val ports = Reg(Vec(Size, Vec(Width, Bool()))) // record which port(s) the entry come from, may not able to cover all the ports
val vpn = Reg(Vec(Size, UInt(vpnLen.W)))
val enqPtr = RegInit(0.U(log2Up(Size).W)) // Enq
val issPtr = RegInit(0.U(log2Up(Size).W)) // Iss to Ptw
......@@ -87,8 +88,37 @@ class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule
val sfence = RegNext(io.sfence)
val ptwResp = RegEnable(io.ptw.resp.bits, io.ptw.resp.fire())
val ptwResp_valid = RegNext(io.ptw.resp.valid, init = false.B)
val reqs = filter(io.tlb.req)
val tlb_req = io.tlb.req
val oldMatchVec = tlb_req.map(a => vpn.zip(v).map{case (pi, vi) => vi && a.valid && pi === a.bits.vpn })
val newMatchVec = tlb_req.map(a => tlb_req.map(b => b.valid && a.valid && b.bits.vpn === a.bits.vpn ))
val ptwResp_newMatchVec = tlb_req.map(a => ptwResp_valid && ptwResp.entry.hit(a.bits.vpn, allType = true) && a.valid) // TODO: may have long latency
val ptwResp_oldMatchVec = vpn.zip(v).map{ case (pi, vi) => vi && ptwResp.entry.hit(pi, allType = true) }
val update_ports = v.indices.map(i => oldMatchVec.map(j => j(i)))
val ports_init = (0 until Width).map(i => (1 << i).U(Width.W))
val filter_ports = (0 until Width).map(i => ParallelMux(newMatchVec(i).zip(ports_init).drop(i)))
val resp_vector = ParallelMux(ptwResp_oldMatchVec zip ports)
val resp_still_valid = ParallelOR(ptwResp_oldMatchVec).asBool
def canMerge(index: Int) : Bool = {
ptwResp_newMatchVec(index) ||
Cat(oldMatchVec(index)).orR ||
Cat(newMatchVec(index).take(index)).orR
}
def filter_req() = {
val reqs = tlb_req.indices.map{ i =>
val req = Wire(ValidIO(new PtwReq()))
val merge = canMerge(i)
req.bits := tlb_req(i).bits
req.valid := !merge && tlb_req(i).valid
req
}
reqs
}
val reqs = filter_req()
val req_ports = filter_ports
var enqPtr_next = WireInit(deqPtr)
val isFull = enqPtr === deqPtr && mayFullDeq
val isEmptyDeq = enqPtr === deqPtr && !mayFullDeq
......@@ -99,8 +129,9 @@ class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule
val canEnqueue = counter +& enqNum <= Size.U
io.tlb.req.map(_.ready := true.B) // NOTE: just drop un-fire reqs
io.tlb.resp.valid := ptwResp_valid
io.tlb.resp.bits := ptwResp
io.tlb.resp.valid := ptwResp_valid && resp_still_valid
io.tlb.resp.bits.data := ptwResp
io.tlb.resp.bits.vector := resp_vector
io.ptw.req(0).valid := v(issPtr) && !isEmptyIss && !(ptwResp_valid && ptwResp.entry.hit(io.ptw.req(0).bits.vpn))
io.ptw.req(0).bits.vpn := vpn(issPtr)
io.ptw.resp.ready := true.B
......@@ -110,8 +141,14 @@ class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule
when (req.valid && canEnqueue) {
v(enqPtrVec(i)) := true.B
vpn(enqPtrVec(i)) := req.bits.vpn
ports(enqPtrVec(i)) := req_ports(i).asBools
}
}
for (i <- ports.indices) {
when (v(i)) {
ports(i) := ports(i).zip(update_ports(i)).map(a => a._1 || a._2)
}
}
val do_enq = canEnqueue && Cat(reqs.map(_.valid)).orR
val do_deq = (!v(deqPtr) && !isEmptyDeq)
......@@ -133,9 +170,7 @@ class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule
}
when (ptwResp_valid) {
vpn.zip(v).map{case (pi, vi) =>
when (vi && ptwResp.entry.hit(pi, allType = true)) { vi := false.B }
}
v.zip(ptwResp_oldMatchVec).map{ case (vi, mi) => when (mi) { vi := false.B }}
}
counter := counter - do_deq + Mux(do_enq, enqNum, 0.U)
......@@ -159,22 +194,6 @@ class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule
counter := 0.U
}
def canMerge(vpnReq: UInt, reqs: Seq[DecoupledIO[PtwReq]], index: Int) : Bool = {
Cat((vpn ++ reqs.take(index).map(_.bits.vpn))
.zip(v ++ reqs.take(index).map(_.valid))
.map{case (pi, vi) => vi && pi === vpnReq}
).orR || (ptwResp_valid && ptwResp.entry.hit(vpnReq))
}
def filter(tlbReq: Vec[DecoupledIO[PtwReq]]) = {
val reqs = tlbReq.indices.map{ i =>
val req = Wire(ValidIO(new PtwReq()))
req.bits := tlbReq(i).bits
req.valid := !canMerge(tlbReq(i).bits.vpn, tlbReq, i) && tlbReq(i).valid
req
}
reqs
}
// perf
val inflight_counter = RegInit(0.U(log2Up(Size + 1).W))
......@@ -194,4 +213,4 @@ class PTWFilter(Width: Int, Size: Int)(implicit p: Parameters) extends XSModule
for (i <- 0 until Size + 1) {
XSPerfAccumulate(s"counter${i}", counter === i.U)
}
}
\ No newline at end of file
}
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package xiangshan.cache.mmu
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import chisel3.experimental.chiselName
import freechips.rocketchip.util.SRAMAnnotation
import xiangshan._
import utils._
@chiselName
class TLBFA(
sameCycle: Boolean,
ports: Int,
nSets: Int,
nWays: Int,
sramSinglePort: Boolean,
normalPage: Boolean,
superPage: Boolean
)(implicit p: Parameters) extends TlbModule{
val io = IO(new TlbStorageIO(nSets, nWays, ports))
io.r.req.map(_.ready := true.B)
val v = RegInit(VecInit(Seq.fill(nWays)(false.B)))
val entries = Reg(Vec(nWays, new TlbEntry(normalPage, superPage)))
val g = entries.map(_.perm.g)
for (i <- 0 until ports) {
val req = io.r.req(i)
val resp = io.r.resp(i)
val vpn = req.bits.vpn
val vpn_reg = if (sameCycle) vpn else RegEnable(vpn, req.fire())
val refill_mask = if (sameCycle) 0.U(nWays.W) else Mux(io.w.valid, UIntToOH(io.w.bits.wayIdx), 0.U(nWays.W))
val hitVec = VecInit(entries.zip(v zip refill_mask.asBools).map{case (e, m) => e.hit(vpn) && m._1 && !m._2})
hitVec.suggestName("hitVec")
val hitVecReg = if (sameCycle) hitVec else RegEnable(hitVec, req.fire())
resp.valid := { if (sameCycle) req.valid else RegNext(req.valid) }
resp.bits.hit := Cat(hitVecReg).orR
resp.bits.ppn := ParallelMux(hitVecReg zip entries.map(_.genPPN(vpn_reg)))
resp.bits.perm := ParallelMux(hitVecReg zip entries.map(_.perm))
resp.bits.hitVec := hitVecReg.asUInt
resp.bits.hit.suggestName("hit")
resp.bits.ppn.suggestName("ppn")
resp.bits.perm.suggestName("perm")
resp.bits.hitVec.suggestName("hitVec")
}
when (io.w.valid) {
v(io.w.bits.wayIdx) := true.B
entries(io.w.bits.wayIdx).apply(io.w.bits.data)
}
val sfence = io.sfence
val sfence_vpn = sfence.bits.addr.asTypeOf(new VaBundle().cloneType).vpn
val sfenceHit = entries.map(_.hit(sfence_vpn))
when (io.sfence.valid) {
when (sfence.bits.rs1) { // virtual address *.rs1 <- (rs1===0.U)
when (sfence.bits.rs2) { // asid, but i do not want to support asid, *.rs2 <- (rs2===0.U)
// all addr and all asid
v.map(_ := false.B)
}.otherwise {
// all addr but specific asid
v.zipWithIndex.map{ case (a,i) => a := a & g(i) }
}
}.otherwise {
when (sfence.bits.rs2) {
// specific addr but all asid
v.zipWithIndex.map{ case (a,i) => a := a & !sfenceHit(i) }
}.otherwise {
// specific addr and specific asid
v.zipWithIndex.map{ case (a,i) => a := a & !(sfenceHit(i) && !g(i)) }
}
}
}
val victim_idx = io.w.bits.wayIdx
io.victim.out.valid := v(victim_idx) && io.w.valid && entries(victim_idx).level.getOrElse(3.U) === 2.U
io.victim.out.bits := ns_to_n(entries(victim_idx))
def ns_to_n(ns: TlbEntry): TlbEntry = {
val n = Wire(new TlbEntry(pageNormal = true, pageSuper = false))
n.perm := ns.perm
n.ppn := ns.ppn
n.tag := ns.tag
n
}
XSPerfAccumulate(s"access", io.r.resp.map(_.valid.asUInt()).fold(0.U)(_ + _))
XSPerfAccumulate(s"hit", io.r.resp.map(a => a.valid && a.bits.hit).fold(0.U)(_.asUInt() + _.asUInt()))
for (i <- 0 until nWays) {
XSPerfAccumulate(s"access${i}", io.r.resp.map(a => a.valid && a.bits.hit && a.bits.hitVec(i)).fold(0.U)(_.asUInt
() + _.asUInt()))
}
for (i <- 0 until nWays) {
XSPerfAccumulate(s"refill${i}", io.w.valid && io.w.bits.wayIdx === i.U)
}
println(s"tlb_fa: nSets${nSets} nWays:${nWays}")
}
@chiselName
class TLBSA(
sameCycle: Boolean,
ports: Int,
nSets: Int,
nWays: Int,
sramSinglePort: Boolean,
normalPage: Boolean,
superPage: Boolean
)(implicit p: Parameters) extends TlbModule {
require(!superPage, "super page should use reg/fa")
require(!sameCycle, "sram needs next cycle")
val io = IO(new TlbStorageIO(nSets, nWays, ports))
io.r.req.map(_.ready := { if (sramSinglePort) !io.w.valid else true.B })
val v = RegInit(VecInit(Seq.fill(nSets)(VecInit(Seq.fill(nWays)(false.B)))))
for (i <- 0 until ports) { // duplicate sram
val entries = Module(new SRAMTemplate(
new TlbEntry(normalPage, superPage),
set = nSets,
way = nWays,
singlePort = sramSinglePort
))
val req = io.r.req(i)
val resp = io.r.resp(i)
val vpn = req.bits.vpn
val vpn_reg = RegEnable(vpn, req.fire())
val ridx = get_idx(vpn, nSets)
val vidx = RegNext(Mux(req.fire(), v(ridx), VecInit(Seq.fill(nWays)(false.B))))
entries.io.r.req.valid := req.valid
entries.io.r.req.bits.apply(setIdx = ridx)
val data = entries.io.r.resp.data
val hitVec = VecInit(data.zip(vidx).map{ case (e, vi) => e.hit(vpn_reg) && vi})
resp.bits.hit := Cat(hitVec).orR
resp.bits.ppn := ParallelMux(hitVec zip data.map(_.genPPN(vpn_reg)))
resp.bits.perm := ParallelMux(hitVec zip data.map(_.perm))
resp.bits.hitVec := hitVec.asUInt
resp.valid := { if (sramSinglePort) RegNext(req.fire()) else RegNext(req.valid) }
resp.bits.hit.suggestName("hit")
resp.bits.ppn.suggestName("ppn")
resp.bits.perm.suggestName("perm")
resp.bits.hitVec.suggestName("hitVec")
entries.io.w.apply(
valid = io.w.valid || io.victim.in.valid,
setIdx = Mux(io.w.valid, get_idx(io.w.bits.data.entry.tag, nSets), get_idx(io.victim.in.bits.tag, nSets)),
data = Mux(io.w.valid, (Wire(new TlbEntry(normalPage, superPage)).apply(io.w.bits.data)), io.victim.in.bits),
waymask = UIntToOH(io.w.bits.wayIdx)
)
}
when (io.w.valid) {
v(get_idx(io.w.bits.data.entry.tag, nSets))(io.w.bits.wayIdx) := true.B
}
when (io.victim.in.valid) {
v(get_idx(io.victim.in.bits.tag, nSets))(io.w.bits.wayIdx) := true.B
}
val sfence = io.sfence
val sfence_vpn = sfence.bits.addr.asTypeOf(new VaBundle().cloneType).vpn
when (io.sfence.valid) {
when (sfence.bits.rs1) { // virtual address *.rs1 <- (rs1===0.U)
when (sfence.bits.rs2) { // asid, but i do not want to support asid, *.rs2 <- (rs2===0.U)
// all addr and all asid
v.map(a => a.map(b => b := false.B))
}.otherwise {
// all addr but specific asid
// v.zipWithIndex.map{ case (a,i) => a := a & g(i) }
v.map(a => a.map(b => b := false.B)) // TODO: handle g
}
}.otherwise {
when (sfence.bits.rs2) {
// specific addr but all asid
v(get_idx(sfence_vpn, nSets)).map(_ := false.B)
}.otherwise {
// specific addr and specific asid
v(get_idx(sfence_vpn, nSets)).map(_ := false.B)
}
}
}
io.victim.out := DontCare
XSPerfAccumulate(s"access", io.r.req.map(_.valid.asUInt()).fold(0.U)(_ + _))
XSPerfAccumulate(s"hit", io.r.resp.map(a => a.valid && a.bits.hit).fold(0.U)(_.asUInt() + _.asUInt()))
for (i <- 0 until nSets) {
for (j <- 0 until nWays) {
XSPerfAccumulate(s"refill${i}_${j}", (io.w.valid || io.victim.in.valid) &&
(Mux(io.w.valid, get_idx(io.w.bits.data.entry.tag, nSets), get_idx(io.victim.in.bits.tag, nSets)) === i.U) &&
(j.U === io.w.bits.wayIdx)
)
}
}
for (i <- 0 until nSets) {
for (j <- 0 until nWays) {
XSPerfAccumulate(s"hit${i}_${j}", io.r.resp.map(_.valid)
.zip(io.r.resp.map(_.bits.hitVec(j)))
.map{case(vi, hi) => vi && hi }
.zip(io.r.req.map(a => RegNext(get_idx(a.bits.vpn, nSets)) === i.U))
.map{a => (a._1 && a._2).asUInt()}
.fold(0.U)(_ + _)
)
}
}
for (i <- 0 until nSets) {
XSPerfAccumulate(s"access${i}", io.r.resp.map(_.valid)
.zip(io.r.req.map(a => RegNext(get_idx(a.bits.vpn, nSets)) === i.U))
.map{a => (a._1 && a._2).asUInt()}
.fold(0.U)(_ + _)
)
}
println(s"tlb_sa: nSets:${nSets} nWays:${nWays}")
}
object TlbStorage {
def apply
(
name: String,
associative: String,
sameCycle: Boolean,
ports: Int,
nSets: Int,
nWays: Int,
sramSinglePort: Boolean,
normalPage: Boolean,
superPage: Boolean
)(implicit p: Parameters) = {
if (associative == "fa") {
val storage = Module(new TLBFA(sameCycle, ports, nSets, nWays, sramSinglePort, normalPage, superPage))
storage.suggestName(s"tlb_${name}_fa")
storage.io
} else {
val storage = Module(new TLBSA(sameCycle, ports, nSets, nWays, sramSinglePort, normalPage, superPage))
storage.suggestName(s"tlb_${name}_sa")
storage.io
}
}
}
\ No newline at end of file
......@@ -71,8 +71,8 @@ class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
sfence = io.sfence,
csr = io.tlbCsr,
width = 2,
isDtlb = false,
shouldBlock = true
shouldBlock = true,
itlbParams
)
//TODO: modules need to be removed
val instrUncache = outer.instrUncache.module
......
......@@ -23,7 +23,7 @@ import utils._
import xiangshan._
import xiangshan.backend.decode.ImmUnion
import xiangshan.cache._
import xiangshan.cache.mmu.{TlbRequestIO, TlbReq, TlbResp, TlbCmd}
import xiangshan.cache.mmu.{TlbPtwIO, TlbReq, TlbResp, TlbCmd, TlbRequestIO, TLB}
class LoadToLsqIO(implicit p: Parameters) extends XSBundle {
val loadIn = ValidIO(new LsPipelineBundle)
......@@ -319,10 +319,11 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
val rsIdx = Input(UInt(log2Up(IssQueSize).W))
val isFirstIssue = Input(Bool())
val dcache = new DCacheLoadIO
val dtlb = new TlbRequestIO()
val sbuffer = new LoadForwardQueryIO
val lsq = new LoadToLsqIO
val fastUop = ValidIO(new MicroOp) // early wakeup signal generated in load_s1
val tlb = new TlbRequestIO
val fastpathOut = Output(new LoadToLoadIO)
val fastpathIn = Input(Vec(LoadPipelineWidth, new LoadToLoadIO))
val loadFastMatch = Input(UInt(exuParameters.LduCnt.W))
......@@ -333,7 +334,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
val load_s2 = Module(new LoadUnit_S2)
load_s0.io.in <> io.ldin
load_s0.io.dtlbReq <> io.dtlb.req
load_s0.io.dtlbReq <> io.tlb.req
load_s0.io.dcacheReq <> io.dcache.req
load_s0.io.rsIdx := io.rsIdx
load_s0.io.isFirstIssue := io.isFirstIssue
......@@ -342,7 +343,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, load_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush))
load_s1.io.dtlbResp <> io.dtlb.resp
load_s1.io.dtlbResp <> io.tlb.resp
io.dcache.s1_paddr <> load_s1.io.dcachePAddr
io.dcache.s1_kill <> load_s1.io.dcacheKill
load_s1.io.sbuffer <> io.sbuffer
......@@ -379,7 +380,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule with HasLoadHelper {
p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " +
p"vaddr ${Hexadecimal(load_s0.io.out.bits.vaddr)}, mask ${Hexadecimal(load_s0.io.out.bits.mask)}\n")
XSDebug(load_s1.io.out.valid,
p"S1: pc ${Hexadecimal(load_s1.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s1.io.out.bits.uop.lqIdx.asUInt)}, tlb_miss ${io.dtlb.resp.bits.miss}, " +
p"S1: pc ${Hexadecimal(load_s1.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s1.io.out.bits.uop.lqIdx.asUInt)}, tlb_miss ${io.tlb.resp.bits.miss}, " +
p"paddr ${Hexadecimal(load_s1.io.out.bits.paddr)}, mmio ${load_s1.io.out.bits.mmio}\n")
// writeback to LSQ
......
......@@ -23,7 +23,7 @@ import utils._
import xiangshan._
import xiangshan.backend.decode.ImmUnion
import xiangshan.cache._
import xiangshan.cache.mmu.{TlbRequestIO, TlbReq, TlbResp, TlbCmd}
import xiangshan.cache.mmu.{TlbPtwIO, TlbRequestIO, TlbReq, TlbResp, TlbCmd, TLB}
// Store Pipeline Stage 0
// Generate addr, use addr to query DCache and DTLB
......@@ -167,7 +167,7 @@ class StoreUnit(implicit p: Parameters) extends XSModule {
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
val rsFeedback = ValidIO(new RSFeedback)
val dtlb = new TlbRequestIO()
val tlb = new TlbRequestIO()
val rsIdx = Input(UInt(log2Up(IssQueSize).W))
val isFirstIssue = Input(Bool())
val lsq = ValidIO(new LsPipelineBundle)
......@@ -180,14 +180,14 @@ class StoreUnit(implicit p: Parameters) extends XSModule {
val store_s3 = Module(new StoreUnit_S3)
store_s0.io.in <> io.stin
store_s0.io.dtlbReq <> io.dtlb.req
store_s0.io.dtlbReq <> io.tlb.req
store_s0.io.rsIdx := io.rsIdx
store_s0.io.isFirstIssue := io.isFirstIssue
PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, store_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush))
store_s1.io.lsq <> io.lsq // send result to sq
store_s1.io.dtlbResp <> io.dtlb.resp
store_s1.io.dtlbResp <> io.tlb.resp
store_s1.io.rsFeedback <> io.rsFeedback
PipelineConnect(store_s1.io.out, store_s2.io.in, true.B, store_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect, io.flush))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册