diff --git a/src/main/scala/xiangshan/backend/fu/CSR.scala b/src/main/scala/xiangshan/backend/fu/CSR.scala index f2b536cecadb9433879877ee50707c3967ce2c74..1f4f1788d5ac3b1b91113c80f8b51731121f55bc 100644 --- a/src/main/scala/xiangshan/backend/fu/CSR.scala +++ b/src/main/scala/xiangshan/backend/fu/CSR.scala @@ -358,8 +358,8 @@ class CSR extends FunctionUnit with HasCSRConst // val sie = RegInit(0.U(XLEN.W)) val sieMask = "h222".U & mideleg val sipMask = "h222".U & mideleg - val satp = RegInit(0.U(XLEN.W)) - // val satp = RegInit(UInt(XLEN.W), "h8000000000087fbe".U) // only use for tlb naive debug + // val satp = RegInit(0.U(XLEN.W)) + val satp = RegInit(UInt(XLEN.W), "h8000000000087fbe".U) // only use for tlb naive debug val satpMask = "h80000fffffffffff".U // disable asid, mode can only be 8 / 0 // val satp = RegInit(UInt(XLEN.W), 0.U) val sepc = RegInit(UInt(XLEN.W), 0.U) @@ -851,7 +851,7 @@ class CSR extends FunctionUnit with HasCSRConst // "Custom8" -> (0xb22, "Custom8" ), // "Ml2cacheHit" -> (0xb23, "perfCntCondMl2cacheHit") ) ++ ( - (0 until dcacheParameters.nMissEntries).map(i => + (0 until dcacheParameters.nMissEntries).map(i => ("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0xb2d + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) ).toMap ) ++ ( diff --git a/src/main/scala/xiangshan/cache/dtlb.scala b/src/main/scala/xiangshan/cache/dtlb.scala index a749e210f3676e110c378b9c60f9c5aa57693ba1..497236a4a8df5c1a18be5c9f5af75a0878c8ad49 100644 --- a/src/main/scala/xiangshan/cache/dtlb.scala +++ b/src/main/scala/xiangshan/cache/dtlb.scala @@ -59,6 +59,24 @@ class PermBundle(val hasV: Boolean = true) extends TlbBundle { } } +class TlbPermBundle extends TlbBundle { + val pf = Bool() // NOTE: if this is true, just raise pf + val d = Bool() + val a = Bool() + val g = Bool() + val u = Bool() + val x = Bool() + val w = Bool() + val r = Bool() + + // pma perm check + // val at = Bool() // Access Type + // val as = Bool() // Atomic Swap + // val al = Bool() // Atomic Logical + // val aa = Bool() // Atomic Arithmetic + // TODO: add pma check +} + class comBundle extends TlbBundle with HasCircularQueuePtrHelper{ val roqIdx = new RoqPtr val valid = Bool() @@ -101,7 +119,7 @@ class CAMTemplate[T <: Data](val gen: T, val set: Int, val readWidth: Int) exten class TlbEntryData extends TlbBundle { val ppn = UInt(ppnLen.W) - val perm = new PermBundle(hasV = false) + val perm = new TlbPermBundle // TODO: change perm to every kinds of pf check override def toPrintable: Printable = { @@ -114,7 +132,7 @@ class TlbEntry(superpage: Boolean = false, superpageOnly: Boolean = false) exten val level = UInt(log2Up(Level).W) // 2 for 4KB, 1 for 2MB, 0 for 1GB val data = new TlbEntryData - def vpnHit(vpn: UInt):Bool = { + def hit(vpn: UInt):Bool = { if (superpage) { val fullMask = VecInit((Seq.fill(vpnLen)(true.B))).asUInt val maskLevel = VecInit((Level-1 to 0 by -1).map{i => // NOTE: level 2 for 4KB, 1 for 2MB, 0 for 1GB @@ -124,20 +142,25 @@ class TlbEntry(superpage: Boolean = false, superpageOnly: Boolean = false) exten } else { tag === vpn } - } - def hit(vpn: UInt):Bool = { - vpnHit(vpn) - } + def apply(vpn: UInt, ppn: UInt, level: UInt, perm: UInt, pf: Bool) = { + this.tag := vpn + this.level := level + this.data.ppn := ppn + val ptePerm = perm.asTypeOf(new PermBundle) + this.data.perm.pf:= pf + this.data.perm.d := ptePerm.d + this.data.perm.a := ptePerm.a + this.data.perm.g := ptePerm.g + this.data.perm.u := ptePerm.u + this.data.perm.x := ptePerm.x + this.data.perm.w := ptePerm.w + this.data.perm.r := ptePerm.r - def genTlbEntry(superPage: Boolean = false, superpageOnly: Boolean = false, pte: UInt, level: UInt, vpn: UInt) = { - val e = Wire(new TlbEntry(superPage, superpageOnly)) - e.tag := vpn - e.level := level - e.data.ppn := pte.asTypeOf(pteBundle).ppn - e.data.perm := pte.asTypeOf(pteBundle).perm - e + XSDebug(p"refill: vpn:${Hexadecimal(vpn)} level:${level} ppn:${Hexadecimal(ppn)} pf:${pf} d:${ptePerm.d} a:${ptePerm.a} g:${ptePerm.g} u:${ptePerm.u} x:${ptePerm.x} w:${ptePerm.w} r:${ptePerm.r}\n")("Tlbrefill") + + this } override def toPrintable: Printable = { @@ -147,67 +170,6 @@ class TlbEntry(superpage: Boolean = false, superpageOnly: Boolean = false) exten override def cloneType: this.type = (new TlbEntry(superpage, superpageOnly)).asInstanceOf[this.type] } -class TlbEntries(num: Int, tagLen: Int) extends TlbBundle { - require(log2Up(num)==log2Down(num)) - /* vpn can be divide into three part */ - // vpn: tagPart(17bit) + addrPart(8bit) + cutLenPart(2bit) - val cutLen = log2Up(num) - - val tag = UInt(tagLen.W) // NOTE: high part of vpn - val ppns = Vec(num, UInt(ppnLen.W)) - val perms = Vec(num, new PermBundle(hasV = false)) - val vs = Vec(num, Bool()) - - def tagClip(vpn: UInt) = { // full vpn => tagLen - vpn(vpn.getWidth-1, tagLen) - } - - // NOTE: get insize idx - def idxClip(vpn: UInt) = { - vpn(cutLen-1, 0) - } - - def hit(vpn: UInt) = { - (tag === tagClip(vpn)) && vs(idxClip(vpn)) - } - - def genEntries(data: UInt, level: UInt, vpn: UInt): TlbEntries = { - require((data.getWidth / XLEN) == num, - "input data length must be multiple of pte length") - assert(level===2.U, "tlb entries only support 4K pages") - - val ts = Wire(new TlbEntries(num, tagLen)) - ts.tag := tagClip(vpn) - for (i <- 0 until num) { - val pte = data((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle) - ts.ppns(i) := pte.ppn - ts.perms(i):= pte.perm // this.perms has no v - ts.vs(i) := !pte.isPf(level) && pte.isLeaf() // legal and leaf, store to l2Tlb - } - - ts - } - - def get(vpn: UInt): TlbEntry = { - val t = Wire(new TlbEntry(false, false)) - val idx = idxClip(vpn) - t.tag := vpn // Note: Use input vpn, not vpn in TlbL2 - t.level := 2.U - t.data.ppn := ppns(idx) - t.data.perm := perms(idx) - t - } - - override def cloneType: this.type = (new TlbEntries(num, tagLen)).asInstanceOf[this.type] - override def toPrintable: Printable = { - require(num == 4, "if num is not 4, please comment this toPrintable") - // NOTE: if num is not 4, please comment this toPrintable - p"tag:${Hexadecimal(tag)} ppn(0):${Hexadecimal(ppns(0))} ppn(1):${Hexadecimal(ppns(1))}" + - p"ppn(2):${Hexadecimal(ppns(2))} ppn(3):${Hexadecimal(ppns(3))} " + - p"perms(0):${perms(0)} perms(1):${perms(1)} perms(2):${perms(2)} perms(3):${perms(3)} vs:${Binary(vs.asUInt)}" - } -} - object TlbCmd { def read = "b00".U def write = "b01".U @@ -285,8 +247,8 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ val priv = csr.priv val ifecth = if (isDtlb) false.B else true.B val mode = if (isDtlb) priv.dmode else priv.imode - // val vmEnable = satp.mode === 8.U // && (mode < ModeM) // FIXME: fix me when boot xv6/linux... - val vmEnable = satp.mode === 8.U && (mode < ModeM) + val vmEnable = satp.mode === 8.U // && (mode < ModeM) // FIXME: fix me when boot xv6/linux... + // val vmEnable = satp.mode === 8.U && (mode < ModeM) val reqAddr = req.map(_.bits.vaddr.asTypeOf(vaBundle)) val cmd = req.map(_.bits.cmd) @@ -297,8 +259,9 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ // normal page: 4k val v = RegInit(0.U(TlbEntrySize.W)) - val pf = RegInit(0.U(TlbEntrySize.W)) + // val pf = RegInit(0.U(TlbEntrySize.W)) val entry = Reg(Vec(TlbEntrySize, new TlbEntry)) + val pf = VecInit(entry.map(_.data.perm.pf)).asUInt & v val g = VecInit(entry.map(_.data.perm.g)).asUInt /** @@ -306,14 +269,21 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ */ val refill = ptw.resp.fire() val randIdx = LFSR64()(log2Up(TlbEntrySize)-1,0) - val priorIdx = PriorityEncoder(~(v|pf)) - val tlbfull = ParallelAND((v|pf).asBools) + val priorIdx = PriorityEncoder(~(v)) + val tlbfull = ParallelAND((v).asBools) val refillIdx = Mux(tlbfull, randIdx, priorIdx) val refillIdxOH = UIntToOH(refillIdx) when (refill) { - v := Mux(ptw.resp.bits.pf, v & ~refillIdxOH, v | refillIdxOH) - entry(refillIdx) := ptw.resp.bits.entry - XSDebug(p"Refill: idx:${refillIdx} entry:${ptw.resp.bits.entry}\n") + val resp = ptw.resp.bits + v := v | refillIdxOH + entry(refillIdx).apply( + vpn = resp.entry.tag, + ppn = resp.entry.ppn, + level = resp.entry.level, + perm = Cat(VecInit(resp.entry.perm).asUInt, 0.U(1.W)).asUInt, + pf = resp.pf + ) + XSDebug(p"Refill: idx:${refillIdx} entry:${resp.entry} pf:${resp.pf}\n") } /** @@ -343,8 +313,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ val hitLevel= ParallelMux(hitVec zip entry.map(_.level)) val multiHit = { val hitSum = PopCount(hitVec) - val pfHitSum = PopCount(pfHitVec) - !(hitSum===0.U || hitSum===1.U) || !(pfHitSum===0.U || pfHitSum===1.U) + !(hitSum===0.U || hitSum===1.U) } // resp // TODO: A/D has not being concerned @@ -394,24 +363,9 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ } ptw.req.bits := Compare(ptwReqSeq).bits - // reset pf when pf hit - val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVecVec(i)).asUInt, 0.U) }) - val pfHitRefill = false.B//ParallelOR(pfHitReset.asBools) - - // pf update - when (refill) { - when (pfHitRefill) { - pf := Mux(ptw.resp.bits.pf, pf | refillIdxOH, pf & ~refillIdxOH) & ~pfHitReset - } .otherwise { - pf := Mux(ptw.resp.bits.pf, pf | refillIdxOH, pf & ~refillIdxOH) - } - } .otherwise { - when (pfHitRefill) { - pf := pf & ~pfHitReset - } - } when (PopCount(pf) > 10.U) { // when too much pf, just clear - pf := Mux(refill && ptw.resp.bits.pf, refillIdxOH, 0.U) + // pf := Mux(refill && ptw.resp.bits.pf, refillIdxOH, 0.U) + v := v & ~pf } // sfence (flush) @@ -421,22 +375,18 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ when (sfence.bits.rs2) { // asid, but i do not want to support asid, *.rs2 <- (rs2===0.U) // all addr and all asid v := 0.U - pf := 0.U }.otherwise { // all addr but specific asid v := v & g // TODO: need check if reverse is needed - pf := pf & g } }.otherwise { val sfenceVpn = sfence.bits.addr.asTypeOf(vaBundle).vpn when (sfence.bits.rs2) { // specific addr but all asid v := v & ~VecInit(entry.map(_.hit(sfenceVpn))).asUInt - pf := pf & ~VecInit(entry.map(_.hit(sfenceVpn))).asUInt }.otherwise { // specific addr and specific asid - v := v & ~VecInit(entry.map(e => e.hit(sfenceVpn) && (/*e.asid === sfence.bits.asid && */!e.data.perm.g))).asUInt - pf := pf & ~VecInit(entry.map(e => e.hit(sfenceVpn) && (/*e.asid === sfence.bits.asid && */!e.data.perm.g))).asUInt + v := v & ~VecInit(entry.map(e => e.hit(sfenceVpn) && !e.data.perm.g)).asUInt } } } @@ -468,6 +418,17 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)}\n") XSDebug(ptw.req.fire(), p"PTW req:${ptw.req.bits}\n") XSDebug(ptw.resp.valid, p"PTW resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n") + + // NOTE: just for simple tlb debug, comment it after tlb's debug + for (i <- 0 until Width) { + if(isDtlb) { + XSDebug(!(!vmEnable || RegNext(req(i).bits.vaddr)===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss), p"Dtlb: vaddr:${Hexadecimal(RegNext(req(i).bits.vaddr))} paddr:${Hexadecimal(resp(i).bits.paddr)} should be equal\n") + assert(!vmEnable || RegNext(req(i).bits.vaddr)===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss) + } else { + XSDebug(!(!vmEnable || req(i).bits.vaddr===resp(i).bits.paddr || !resp(i).valid) || resp(i).bits.miss, p"Itlb: vaddr:${Hexadecimal(RegNext(req(i).bits.vaddr))} paddr:${Hexadecimal(resp(i).bits.paddr)} should be equal\n") + assert(!vmEnable || req(i).bits.vaddr===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss) + } + } } object TLB { diff --git a/src/main/scala/xiangshan/cache/ptw.scala b/src/main/scala/xiangshan/cache/ptw.scala index 018a141aa1879386cf5714407e442be2b3e9e06b..f584490cdd34b48ee9aa239eb0170be93daccbb5 100644 --- a/src/main/scala/xiangshan/cache/ptw.scala +++ b/src/main/scala/xiangshan/cache/ptw.scala @@ -156,6 +156,94 @@ class PtwEntries(num: Int, tagLen: Int) extends PtwBundle { } } +class L2TlbEntry extends TlbBundle { + val tag = UInt(vpnLen.W) // tag is vpn + val level = UInt(log2Up(Level).W) // 2 for 4KB, 1 for 2MB, 0 for 1GB + val ppn = UInt(ppnLen.W) + val perm = new PermBundle(hasV = false) + + def hit(vpn: UInt):Bool = { + val fullMask = VecInit((Seq.fill(vpnLen)(true.B))).asUInt + val maskLevel = VecInit((Level-1 to 0 by -1).map{i => // NOTE: level 2 for 4KB, 1 for 2MB, 0 for 1GB + Reverse(VecInit(Seq.fill(vpnLen-i*vpnnLen)(true.B) ++ Seq.fill(i*vpnnLen)(false.B)).asUInt)}) + val mask = maskLevel(level) + (mask&this.tag) === (mask&vpn) + } + + def apply(pte: UInt, level: UInt, vpn: UInt) = { + this.tag := vpn + this.level := level + this.ppn := pte.asTypeOf(pteBundle).ppn + this.perm := pte.asTypeOf(pteBundle).perm + this + } + + override def toPrintable: Printable = { + p"vpn:0x${Hexadecimal(tag)} level:${level} ppn:${Hexadecimal(ppn)} perm:${perm}" + } +} + +class L2TlbEntires(num: Int, tagLen: Int) extends TlbBundle { + require(log2Up(num)==log2Down(num)) + /* vpn can be divide into three part */ + // vpn: tagPart(17bit) + addrPart(8bit) + cutLenPart(2bit) + val cutLen = log2Up(num) + + val tag = UInt(tagLen.W) // NOTE: high part of vpn + val ppns = Vec(num, UInt(ppnLen.W)) + val perms = Vec(num, new PermBundle(hasV = false)) + val vs = Vec(num, Bool()) + + def tagClip(vpn: UInt) = { // full vpn => tagLen + vpn(vpn.getWidth-1, tagLen) + } + + // NOTE: get insize idx + def idxClip(vpn: UInt) = { + vpn(cutLen-1, 0) + } + + def hit(vpn: UInt) = { + (tag === tagClip(vpn)) && vs(idxClip(vpn)) + } + + def genEntries(data: UInt, level: UInt, vpn: UInt): L2TlbEntires = { + require((data.getWidth / XLEN) == num, + "input data length must be multiple of pte length") + assert(level===2.U, "tlb entries only support 4K pages") + + val ts = Wire(new L2TlbEntires(num, tagLen)) + ts.tag := tagClip(vpn) + for (i <- 0 until num) { + val pte = data((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle) + ts.ppns(i) := pte.ppn + ts.perms(i):= pte.perm // this.perms has no v + ts.vs(i) := !pte.isPf(level) && pte.isLeaf() // legal and leaf, store to l2Tlb + } + + ts + } + + def get(vpn: UInt): L2TlbEntry = { + val t = Wire(new L2TlbEntry) + val idx = idxClip(vpn) + t.tag := vpn // Note: Use input vpn, not vpn in TlbL2 + t.level := 2.U // L2TlbEntries only support 4k page + t.ppn := ppns(idx) + t.perm := perms(idx) + t + } + + override def cloneType: this.type = (new L2TlbEntires(num, tagLen)).asInstanceOf[this.type] + override def toPrintable: Printable = { + require(num == 4, "if num is not 4, please comment this toPrintable") + // NOTE: if num is not 4, please comment this toPrintable + p"tag:${Hexadecimal(tag)} ppn(0):${Hexadecimal(ppns(0))} ppn(1):${Hexadecimal(ppns(1))}" + + p"ppn(2):${Hexadecimal(ppns(2))} ppn(3):${Hexadecimal(ppns(3))} " + + p"perms(0):${perms(0)} perms(1):${perms(1)} perms(2):${perms(2)} perms(3):${perms(3)} vs:${Binary(vs.asUInt)}" + } +} + class PtwReq extends PtwBundle { val vpn = UInt(vpnLen.W) @@ -165,8 +253,8 @@ class PtwReq extends PtwBundle { } class PtwResp extends PtwBundle { - val entry = new TlbEntry - val pf = Bool() // simple pf no matter cmd + val entry = new L2TlbEntry + val pf = Bool() override def toPrintable: Printable = { p"entry:${entry} pf:${pf}" @@ -235,11 +323,11 @@ class PTWImp(outer: PTW) extends PtwModule(outer){ // two level: l2-tlb-cache && pde/pte-cache // l2-tlb-cache is ram-larger-edition tlb // pde/pte-cache is cache of page-table, speeding up ptw - val tlbl2 = Module(new SRAMTemplate(new TlbEntries(num = TlbL2LineSize, tagLen = TlbL2TagLen), set = TlbL2LineNum)) // (total 256, one line is 4 => 64 lines) + val tlbl2 = Module(new SRAMTemplate(new L2TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen), set = TlbL2LineNum)) // (total 256, one line is 4 => 64 lines) val tlbv = RegInit(0.U(TlbL2LineNum.W)) // valid val tlbg = Reg(UInt(TlbL2LineNum.W)) // global - val sp = Reg(Vec(TlbL2SPEntrySize, new TlbEntry(true, true))) // (total 16, one is 4M or 1G) + val sp = Reg(Vec(TlbL2SPEntrySize, new L2TlbEntry)) // (total 16, one is 4M or 1G) val spv = RegInit(0.U(TlbL2SPEntrySize.W)) val spg = Reg(UInt(TlbL2SPEntrySize.W)) @@ -378,7 +466,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){ state := state_idle }.otherwise { state := state_wait_ready - latch.entry := new TlbEntry().genTlbEntry(false, false, memRdata, level, req.vpn) + latch.entry := Wire(new L2TlbEntry()).apply(memRdata, level, req.vpn) latch.pf := memPte.isPf(level) } }.otherwise { @@ -435,7 +523,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){ for(i <- 0 until PtwWidth) { resp(i).valid := valid && arbChosen===i.U && ptwFinish // TODO: add resp valid logic resp(i).bits.entry := Mux(tlbHit, tlbHitData, - Mux(state===state_wait_ready, latch.entry, new TlbEntry().genTlbEntry(false, false, memSelData, Mux(level===3.U, 2.U, level), req.vpn))) + Mux(state===state_wait_ready, latch.entry, Wire(new L2TlbEntry()).apply(memSelData, Mux(level===3.U, 2.U, level), req.vpn))) resp(i).bits.pf := Mux(level===3.U || notFound, true.B, Mux(tlbHit, false.B, Mux(state===state_wait_ready, latch.pf, memPte.isPf(level)))) // TODO: the pf must not be correct, check it } @@ -480,7 +568,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){ //TODO: check why the old refillIdx is right assert(tlbl2.io.w.req.ready) - val ts = new TlbEntries(num = TlbL2LineSize, tagLen = TlbL2TagLen).genEntries(memRdata, level, req.vpn) + val ts = new L2TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen).genEntries(memRdata, level, req.vpn) tlbl2.io.w.apply( valid = true.B, setIdx = refillIdx, @@ -494,7 +582,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){ when (memPte.isLeaf() && (level===1.U || level===0.U)) { val refillIdx = LFSR64()(log2Up(TlbL2SPEntrySize)-1,0) // TODO: may be LRU val rfOH = UIntToOH(refillIdx) - sp(refillIdx) := new TlbEntry().genTlbEntry(false, false, memSelData, Mux(level===3.U, 2.U, level), req.vpn) + sp(refillIdx) := Wire(new L2TlbEntry()).apply(memSelData, Mux(level===3.U, 2.U, level), req.vpn) spv := spv | rfOH spg := (spg & ~rfOH) | Mux(memPte.perm.g, rfOH, 0.U) } diff --git a/src/test/csrc/ram.cpp b/src/test/csrc/ram.cpp index d7192983ad7be7fadb21c1605d53469a436b91cc..899e94b8e6c7c956f77a3d90eee37aa8bc94d580 100644 --- a/src/test/csrc/ram.cpp +++ b/src/test/csrc/ram.cpp @@ -4,7 +4,7 @@ #include "ram.h" #include "compress.h" -// #define TLB_UNITTEST +#define TLB_UNITTEST #ifdef WITH_DRAMSIM3 #include "cosimulation.h"