diff --git a/.github/workflows/emu.yml b/.github/workflows/emu.yml index 68f7c783bd7c1261abb6fa0d573f650a5c8d9ce5..e1167de24eaae9583ec7967a7803be4bb8b35390 100644 --- a/.github/workflows/emu.yml +++ b/.github/workflows/emu.yml @@ -3,20 +3,34 @@ name: EMU Test on: push: - branches: [ master, update-ci] + branches: [ master ] pull_request: branches: [ master ] jobs: - build-emu: + generate-verilog: runs-on: self-hosted - name: Make EMU + name: Generate Verilog steps: - uses: actions/checkout@v2 with: submodules: 'recursive' - name: Check Wiring run: bash .github/workflows/check-usage.sh "BoringUtils" $GITHUB_WORKSPACE + - name: set env + run: | + echo "NEMU_HOME=/home/ci-runner/xsenv/NEMU" >> $GITHUB_ENV + echo "NOOP_HOME=$GITHUB_WORKSPACE" >> $GITHUB_ENV + - name: generate verilog file + run: + make verilog SIM_ARGS=--dual-core + build-emu: + runs-on: self-hosted + name: Make EMU + steps: + - uses: actions/checkout@v2 + with: + submodules: 'recursive' - name: Set env run: | echo "NEMU_HOME=/home/ci-runner/xsenv/NEMU" >> $GITHUB_ENV @@ -25,7 +39,7 @@ jobs: echo "AM_HOME=/home/ci-runner/xsenv/nexus-am" >> $GITHUB_ENV - name: Build EMU run: - make ./build/emu SIM_ARGS=--disable-all NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME -j60 + make ./build/emu SIM_ARGS=--disable-all NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME -j220 - name: Run cputest run: | CPU_TEST_DIR=$AM_HOME/tests/cputest diff --git a/build.sc b/build.sc index 14524033e7288596d0c191b89ca3797391cf7c54..c2eb5a72e035c22a856c057bdf7f21387c640f00 100644 --- a/build.sc +++ b/build.sc @@ -29,7 +29,7 @@ trait CommonModule extends ScalaModule { } val chisel = Agg( - ivy"edu.berkeley.cs::chisel3:3.4.0" + ivy"edu.berkeley.cs::chisel3:3.4.1" ) object `api-config-chipsalliance` extends CommonModule { @@ -103,4 +103,4 @@ object XiangShan extends CommonModule with SbtModule { } } -} \ No newline at end of file +} diff --git a/src/main/scala/utils/DataModuleTemplate.scala b/src/main/scala/utils/DataModuleTemplate.scala index f0fd314c2de659e5107bc88d3bdd31bffea46b2d..cfc72d6a0e245605fe1ecc03998f18cd4d180e8a 100644 --- a/src/main/scala/utils/DataModuleTemplate.scala +++ b/src/main/scala/utils/DataModuleTemplate.scala @@ -3,7 +3,7 @@ package utils import chisel3._ import chisel3.util._ -class AsyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int) extends Module { +class DataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int, isSync: Boolean) extends Module { val io = IO(new Bundle { val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W))) val rdata = Vec(numRead, Output(gen)) @@ -15,8 +15,9 @@ class AsyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, val data = Mem(numEntries, gen) // read ports + val raddr = if (isSync) (RegNext(io.raddr)) else io.raddr for (i <- 0 until numRead) { - io.rdata(i) := data(io.raddr(i)) + io.rdata(i) := data(raddr(i)) } // below is the write ports (with priorities) @@ -34,34 +35,5 @@ class AsyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, } } -class SyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int) extends Module { - val io = IO(new Bundle { - val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W))) - val rdata = Vec(numRead, Output(gen)) - val wen = Vec(numWrite, Input(Bool())) - val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W))) - val wdata = Vec(numWrite, Input(gen)) - }) - - val data = Mem(numEntries, gen) - - // read ports - val raddr_reg = RegNext(io.raddr) - for (i <- 0 until numRead) { - io.rdata(i) := data(raddr_reg(i)) - } - - // below is the write ports (with priorities) - for (i <- 0 until numWrite) { - when (io.wen(i)) { - data(io.waddr(i)) := io.wdata(i) - } - } - - // DataModuleTemplate should not be used when there're any write conflicts - for (i <- 0 until numWrite) { - for (j <- i+1 until numWrite) { - assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) - } - } -} +class SyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int) extends DataModuleTemplate(gen, numEntries, numRead, numWrite, true) +class AsyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int) extends DataModuleTemplate(gen, numEntries, numRead, numWrite, false) diff --git a/src/main/scala/xiangshan/Bundle.scala b/src/main/scala/xiangshan/Bundle.scala index f8a66dede8fefacae5e2bca61c8109f8e90595f2..0bded8800ed6d70b7542476ae35c57a47e45c940 100644 --- a/src/main/scala/xiangshan/Bundle.scala +++ b/src/main/scala/xiangshan/Bundle.scala @@ -284,6 +284,7 @@ class ReplayPregReq extends XSBundle { class DebugBundle extends XSBundle{ val isMMIO = Bool() + val isPerfCnt = Bool() } class ExuInput extends XSBundle { diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index 8338bd3b4b8111ecdd87a59336a6112e21b8bc76..4bd6f22ce3bda67d0853fcdef45b40709300cdd1 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -425,6 +425,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) integerBlock.io.csrio.memExceptionVAddr <> memBlock.io.lsqio.exceptionAddr.vaddr integerBlock.io.csrio.externalInterrupt <> io.externalInterrupt integerBlock.io.csrio.tlb <> memBlock.io.tlbCsr + integerBlock.io.csrio.perfinfo <> ctrlBlock.io.roqio.toCSR.perfinfo integerBlock.io.fenceio.sfence <> memBlock.io.sfence integerBlock.io.fenceio.sbuffer <> memBlock.io.fenceToSbuffer diff --git a/src/main/scala/xiangshan/backend/IntegerBlock.scala b/src/main/scala/xiangshan/backend/IntegerBlock.scala index 41b253d34c76599c33e929457809eac95d2fe9f2..ad12583c4ee1a37e84536ca27217418869e8bbbd 100644 --- a/src/main/scala/xiangshan/backend/IntegerBlock.scala +++ b/src/main/scala/xiangshan/backend/IntegerBlock.scala @@ -82,6 +82,9 @@ class IntegerBlock val memExceptionVAddr = Input(UInt(VAddrBits.W)) // from lsq val externalInterrupt = new ExternalInterruptIO // from outside val tlb = Output(new TlbCsrBundle) // from tlb + val perfinfo = new Bundle { + val retiredInstr = Input(UInt(3.W)) + } } val fenceio = new Bundle { val sfence = Output(new SfenceBundle) // to front,mem diff --git a/src/main/scala/xiangshan/backend/brq/Brq.scala b/src/main/scala/xiangshan/backend/brq/Brq.scala index a6a1101dd983228f0a724c91ce82c1621fc487fd..12e9e947a215e03e40b1dbd14c569d0fe04853f7 100644 --- a/src/main/scala/xiangshan/backend/brq/Brq.scala +++ b/src/main/scala/xiangshan/backend/brq/Brq.scala @@ -306,16 +306,16 @@ class Brq extends XSModule with HasCircularQueuePtrHelper { val mbpRWrong = predWrong && isRType if(!env.FPGAPlatform){ - ExcitingUtils.addSource(mbpInstr, "perfCntCondMbpInstr", Perf) - ExcitingUtils.addSource(mbpRight, "perfCntCondMbpRight", Perf) - ExcitingUtils.addSource(mbpWrong, "perfCntCondMbpWrong", Perf) - ExcitingUtils.addSource(mbpBRight, "perfCntCondMbpBRight", Perf) - ExcitingUtils.addSource(mbpBWrong, "perfCntCondMbpBWrong", Perf) - ExcitingUtils.addSource(mbpJRight, "perfCntCondMbpJRight", Perf) - ExcitingUtils.addSource(mbpJWrong, "perfCntCondMbpJWrong", Perf) - ExcitingUtils.addSource(mbpIRight, "perfCntCondMbpIRight", Perf) - ExcitingUtils.addSource(mbpIWrong, "perfCntCondMbpIWrong", Perf) - ExcitingUtils.addSource(mbpRRight, "perfCntCondMbpRRight", Perf) - ExcitingUtils.addSource(mbpRWrong, "perfCntCondMbpRWrong", Perf) + ExcitingUtils.addSource(mbpInstr, "perfCntCondBpInstr", Perf) + ExcitingUtils.addSource(mbpRight, "perfCntCondBpRight", Perf) + ExcitingUtils.addSource(mbpWrong, "perfCntCondBpWrong", Perf) + ExcitingUtils.addSource(mbpBRight, "perfCntCondBpBRight", Perf) + ExcitingUtils.addSource(mbpBWrong, "perfCntCondBpBWrong", Perf) + ExcitingUtils.addSource(mbpJRight, "perfCntCondBpJRight", Perf) + ExcitingUtils.addSource(mbpJWrong, "perfCntCondBpJWrong", Perf) + ExcitingUtils.addSource(mbpIRight, "perfCntCondBpIRight", Perf) + ExcitingUtils.addSource(mbpIWrong, "perfCntCondBpIWrong", Perf) + ExcitingUtils.addSource(mbpRRight, "perfCntCondBpRRight", Perf) + ExcitingUtils.addSource(mbpRWrong, "perfCntCondBpRWrong", Perf) } } diff --git a/src/main/scala/xiangshan/backend/exu/Exu.scala b/src/main/scala/xiangshan/backend/exu/Exu.scala index 2dfb3cb13df33c5e2339a37fd3768a97b1c47e60..4d88adbdad63af836fe3fa61bf6d9eb98f43e92b 100644 --- a/src/main/scala/xiangshan/backend/exu/Exu.scala +++ b/src/main/scala/xiangshan/backend/exu/Exu.scala @@ -183,6 +183,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule { out.fflags := DontCare out.debug <> DontCare out.debug.isMMIO := false.B + out.debug.isPerfCnt := false.B out.redirect <> DontCare out.redirectValid := false.B } diff --git a/src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala b/src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala index 022621a54d9e35c2dc237264ed0a634c2f6b6db1..cf8e424c833b1d537a4afdbe7856f6d3e86c7b37 100644 --- a/src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala +++ b/src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala @@ -21,6 +21,9 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg) val memExceptionVAddr = Input(UInt(VAddrBits.W)) val externalInterrupt = new ExternalInterruptIO val tlb = Output(new TlbCsrBundle) + val perfinfo = new Bundle { + val retiredInstr = Input(UInt(3.W)) + } }) val fenceio = IO(new Bundle { val sfence = Output(new SfenceBundle) @@ -42,6 +45,7 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg) }.get csr.csrio.perf <> DontCare + csr.csrio.perf.retiredInstr <> csrio.perfinfo.retiredInstr csr.csrio.fpu.fflags <> csrio.fflags csr.csrio.fpu.isIllegal := false.B csr.csrio.fpu.dirty_fs <> csrio.dirty_fs @@ -73,6 +77,7 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg) io.toInt.bits.redirect.roqIdx := uop.roqIdx io.toInt.bits.redirect.target := csr.csrio.redirectOut.bits io.toInt.bits.redirect.pc := uop.cf.pc + io.toInt.bits.debug.isPerfCnt := csr.csrio.isPerfCnt }.elsewhen(jmp.io.out.valid){ io.toInt.bits.redirectValid := jmp.redirectOutValid io.toInt.bits.redirect := jmp.redirectOut diff --git a/src/main/scala/xiangshan/backend/fu/CSR.scala b/src/main/scala/xiangshan/backend/fu/CSR.scala index 2674d4bcf646dbbb43adbaa9e2940949dbe7c282..d85f9907a3d5af63994cff176e487bd663b50b29 100644 --- a/src/main/scala/xiangshan/backend/fu/CSR.scala +++ b/src/main/scala/xiangshan/backend/fu/CSR.scala @@ -6,136 +6,14 @@ import chisel3.util._ import utils._ import xiangshan._ import xiangshan.backend._ +import xiangshan.backend.fu.util._ import utils.XSDebug -object debugId extends Function0[Integer] { - var x = 0 - def apply(): Integer = { - x = x + 1 - return x - } -} - -object hartId extends Function0[Int] { +object hartId extends (() => Int) { var x = 0 def apply(): Int = { x = x + 1 - return x-1 - } -} - -trait HasCSRConst { - // User Trap Setup - val Ustatus = 0x000 - val Uie = 0x004 - val Utvec = 0x005 - - // User Trap Handling - val Uscratch = 0x040 - val Uepc = 0x041 - val Ucause = 0x042 - val Utval = 0x043 - val Uip = 0x044 - - // User Floating-Point CSRs (not implemented) - val Fflags = 0x001 - val Frm = 0x002 - val Fcsr = 0x003 - - // User Counter/Timers - val Cycle = 0xC00 - val Time = 0xC01 - val Instret = 0xC02 - - // Supervisor Trap Setup - val Sstatus = 0x100 - val Sedeleg = 0x102 - val Sideleg = 0x103 - val Sie = 0x104 - val Stvec = 0x105 - val Scounteren = 0x106 - - // Supervisor Trap Handling - val Sscratch = 0x140 - val Sepc = 0x141 - val Scause = 0x142 - val Stval = 0x143 - val Sip = 0x144 - - // Supervisor Protection and Translation - val Satp = 0x180 - - // Machine Information Registers - val Mvendorid = 0xF11 - val Marchid = 0xF12 - val Mimpid = 0xF13 - val Mhartid = 0xF14 - - // Machine Trap Setup - val Mstatus = 0x300 - val Misa = 0x301 - val Medeleg = 0x302 - val Mideleg = 0x303 - val Mie = 0x304 - val Mtvec = 0x305 - val Mcounteren = 0x306 - - // Machine Trap Handling - val Mscratch = 0x340 - val Mepc = 0x341 - val Mcause = 0x342 - val Mtval = 0x343 - val Mip = 0x344 - - // Machine Memory Protection - // TBD - val Pmpcfg0 = 0x3A0 - val Pmpcfg1 = 0x3A1 - val Pmpcfg2 = 0x3A2 - val Pmpcfg3 = 0x3A3 - val PmpaddrBase = 0x3B0 - - // Machine Counter/Timers - // Currently, we uses perfcnt csr set instead of standard Machine Counter/Timers - // 0xB80 - 0x89F are also used as perfcnt csr - - // Machine Counter Setup (not implemented) - // Debug/Trace Registers (shared with Debug Mode) (not implemented) - // Debug Mode Registers (not implemented) - - def privEcall = 0x000.U - def privEbreak = 0x001.U - def privMret = 0x302.U - def privSret = 0x102.U - def privUret = 0x002.U - - def ModeM = 0x3.U - def ModeH = 0x2.U - def ModeS = 0x1.U - def ModeU = 0x0.U - - def IRQ_UEIP = 0 - def IRQ_SEIP = 1 - def IRQ_MEIP = 3 - - def IRQ_UTIP = 4 - def IRQ_STIP = 5 - def IRQ_MTIP = 7 - - def IRQ_USIP = 8 - def IRQ_SSIP = 9 - def IRQ_MSIP = 11 - - val IntPriority = Seq( - IRQ_MEIP, IRQ_MSIP, IRQ_MTIP, - IRQ_SEIP, IRQ_SSIP, IRQ_STIP, - IRQ_UEIP, IRQ_USIP, IRQ_UTIP - ) - - def csrAccessPermissionCheck(addr: UInt, wen: Bool, mode: UInt): Bool = { - val readOnly = addr(11,10) === "b11".U - val lowestAccessPrivilegeLevel = addr(9,8) - mode >= lowestAccessPrivilegeLevel && !(wen && readOnly) + x-1 } } @@ -156,18 +34,18 @@ trait HasExceptionNO { def storePageFault = 15 val ExcPriority = Seq( - breakPoint, // TODO: different BP has different priority - instrPageFault, - instrAccessFault, - illegalInstr, - instrAddrMisaligned, - ecallM, ecallS, ecallU, - storePageFault, - loadPageFault, - storeAccessFault, - loadAccessFault, - storeAddrMisaligned, - loadAddrMisaligned + breakPoint, // TODO: different BP has different priority + instrPageFault, + instrAccessFault, + illegalInstr, + instrAddrMisaligned, + ecallM, ecallS, ecallU, + storePageFault, + loadPageFault, + storeAccessFault, + loadAccessFault, + storeAddrMisaligned, + loadAddrMisaligned ) } @@ -180,6 +58,7 @@ class FpuCsrIO extends XSBundle { class PerfCounterIO extends XSBundle { + val retiredInstr = Input(UInt(3.W)) val value = Input(UInt(XLEN.W)) } @@ -188,7 +67,8 @@ class CSR extends FunctionUnit with HasCSRConst val csrio = IO(new Bundle { // output (for func === CSROpType.jmp) val redirectOut = ValidIO(UInt(VAddrBits.W)) - val perf = Vec(NumPerfCounters, new PerfCounterIO) + val perf = new PerfCounterIO + val isPerfCnt = Output(Bool()) // to FPU val fpu = Flipped(new FpuCsrIO) // from rob @@ -281,11 +161,12 @@ class CSR extends FunctionUnit with HasCSRConst def getMisaMxl(mxl: Int): UInt = {mxl.U << (XLEN-2)}.asUInt() def getMisaExt(ext: Char): UInt = {1.U << (ext.toInt - 'a'.toInt)}.asUInt() var extList = List('a', 's', 'i', 'u') - if(HasMExtension){ extList = extList :+ 'm'} - if(HasCExtension){ extList = extList :+ 'c'} - if(HasFPU){ extList = extList ++ List('f', 'd')} + if (HasMExtension) { extList = extList :+ 'm' } + if (HasCExtension) { extList = extList :+ 'c' } + if (HasFPU) { extList = extList ++ List('f', 'd') } val misaInitVal = getMisaMxl(2) | extList.foldLeft(0.U)((sum, i) => sum | getMisaExt(i)) //"h8000000000141105".U val misa = RegInit(UInt(XLEN.W), misaInitVal) + // MXL = 2 | 0 | EXT = b 00 0000 0100 0001 0001 0000 0101 // (XLEN-1, XLEN-2) | |(25, 0) ZY XWVU TSRQ PONM LKJI HGFE DCBA @@ -294,8 +175,8 @@ class CSR extends FunctionUnit with HasCSRConst val mimpid = RegInit(UInt(XLEN.W), 0.U) // provides a unique encoding of the version of the processor implementation val mhartNo = hartId() val mhartid = RegInit(UInt(XLEN.W), mhartNo.asUInt) // the hardware thread running the code - val mstatus = RegInit(UInt(XLEN.W), "h00001800".U) - // val mstatus = RegInit(UInt(XLEN.W), "h8000c0100".U) + val mstatus = RegInit(UInt(XLEN.W), "h00001800".U) // another option: "h8000c0100".U + // mstatus Value Table // | sd | // | pad1 | @@ -315,6 +196,7 @@ class CSR extends FunctionUnit with HasCSRConst // | spp | 0 | // | pie | 0000 | pie.h is used as UBE // | ie | 0000 | uie hardlinked to 0, as N ext is not implemented + val mstatusStruct = mstatus.asTypeOf(new MstatusStruct) def mstatusUpdateSideEffect(mstatus: UInt): UInt = { val mstatusOld = WireInit(mstatus.asTypeOf(new MstatusStruct)) @@ -361,7 +243,6 @@ class CSR extends FunctionUnit with HasCSRConst val satp = RegInit(0.U(XLEN.W)) // val satp = RegInit(UInt(XLEN.W), "h8000000000087fbe".U) // only use for tlb naive debug val satpMask = "h80000fffffffffff".U // disable asid, mode can only be 8 / 0 - // val satp = RegInit(UInt(XLEN.W), 0.U) val sepc = RegInit(UInt(XLEN.W), 0.U) val scause = RegInit(UInt(XLEN.W), 0.U) val stval = Reg(UInt(XLEN.W)) @@ -376,7 +257,7 @@ class CSR extends FunctionUnit with HasCSRConst val uepc = Reg(UInt(XLEN.W)) // fcsr - class FcsrStruct extends Bundle{ + class FcsrStruct extends Bundle { val reserved = UInt((XLEN-3-5).W) val frm = UInt(3.W) val fflags = UInt(5.W) @@ -398,7 +279,7 @@ class CSR extends FunctionUnit with HasCSRConst val fcsrOld = fcsr.asTypeOf(new FcsrStruct) val fcsrNew = WireInit(fcsrOld) csrw_dirty_fp_state := true.B - if(update){ + if (update) { fcsrNew.fflags := wdata(4,0) | fcsrOld.fflags } else { fcsrNew.fflags := wdata(4,0) @@ -420,74 +301,87 @@ class CSR extends FunctionUnit with HasCSRConst ) // Atom LR/SC Control Bits -// val setLr = WireInit(Bool(), false.B) -// val setLrVal = WireInit(Bool(), false.B) -// val setLrAddr = WireInit(UInt(AddrBits.W), DontCare) //TODO : need check -// val lr = RegInit(Bool(), false.B) -// val lrAddr = RegInit(UInt(AddrBits.W), 0.U) -// -// when(setLr){ -// lr := setLrVal -// lrAddr := setLrAddr -// } + // val setLr = WireInit(Bool(), false.B) + // val setLrVal = WireInit(Bool(), false.B) + // val setLrAddr = WireInit(UInt(AddrBits.W), DontCare) //TODO : need check + // val lr = RegInit(Bool(), false.B) + // val lrAddr = RegInit(UInt(AddrBits.W), 0.U) + // + // when (setLr) { + // lr := setLrVal + // lrAddr := setLrAddr + // } // Hart Priviledge Mode val priviledgeMode = RegInit(UInt(2.W), ModeM) - // perfcnt - val hasPerfCnt = !env.FPGAPlatform - val nrPerfCnts = if (hasPerfCnt) 0x80 else 0x3 - val perfCnts = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W))) - val perfCntsLoMapping = (0 until nrPerfCnts).map(i => MaskedRegMap(0xb00 + i, perfCnts(i))) - val perfCntsHiMapping = (0 until nrPerfCnts).map(i => MaskedRegMap(0xb80 + i, perfCnts(i)(63, 32))) - println(s"CSR: hasPerfCnt:${hasPerfCnt}") + // Emu perfcnt + val hasEmuPerfCnt = !env.FPGAPlatform + val nrEmuPerfCnts = if (hasEmuPerfCnt) 0x80 else 0x3 + + val emuPerfCnts = List.fill(nrEmuPerfCnts)(RegInit(0.U(XLEN.W))) + val emuPerfCntCond = List.fill(nrEmuPerfCnts)(WireInit(false.B)) + (emuPerfCnts zip emuPerfCntCond).map { case (c, e) => when (e) { c := c + 1.U } } + + val emuPerfCntsLoMapping = (0 until nrEmuPerfCnts).map(i => MaskedRegMap(0x1000 + i, emuPerfCnts(i))) + val emuPerfCntsHiMapping = (0 until nrEmuPerfCnts).map(i => MaskedRegMap(0x1080 + i, emuPerfCnts(i)(63, 32))) + println(s"CSR: hasEmuPerfCnt:${hasEmuPerfCnt}") + + // Perf Counter + val nrPerfCnts = 29 // 3...31 + val perfCnts = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W))) + val perfEvents = List.fill(nrPerfCnts)(RegInit(0.U(XLEN.W))) + val mcountinhibit = RegInit(0.U(XLEN.W)) + val mcycle = RegInit(0.U(XLEN.W)) + mcycle := mcycle + 1.U + val minstret = RegInit(0.U(XLEN.W)) + minstret := minstret + RegNext(csrio.perf.retiredInstr) + // CSR reg map - val mapping = Map( + val basicPrivMapping = Map( - // User Trap Setup + //--- User Trap Setup --- // MaskedRegMap(Ustatus, ustatus), // MaskedRegMap(Uie, uie, 0.U, MaskedRegMap.Unwritable), // MaskedRegMap(Utvec, utvec), - // User Trap Handling + //--- User Trap Handling --- // MaskedRegMap(Uscratch, uscratch), // MaskedRegMap(Uepc, uepc), // MaskedRegMap(Ucause, ucause), // MaskedRegMap(Utval, utval), // MaskedRegMap(Uip, uip), - // User Counter/Timers + //--- User Counter/Timers --- // MaskedRegMap(Cycle, cycle), // MaskedRegMap(Time, time), // MaskedRegMap(Instret, instret), - // Supervisor Trap Setup + //--- Supervisor Trap Setup --- MaskedRegMap(Sstatus, mstatus, sstatusWmask, mstatusUpdateSideEffect, sstatusRmask), - // MaskedRegMap(Sedeleg, Sedeleg), // MaskedRegMap(Sideleg, Sideleg), MaskedRegMap(Sie, mie, sieMask, MaskedRegMap.NoSideEffect, sieMask), MaskedRegMap(Stvec, stvec), MaskedRegMap(Scounteren, scounteren), - // Supervisor Trap Handling + //--- Supervisor Trap Handling --- MaskedRegMap(Sscratch, sscratch), MaskedRegMap(Sepc, sepc), MaskedRegMap(Scause, scause), MaskedRegMap(Stval, stval), MaskedRegMap(Sip, mip.asUInt, sipMask, MaskedRegMap.Unwritable, sipMask), - // Supervisor Protection and Translation + //--- Supervisor Protection and Translation --- MaskedRegMap(Satp, satp, satpMask, MaskedRegMap.NoSideEffect, satpMask), - // Machine Information Registers + //--- Machine Information Registers --- MaskedRegMap(Mvendorid, mvendorid, 0.U, MaskedRegMap.Unwritable), MaskedRegMap(Marchid, marchid, 0.U, MaskedRegMap.Unwritable), MaskedRegMap(Mimpid, mimpid, 0.U, MaskedRegMap.Unwritable), MaskedRegMap(Mhartid, mhartid, 0.U, MaskedRegMap.Unwritable), - // Machine Trap Setup - // MaskedRegMap(Mstatus, mstatus, "hffffffffffffffee".U, (x=>{printf("mstatus write: %x time: %d\n", x, GTimer()); x})), + //--- Machine Trap Setup --- MaskedRegMap(Mstatus, mstatus, mstatusMask, mstatusUpdateSideEffect, mstatusMask), MaskedRegMap(Misa, misa), // now MXL, EXT is not changeable MaskedRegMap(Medeleg, medeleg, "hf3ff".U), @@ -496,14 +390,16 @@ class CSR extends FunctionUnit with HasCSRConst MaskedRegMap(Mtvec, mtvec), MaskedRegMap(Mcounteren, mcounteren), - // Machine Trap Handling + //--- Machine Trap Handling --- MaskedRegMap(Mscratch, mscratch), MaskedRegMap(Mepc, mepc), MaskedRegMap(Mcause, mcause), MaskedRegMap(Mtval, mtval), MaskedRegMap(Mip, mip.asUInt, 0.U, MaskedRegMap.Unwritable), + ) - // Machine Memory Protection + // PMP is unimplemented yet + val pmpMapping = Map( MaskedRegMap(Pmpcfg0, pmpcfg0), MaskedRegMap(Pmpcfg1, pmpcfg1), MaskedRegMap(Pmpcfg2, pmpcfg2), @@ -512,11 +408,27 @@ class CSR extends FunctionUnit with HasCSRConst MaskedRegMap(PmpaddrBase + 1, pmpaddr1), MaskedRegMap(PmpaddrBase + 2, pmpaddr2), MaskedRegMap(PmpaddrBase + 3, pmpaddr3) + ) - ) ++ - perfCntsLoMapping ++ (if (XLEN == 32) perfCntsHiMapping else Nil) ++ - (if(HasFPU) fcsrMapping else Nil) + var perfCntMapping = Map( + MaskedRegMap(Mcountinhibit, mcountinhibit), + MaskedRegMap(Mcycle, mcycle), + MaskedRegMap(Minstret, minstret), + ) + val MhpmcounterStart = Mhpmcounter3 + val MhpmeventStart = Mhpmevent3 + for (i <- 0 until nrPerfCnts) { + perfCntMapping += MaskedRegMap(MhpmcounterStart + i, perfCnts(i)) + perfCntMapping += MaskedRegMap(MhpmeventStart + i, perfEvents(i)) + } + val mapping = basicPrivMapping ++ + perfCntMapping ++ + pmpMapping ++ + emuPerfCntsLoMapping ++ + (if (XLEN == 32) emuPerfCntsHiMapping else Nil) ++ + (if (HasFPU) fcsrMapping else Nil) + val addr = src2(11, 0) val csri = src2(16, 12) val rdata = Wire(UInt(XLEN.W)) @@ -529,6 +441,8 @@ class CSR extends FunctionUnit with HasCSRConst CSROpType.clri -> (rdata & (~csri).asUInt()) )) + csrio.isPerfCnt := (addr >= Mcycle.U) && (addr <= Mhpmcounter31.U) + // satp wen check val satpLegalMode = (wdata.asTypeOf(new SatpStruct).mode===0.U) || (wdata.asTypeOf(new SatpStruct).mode===8.U) @@ -551,11 +465,11 @@ class CSR extends FunctionUnit with HasCSRConst val rdataDummy = Wire(UInt(XLEN.W)) MaskedRegMap.generate(fixMapping, addr, rdataDummy, wen, wdata) - when(csrio.fpu.fflags.valid){ + when (csrio.fpu.fflags.valid) { fcsr := fflags_wfn(update = true)(csrio.fpu.fflags.bits) } // set fs and sd in mstatus - when(csrw_dirty_fp_state || csrio.fpu.dirty_fs){ + when (csrw_dirty_fp_state || csrio.fpu.dirty_fs) { val mstatusNew = WireInit(mstatus.asTypeOf(new MstatusStruct)) mstatusNew.fs := "b11".U mstatusNew.sd := true.B @@ -565,10 +479,10 @@ class CSR extends FunctionUnit with HasCSRConst // CSR inst decode val isEbreak = addr === privEbreak && func === CSROpType.jmp - val isEcall = addr === privEcall && func === CSROpType.jmp - val isMret = addr === privMret && func === CSROpType.jmp - val isSret = addr === privSret && func === CSROpType.jmp - val isUret = addr === privUret && func === CSROpType.jmp + val isEcall = addr === privEcall && func === CSROpType.jmp + val isMret = addr === privMret && func === CSROpType.jmp + val isSret = addr === privSret && func === CSROpType.jmp + val isUret = addr === privUret && func === CSROpType.jmp XSDebug(wen, "csr write: pc %x addr %x rdata %x wdata %x func %x\n", cfIn.pc, addr, rdata, wdata, func) XSDebug(wen, "pc %x mstatus %x mideleg %x medeleg %x mode %x\n", cfIn.pc, mstatus, mideleg , medeleg, priviledgeMode) @@ -630,7 +544,7 @@ class CSR extends FunctionUnit with HasCSRConst mstatusNew.mpp := ModeU mstatusNew.mprv := 0.U mstatus := mstatusNew.asUInt -// lr := false.B + // lr := false.B retTarget := mepc(VAddrBits-1, 0) } @@ -715,12 +629,6 @@ class CSR extends FunctionUnit with HasCSRConst val raiseExceptionVec = csrio.exception.bits.cf.exceptionVec.asUInt() val exceptionNO = ExcPriority.foldRight(0.U)((i: Int, sum: UInt) => Mux(raiseExceptionVec(i), i.U, sum)) val causeNO = (raiseIntr << (XLEN-1)).asUInt() | Mux(raiseIntr, intrNO, exceptionNO) - // if (!env.FPGAPlatform) { - val id = debugId() - val difftestIntrNO = Mux(raiseIntr, causeNO, 0.U) - ExcitingUtils.addSource(difftestIntrNO, s"difftestIntrNOfromCSR$id") - ExcitingUtils.addSource(causeNO, s"difftestCausefromCSR$id") - // } val raiseExceptionIntr = csrio.exception.valid XSDebug(raiseExceptionIntr, "int/exc: pc %x int (%d):%x exc: (%d):%x\n", @@ -737,7 +645,7 @@ class CSR extends FunctionUnit with HasCSRConst // mtval write logic val memExceptionAddr = SignExt(csrio.memExceptionVAddr, XLEN) - when(hasInstrPageFault || hasLoadPageFault || hasStorePageFault){ + when (hasInstrPageFault || hasLoadPageFault || hasStorePageFault) { val tval = Mux( hasInstrPageFault, Mux( @@ -747,15 +655,14 @@ class CSR extends FunctionUnit with HasCSRConst ), memExceptionAddr ) - when(priviledgeMode === ModeM){ + when (priviledgeMode === ModeM) { mtval := tval - }.otherwise{ + }.otherwise { stval := tval } } - when(hasLoadAddrMisaligned || hasStoreAddrMisaligned) - { + when (hasLoadAddrMisaligned || hasStoreAddrMisaligned) { mtval := memExceptionAddr } @@ -776,8 +683,7 @@ class CSR extends FunctionUnit with HasCSRConst mstatusNew.pie.s := mstatusOld.ie.s mstatusNew.ie.s := false.B priviledgeMode := ModeS - when(tvalWen){stval := 0.U} - // trapTarget := stvec(VAddrBits-1. 0) + when (tvalWen) { stval := 0.U } }.otherwise { mcause := causeNO mepc := SignExt(csrio.exception.bits.cf.pc, XLEN) @@ -785,8 +691,7 @@ class CSR extends FunctionUnit with HasCSRConst mstatusNew.pie.m := mstatusOld.ie.m mstatusNew.ie.m := false.B priviledgeMode := ModeM - when(tvalWen){mtval := 0.U} - // trapTarget := mtvec(VAddrBits-1. 0) + when (tvalWen) { mtval := 0.U } } mstatus := mstatusNew.asUInt @@ -801,95 +706,77 @@ class CSR extends FunctionUnit with HasCSRConst /** - * Performance counters + * Emu Performance counters */ - val perfCntList = Map( -// "Mcycle" -> (0xb00, "perfCntCondMcycle" ), -// "Minstret" -> (0xb02, "perfCntCondMinstret" ), - "MbpInstr" -> (0xb03, "perfCntCondMbpInstr" ), - "MbpRight" -> (0xb04, "perfCntCondMbpRight" ), - "MbpWrong" -> (0xb05, "perfCntCondMbpWrong" ), - "MbpBRight" -> (0xb06, "perfCntCondMbpBRight" ), - "MbpBWrong" -> (0xb07, "perfCntCondMbpBWrong" ), - "MbpJRight" -> (0xb08, "perfCntCondMbpJRight" ), - "MbpJWrong" -> (0xb09, "perfCntCondMbpJWrong" ), - "MbpIRight" -> (0xb0a, "perfCntCondMbpIRight" ), - "MbpIWrong" -> (0xb0b, "perfCntCondMbpIWrong" ), - "MbpRRight" -> (0xb0c, "perfCntCondMbpRRight" ), - "MbpRWrong" -> (0xb0d, "perfCntCondMbpRWrong" ), - "RoqWalk" -> (0xb0f, "perfCntCondRoqWalk" ), - "DTlbReqCnt0" -> (0xb15, "perfCntDtlbReqCnt0" ), - "DTlbReqCnt1" -> (0xb16, "perfCntDtlbReqCnt1" ), - "DTlbReqCnt2" -> (0xb17, "perfCntDtlbReqCnt2" ), - "DTlbReqCnt3" -> (0xb18, "perfCntDtlbReqCnt3" ), - "DTlbMissCnt0"-> (0xb19, "perfCntDtlbMissCnt0" ), - "DTlbMissCnt1"-> (0xb20, "perfCntDtlbMissCnt1" ), - "DTlbMissCnt2"-> (0xb21, "perfCntDtlbMissCnt2" ), - "DTlbMissCnt3"-> (0xb22, "perfCntDtlbMissCnt3" ), - "ITlbReqCnt0" -> (0xb23, "perfCntItlbReqCnt0" ), - "ITlbMissCnt0"-> (0xb24, "perfCntItlbMissCnt0" ), - "PtwReqCnt" -> (0xb25, "perfCntPtwReqCnt" ), - "PtwCycleCnt" -> (0xb26, "perfCntPtwCycleCnt" ), - "PtwL2TlbHit" -> (0xb27, "perfCntPtwL2TlbHit" ), - "ICacheReq" -> (0xb28, "perfCntIcacheReqCnt" ), - "ICacheMiss" -> (0xb29, "perfCntIcacheMissCnt" ), - "LoopExit" -> (0xb2a, "perfCntLoopExit" ), - "DCacheMiss" -> (0xb2b, "perfCntDCacheMiss" ), - "L1+PrefetchCnt"->(0xb2c, "perfCntL1plusPrefetchReqCnt"), - "L2PrefetchCnt"->(0xb2d, "perfCntL2PrefetchReqCnt") - // "FetchFromICache" -> (0xb2a, "CntFetchFromICache"), - // "FetchFromLoopBuffer" -> (0xb2b, "CntFetchFromLoopBuffer"), - // "ExitLoop1" -> (0xb2c, "CntExitLoop1"), - // "ExitLoop2" -> (0xb2d, "CntExitLoop2"), - // "ExitLoop3" -> (0xb2e, "CntExitLoop3") -// "Custom1" -> (0xb1b, "Custom1" ), -// "Custom2" -> (0xb1c, "Custom2" ), -// "Custom3" -> (0xb1d, "Custom3" ), -// "Custom4" -> (0xb1e, "Custom4" ), -// "Custom5" -> (0xb1f, "Custom5" ), -// "Custom6" -> (0xb20, "Custom6" ), -// "Custom7" -> (0xb21, "Custom7" ), -// "Custom8" -> (0xb22, "Custom8" ), -// "Ml2cacheHit" -> (0xb23, "perfCntCondMl2cacheHit") + val emuPerfCntList = Map( + // "Mcycle" -> (0x1000, "perfCntCondMcycle" ), + // "Minstret" -> (0x1002, "perfCntCondMinstret" ), + "BpInstr" -> (0x1003, "perfCntCondBpInstr" ), + "BpRight" -> (0x1004, "perfCntCondBpRight" ), + "BpWrong" -> (0x1005, "perfCntCondBpWrong" ), + "BpBRight" -> (0x1006, "perfCntCondBpBRight"), + "BpBWrong" -> (0x1007, "perfCntCondBpBWrong"), + "BpJRight" -> (0x1008, "perfCntCondBpJRight"), + "BpJWrong" -> (0x1009, "perfCntCondBpJWrong"), + "BpIRight" -> (0x100a, "perfCntCondBpIRight"), + "BpIWrong" -> (0x100b, "perfCntCondBpIWrong"), + "BpRRight" -> (0x100c, "perfCntCondBpRRight"), + "BpRWrong" -> (0x100d, "perfCntCondBpRWrong"), + "RoqWalk" -> (0x100f, "perfCntCondRoqWalk" ), + "DTlbReqCnt0" -> (0x1015, "perfCntDtlbReqCnt0" ), + "DTlbReqCnt1" -> (0x1016, "perfCntDtlbReqCnt1" ), + "DTlbReqCnt2" -> (0x1017, "perfCntDtlbReqCnt2" ), + "DTlbReqCnt3" -> (0x1018, "perfCntDtlbReqCnt3" ), + "DTlbMissCnt0"-> (0x1019, "perfCntDtlbMissCnt0" ), + "DTlbMissCnt1"-> (0x1020, "perfCntDtlbMissCnt1" ), + "DTlbMissCnt2"-> (0x1021, "perfCntDtlbMissCnt2" ), + "DTlbMissCnt3"-> (0x1022, "perfCntDtlbMissCnt3" ), + "ITlbReqCnt0" -> (0x1023, "perfCntItlbReqCnt0" ), + "ITlbMissCnt0"-> (0x1024, "perfCntItlbMissCnt0" ), + "PtwReqCnt" -> (0x1025, "perfCntPtwReqCnt" ), + "PtwCycleCnt" -> (0x1026, "perfCntPtwCycleCnt" ), + "PtwL2TlbHit" -> (0x1027, "perfCntPtwL2TlbHit" ), + "ICacheReq" -> (0x1028, "perfCntIcacheReqCnt" ), + "ICacheMiss" -> (0x1029, "perfCntIcacheMissCnt") + // "FetchFromICache" -> (0x102a, "CntFetchFromICache"), + // "FetchFromLoopBuffer" -> (0x102b, "CntFetchFromLoopBuffer"), + // "ExitLoop1" -> (0x102c, "CntExitLoop1"), + // "ExitLoop2" -> (0x102d, "CntExitLoop2"), + // "ExitLoop3" -> (0x102e, "CntExitLoop3") + // "L2cacheHit" -> (0x1023, "perfCntCondL2cacheHit") ) ++ ( (0 until dcacheParameters.nMissEntries).map(i => - ("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0xb2d + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) + ("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0x102d + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) ).toMap ) ++ ( (0 until icacheParameters.nMissEntries).map(i => - ("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0xb2d + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) + ("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0x102d + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10))) ).toMap ) ++ ( (0 until l1plusPrefetcherParameters.nEntries).map(i => - ("L1+PrefetchPenalty" + Integer.toString(i, 10), (0xb2d + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10))) + ("L1+PrefetchPenalty" + Integer.toString(i, 10), (0x102d + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10))) ).toMap ) ++ ( (0 until l2PrefetcherParameters.nEntries).map(i => - ("L2PrefetchPenalty" + Integer.toString(i, 10), (0xb2d + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10))) + ("L2PrefetchPenalty" + Integer.toString(i, 10), (0x102d + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10))) ).toMap ) - // (0 until dcacheParameters.nMissEntries).foreach(i => - // perfCntList = perfCntList ++ Map(("DCacheMissQueuePenalty" + Integer.toString(i, 10)) -> (0xb2a + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))) - val perfCntCond = List.fill(0x80)(WireInit(false.B)) - (perfCnts zip perfCntCond).map { case (c, e) => when (e) { c := c + 1.U } } - -// ExcitingUtils.addSource(WireInit(true.B), "perfCntCondMcycle", ConnectionType.Perf) - perfCntList.foreach { + emuPerfCntList.foreach { case (_, (address, boringId)) => - if(hasPerfCnt){ - ExcitingUtils.addSink(perfCntCond(address & 0x7f), boringId, ConnectionType.Perf) + if (hasEmuPerfCnt) { + ExcitingUtils.addSink(emuPerfCntCond(address & 0x7f), boringId, ConnectionType.Perf) } -// if (!hasPerfCnt) { -// // do not enable perfcnts except for Mcycle and Minstret -// if (address != perfCntList("Mcycle")._1 && address != perfCntList("Minstret")._1) { -// perfCntCond(address & 0x7f) := false.B -// } -// } + // if (!hasEmuPerfCnt) { + // // do not enable perfcnts except for Mcycle and Minstret + // if (address != emuPerfCntList("Mcycle")._1 && address != emuPerfCntList("Minstret")._1) { + // perfCntCond(address & 0x7f) := false.B + // } + // } } val xstrap = WireInit(false.B) - if(!env.FPGAPlatform && EnableBPU){ + if (!env.FPGAPlatform && EnableBPU) { ExcitingUtils.addSink(xstrap, "XSTRAP", ConnectionType.Debug) } def readWithScala(addr: Int): UInt = mapping(addr)._1 @@ -899,11 +786,14 @@ class CSR extends FunctionUnit with HasCSRConst // display all perfcnt when nooptrap is executed when (xstrap) { printf("======== PerfCnt =========\n") - perfCntList.toSeq.sortBy(_._2._1).foreach { case (str, (address, boringId)) => + emuPerfCntList.toSeq.sortBy(_._2._1).foreach { case (str, (address, _)) => printf("%d <- " + str + "\n", readWithScala(address)) } } + val difftestIntrNO = Mux(raiseIntr, causeNO, 0.U) + ExcitingUtils.addSource(difftestIntrNO, "difftestIntrNOfromCSR") + ExcitingUtils.addSource(causeNO, "difftestCausefromCSR") ExcitingUtils.addSource(priviledgeMode, "difftestMode", Debug) ExcitingUtils.addSource(mstatus, "difftestMstatus", Debug) ExcitingUtils.addSource(mstatus & sstatusRmask, "difftestSstatus", Debug) @@ -922,6 +812,5 @@ class CSR extends FunctionUnit with HasCSRConst ExcitingUtils.addSource(sscratch, "difftestSscratch", Debug) ExcitingUtils.addSource(mideleg, "difftestMideleg", Debug) ExcitingUtils.addSource(medeleg, "difftestMedeleg", Debug) - } else { } } diff --git a/src/main/scala/xiangshan/backend/fu/util/CSRConst.scala b/src/main/scala/xiangshan/backend/fu/util/CSRConst.scala new file mode 100644 index 0000000000000000000000000000000000000000..d84f0a66d36ea9f2b2d0b2731d6815443dee6bf3 --- /dev/null +++ b/src/main/scala/xiangshan/backend/fu/util/CSRConst.scala @@ -0,0 +1,188 @@ +package xiangshan.backend.fu.util + +import chisel3._ +import chisel3.ExcitingUtils.{ConnectionType, Debug} +import chisel3.util._ +import utils._ +import xiangshan._ +import xiangshan.backend._ +import utils.XSDebug + +trait HasCSRConst { + + // User Trap Setup + val Ustatus = 0x000 + val Uie = 0x004 + val Utvec = 0x005 + + // User Trap Handling + val Uscratch = 0x040 + val Uepc = 0x041 + val Ucause = 0x042 + val Utval = 0x043 + val Uip = 0x044 + + // User Floating-Point CSRs (not implemented) + val Fflags = 0x001 + val Frm = 0x002 + val Fcsr = 0x003 + + // User Counter/Timers + val Cycle = 0xC00 + val Time = 0xC01 + val Instret = 0xC02 + + // Supervisor Trap Setup + val Sstatus = 0x100 + val Sedeleg = 0x102 + val Sideleg = 0x103 + val Sie = 0x104 + val Stvec = 0x105 + val Scounteren = 0x106 + + // Supervisor Trap Handling + val Sscratch = 0x140 + val Sepc = 0x141 + val Scause = 0x142 + val Stval = 0x143 + val Sip = 0x144 + + // Supervisor Protection and Translation + val Satp = 0x180 + + // Machine Information Registers + val Mvendorid = 0xF11 + val Marchid = 0xF12 + val Mimpid = 0xF13 + val Mhartid = 0xF14 + + // Machine Trap Setup + val Mstatus = 0x300 + val Misa = 0x301 + val Medeleg = 0x302 + val Mideleg = 0x303 + val Mie = 0x304 + val Mtvec = 0x305 + val Mcounteren = 0x306 + + // Machine Trap Handling + val Mscratch = 0x340 + val Mepc = 0x341 + val Mcause = 0x342 + val Mtval = 0x343 + val Mip = 0x344 + + // Machine Memory Protection + // TBD + val Pmpcfg0 = 0x3A0 + val Pmpcfg1 = 0x3A1 + val Pmpcfg2 = 0x3A2 + val Pmpcfg3 = 0x3A3 + val PmpaddrBase = 0x3B0 + + // Machine Counter/Timers + // Currently, we uses perfcnt csr set instead of standard Machine Counter/Timers + // 0xB80 - 0x89F are also used as perfcnt csr + val Mcycle = 0xb00 + val Minstret = 0xb02 + + val Mhpmcounter3 = 0xB03 + val Mhpmcounter4 = 0xB04 + val Mhpmcounter5 = 0xB05 + val Mhpmcounter6 = 0xB06 + val Mhpmcounter7 = 0xB07 + val Mhpmcounter8 = 0xB08 + val Mhpmcounter9 = 0xB09 + val Mhpmcounter10 = 0xB0A + val Mhpmcounter11 = 0xB0B + val Mhpmcounter12 = 0xB0C + val Mhpmcounter13 = 0xB0D + val Mhpmcounter14 = 0xB0E + val Mhpmcounter15 = 0xB0F + val Mhpmcounter16 = 0xB10 + val Mhpmcounter17 = 0xB11 + val Mhpmcounter18 = 0xB12 + val Mhpmcounter19 = 0xB13 + val Mhpmcounter20 = 0xB14 + val Mhpmcounter21 = 0xB15 + val Mhpmcounter22 = 0xB16 + val Mhpmcounter23 = 0xB17 + val Mhpmcounter24 = 0xB18 + val Mhpmcounter25 = 0xB19 + val Mhpmcounter26 = 0xB1A + val Mhpmcounter27 = 0xB1B + val Mhpmcounter28 = 0xB1C + val Mhpmcounter29 = 0xB1D + val Mhpmcounter30 = 0xB1E + val Mhpmcounter31 = 0xB1F + + // Machine Counter Setup (not implemented) + val Mcountinhibit = 0x320 + val Mhpmevent3 = 0x323 + val Mhpmevent4 = 0x324 + val Mhpmevent5 = 0x325 + val Mhpmevent6 = 0x326 + val Mhpmevent7 = 0x327 + val Mhpmevent8 = 0x328 + val Mhpmevent9 = 0x329 + val Mhpmevent10 = 0x32A + val Mhpmevent11 = 0x32B + val Mhpmevent12 = 0x32C + val Mhpmevent13 = 0x32D + val Mhpmevent14 = 0x32E + val Mhpmevent15 = 0x32F + val Mhpmevent16 = 0x330 + val Mhpmevent17 = 0x331 + val Mhpmevent18 = 0x332 + val Mhpmevent19 = 0x333 + val Mhpmevent20 = 0x334 + val Mhpmevent21 = 0x335 + val Mhpmevent22 = 0x336 + val Mhpmevent23 = 0x337 + val Mhpmevent24 = 0x338 + val Mhpmevent25 = 0x339 + val Mhpmevent26 = 0x33A + val Mhpmevent27 = 0x33B + val Mhpmevent28 = 0x33C + val Mhpmevent29 = 0x33D + val Mhpmevent30 = 0x33E + val Mhpmevent31 = 0x33F + + // Debug/Trace Registers (shared with Debug Mode) (not implemented) + // Debug Mode Registers (not implemented) + + def privEcall = 0x000.U + def privEbreak = 0x001.U + def privMret = 0x302.U + def privSret = 0x102.U + def privUret = 0x002.U + + def ModeM = 0x3.U + def ModeH = 0x2.U + def ModeS = 0x1.U + def ModeU = 0x0.U + + def IRQ_UEIP = 0 + def IRQ_SEIP = 1 + def IRQ_MEIP = 3 + + def IRQ_UTIP = 4 + def IRQ_STIP = 5 + def IRQ_MTIP = 7 + + def IRQ_USIP = 8 + def IRQ_SSIP = 9 + def IRQ_MSIP = 11 + + val IntPriority = Seq( + IRQ_MEIP, IRQ_MSIP, IRQ_MTIP, + IRQ_SEIP, IRQ_SSIP, IRQ_STIP, + IRQ_UEIP, IRQ_USIP, IRQ_UTIP + ) + + def csrAccessPermissionCheck(addr: UInt, wen: Bool, mode: UInt): Bool = { + val readOnly = addr(11,10) === "b11".U + val lowestAccessPrivilegeLevel = addr(9,8) + mode >= lowestAccessPrivilegeLevel && !(wen && readOnly) + } +} \ No newline at end of file diff --git a/src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala b/src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala index 74011fa2b8576cd46be6b0f0566071d6f7a2c9a2..ab43642ed657ab99fabb8feb76a604f62a7fb90b 100644 --- a/src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala +++ b/src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala @@ -358,7 +358,7 @@ class ReservationStationData // Data // ------------------------ - val data = List.tabulate(srcNum)(_ => Module(new SyncDataModuleTemplate(UInt((XLEN + 1).W), iqSize, if (!env.FPGAPlatform) iqSize else 1, iqSize))) + val data = List.tabulate(srcNum)(_ => Module(new SyncDataModuleTemplate(UInt((XLEN + 1).W), iqSize, numRead = iqSize + 1, numWrite = iqSize))) data.foreach(_.io <> DontCare) data.foreach(_.io.wen.foreach(_ := false.B)) @@ -366,14 +366,13 @@ class ReservationStationData // ! warning: reading has 1 cycle delay, so input addr is used in next cycle // luckily, for fpga platform, read port has fixed value // otherwise, read port has same value as read addr - def dataRead(iqIdx: UInt, srcIdx: Int): UInt = { - if (env.FPGAPlatform) { - data(srcIdx).io.raddr(0) := iqIdx - data(srcIdx).io.rdata(0) - } else { - data(srcIdx).io.raddr(iqIdx) := iqIdx - data(srcIdx).io.rdata(iqIdx) - } + def dataDebugRead(iqIdx: UInt, srcIdx: Int): UInt = { + data(srcIdx).io.raddr(iqIdx + 1.U) := iqIdx + data(srcIdx).io.rdata(iqIdx + 1.U) + } + def dataRead(nextIqIdx: UInt, srcIdx: Int): UInt = { + data(srcIdx).io.raddr(0) := nextIqIdx + data(srcIdx).io.rdata(0) } def dataWrite(iqIdx: UInt, srcIdx: Int, wdata: UInt) = { data(srcIdx).io.waddr(iqIdx) := iqIdx @@ -381,7 +380,7 @@ class ReservationStationData data(srcIdx).io.wen(iqIdx) := true.B } // debug data: only for XSDebug log printing! - val debug_data = if (!env.FPGAPlatform) List.tabulate(srcNum)(i => WireInit(VecInit((0 until iqSize).map(j => dataRead(j.U, i))))) else null + val debug_data = List.tabulate(srcNum)(i => WireInit(VecInit((0 until iqSize).map(j => dataDebugRead(j.U, i))))) // Uop // ------------------------ @@ -501,7 +500,7 @@ class ReservationStationData val exuInput = io.deq.bits exuInput := DontCare exuInput.uop := uop(deq) - val regValues = List.tabulate(srcNum)(i => dataRead(/* Mux(sel.valid, sel.bits, deq), i */deq, i)) + val regValues = List.tabulate(srcNum)(i => dataRead(Mux(sel.valid, sel.bits, deq), i)) XSDebug(io.deq.fire(), p"[regValues] " + List.tabulate(srcNum)(idx => p"reg$idx: ${Hexadecimal(regValues(idx))}").reduce((p1, p2) => p1 + " " + p2) + "\n") exuInput.src1 := regValues(0) if (srcNum > 1) exuInput.src2 := regValues(1) diff --git a/src/main/scala/xiangshan/backend/roq/Roq.scala b/src/main/scala/xiangshan/backend/roq/Roq.scala index d9703047aef03a238a38347fa7fe19d3cd7df0d8..84cc130e1f24be10d912339cba38edce506da7cf 100644 --- a/src/main/scala/xiangshan/backend/roq/Roq.scala +++ b/src/main/scala/xiangshan/backend/roq/Roq.scala @@ -38,6 +38,9 @@ class RoqCSRIO extends XSBundle { val fflags = Output(Valid(UInt(5.W))) val dirty_fs = Output(Bool()) + val perfinfo = new Bundle { + val retiredInstr = Output(UInt(3.W)) + } } class RoqEnqIO extends XSBundle { @@ -671,11 +674,10 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { if(i % 4 == 3) XSDebug(false, true.B, "\n") } - val id = roqDebugId() - val difftestIntrNO = WireInit(0.U(XLEN.W)) - val difftestCause = WireInit(0.U(XLEN.W)) - ExcitingUtils.addSink(difftestIntrNO, s"difftestIntrNOfromCSR$id") - ExcitingUtils.addSink(difftestCause, s"difftestCausefromCSR$id") + val instrCnt = RegInit(0.U(64.W)) + val retireCounter = Mux(state === s_idle, commitCnt, 0.U) + instrCnt := instrCnt + retireCounter + io.csr.perfinfo.retiredInstr := RegNext(retireCounter) if(!env.FPGAPlatform) { @@ -696,10 +698,11 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { val uop = debug_microOp(idx) val DifftestSkipSC = false if(!DifftestSkipSC){ - skip(i) := debug_exuDebug(idx).isMMIO && io.commits.valid(i) + skip(i) := (debug_exuDebug(idx).isMMIO || debug_exuDebug(idx).isPerfCnt) && io.commits.valid(i) }else{ skip(i) := ( debug_exuDebug(idx).isMMIO || + debug_exuDebug(idx).isPerfCnt || uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_d || uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_w ) && io.commits.valid(i) @@ -717,10 +720,10 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper { debug_deqUop.ctrl.fuType === FuType.mou && (debug_deqUop.ctrl.fuOpType === LSUOpType.sc_d || debug_deqUop.ctrl.fuOpType === LSUOpType.sc_w) - val instrCnt = RegInit(0.U(64.W)) - val retireCounter = Mux(state === s_idle, commitCnt, 0.U) - instrCnt := instrCnt + retireCounter - + val difftestIntrNO = WireInit(0.U(XLEN.W)) + val difftestCause = WireInit(0.U(XLEN.W)) + ExcitingUtils.addSink(difftestIntrNO, "difftestIntrNOfromCSR") + ExcitingUtils.addSink(difftestCause, "difftestCausefromCSR") XSDebug(difftestIntrNO =/= 0.U, "difftest intrNO set %x\n", difftestIntrNO) val retireCounterFix = Mux(io.redirectOut.valid, 1.U, retireCounter) val retirePCFix = SignExt(Mux(io.redirectOut.valid, debug_deqUop.cf.pc, debug_microOp(firstValidCommit).cf.pc), XLEN) diff --git a/src/main/scala/xiangshan/cache/dcacheWrapper.scala b/src/main/scala/xiangshan/cache/dcacheWrapper.scala index 9ac451171be24c855109c6ecdaa4ba2fce0cf493..cb20424bbe61c271b48aa1faeba45aed47da85c2 100644 --- a/src/main/scala/xiangshan/cache/dcacheWrapper.scala +++ b/src/main/scala/xiangshan/cache/dcacheWrapper.scala @@ -441,28 +441,28 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr))) val atomics_addr_match = atomics_addr_matches.reduce(_||_) - val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr) + val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr) val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr))) val miss_idx_match = miss_idx_matches.reduce(_||_) - store_addr_match || atomics_addr_match || prober_addr_match || miss_idx_match + store_addr_match || atomics_addr_match || prober_idx_match || miss_idx_match } def block_store(addr: UInt) = { - val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr) + val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr) val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr))) val miss_idx_match = miss_idx_matches.reduce(_||_) - prober_addr_match || miss_idx_match + prober_idx_match || miss_idx_match } def block_atomics(addr: UInt) = { - val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr) + val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr) val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr))) val miss_idx_match = miss_idx_matches.reduce(_||_) - prober_addr_match || miss_idx_match + prober_idx_match || miss_idx_match } def block_miss(addr: UInt) = { @@ -475,11 +475,11 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame } def block_probe(addr: UInt) = { - val store_addr_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr))) - val store_addr_match = store_addr_matches.reduce(_||_) + val store_idx_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && get_idx(entry.bits) === get_idx(addr))) + val store_idx_match = store_idx_matches.reduce(_||_) - val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr))) - val atomics_addr_match = atomics_addr_matches.reduce(_||_) + val atomics_idx_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && get_idx(entry.bits) === get_idx(addr))) + val atomics_idx_match = atomics_idx_matches.reduce(_||_) val lrsc_addr_match = atomics.io.block_probe_addr.valid && atomics.io.block_probe_addr.bits === get_block_addr(addr) @@ -489,7 +489,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame // the missed req val miss_req_idx_match = missReq.fire() && get_idx(missReq.bits.addr) === get_idx(addr) - store_addr_match || atomics_addr_match || lrsc_addr_match || miss_idx_match || miss_req_idx_match + store_idx_match || atomics_idx_match || lrsc_addr_match || miss_idx_match || miss_req_idx_match } def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = { diff --git a/src/main/scala/xiangshan/cache/dtlb.scala b/src/main/scala/xiangshan/cache/dtlb.scala index 1dea41e719cefda3f25e8084fb54cf2e2ab18ea7..af7353ddfd788e9daab3cffdba2d1066e525544d 100644 --- a/src/main/scala/xiangshan/cache/dtlb.scala +++ b/src/main/scala/xiangshan/cache/dtlb.scala @@ -5,7 +5,7 @@ import chisel3.util._ import xiangshan._ import utils._ import xiangshan.backend.roq.RoqPtr -import xiangshan.backend.fu.HasCSRConst +import xiangshan.backend.fu.util.HasCSRConst import chisel3.ExcitingUtils._ trait HasTlbConst extends HasXSParameter { diff --git a/src/main/scala/xiangshan/frontend/LoopPredictor.scala b/src/main/scala/xiangshan/frontend/LoopPredictor.scala index 1d43e9fb9a31a01903eb4764384cbef177ec47c1..1600d2628a546ba41d82f4346a3a8018fe2f7968 100644 --- a/src/main/scala/xiangshan/frontend/LoopPredictor.scala +++ b/src/main/scala/xiangshan/frontend/LoopPredictor.scala @@ -403,7 +403,9 @@ class LoopPredictor extends BasePredictor with LTBParams { io.meta.specCnts(i) := ltbResps(i).meta } - ExcitingUtils.addSource(io.resp.exit.reduce(_||_), "perfCntLoopExit", Perf) + if (!env.FPGAPlatform) { + ExcitingUtils.addSource(io.resp.exit.reduce(_||_), "perfCntLoopExit", Perf) + } if (BPUDebug && debug) { // debug info @@ -422,4 +424,4 @@ class LoopPredictor extends BasePredictor with LTBParams { XSDebug(false, out_fire && (i.U === 3.U || i.U === 7.U || i.U === 11.U || i.U === 15.U), "\n") } } -} \ No newline at end of file +} diff --git a/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala b/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala index 9cbe6d3fec43bc8721af821446b62dc76e401259..95c9c5bcb44e92cb557bc84ed4f9cfd4dab9c884 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala @@ -16,210 +16,11 @@ class ExceptionAddrIO extends XSBundle { val vaddr = Output(UInt(VAddrBits.W)) } - -class LsqEntry extends XSBundle { - val vaddr = UInt(VAddrBits.W) // TODO: need opt - val paddr = UInt(PAddrBits.W) - val mask = UInt(8.W) - val data = UInt(XLEN.W) - val exception = UInt(16.W) // TODO: opt size - val fwdMask = Vec(8, Bool()) - val fwdData = Vec(8, UInt(8.W)) -} - class FwdEntry extends XSBundle { val mask = Vec(8, Bool()) val data = Vec(8, UInt(8.W)) } - -class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { - val io = IO(new Bundle() { - val wb = Vec(nchannel, new Bundle() { - val wen = Input(Bool()) - val index = Input(UInt(log2Up(size).W)) - val wdata = Input(new LsqEntry) - }) - val uncache = new Bundle() { - val wen = Input(Bool()) - val index = Input(UInt(log2Up(size).W)) - val wdata = Input(UInt(XLEN.W)) - } - val refill = new Bundle() { - val wen = Input(Vec(size, Bool())) - val data = Input(UInt((cfg.blockBytes * 8).W)) - } - val needForward = Input(Vec(nchannel, Vec(2, UInt(size.W)))) - val forward = Vec(nchannel, Flipped(new LoadForwardQueryIO)) - val rdata = Output(Vec(size, new LsqEntry)) - - // val debug = new Bundle() { - // val debug_data = Vec(LoadQueueSize, new LsqEntry) - // } - - def wbWrite(channel: Int, index: UInt, wdata: LsqEntry): Unit = { - require(channel < nchannel && channel >= 0) - // need extra "this.wb(channel).wen := true.B" - this.wb(channel).index := index - this.wb(channel).wdata := wdata - } - - def uncacheWrite(index: UInt, wdata: UInt): Unit = { - // need extra "this.uncache.wen := true.B" - this.uncache.index := index - this.uncache.wdata := wdata - } - - def forwardQuery(channel: Int, paddr: UInt, needForward1: Data, needForward2: Data): Unit = { - this.needForward(channel)(0) := needForward1 - this.needForward(channel)(1) := needForward2 - this.forward(channel).paddr := paddr - } - - // def refillWrite(ldIdx: Int): Unit = { - // } - // use "this.refill.wen(ldIdx) := true.B" instead - }) - - io := DontCare - - val data = Reg(Vec(size, new LsqEntry)) - - // writeback to lq/sq - (0 until 2).map(i => { - when(io.wb(i).wen){ - data(io.wb(i).index) := io.wb(i).wdata - } - }) - - when(io.uncache.wen){ - data(io.uncache.index).data := io.uncache.wdata - } - - // refill missed load - def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = { - val res = Wire(Vec(8, UInt(8.W))) - (0 until 8).foreach(i => { - res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i)) - }) - res.asUInt - } - - // split dcache result into words - val words = VecInit((0 until blockWords) map { i => io.refill.data(DataBits * (i + 1) - 1, DataBits * i)}) - - - (0 until size).map(i => { - when(io.refill.wen(i) ){ - val refillData = words(get_word(data(i).paddr)) - data(i).data := mergeRefillData(refillData, data(i).fwdData.asUInt, data(i).fwdMask.asUInt) - XSDebug("miss resp: pos %d addr %x data %x + %x(%b)\n", i.U, data(i).paddr, refillData, data(i).fwdData.asUInt, data(i).fwdMask.asUInt) - } - }) - - // forwarding - // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases: - // (1) if they have the same flag, we need to check range(tail, sqIdx) - // (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx) - // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize)) - // Forward2: Mux(same_flag, 0.U, range(0, sqIdx) ) - // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise - - // entry with larger index should have higher priority since it's data is younger - - // FIXME: old fwd logic for assertion, remove when rtl freeze - (0 until nchannel).map(i => { - - val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B))) - val forwardData1 = WireInit(VecInit(Seq.fill(8)(0.U(8.W)))) - val forwardMask2 = WireInit(VecInit(Seq.fill(8)(false.B))) - val forwardData2 = WireInit(VecInit(Seq.fill(8)(0.U(8.W)))) - - for (j <- 0 until size) { - val needCheck = io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3) - (0 until XLEN / 8).foreach(k => { - when (needCheck && data(j).mask(k)) { - when (io.needForward(i)(0)(j)) { - forwardMask1(k) := true.B - forwardData1(k) := data(j).data(8 * (k + 1) - 1, 8 * k) - } - when (io.needForward(i)(1)(j)) { - forwardMask2(k) := true.B - forwardData2(k) := data(j).data(8 * (k + 1) - 1, 8 * k) - } - XSDebug(io.needForward(i)(0)(j) || io.needForward(i)(1)(j), - p"forwarding $k-th byte ${Hexadecimal(data(j).data(8 * (k + 1) - 1, 8 * k))} " + - p"from ptr $j\n") - } - }) - } - - // merge forward lookup results - // forward2 is younger than forward1 and should have higher priority - val oldFwdResult = Wire(new FwdEntry) - (0 until XLEN / 8).map(k => { - oldFwdResult.mask(k) := RegNext(forwardMask1(k) || forwardMask2(k)) - oldFwdResult.data(k) := RegNext(Mux(forwardMask2(k), forwardData2(k), forwardData1(k))) - }) - - // parallel fwd logic - val paddrMatch = Wire(Vec(size, Bool())) - val matchResultVec = Wire(Vec(size * 2, new FwdEntry)) - - def parallelFwd(xs: Seq[Data]): Data = { - ParallelOperation(xs, (a: Data, b: Data) => { - val l = a.asTypeOf(new FwdEntry) - val r = b.asTypeOf(new FwdEntry) - val res = Wire(new FwdEntry) - (0 until 8).map(p => { - res.mask(p) := l.mask(p) || r.mask(p) - res.data(p) := Mux(r.mask(p), r.data(p), l.data(p)) - }) - res - }) - } - - for (j <- 0 until size) { - paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3) - } - - for (j <- 0 until size) { - val needCheck0 = RegNext(paddrMatch(j) && io.needForward(i)(0)(j)) - val needCheck1 = RegNext(paddrMatch(j) && io.needForward(i)(1)(j)) - (0 until XLEN / 8).foreach(k => { - matchResultVec(j).mask(k) := needCheck0 && data(j).mask(k) - matchResultVec(j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k) - matchResultVec(size + j).mask(k) := needCheck1 && data(j).mask(k) - matchResultVec(size + j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k) - }) - } - - val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry) - - io.forward(i).forwardMask := parallelFwdResult.mask - io.forward(i).forwardData := parallelFwdResult.data - - when( - oldFwdResult.mask.asUInt =/= parallelFwdResult.mask.asUInt - ){ - printf("%d: mask error: right: %b false %b\n", GTimer(), oldFwdResult.mask.asUInt, parallelFwdResult.mask.asUInt) - } - - for (p <- 0 until 8) { - when( - oldFwdResult.data(p) =/= parallelFwdResult.data(p) && oldFwdResult.mask(p) - ){ - printf("%d: data "+p+" error: right: %x false %x\n", GTimer(), oldFwdResult.data(p), parallelFwdResult.data(p)) - } - } - - }) - - // data read - io.rdata := data - // io.debug.debug_data := data -} - // inflight miss block reqs class InflightBlockInfo extends XSBundle { val block_addr = UInt(PAddrBits.W) diff --git a/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala index dd85625d448567532580a06e1133d1c5d431253f..31523f8d8eb8bda279164d89a2203e0ec5ad9aa1 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala @@ -76,8 +76,10 @@ class LoadQueue extends XSModule val uop = Reg(Vec(LoadQueueSize, new MicroOp)) // val data = Reg(Vec(LoadQueueSize, new LsRoqEntry)) - val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth)) + val dataModule = Module(new LoadQueueData(LoadQueueSize, wbNumRead = LoadPipelineWidth, wbNumWrite = LoadPipelineWidth)) dataModule.io := DontCare + val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), LoadQueueSize, numRead = 1, numWrite = LoadPipelineWidth)) + vaddrModule.io := DontCare val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB @@ -144,7 +146,8 @@ class LoadQueue extends XSModule * After cache refills, it will write back through arbiter with loadUnit. */ for (i <- 0 until LoadPipelineWidth) { - dataModule.io.wb(i).wen := false.B + dataModule.io.wb.wen(i) := false.B + vaddrModule.io.wen(i) := false.B when(io.loadIn(i).fire()) { when(io.loadIn(i).bits.miss) { XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n", @@ -179,16 +182,18 @@ class LoadQueue extends XSModule datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio - val loadWbData = Wire(new LsqEntry) + val loadWbData = Wire(new LQDataEntry) loadWbData.paddr := io.loadIn(i).bits.paddr - loadWbData.vaddr := io.loadIn(i).bits.vaddr loadWbData.mask := io.loadIn(i).bits.mask - loadWbData.data := io.loadIn(i).bits.data // for mmio / misc / debug + loadWbData.data := io.loadIn(i).bits.data // fwd data loadWbData.fwdMask := io.loadIn(i).bits.forwardMask - loadWbData.fwdData := io.loadIn(i).bits.forwardData loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt dataModule.io.wbWrite(i, loadWbIndex, loadWbData) - dataModule.io.wb(i).wen := true.B + dataModule.io.wb.wen(i) := true.B + + vaddrModule.io.waddr(i) := loadWbIndex + vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr + vaddrModule.io.wen(i) := true.B debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio @@ -270,13 +275,13 @@ class LoadQueue extends XSModule // Refill 64 bit in a cycle // Refill data comes back from io.dcache.resp + dataModule.io.refill.valid := io.dcache.valid + dataModule.io.refill.paddr := io.dcache.bits.addr dataModule.io.refill.data := io.dcache.bits.data (0 until LoadQueueSize).map(i => { - val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === get_block_addr(io.dcache.bits.addr) - dataModule.io.refill.wen(i) := false.B - when(allocated(i) && miss(i) && blockMatch && io.dcache.valid) { - dataModule.io.refill.wen(i) := true.B + dataModule.io.refill.refillMask(i) := allocated(i) && miss(i) + when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) { datavalid(i) := true.B miss(i) := false.B } @@ -290,7 +295,7 @@ class LoadQueue extends XSModule // Stage 0 // Generate writeback indexes val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => { - allocated(i) && !writebacked(i) && (datavalid(i) || dataModule.io.refill.wen(i)) + allocated(i) && !writebacked(i) && datavalid(i) })).asUInt() // use uint instead vec to reduce verilog lines val loadEvenSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i)})) val loadOddSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i+1)})) @@ -329,10 +334,11 @@ class LoadQueue extends XSModule // writeback data to cdb (0 until LoadPipelineWidth).map(i => { // data select - val rdata = dataModule.io.rdata(loadWbSel(i)).data + dataModule.io.wb.raddr(i) := loadWbSel(i) + val rdata = dataModule.io.wb.rdata(i).data val seluop = uop(loadWbSel(i)) val func = seluop.ctrl.fuOpType - val raddr = dataModule.io.rdata(loadWbSel(i)).paddr + val raddr = dataModule.io.wb.rdata(i).paddr val rdataSel = LookupTree(raddr(2, 0), List( "b000".U -> rdata(63, 0), "b001".U -> rdata(63, 8), @@ -349,13 +355,14 @@ class LoadQueue extends XSModule // // Int load writeback will finish (if not blocked) in one cycle io.ldout(i).bits.uop := seluop - io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.rdata(loadWbSel(i)).exception.asBools + io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.wb.rdata(i).exception.asBools io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr) io.ldout(i).bits.data := rdataPartialLoad io.ldout(i).bits.redirectValid := false.B io.ldout(i).bits.redirect := DontCare io.ldout(i).bits.brUpdate := DontCare io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i)) + io.ldout(i).bits.debug.isPerfCnt := false.B io.ldout(i).bits.fflags := DontCare io.ldout(i).valid := loadWbSelV(i) @@ -364,8 +371,8 @@ class LoadQueue extends XSModule io.ldout(i).bits.uop.roqIdx.asUInt, io.ldout(i).bits.uop.lqIdx.asUInt, io.ldout(i).bits.uop.cf.pc, - dataModule.io.rdata(loadWbSel(i)).paddr, - dataModule.io.rdata(loadWbSel(i)).data, + dataModule.io.debug(loadWbSel(i)).paddr, + dataModule.io.debug(loadWbSel(i)).data, debug_mmio(loadWbSel(i)) ) } @@ -433,18 +440,14 @@ class LoadQueue extends XSModule val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask) // check if load already in lq needs to be rolledback - val addrMatch = RegNext(VecInit((0 until LoadQueueSize).map(j => { - io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3) - }))) + dataModule.io.violation(i).paddr := io.storeIn(i).bits.paddr + dataModule.io.violation(i).mask := io.storeIn(i).bits.mask + val addrMaskMatch = RegNext(dataModule.io.violation(i).violationMask) val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => { allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j)) }))) - val overlap = RegNext(VecInit((0 until LoadQueueSize).map(j => { - val overlapVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k)) - Cat(overlapVec).orR() - }))) val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => { - addrMatch(j) && entryNeedCheck(j) && overlap(j) + addrMaskMatch(j) && entryNeedCheck(j) })) val lqViolation = lqViolationVec.asUInt().orR() val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask)) @@ -552,18 +555,20 @@ class LoadQueue extends XSModule io.roqDeqPtr === uop(deqPtr).roqIdx && !io.commits.isWalk + dataModule.io.uncache.raddr := deqPtr + io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD - io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr - io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data - io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask + io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr + io.uncache.req.bits.data := dataModule.io.uncache.rdata.data + io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask io.uncache.req.bits.meta.id := DontCare io.uncache.req.bits.meta.vaddr := DontCare - io.uncache.req.bits.meta.paddr := dataModule.io.rdata(deqPtr).paddr + io.uncache.req.bits.meta.paddr := dataModule.io.uncache.rdata.paddr io.uncache.req.bits.meta.uop := uop(deqPtr) io.uncache.req.bits.meta.mmio := true.B io.uncache.req.bits.meta.tlb_miss := false.B - io.uncache.req.bits.meta.mask := dataModule.io.rdata(deqPtr).mask + io.uncache.req.bits.meta.mask := dataModule.io.uncache.rdata.mask io.uncache.req.bits.meta.replay := false.B io.uncache.resp.ready := true.B @@ -590,7 +595,8 @@ class LoadQueue extends XSModule } // Read vaddr for mem exception - io.exceptionAddr.vaddr := dataModule.io.rdata(io.exceptionAddr.lsIdx.lqIdx.value).vaddr + vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.lqIdx.value + io.exceptionAddr.vaddr := vaddrModule.io.rdata(0) // misprediction recovery / exception redirect // invalidate lq term using robIdx @@ -646,7 +652,7 @@ class LoadQueue extends XSModule for (i <- 0 until LoadQueueSize) { if (i % 4 == 0) XSDebug("") - XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr) + XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr) PrintFlag(allocated(i), "a") PrintFlag(allocated(i) && datavalid(i), "v") PrintFlag(allocated(i) && writebacked(i), "w") diff --git a/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala b/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala new file mode 100644 index 0000000000000000000000000000000000000000..1e4cecb154e3bba703ad02e2dc720357a9249f9d --- /dev/null +++ b/src/main/scala/xiangshan/mem/lsqueue/LoadQueueData.scala @@ -0,0 +1,349 @@ +package xiangshan.mem + +import chisel3._ +import chisel3.util._ +import utils._ +import xiangshan._ +import xiangshan.cache._ +import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants} +import xiangshan.backend.LSUOpType +import xiangshan.mem._ +import xiangshan.backend.roq.RoqPtr + +class LQDataEntry extends XSBundle { + // val vaddr = UInt(VAddrBits.W) + val paddr = UInt(PAddrBits.W) + val mask = UInt(8.W) + val data = UInt(XLEN.W) + val exception = UInt(16.W) // TODO: opt size + val fwdMask = Vec(8, Bool()) +} + +// Data module define +// These data modules are like SyncDataModuleTemplate, but support cam-like ops +class PaddrModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule with HasDCacheParameters { + val io = IO(new Bundle { + val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) + val rdata = Output(Vec(numRead, UInt((PAddrBits).W))) + val wen = Input(Vec(numWrite, Bool())) + val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) + val wdata = Input(Vec(numWrite, UInt((PAddrBits).W))) + val violationMdata = Input(Vec(2, UInt((PAddrBits).W))) + val violationMmask = Output(Vec(2, Vec(numEntries, Bool()))) + val refillMdata = Input(UInt((PAddrBits).W)) + val refillMmask = Output(Vec(numEntries, Bool())) + }) + + val data = Reg(Vec(numEntries, UInt((PAddrBits).W))) + + // read ports + for (i <- 0 until numRead) { + io.rdata(i) := data(io.raddr(i)) + } + + // below is the write ports (with priorities) + for (i <- 0 until numWrite) { + when (io.wen(i)) { + data(io.waddr(i)) := io.wdata(i) + } + } + + // content addressed match + for (i <- 0 until 2) { + for (j <- 0 until numEntries) { + io.violationMmask(i)(j) := io.violationMdata(i)(PAddrBits-1, 3) === data(j)(PAddrBits-1, 3) + } + } + + for (j <- 0 until numEntries) { + io.refillMmask(j) := get_block_addr(io.refillMdata) === get_block_addr(data(j)) + } + + // DataModuleTemplate should not be used when there're any write conflicts + for (i <- 0 until numWrite) { + for (j <- i+1 until numWrite) { + assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) + } + } +} + +class MaskModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule { + val io = IO(new Bundle { + val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) + val rdata = Output(Vec(numRead, UInt(8.W))) + val wen = Input(Vec(numWrite, Bool())) + val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) + val wdata = Input(Vec(numWrite, UInt(8.W))) + val violationMdata = Input(Vec(2, UInt((PAddrBits).W))) + val violationMmask = Output(Vec(2, Vec(numEntries, Bool()))) + }) + + val data = Reg(Vec(numEntries, UInt(8.W))) + + // read ports + for (i <- 0 until numRead) { + io.rdata(i) := data(io.raddr(i)) + } + + // below is the write ports (with priorities) + for (i <- 0 until numWrite) { + when (io.wen(i)) { + data(io.waddr(i)) := io.wdata(i) + } + } + + // content addressed match + for (i <- 0 until 2) { + for (j <- 0 until numEntries) { + io.violationMmask(i)(j) := (io.violationMdata(i) & data(j)).orR + } + } + + // DataModuleTemplate should not be used when there're any write conflicts + for (i <- 0 until numWrite) { + for (j <- i+1 until numWrite) { + assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) + } + } +} + +class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule with HasDCacheParameters { + val io = IO(new Bundle { + // data io + // read + val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W))) + val rdata = Output(Vec(numRead, UInt(XLEN.W))) + // address indexed write + val wen = Input(Vec(numWrite, Bool())) + val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W))) + val wdata = Input(Vec(numWrite, UInt(XLEN.W))) + // masked write + val mwmask = Input(Vec(numEntries, Bool())) + val refillData = Input(UInt((cfg.blockBytes * 8).W)) + + // fwdMask io + val fwdMaskWdata = Input(Vec(numWrite, UInt(8.W))) + val fwdMaskWen = Input(Vec(numWrite, Bool())) + // fwdMaskWaddr = waddr + + // paddr io + // 3 bits in paddr need to be stored in CoredataModule for refilling + val paddrWdata = Input(Vec(numWrite, UInt((PAddrBits).W))) + val paddrWen = Input(Vec(numWrite, Bool())) + }) + + val data = Reg(Vec(numEntries, UInt(XLEN.W))) + val fwdMask = Reg(Vec(numEntries, UInt(8.W))) + val wordIndex = Reg(Vec(numEntries, UInt((blockOffBits - wordOffBits).W))) + + // read ports + for (i <- 0 until numRead) { + io.rdata(i) := data(io.raddr(i)) + } + + // below is the write ports (with priorities) + for (i <- 0 until numWrite) { + when (io.wen(i)) { + data(io.waddr(i)) := io.wdata(i) + } + when (io.fwdMaskWen(i)) { + fwdMask(io.waddr(i)) := io.fwdMaskWdata(i) + } + when (io.paddrWen(i)) { + wordIndex(io.waddr(i)) := get_word(io.paddrWdata(i)) + } + } + + + // masked write + // refill missed load + def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = { + val res = Wire(Vec(8, UInt(8.W))) + (0 until 8).foreach(i => { + res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i)) + }) + res.asUInt + } + + // split dcache result into words + val words = VecInit((0 until blockWords) map { i => io.refillData(DataBits * (i + 1) - 1, DataBits * i)}) + + // refill data according to matchMask, refillMask and refill.vald + for (j <- 0 until numEntries) { + when (io.mwmask(j)) { + val refillData = words(wordIndex(j)) // TODO + data(j) := mergeRefillData(refillData, data(j), fwdMask(j)) + } + } + + // DataModuleTemplate should not be used when there're any write conflicts + for (i <- 0 until numWrite) { + for (j <- i+1 until numWrite) { + assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j))) + } + } +} + +class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { + val io = IO(new Bundle() { + val wb = new Bundle() { + val wen = Vec(wbNumWrite, Input(Bool())) + val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W))) + val wdata = Input(Vec(wbNumWrite, new LQDataEntry)) + val raddr = Input(Vec(wbNumRead, UInt(log2Up(size).W))) + val rdata = Output(Vec(wbNumRead, new LQDataEntry)) + } + val uncache = new Bundle() { + val wen = Input(Bool()) + val waddr = Input(UInt(log2Up(size).W)) + val wdata = Input(UInt(XLEN.W)) // only write back uncache data + val raddr = Input(UInt(log2Up(size).W)) + val rdata = Output(new LQDataEntry) + } + val refill = new Bundle() { + val valid = Input(Bool()) + val paddr = Input(UInt(PAddrBits.W)) + val data = Input(UInt((cfg.blockBytes * 8).W)) + val refillMask = Input(Vec(size, Bool())) + val matchMask = Output(Vec(size, Bool())) + } + val violation = Vec(StorePipelineWidth, new Bundle() { + val paddr = Input(UInt(PAddrBits.W)) + val mask = Input(UInt(8.W)) + val violationMask = Output(Vec(size, Bool())) + }) + val debug = Output(Vec(size, new LQDataEntry)) + + def wbWrite(channel: Int, waddr: UInt, wdata: LQDataEntry): Unit = { + require(channel < wbNumWrite && wbNumWrite >= 0) + // need extra "this.wb(channel).wen := true.B" + this.wb.waddr(channel) := waddr + this.wb.wdata(channel) := wdata + } + + def uncacheWrite(waddr: UInt, wdata: UInt): Unit = { + // need extra "this.uncache.wen := true.B" + this.uncache.waddr := waddr + this.uncache.wdata := wdata + } + + // def refillWrite(ldIdx: Int): Unit = { + // } + // use "this.refill.wen(ldIdx) := true.B" instead + }) + + // val data = Reg(Vec(size, new LQDataEntry)) + // data module + val paddrModule = Module(new PaddrModule(size, numRead = 3, numWrite = 2)) + val maskModule = Module(new MaskModule(size, numRead = 3, numWrite = 2)) + val exceptionModule = Module(new AsyncDataModuleTemplate(UInt(16.W), size, numRead = 3, numWrite = 2)) + val coredataModule = Module(new CoredataModule(size, numRead = 3, numWrite = 3)) + + // read data + // read port 0 -> wbNumRead-1 + (0 until wbNumRead).map(i => { + paddrModule.io.raddr(i) := io.wb.raddr(i) + maskModule.io.raddr(i) := io.wb.raddr(i) + exceptionModule.io.raddr(i) := io.wb.raddr(i) + coredataModule.io.raddr(i) := io.wb.raddr(i) + + io.wb.rdata(i).paddr := paddrModule.io.rdata(i) + io.wb.rdata(i).mask := maskModule.io.rdata(i) + io.wb.rdata(i).data := coredataModule.io.rdata(i) + io.wb.rdata(i).exception := exceptionModule.io.rdata(i) + io.wb.rdata(i).fwdMask := DontCare + }) + + // read port wbNumRead + paddrModule.io.raddr(wbNumRead) := io.uncache.raddr + maskModule.io.raddr(wbNumRead) := io.uncache.raddr + exceptionModule.io.raddr(wbNumRead) := io.uncache.raddr + coredataModule.io.raddr(wbNumRead) := io.uncache.raddr + + io.uncache.rdata.paddr := paddrModule.io.rdata(wbNumRead) + io.uncache.rdata.mask := maskModule.io.rdata(wbNumRead) + io.uncache.rdata.data := exceptionModule.io.rdata(wbNumRead) + io.uncache.rdata.exception := coredataModule.io.rdata(wbNumRead) + io.uncache.rdata.fwdMask := DontCare + + // write data + // write port 0 -> wbNumWrite-1 + (0 until wbNumWrite).map(i => { + paddrModule.io.wen(i) := false.B + maskModule.io.wen(i) := false.B + exceptionModule.io.wen(i) := false.B + coredataModule.io.wen(i) := false.B + coredataModule.io.fwdMaskWen(i) := false.B + coredataModule.io.paddrWen(i) := false.B + + paddrModule.io.waddr(i) := io.wb.waddr(i) + maskModule.io.waddr(i) := io.wb.waddr(i) + exceptionModule.io.waddr(i) := io.wb.waddr(i) + coredataModule.io.waddr(i) := io.wb.waddr(i) + + paddrModule.io.wdata(i) := io.wb.wdata(i).paddr + maskModule.io.wdata(i) := io.wb.wdata(i).mask + exceptionModule.io.wdata(i) := io.wb.wdata(i).exception + coredataModule.io.wdata(i) := io.wb.wdata(i).data + coredataModule.io.fwdMaskWdata(i) := io.wb.wdata(i).fwdMask.asUInt + coredataModule.io.paddrWdata(i) := io.wb.wdata(i).paddr + + when(io.wb.wen(i)){ + paddrModule.io.wen(i) := true.B + maskModule.io.wen(i) := true.B + exceptionModule.io.wen(i) := true.B + coredataModule.io.wen(i) := true.B + coredataModule.io.fwdMaskWen(i) := true.B + coredataModule.io.paddrWen(i) := true.B + } + }) + + // write port wbNumWrite + // exceptionModule.io.wen(wbNumWrite) := false.B + coredataModule.io.wen(wbNumWrite) := io.uncache.wen + coredataModule.io.fwdMaskWen(wbNumWrite) := false.B + coredataModule.io.paddrWen(wbNumWrite) := false.B + + coredataModule.io.waddr(wbNumWrite) := io.uncache.waddr + + coredataModule.io.fwdMaskWdata(wbNumWrite) := DontCare + coredataModule.io.paddrWdata(wbNumWrite) := DontCare + coredataModule.io.wdata(wbNumWrite) := io.uncache.wdata + + // mem access violation check, gen violationMask + (0 until StorePipelineWidth).map(i => { + paddrModule.io.violationMdata(i) := io.violation(i).paddr + maskModule.io.violationMdata(i) := io.violation(i).mask + io.violation(i).violationMask := (paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt).asBools + // VecInit((0 until size).map(j => { + // val addrMatch = io.violation(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3) + // val violationVec = (0 until 8).map(k => data(j).mask(k) && io.violation(i).mask(k)) + // Cat(violationVec).orR() && addrMatch + // })) + }) + + // refill missed load + def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = { + val res = Wire(Vec(8, UInt(8.W))) + (0 until 8).foreach(i => { + res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i)) + }) + res.asUInt + } + + // gen paddr match mask + paddrModule.io.refillMdata := io.refill.paddr + (0 until size).map(i => { + io.refill.matchMask := paddrModule.io.refillMmask + // io.refill.matchMask(i) := get_block_addr(data(i).paddr) === get_block_addr(io.refill.paddr) + }) + + // refill data according to matchMask, refillMask and refill.valid + coredataModule.io.refillData := io.refill.data + (0 until size).map(i => { + coredataModule.io.mwmask(i) := io.refill.valid && io.refill.matchMask(i) && io.refill.refillMask(i) + }) + + // debug data read + io.debug := DontCare +} diff --git a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala index 07a4181befc9b7a648a84b6330e6eb8c85ff3878..c80bd70eaf0af0147e6a86094745ba1d8c17b0d4 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala @@ -132,7 +132,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue writebacked(stWbIndex) := hasWritebacked pending(stWbIndex) := !hasWritebacked // valid mmio require - val storeWbData = Wire(new LsqEntry) + val storeWbData = Wire(new SQDataEntry) storeWbData := DontCare storeWbData.paddr := io.storeIn(i).bits.paddr storeWbData.mask := io.storeIn(i).bits.mask @@ -264,6 +264,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue io.mmioStout.bits.redirect := DontCare io.mmioStout.bits.brUpdate := DontCare io.mmioStout.bits.debug.isMMIO := true.B + io.mmioStout.bits.debug.isPerfCnt := false.B io.mmioStout.bits.fflags := DontCare when (io.mmioStout.fire()) { writebacked(deqPtr) := true.B @@ -322,7 +323,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue } // Read vaddr for mem exception - io.exceptionAddr.vaddr := exceptionModule.io.rdata(0) + io.exceptionAddr.vaddr := vaddrModule.io.rdata(0) // misprediction recovery / exception redirect // invalidate sq term using robIdx diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index 8301d07fe604afcc456890957db87e96cc45c42d..133176d4c6d69b35a33a1dda0f1a8485887bd48f 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -281,6 +281,7 @@ class LoadUnit extends XSModule with HasLoadHelper { intHitLoadOut.bits.redirect := DontCare intHitLoadOut.bits.brUpdate := DontCare intHitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio + intHitLoadOut.bits.debug.isPerfCnt := false.B intHitLoadOut.bits.fflags := DontCare load_s2.io.out.ready := true.B diff --git a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala index 99469a45322478f981791cdba648bf8f739ee318..5c1403ea635752f35ab7f16cacc1ca1ea53866cd 100644 --- a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala @@ -115,6 +115,7 @@ class StoreUnit_S2 extends XSModule { io.stout.bits.redirect := DontCare io.stout.bits.brUpdate := DontCare io.stout.bits.debug.isMMIO := io.in.bits.mmio + io.stout.bits.debug.isPerfCnt := false.B io.stout.bits.fflags := DontCare } diff --git a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala index e1a00ecc7ee8a170cf660112b33b7eb24a463b22..5d56705a417dc39d9dbd5f89ac4917185d6d7c99 100644 --- a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala +++ b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala @@ -13,6 +13,10 @@ trait HasSbufferCst extends HasXSParameter { def s_prepare = 2.U(2.W) def s_inflight = 3.U(2.W) + val evictCycle = 8192 + require(isPow2(evictCycle)) + val countBits = 1 + log2Up(evictCycle) + val SbufferIndexWidth: Int = log2Up(StoreBufferSize) // paddr = tag + offset val CacheLineBytes: Int = CacheLineSize / 8 @@ -37,7 +41,6 @@ class SbufferLine extends SbufferBundle { class ChooseReplace(nWay: Int) extends XSModule { val io = IO(new Bundle{ val mask = Vec(nWay, Input(Bool())) - val fire = Input(Bool()) val way = Output(UInt(nWay.W)) val flush = Input(Bool()) }) @@ -49,12 +52,9 @@ class ChooseReplace(nWay: Int) extends XSModule { val nextWay = PriorityEncoder(Cat(stateMask, loMask))(log2Up(nWay)-1, 0) XSDebug(p"nextWay[${nextWay}]\n") + wayReg := nextWay io.way := wayReg - when(io.fire){ - wayReg := nextWay - } - when(io.flush){ wayReg := 0.U } @@ -116,11 +116,11 @@ class NewSbuffer extends XSModule with HasSbufferCst { val buffer = Mem(StoreBufferSize, new SbufferLine) val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(s_invalid))) - + val cohCount = Reg(Vec(StoreBufferSize, UInt(countBits.W))) /* idle --[flush]--> drian_sbuffer --[buf empty]--> idle --[buf full]--> replace --[dcache resp]--> idle - */ + */ val x_idle :: x_drain_sbuffer :: x_replace :: Nil = Enum(3) val sbuffer_state = RegInit(x_idle) @@ -150,7 +150,6 @@ class NewSbuffer extends XSModule with HasSbufferCst { val invalidCount = RegInit(StoreBufferSize.U((log2Up(StoreBufferSize) + 1).W)) val validCount = RegInit(0.U((log2Up(StoreBufferSize) + 1).W)) val full = invalidCount === 0.U - // val oneSpace = invalidCount === 1.U val bufferRead = VecInit((0 until StoreBufferSize).map(i => buffer(i))) val stateRead = VecInit((0 until StoreBufferSize).map(i => stateVec(i))) @@ -171,8 +170,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { val lru = Module(new ChooseReplace(StoreBufferSize)) val evictionIdx = lru.io.way - - lru.io.fire := false.B + lru.io.mask := stateRead.map(_ === s_valid) val tags = io.in.map(in => getTag(in.bits.addr)) @@ -212,6 +210,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { def wordReqToBufLine(req: DCacheWordReq, tag: UInt, insertIdx: UInt, wordOffset: UInt, flushMask: Bool): Unit = { stateUpdate(insertIdx) := s_valid tagUpdate(insertIdx) := tag + cohCount(insertIdx) := 0.U when(flushMask){ for(j <- 0 until CacheLineWords){ @@ -230,6 +229,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { } def mergeWordReq(req: DCacheWordReq, mergeIdx:UInt, wordOffset:UInt): Unit = { + cohCount(mergeIdx) := 0.U for(i <- 0 until DataBytes){ when(req.mask(i)){ maskUpdate(mergeIdx)(wordOffset)(i) := true.B @@ -267,7 +267,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { for(i <- 0 until StoreBufferSize){ XSDebug(stateVec(i)=/=s_invalid, - p"[$i] state:${stateVec(i)} buf:${bufferRead(i)}\n" + p"[$i] timeout:${cohCount(i)(countBits-1)} state:${stateVec(i)} buf:${bufferRead(i)}\n" ) } @@ -338,16 +338,14 @@ class NewSbuffer extends XSModule with HasSbufferCst { // // evictionEntry.bits := evictionIdx - val prepareValid = ((do_eviction && sbuffer_state === x_replace)|| (sbuffer_state === x_drain_sbuffer)) && + val prepareValid = ((do_eviction && sbuffer_state === x_replace) || (sbuffer_state === x_drain_sbuffer)) && stateVec(evictionIdx)===s_valid && noSameBlockInflight(evictionIdx) when(prepareValid){ stateVec(evictionIdx) := s_prepare - lru.io.fire := true.B } - val prepareMask = stateVec.map(s => s === s_prepare) val (prepareIdx, prepareEn) = PriorityEncoderWithFlag(prepareMask) @@ -383,6 +381,21 @@ class NewSbuffer extends XSModule with HasSbufferCst { XSDebug(p"needSpace[$needSpace] invalidCount[$invalidCount] validCount[$validCount]\n") + + //-------------------------cohCount----------------------------- + // insert and merge: cohCount=0 + // every cycle cohCount+=1 + // if cohCount(countBits-1)==1,evict + for(i <- 0 until StoreBufferSize){ + when(stateVec(i) === s_valid){ + when(cohCount(i)(countBits-1)){ + assert(stateVec(i) === s_valid) + stateUpdate(i) := s_prepare + } + cohCount(i) := cohCount(i)+1.U + } + } + // ---------------------- Load Data Forward --------------------- for ((forward, i) <- io.forward.zipWithIndex) { diff --git a/src/test/scala/xiangshan/memend/SbufferTest.scala b/src/test/scala/xiangshan/memend/SbufferTest.scala index 95bbfbc11f057bdaa3d91413c9c4e8cb90b68d54..868c41694ef139c0ef27079684f4378b4c74994a 100644 --- a/src/test/scala/xiangshan/memend/SbufferTest.scala +++ b/src/test/scala/xiangshan/memend/SbufferTest.scala @@ -44,9 +44,87 @@ class SbufferTest extends AnyFlatSpec top.Parameters.set(top.Parameters.debugParameters) - it should "random req" in { +// it should "random req" in { +// test(new SbufferWapper{AddSinks()}){ c => +// +// def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) ={ +// (0 until StorePipelineWidth).map { i => +// c.io.in(i).valid.poke(true.B) +// c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit( +// _.mask -> mask(i), +// _.addr -> addr(i), +// _.data -> data(i) +// )) +// } +// c.clock.step(1) +// for (in <- c.io.in){ in.valid.poke(false.B)} +// } +// +// def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = { +// (0 until LoadPipelineWidth).map{ i => +// c.io.forward(i).paddr.poke(addr(i)) +// c.io.forward(i).mask.poke(mask(i)) +// if(c.io.in(i).ready.peek() == true.B) { +// (0 until 8).map { j => +// c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8)) +// } +// } +// } +// } +// +// val TEST_SIZE = 100 +// for(i <- 0 until TEST_SIZE) { +// val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to block size +// val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U) +// val mask = Seq.fill(StorePipelineWidth)(0xff.U) +// store_enq(addr, data, mask) +// forward_req_and_resp(addr, data, mask) +// } +// } +// } +// +// it should "sequence req" in { +// test(new SbufferWapper{AddSinks()}){ c => +// +// def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) = { +// (0 until StorePipelineWidth).map { i => +// c.io.in(i).valid.poke(true.B) +// c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit( +// _.mask -> mask(i), +// _.addr -> addr(i), +// _.data -> data(i) +// )) +// } +// c.clock.step(1) +// for (in <- c.io.in){ in.valid.poke(false.B)} +// } +// +// def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = { +// (0 until LoadPipelineWidth).map{ i => +// c.io.forward(i).paddr.poke(addr(i)) +// c.io.forward(i).mask.poke(mask(i)) +// if(c.io.in(i).ready.peek() == true.B) { +// (0 until 8).map { j => +// c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8)) +// } +// } +// } +// } +// +// val TEST_SIZE = 100 +// val start_addr = Random.nextLong() & 0x7ffffffff8L +// for(i <- 0 until TEST_SIZE) { +// val addr = Seq(((i<<4) + start_addr).U,((i<<4)+8+start_addr).U) +// val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U) +// val mask = Seq.fill(StorePipelineWidth)(0xff.U) +// store_enq(addr, data, mask) +// forward_req_and_resp(addr, data, mask) +// } +// } +// } + + it should "sbuffer coherence" in { test(new SbufferWapper{AddSinks()}){ c => - def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) ={ (0 until StorePipelineWidth).map { i => c.io.in(i).valid.poke(true.B) @@ -59,7 +137,6 @@ class SbufferTest extends AnyFlatSpec c.clock.step(1) for (in <- c.io.in){ in.valid.poke(false.B)} } - def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = { (0 until LoadPipelineWidth).map{ i => c.io.forward(i).paddr.poke(addr(i)) @@ -71,55 +148,16 @@ class SbufferTest extends AnyFlatSpec } } } - - val TEST_SIZE = 100 + val TEST_SIZE = 10 for(i <- 0 until TEST_SIZE) { - val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to block size + val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U) val mask = Seq.fill(StorePipelineWidth)(0xff.U) store_enq(addr, data, mask) forward_req_and_resp(addr, data, mask) } - } - } - - it should "sequence req" in { - test(new SbufferWapper{AddSinks()}){ c => - - def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) = { - (0 until StorePipelineWidth).map { i => - c.io.in(i).valid.poke(true.B) - c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit( - _.mask -> mask(i), - _.addr -> addr(i), - _.data -> data(i) - )) - } - c.clock.step(1) - for (in <- c.io.in){ in.valid.poke(false.B)} - } - def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = { - (0 until LoadPipelineWidth).map{ i => - c.io.forward(i).paddr.poke(addr(i)) - c.io.forward(i).mask.poke(mask(i)) - if(c.io.in(i).ready.peek() == true.B) { - (0 until 8).map { j => - c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8)) - } - } - } - } - - val TEST_SIZE = 100 - val start_addr = Random.nextLong() & 0x7ffffffff8L - for(i <- 0 until TEST_SIZE) { - val addr = Seq(((i<<4) + start_addr).U,((i<<4)+8+start_addr).U) - val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U) - val mask = Seq.fill(StorePipelineWidth)(0xff.U) - store_enq(addr, data, mask) - forward_req_and_resp(addr, data, mask) - } + c.clock.step(512 + 10) } } }