提交 51f54365 编写于 作者: Z zoujr

Merge branch 'master' into perf-debug

......@@ -39,7 +39,7 @@ jobs:
echo "AM_HOME=/home/ci-runner/xsenv/nexus-am" >> $GITHUB_ENV
- name: Build EMU
make ./build/emu SIM_ARGS=--disable-all NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME -j220
make ./build/emu SIM_ARGS=--disable-log NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME B=0 E=0 -j220
- name: Run cputest
run: |
......@@ -49,7 +49,7 @@ jobs:
echo $t
if [[ $? != 0 ]];
echo $t fail
......@@ -59,10 +59,10 @@ jobs:
exit $ret
- name: Run riscv-tests
run: |
make -C $RVTEST_HOME/isa/ SUITES+=rv64ui SUITES+=rv64um SUITES+=rv64ua SUITES+=rv64uf SUITES+=rv64ud NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME noop_run 2> /dev/null
make -C $RVTEST_HOME/isa/ SUITES+=rv64ui SUITES+=rv64um SUITES+=rv64ua SUITES+=rv64uf SUITES+=rv64ud NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME noop_run B=0 E=0
- name: Run microbench
run: |
make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run 2> /dev/null
make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run B=0 E=0
- name: Run coremark
run: |
make -C $AM_HOME/apps/coremark ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run 2> /dev/null
make -C $AM_HOME/apps/coremark ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run B=0 E=0
......@@ -27,17 +27,16 @@ help:
mkdir -p $(@D)
mill XiangShan.test.runMain $(SIMTOP) -X verilog -td $(@D) --full-stacktrace --output-file $(@F) --disable-all --fpga-platform --remove-assert $(SIM_ARGS)
# mill XiangShan.runMain top.$(TOP) -X verilog -td $(@D) --output-file $(@F) --infer-rw $(FPGATOP) --repl-seq-mem -c:$(FPGATOP):-o:$(@D)/$(@F).conf
# $(MEM_GEN) $(@D)/$(@F).conf >> $@
mill XiangShan.test.runMain $(SIMTOP) -td $(@D) --full-stacktrace --output-file $(@F) --disable-all --fpga-platform --remove-assert --infer-rw --repl-seq-mem -c:$(SIMTOP):-o:$(@D)/$(@F).conf $(SIM_ARGS)
$(MEM_GEN) $(@D)/$(@F).conf >> $@
# sed -i -e 's/_\(aw\|ar\|w\|r\|b\)_\(\|bits_\)/_\1/g' $@
# @git log -n 1 >> .__head__
# @git diff >> .__diff__
# @sed -i 's/^/\/\// ' .__head__
# @sed -i 's/^/\/\//' .__diff__
# @cat .__head__ .__diff__ $@ > .__out__
# @mv .__out__ $@
# @rm .__head__ .__diff__
@git log -n 1 >> .__head__
@git diff >> .__diff__
@sed -i 's/^/\/\// ' .__head__
@sed -i 's/^/\/\//' .__diff__
@cat .__head__ .__diff__ $@ > .__out__
@mv .__out__ $@
@rm .__head__ .__diff__
deploy: build/top.zip
Subproject commit ab2a8e8afd162b601d9f749e6e6af452cccc03a7
Subproject commit cf429e420be6702a2e24b9b91910366187c103b4
......@@ -3,7 +3,7 @@ NANOS_HOME ?= $(AM_HOME)/../nanos-lite
B ?= 0
E ?= 0
E ?= -1
V ?= OFF
#V ?= OFF
EMU_ARGS = B=$(B) E=$(E) V=$(V)
export NOOP_HOME=$(pwd)/..
......@@ -23,7 +23,9 @@ int main(int argc, char* argv[]){
fd = tryLock(argv[1]);
if(fd > 0){
getlogin_r(user, BUF_SIZE);
write(fd, user, strlen(user));
int len = strlen(user);
user[len] = '\0';
write(fd, user, len+1);
} else {
// someone is holding the lock...
......@@ -25,10 +25,12 @@ class AXI4Flash
def getOffset(addr: UInt) = addr(12,0)
val rdata = Wire(UInt(64.W))
RegMap.generate(mapping, getOffset(raddr), rdata,
getOffset(waddr), in.w.fire(), in.w.bits.data, MaskExpand(in.w.bits.strb))
val rdata = Wire(Vec(2,UInt(32.W)))
(0 until 2).map{ i =>
RegMap.generate(mapping, getOffset(raddr + (i * 4).U), rdata(i),
getOffset(waddr), in.w.fire(), in.w.bits.data, MaskExpand(in.w.bits.strb))
in.r.bits.data := Fill(2, rdata(31,0))
in.r.bits.data := rdata.asUInt
......@@ -6,7 +6,7 @@ import chisel3._
import chisel3.util._
import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp}
import freechips.rocketchip.tilelink.{BankBinder, TLBuffer, TLBundleParameters, TLCacheCork, TLClientNode, TLFilter, TLFuzzer, TLIdentityNode, TLToAXI4, TLWidthWidget, TLXbar}
import utils.DebugIdentityNode
import utils.{DebugIdentityNode, DataDontCareNode}
import utils.XSInfo
import xiangshan.{HasXSParameter, XSCore, HasXSLog}
import sifive.blocks.inclusivecache.{CacheParameters, InclusiveCache, InclusiveCacheMicroParameters}
......@@ -61,7 +61,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
cacheName = s"L2"
writeBytes = 8
writeBytes = 32
......@@ -79,7 +79,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
cacheName = "L3"
writeBytes = 8
writeBytes = 32
......@@ -101,7 +101,8 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
l2_xbar(i) := TLBuffer() := DebugIdentityNode() := xs_core(i).ptw.node
l2_xbar(i) := TLBuffer() := DebugIdentityNode() := xs_core(i).l2Prefetcher.clientNode
mmioXbar := TLBuffer() := DebugIdentityNode() := xs_core(i).memBlock.uncache.clientNode
l2cache(i).node := TLBuffer() := DebugIdentityNode() := l2_xbar(i)
mmioXbar := TLBuffer() := DebugIdentityNode() := xs_core(i).frontend.instrUncache.clientNode
l2cache(i).node := DataDontCareNode(a = true, b = true) := TLBuffer() := DebugIdentityNode() := l2_xbar(i)
l3_xbar := TLBuffer() := DebugIdentityNode() := l2cache(i).node
......@@ -169,6 +170,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
xs_core(i).module.io.externalInterrupt.msip := clint.module.io.msip(i)
// xs_core(i).module.io.externalInterrupt.meip := RegNext(RegNext(io.meip(i)))
xs_core(i).module.io.externalInterrupt.meip := plic.module.io.extra.get.meip(i)
xs_core(i).module.io.l2ToPrefetcher <> l2cache(i).module.io
// do not let dma AXI signals optimized out
......@@ -5,6 +5,7 @@ import firrtl.AnnotationSeq
import firrtl.annotations.NoTargetAnnotation
import firrtl.options.{HasShellOptions, Shell, ShellOption}
import firrtl.stage.{FirrtlCli, RunFirrtlTransformAnnotation}
import freechips.rocketchip.transforms.naming.{OverrideDesiredNameAnnotation, RenameDesiredNames}
import xstransforms.ShowPrintTransform
import xstransforms.PrintModuleName
......@@ -93,7 +94,8 @@ object XiangShanStage {
annotations ++ Seq(
RunFirrtlTransformAnnotation(new ShowPrintTransform),
RunFirrtlTransformAnnotation(new PrintModuleName)
RunFirrtlTransformAnnotation(new PrintModuleName),
RunFirrtlTransformAnnotation(new RenameDesiredNames)
package utils
import chisel3._
import chipsalliance.rocketchip.config.Parameters
import chisel3.util.DecoupledIO
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import freechips.rocketchip.tilelink.{TLBundle, TLClientNode, TLIdentityNode, TLMasterParameters, TLMasterPortParameters}
import xiangshan.HasXSLog
class DataDontCareNode(a: Boolean = false, b: Boolean = false, c: Boolean = false, d: Boolean = false)(implicit p: Parameters) extends LazyModule {
val node = TLIdentityNode()
val n = TLClientNode(Seq(TLMasterPortParameters.v1(
lazy val module = new LazyModuleImp(this) with HasXSLog with HasTLDump{
val (out, _) = node.out(0)
val (in, _) = node.in(0)
if (a) {
out.a.bits.data := DontCare
if (b) {
in.b.bits.data := DontCare
if (c) {
out.c.bits.data := DontCare
if (d) {
in.d.bits.data := DontCare
object DataDontCareNode {
def apply(a: Boolean = false, b: Boolean = false, c: Boolean = false, d: Boolean = false)(implicit p: Parameters): TLIdentityNode = {
val dataDontCareNode = LazyModule(new DataDontCareNode(a, b, c, d))
......@@ -23,14 +23,15 @@ object ExcitingUtils {
var connType: ConnectionType,
var sourceModule: Option[String] = None,
var sinkModule: Option[String] = None
var sinkModule: Option[String] = None,
var warned: Boolean = false
override def toString: String =
s"type:[$connType] source location:[${sourceModule.getOrElse(strToErrorMsg("Not Found"))}]" +
s" sink location:[${sinkModule.getOrElse(strToErrorMsg("Not Found"))}]"
def isLeagleConnection: Boolean = sourceModule.nonEmpty && sinkModule.nonEmpty
def isLegalConnection: Boolean = sourceModule.nonEmpty && sinkModule.nonEmpty
private val map = mutable.LinkedHashMap[String, Connection]()
......@@ -44,6 +45,10 @@ object ExcitingUtils {
uniqueName: Boolean = false
): String = {
val conn = map.getOrElseUpdate(name, new Connection(connType))
if (!conn.sourceModule.isEmpty && !conn.warned) {
println(s"[WARN] Signal |$name| has multiple sources")
conn.warned = true
require(conn.connType == connType)
conn.sourceModule = Some(component.parentModName)
BoringUtils.addSource(component, name, disableDedup, uniqueName)
......@@ -58,6 +63,10 @@ object ExcitingUtils {
forceExists: Boolean = false
): Unit = {
val conn = map.getOrElseUpdate(name, new Connection(connType))
if (!conn.sinkModule.isEmpty && !conn.warned) {
println(s"[WARN] Signal |$name| has multiple sinks")
conn.warned = true
require(conn.connType == connType)
conn.sinkModule = Some(component.parentModName)
BoringUtils.addSink(component, name, disableDedup, forceExists)
......@@ -77,14 +86,14 @@ object ExcitingUtils {
def checkAndDisplay(): Unit = {
var leagle = true
var legal = true
val buf = new mutable.StringBuilder()
for((id, conn) <- map){
buf ++= s"Connection:[$id] $conn\n"
if(!conn.isLeagleConnection) leagle = false
if(!conn.isLegalConnection) legal = false
require(leagle, strToErrorMsg("Error: Illeagle connection found!"))
require(legal, strToErrorMsg("Error: Illegal connection found!"))
* Copyright (c) 2020 Institute of Computing Technology, CAS
* Copyright (c) 2020 University of Chinese Academy of Sciences
* NutShell is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* See the Mulan PSL v2 for more details.
package utils
import chisel3._
......@@ -85,14 +101,6 @@ class SRAMTemplate[T <: Data](gen: T, set: Int, way: Int = 1,
io.r.req.ready := !resetState && (if (singlePort) !wen else true.B)
io.w.req.ready := true.B
// Debug(false) {
// when (wen) {
// printf("%d: SRAMTemplate: write %x to idx = %d\n", GTimer(), wdata.asUInt, setIdx)
// }
// when (RegNext(realRen)) {
// printf("%d: SRAMTemplate: read %x at idx = %d\n", GTimer(), VecInit(rdata).asUInt, RegNext(io.r.req.bits.setIdx))
// }
// }
class SRAMTemplateWithArbiter[T <: Data](nRead: Int, gen: T, set: Int, way: Int = 1,
......@@ -196,7 +196,7 @@ class CfiUpdateInfo extends XSBundle with HasBPUParameter {
class CtrlFlow extends XSBundle {
val instr = UInt(32.W)
val pc = UInt(VAddrBits.W)
val exceptionVec = Vec(16, Bool())
val exceptionVec = ExceptionVec()
val intrVec = Vec(12, Bool())
val brUpdate = new CfiUpdateInfo
val crossPageIPFFix = Bool()
package xiangshan
import chisel3._
import chisel3.util._
import utils._
import Chisel.experimental.chiselName
import xiangshan.cache.{DCache, HasDCacheParameters, DCacheParameters, ICache, ICacheParameters, L1plusCache, L1plusCacheParameters, PTW, Uncache}
object MemMap {
def apply (base: String, top: String, width: String, description: String, mode: String): ((String, String), Map[String, String]) = {
((base, top) -> Map(
"width" -> width, // 0 means no limitation
"description" -> description,
"mode" -> mode,
object AddressSpace {
def MemMapList = List(
// Base address Top address Width Description Mode (RWXIDSAC)
MemMap("h00_0000_0000", "h00_0FFF_FFFF", "h0", "Reserved", ""),
MemMap("h00_1000_0000", "h00_1FFF_FFFF", "h0", "QSPI_Flash", "RX"),
MemMap("h00_2000_0000", "h00_2FFF_FFFF", "h0", "Reserved", ""),
MemMap("h00_3000_0000", "h00_3000_FFFF", "h0", "DMA", "RW"),
MemMap("h00_3001_0000", "h00_3004_FFFF", "h0", "GPU", "RWC"),
MemMap("h00_3005_0000", "h00_3005_FFFF", "h0", "USB", "RW"),
MemMap("h00_3006_0000", "h00_3006_FFFF", "h0", "SDMMC", "RW"),
MemMap("h00_3007_0000", "h00_30FF_FFFF", "h0", "Reserved", ""),
MemMap("h00_3100_0000", "h00_3100_FFFF", "h0", "QSPI", "RW"),
MemMap("h00_3101_0000", "h00_3101_FFFF", "h0", "GMAC", "RW"),
MemMap("h00_3102_0000", "h00_3102_FFFF", "h0", "HDMI", "RW"),
MemMap("h00_3103_0000", "h00_3103_FFFF", "h0", "HDMI_PHY", "RW"),
MemMap("h00_3104_0000", "h00_3105_FFFF", "h0", "DP", "RW"),
MemMap("h00_3106_0000", "h00_3106_FFFF", "h0", "DDR0", "RW"),
MemMap("h00_3107_0000", "h00_3107_FFFF", "h0", "DDR0_PHY", "RW"),
MemMap("h00_3108_0000", "h00_3108_FFFF", "h0", "DDR1", "RW"),
MemMap("h00_3109_0000", "h00_3109_FFFF", "h0", "DDR1_PHY", "RW"),
MemMap("h00_310A_0000", "h00_310A_FFFF", "h0", "IIS", "RW"),
MemMap("h00_310B_0000", "h00_310B_FFFF", "h0", "UART0", "RW"),
MemMap("h00_310C_0000", "h00_310C_FFFF", "h0", "UART1", "RW"),
MemMap("h00_310D_0000", "h00_310D_FFFF", "h0", "IIC0", "RW"),
MemMap("h00_310E_0000", "h00_310E_FFFF", "h0", "IIC1", "RW"),
MemMap("h00_310F_0000", "h00_310F_FFFF", "h0", "IIC2", "RW"),
MemMap("h00_3110_0000", "h00_3110_FFFF", "h0", "GPIO", "RW"),
MemMap("h00_3111_0000", "h00_3111_FFFF", "h0", "CRU", "RW"),
MemMap("h00_3112_0000", "h00_37FF_FFFF", "h0", "Reserved", ""),
MemMap("h00_3800_0000", "h00_3800_FFFF", "h0", "CLINT", "RW"),
MemMap("h00_3801_0000", "h00_3BFF_FFFF", "h0", "Reserved", ""),
MemMap("h00_3C00_0000", "h00_3FFF_FFFF", "h0", "PLIC", "RW"),
MemMap("h00_4000_0000", "h00_4FFF_FFFF", "h0", "PCIe0", "RW"),
MemMap("h00_5000_0000", "h00_5FFF_FFFF", "h0", "PCIe1", "RW"),
MemMap("h00_6000_0000", "h00_6FFF_FFFF", "h0", "PCIe2", "RW"),
MemMap("h00_7000_0000", "h00_7FFF_FFFF", "h0", "PCIe3", "RW"),
MemMap("h00_8000_0000", "h1F_FFFF_FFFF", "h0", "DDR", "RWXIDSA"),
def printMemmap(){
println("-------------------- memory map --------------------")
for(i <- MemMapList){
println(i._1._1 + "->" + i._1._2 + " width " + (if(i._2.get("width").get == "0") "unlimited" else i._2.get("width").get) + " " + i._2.get("description").get + " [" + i._2.get("mode").get + "]")
def genMemmapMatchVec(addr: UInt): UInt = {
VecInit(MemMapList.map(i => {
i._1._1.U <= addr && addr < i._1._2.U
def queryMode(matchVec: UInt): UInt = {
Mux1H(matchVec, VecInit(MemMapList.map(i => {
def queryWidth(matchVec: UInt): UInt = {
Mux1H(matchVec, VecInit(MemMapList.map(i => {
def memmapAddrMatch(addr: UInt): (UInt, UInt) = {
val matchVec = genMemmapMatchVec(addr)
(queryMode(matchVec), queryWidth(matchVec))
def isDMMIO(addr: UInt): Bool = !PMAMode.dcache(memmapAddrMatch(addr)._1)
def isIMMIO(addr: UInt): Bool = !PMAMode.icache(memmapAddrMatch(addr)._1)
def isConfigableAddr(addr: UInt): Bool = {
VecInit(MemMapList.map(i => {
i._1._1.U <= addr && addr < i._1._2.U && (i._2.get("mode").get.toUpperCase.indexOf("C") >= 0).B
class PMAChecker extends XSModule with HasDCacheParameters
val io = IO(new Bundle() {
val paddr = Input(UInt(VAddrBits.W))
val mode = Output(PMAMode())
val widthLimit = Output(UInt(8.W)) // TODO: fixme
val updateCConfig = Input(Valid(Bool()))
val enableConfigableCacheZone = RegInit(false.B)
val updateCConfig = RegNext(RegNext(RegNext(io.updateCConfig)))
when(updateCConfig.valid) {
enableConfigableCacheZone := updateCConfig.bits
val (mode, widthLimit) = AddressSpace.memmapAddrMatch(io.paddr)
io.mode := Mux(AddressSpace.isConfigableAddr(io.paddr) && enableConfigableCacheZone, mode | PMAMode.D, mode)
io.widthLimit := widthLimit
\ No newline at end of file
......@@ -10,7 +10,7 @@ import xiangshan.backend.exu.Exu._
import xiangshan.frontend._
import xiangshan.mem._
import xiangshan.backend.fu.HasExceptionNO
import xiangshan.cache.{DCache, DCacheParameters, ICache, ICacheParameters, L1plusCache, L1plusCacheParameters, PTW, Uncache}
import xiangshan.cache.{DCache,InstrUncache, DCacheParameters, ICache, ICacheParameters, L1plusCache, L1plusCacheParameters, PTW, Uncache, MemoryOpConstants, MissReq}
import xiangshan.cache.prefetch._
import chipsalliance.rocketchip.config
import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp}
......@@ -19,6 +19,7 @@ import freechips.rocketchip.devices.tilelink.{DevNullParams, TLError}
import sifive.blocks.inclusivecache.{CacheParameters, InclusiveCache, InclusiveCacheMicroParameters}
import freechips.rocketchip.amba.axi4.{AXI4Deinterleaver, AXI4Fragmenter, AXI4IdIndexer, AXI4IdentityNode, AXI4ToTL, AXI4UserYanker}
import freechips.rocketchip.tile.HasFPUParameters
import sifive.blocks.inclusivecache.PrefetcherIO
import utils._
case class XSCoreParameters
......@@ -89,7 +90,9 @@ case class XSCoreParameters
StoreBufferSize: Int = 16,
RefillSize: Int = 512,
TlbEntrySize: Int = 32,
TlbSPEntrySize: Int = 4,
TlbL2EntrySize: Int = 256, // or 512
TlbL2SPEntrySize: Int = 16,
PtwL1EntrySize: Int = 16,
PtwL2EntrySize: Int = 256,
NumPerfCounters: Int = 16,
......@@ -165,7 +168,9 @@ trait HasXSParameter {
val RefillSize = core.RefillSize
val DTLBWidth = core.LoadPipelineWidth + core.StorePipelineWidth
val TlbEntrySize = core.TlbEntrySize
val TlbSPEntrySize = core.TlbSPEntrySize
val TlbL2EntrySize = core.TlbL2EntrySize
val TlbL2SPEntrySize = core.TlbL2SPEntrySize
val PtwL1EntrySize = core.PtwL1EntrySize
val PtwL2EntrySize = core.PtwL2EntrySize
val NumPerfCounters = core.NumPerfCounters
......@@ -183,32 +188,6 @@ trait HasXSParameter {
nMissEntries = 8
// icache prefetcher
val l1plusPrefetcherParameters = L1plusPrefetcherParameters(
enable = false,
_type = "stream",
streamParams = StreamPrefetchParameters(
streamCnt = 4,
streamSize = 4,
ageWidth = 4,
blockBytes = l1plusCacheParameters.blockBytes,
reallocStreamOnMissInstantly = true
// dcache prefetcher
val l2PrefetcherParameters = L2PrefetcherParameters(
enable = true,
_type = "stream",
streamParams = StreamPrefetchParameters(
streamCnt = 4,
streamSize = 4,
ageWidth = 4,
blockBytes = L2BlockSize,
reallocStreamOnMissInstantly = true
val dcacheParameters = DCacheParameters(
tagECC = Some("secded"),
dataECC = Some("secded"),
......@@ -240,6 +219,43 @@ trait HasXSParameter {
// on chip network configurations
val L3BusWidth = 256
// icache prefetcher
val l1plusPrefetcherParameters = L1plusPrefetcherParameters(
enable = true,
_type = "stream",
streamParams = StreamPrefetchParameters(
streamCnt = 2,
streamSize = 4,
ageWidth = 4,
blockBytes = l1plusCacheParameters.blockBytes,
reallocStreamOnMissInstantly = true,
cacheName = "icache"
// dcache prefetcher
val l2PrefetcherParameters = L2PrefetcherParameters(
enable = true,
_type = "bop",// "stream" or "bop"
streamParams = StreamPrefetchParameters(
streamCnt = 4,
streamSize = 4,
ageWidth = 4,
blockBytes = L2BlockSize,
reallocStreamOnMissInstantly = true,
cacheName = "dcache"
bopParams = BOPParameters(
rrTableEntries = 256,
rrTagBits = 12,
scoreBits = 5,
roundMax = 50,
badScore = 1,
blockBytes = L2BlockSize,
nEntries = dcacheParameters.nMissEntries * 2 // TODO: this is too large
trait HasXSLog { this: RawModule =>
......@@ -272,23 +288,23 @@ case class EnviromentParameters
FPGAPlatform: Boolean = true,
EnableDebug: Boolean = false,
EnablePerfDebug: Boolean = true
EnablePerfDebug: Boolean = false
object AddressSpace extends HasXSParameter {
// (start, size)
// address out of MMIO will be considered as DRAM
def mmio = List(
(0x00000000L, 0x40000000L), // internal devices, such as CLINT and PLIC
(0x40000000L, 0x40000000L) // external devices
// object AddressSpace extends HasXSParameter {
// // (start, size)
// // address out of MMIO will be considered as DRAM
// def mmio = List(
// (0x00000000L, 0x40000000L), // internal devices, such as CLINT and PLIC
// (0x40000000L, 0x40000000L) // external devices
// )
def isMMIO(addr: UInt): Bool = mmio.map(range => {
val bits = log2Up(range._2)
(addr ^ range._1.U)(PAddrBits-1, bits) === 0.U
}).reduce(_ || _)
// def isMMIO(addr: UInt): Bool = mmio.map(range => {
// require(isPow2(range._2))
// val bits = log2Up(range._2)
// (addr ^ range._1.U)(PAddrBits-1, bits) === 0.U
// }).reduce(_ || _)
// }
......@@ -309,6 +325,7 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule
val fpBlockSlowWakeUpInt = fpExuConfigs.filter(intSlowFilter)
// outer facing nodes
val frontend = LazyModule(new Frontend())
val l1pluscache = LazyModule(new L1plusCache())
val ptw = LazyModule(new PTW())
val l2Prefetcher = LazyModule(new L2Prefetcher())
......@@ -330,9 +347,11 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
val io = IO(new Bundle {
val externalInterrupt = new ExternalInterruptIO
val l2ToPrefetcher = Flipped(new PrefetcherIO(PAddrBits))
println(s"FPGAPlatform:${env.FPGAPlatform} EnableDebug:${env.EnableDebug}")
// to fast wake up fp, mem rs
val intBlockFastWakeUpFp = intExuConfigs.filter(fpFastFilter)
......@@ -345,7 +364,6 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
val fpBlockFastWakeUpInt = fpExuConfigs.filter(intFastFilter)
val fpBlockSlowWakeUpInt = fpExuConfigs.filter(intSlowFilter)
val frontend = Module(new Frontend)
val ctrlBlock = Module(new CtrlBlock)
val integerBlock = Module(new IntegerBlock(
fastWakeUpIn = fpBlockFastWakeUpInt,
......@@ -364,6 +382,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
slowIntOut = fpBlockSlowWakeUpInt
val frontend = outer.frontend.module
val memBlock = outer.memBlock.module
val l1pluscache = outer.l1pluscache.module
val ptw = outer.ptw.module
......@@ -443,7 +462,16 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ptw.io.sfence <> integerBlock.io.fenceio.sfence
ptw.io.csr <> integerBlock.io.csrio.tlb
l2Prefetcher.io.in <> memBlock.io.toDCachePrefetch
val l2PrefetcherIn = Wire(Decoupled(new MissReq))
if (l2PrefetcherParameters.enable && l2PrefetcherParameters._type == "bop") {
l2PrefetcherIn.valid := io.l2ToPrefetcher.acquire.valid
l2PrefetcherIn.bits := DontCare
l2PrefetcherIn.bits.addr := io.l2ToPrefetcher.acquire.bits.address
l2PrefetcherIn.bits.cmd := Mux(io.l2ToPrefetcher.acquire.bits.write, MemoryOpConstants.M_XWR, MemoryOpConstants.M_XRD)
} else {
l2PrefetcherIn <> memBlock.io.toDCachePrefetch
l2Prefetcher.io.in <> l2PrefetcherIn
if (!env.FPGAPlatform) {
val debugIntReg, debugFpReg = WireInit(VecInit(Seq.fill(32)(0.U(XLEN.W))))
......@@ -16,7 +16,7 @@ import xiangshan.mem.LsqEnqIO
class CtrlToIntBlockIO extends XSBundle {
val enqIqCtrl = Vec(exuParameters.IntExuCnt, DecoupledIO(new MicroOp))
val readRf = Vec(NRIntReadPorts, Flipped(new RfReadPort(XLEN)))
val readRf = Vec(NRIntReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val jumpPc = Output(UInt(VAddrBits.W))
// int block only uses port 0~7
val readPortIndex = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(8 / 2).W))) // TODO parameterize 8 here
......@@ -25,7 +25,7 @@ class CtrlToIntBlockIO extends XSBundle {
class CtrlToFpBlockIO extends XSBundle {
val enqIqCtrl = Vec(exuParameters.FpExuCnt, DecoupledIO(new MicroOp))
val readRf = Vec(NRFpReadPorts, Flipped(new RfReadPort(XLEN + 1)))
val readRf = Vec(NRFpReadPorts, Output(UInt(PhyRegIdxWidth.W)))
// fp block uses port 0~11
val readPortIndex = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil((NRFpReadPorts - exuParameters.StuCnt) / 3).W)))
val redirect = ValidIO(new Redirect)
......@@ -131,10 +131,8 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
setPhyRegRdy.valid := wb.valid && wb.bits.uop.ctrl.fpWen
setPhyRegRdy.bits := wb.bits.uop.pdest
intBusyTable.io.rfReadAddr <> dispatch.io.readIntRf.map(_.addr)
intBusyTable.io.pregRdy <> dispatch.io.intPregRdy
fpBusyTable.io.rfReadAddr <> dispatch.io.readFpRf.map(_.addr)
fpBusyTable.io.pregRdy <> dispatch.io.fpPregRdy
intBusyTable.io.read <> dispatch.io.readIntState
fpBusyTable.io.read <> dispatch.io.readFpState
roq.io.redirect.valid := brq.io.redirectOut.valid || io.fromLsBlock.replay.valid
roq.io.redirect.bits <> redirectArb
......@@ -152,7 +152,7 @@ class FloatBlock
// read fp rf from ctrl block
fpRf.io.readPorts <> io.fromCtrlBlock.readRf
fpRf.io.readPorts.zipWithIndex.map{ case (r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until exuParameters.StuCnt).foreach(i => io.toMemBlock.readFpRf(i).data := fpRf.io.readPorts(i + 12).data)
// write fp rf arbiter
val fpWbArbiter = Module(new Wb(
......@@ -218,7 +218,7 @@ class IntegerBlock
jmpExeUnit.fenceio <> io.fenceio
// read int rf from ctrl block
intRf.io.readPorts <> io.fromCtrlBlock.readRf
intRf.io.readPorts.zipWithIndex.map{ case(r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until NRMemReadPorts).foreach(i => io.toMemBlock.readIntRf(i).data := intRf.io.readPorts(i + 8).data)
// write int rf arbiter
val intWbArbiter = Module(new Wb(
......@@ -237,4 +237,4 @@ class IntegerBlock
rf.addr := wb.bits.uop.pdest
rf.data := wb.bits.data
\ No newline at end of file
......@@ -250,6 +250,7 @@ class MemBlockImp
// LSQ to store buffer
lsq.io.sbuffer <> sbuffer.io.in
lsq.io.sqempty <> sbuffer.io.sqempty
// Sbuffer
sbuffer.io.dcache <> dcache.io.lsu.store
......@@ -5,6 +5,7 @@ import chisel3.util._
import xiangshan._
import utils._
import chisel3.ExcitingUtils._
import xiangshan.backend.JumpOpType
import xiangshan.backend.decode.ImmUnion
......@@ -75,7 +76,7 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
val exuOut = new ExuOutput
val s_idle :: s_wb :: Nil = Enum(2)
val s_idle :: s_wb :: s_auipc_wb :: Nil = Enum(3)
class DecodeEnqBrqData extends Bundle {
val cfiUpdateInfo = new CfiUpdateInfo
......@@ -107,7 +108,9 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
* write back
val wbValid = stateQueue(writebackIdx) === s_wb
val wbState = stateQueue(writebackIdx)
val wbValid = wbState === s_wb
val wbIsAuipc = wbState === s_auipc_wb
val wbEntry = Wire(new ExuOutput)
val wbIsMisPred = wbEntry.redirect.target =/= wbEntry.brUpdate.pnpc
......@@ -115,9 +118,9 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
io.redirectOut.bits := wbEntry.redirect
io.redirectOut.bits.brTag := BrqPtr(ptrFlagVec(writebackIdx), writebackIdx)
io.out.valid := wbValid
io.out.valid := wbValid || wbIsAuipc
io.out.bits := wbEntry
when (wbValid) {
when (io.out.valid) {
stateQueue(writebackIdx) := s_idle
writebackPtr_next := writebackPtr + 1.U
......@@ -164,7 +167,7 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
* exu write back
for (exuWb <- io.exuRedirectWb) {
for ((exuWb, i) <- io.exuRedirectWb.zipWithIndex) {
when (exuWb.valid) {
val wbIdx = exuWb.bits.redirect.brTag.value
......@@ -174,8 +177,14 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
assert(stateQueue(wbIdx) === s_idle)
stateQueue(wbIdx) := s_wb
if(i == 0){ // jump
stateQueue(wbIdx) := Mux(JumpOpType.jumpOpisAuipc(exuWb.bits.uop.ctrl.fuOpType),
} else { // alu
stateQueue(wbIdx) := s_wb
......@@ -5,6 +5,7 @@ import chisel3.util._
import xiangshan._
import xiangshan.backend.brq.BrqEnqIO
import utils._
import xiangshan.backend.decode.Instructions.{AUIPC, MRET, SRET}
class DecodeStage extends XSModule {
val io = IO(new Bundle() {
......@@ -31,12 +32,14 @@ class DecodeStage extends XSModule {
for (i <- 0 until DecodeWidth) {
decoders(i).io.enq.ctrl_flow <> io.in(i).bits
val isMret = io.in(i).bits.instr === BitPat("b001100000010_00000_000_00000_1110011")
val isSret = io.in(i).bits.instr === BitPat("b000100000010_00000_000_00000_1110011")
val thisBrqValid = !io.in(i).bits.brUpdate.pd.notCFI || isMret || isSret
val isMret = io.in(i).bits.instr === MRET
val isSret = io.in(i).bits.instr === SRET
val isAuiPc = io.in(i).bits.instr === AUIPC
val thisBrqValid = !io.in(i).bits.brUpdate.pd.notCFI || isMret || isSret || isAuiPc
io.enqBrq.needAlloc(i) := thisBrqValid
io.enqBrq.req(i).valid := io.in(i).valid && thisBrqValid && io.out(i).ready
io.enqBrq.req(i).bits := decoders(i).io.deq.cf_ctrl.cf
io.enqBrq.req(i).bits := io.in(i).bits
io.enqBrq.req(i).bits.instr := decoders(i).io.deq.cf_ctrl.cf.instr
io.out(i).valid := io.in(i).valid && io.enqBrq.req(i).ready
io.out(i).bits := decoders(i).io.deq.cf_ctrl
......@@ -135,7 +135,7 @@ object XDecode extends DecodeConstants {
REMW -> List(SrcType.reg, SrcType.reg, SrcType.DC, FuType.div, MDUOpType.remw, Y, N, N, N, N, N, N, SelImm.IMM_X),
REMUW -> List(SrcType.reg, SrcType.reg, SrcType.DC, FuType.div, MDUOpType.remuw, Y, N, N, N, N, N, N, SelImm.IMM_X),
AUIPC -> List(SrcType.pc, SrcType.imm, SrcType.DC, FuType.alu, ALUOpType.add, Y, N, N, N, N, N, N, SelImm.IMM_U),
AUIPC -> List(SrcType.pc , SrcType.imm, SrcType.DC, FuType.jmp, JumpOpType.auipc, Y, N, N, N, N, N, N, SelImm.IMM_U),
JAL -> List(SrcType.pc , SrcType.imm, SrcType.DC, FuType.jmp, JumpOpType.jal, Y, N, N, N, N, N, N, SelImm.IMM_UJ),
JALR -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.jmp, JumpOpType.jalr, Y, N, N, N, N, N, N, SelImm.IMM_I),
BEQ -> List(SrcType.reg, SrcType.reg, SrcType.DC, FuType.alu, ALUOpType.beq, N, N, N, N, N, N, N, SelImm.IMM_SB),
......@@ -155,9 +155,9 @@ object XDecode extends DecodeConstants {
CSRRCI -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.csr, CSROpType.clri, Y, N, N, Y, Y, N, N, SelImm.IMM_Z),
SFENCE_VMA->List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.fence, FenceOpType.sfence, N, N, N, Y, Y, Y, N, SelImm.IMM_X),
ECALL -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.csr, CSROpType.jmp, Y, N, N, Y, Y, N, N, SelImm.IMM_X),
SRET -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.csr, CSROpType.jmp, Y, N, N, Y, Y, N, N, SelImm.IMM_X),
MRET -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.csr, CSROpType.jmp, Y, N, N, Y, Y, N, N, SelImm.IMM_X),
ECALL -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.csr, CSROpType.jmp, Y, N, N, Y, Y, N, N, SelImm.IMM_I),
SRET -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.csr, CSROpType.jmp, Y, N, N, Y, Y, N, N, SelImm.IMM_I),
MRET -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.csr, CSROpType.jmp, Y, N, N, Y, Y, N, N, SelImm.IMM_I),
WFI -> List(SrcType.pc, SrcType.imm, SrcType.DC, FuType.alu, ALUOpType.sll, Y, N, N, N, N, N, N, SelImm.IMM_X),
......@@ -300,22 +300,6 @@ object XSTrapDecode extends DecodeConstants {
class RVCExpander extends XSModule {
val io = IO(new Bundle {
val in = Input(UInt(32.W))
val out = Output(new ExpandedInstruction)
val rvc = Output(Bool())
if (HasCExtension) {
io.rvc := io.in(1,0) =/= 3.U
io.out := new RVCDecoder(io.in, XLEN).decode
} else {
io.rvc := false.B
io.out := new RVCDecoder(io.in, XLEN).passthrough
//object Imm32Gen {
// def apply(sel: UInt, inst: UInt) = {
// val sign = Mux(sel === SelImm.IMM_Z, 0.S, inst(31).asSInt)
......@@ -425,19 +409,7 @@ class DecodeUnit extends XSModule with DecodeUnitConstants {
val ctrl_flow = Wire(new CtrlFlow) // input with RVC Expanded
val cf_ctrl = Wire(new CfCtrl)
val exp = Module(new RVCExpander())
exp.io.in := io.enq.ctrl_flow.instr
ctrl_flow := io.enq.ctrl_flow
when (exp.io.rvc) {
ctrl_flow.instr := exp.io.out.bits
// save rvc decode info
// TODO maybe rvc_info are useless?
val rvc_info = Wire(new ExpandedInstruction())
val is_rvc = Wire(Bool())
rvc_info := exp.io.out
is_rvc := exp.io.rvc
var decode_table = XDecode.table ++ FDecode.table ++ FDivSqrtDecode.table ++ X64Decode.table ++ XSTrapDecode.table
......@@ -458,10 +430,8 @@ class DecodeUnit extends XSModule with DecodeUnitConstants {
cs.ldest := Mux(cs.fpWen || cs.rfWen, ctrl_flow.instr(RD_MSB,RD_LSB), 0.U)
// fill in exception vector
cf_ctrl.cf.exceptionVec.map(_ := false.B)
cf_ctrl.cf.exceptionVec := io.enq.ctrl_flow.exceptionVec
cf_ctrl.cf.exceptionVec(illegalInstr) := cs.selImm === SelImm.INVALID_INSTR
cf_ctrl.cf.exceptionVec(instrPageFault) := io.enq.ctrl_flow.exceptionVec(instrPageFault)
cf_ctrl.cf.exceptionVec(instrAccessFault) := io.enq.ctrl_flow.exceptionVec(instrAccessFault)
// fix frflags
// fflags zero csrrs rd csr
......@@ -27,7 +27,7 @@ class FPDecoder extends XSModule{
FCVT_S_WU-> List(N,s,s,Y,Y,Y,N,N,Y),
FCVT_S_L -> List(N,s,s,Y,Y,Y,N,N,Y),
FCVT_S_LU-> List(N,s,s,Y,Y,Y,N,N,Y),
FMV_X_W -> List(N,s,X,N,N,N,N,N,N),
FMV_X_W -> List(N,d,X,N,N,N,N,N,N),
FCLASS_S -> List(N,s,X,N,N,N,N,N,N),
FCVT_W_S -> List(N,s,X,N,Y,N,N,N,Y),
FCVT_WU_S-> List(N,s,X,N,Y,N,N,N,Y),
......@@ -7,7 +7,7 @@ import utils._
import xiangshan.backend.regfile.RfReadPort
import chisel3.ExcitingUtils._
import xiangshan.backend.roq.{RoqPtr, RoqEnqIO}
import xiangshan.backend.rename.RenameBypassInfo
import xiangshan.backend.rename.{RenameBypassInfo, BusyTableReadIO}
import xiangshan.mem.LsqEnqIO
case class DispatchParameters
......@@ -34,11 +34,11 @@ class Dispatch extends XSModule {
// enq Lsq
val enqLsq = Flipped(new LsqEnqIO)
// read regfile
val readIntRf = Vec(NRIntReadPorts, Flipped(new RfReadPort(XLEN)))
val readFpRf = Vec(NRFpReadPorts, Flipped(new RfReadPort(XLEN + 1)))
// read reg status (busy/ready)
val intPregRdy = Vec(NRIntReadPorts, Input(Bool()))
val fpPregRdy = Vec(NRFpReadPorts, Input(Bool()))
val readIntRf = Vec(NRIntReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val readFpRf = Vec(NRFpReadPorts, Output(UInt(PhyRegIdxWidth.W)))
// to busytable: read physical registers' state (busy/ready)
val readIntState= Vec(NRIntReadPorts, Flipped(new BusyTableReadIO))
val readFpState = Vec(NRFpReadPorts, Flipped(new BusyTableReadIO))
// to reservation stations
val numExist = Input(Vec(exuParameters.ExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.ExuCnt, DecoupledIO(new MicroOp))
......@@ -82,7 +82,7 @@ class Dispatch extends XSModule {
val intDispatch = Module(new Dispatch2Int)
intDispatch.io.fromDq <> intDq.io.deq
intDispatch.io.readRf.zipWithIndex.map({case (r, i) => r <> io.readIntRf(i)})
intDispatch.io.regRdy.zipWithIndex.map({case (r, i) => r <> io.intPregRdy(i)})
intDispatch.io.readState.zipWithIndex.map({case (r, i) => r <> io.readIntState(i)})
intDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(i)})
intDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(i)})
// intDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i)})
......@@ -92,7 +92,7 @@ class Dispatch extends XSModule {
val fpDispatch = Module(new Dispatch2Fp)
fpDispatch.io.fromDq <> fpDq.io.deq
fpDispatch.io.readRf.zipWithIndex.map({case (r, i) => r <> io.readFpRf(i)})
fpDispatch.io.regRdy.zipWithIndex.map({case (r, i) => r <> io.fpPregRdy(i)})
fpDispatch.io.readState.zipWithIndex.map({case (r, i) => r <> io.readFpState(i)})
fpDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(i + exuParameters.IntExuCnt)})
fpDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(i + exuParameters.IntExuCnt)})
// fpDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i + exuParameters.IntExuCnt)})
......@@ -103,8 +103,8 @@ class Dispatch extends XSModule {
lsDispatch.io.fromDq <> lsDq.io.deq
lsDispatch.io.readIntRf.zipWithIndex.map({case (r, i) => r <> io.readIntRf(i + 8)})
lsDispatch.io.readFpRf.zipWithIndex.map({case (r, i) => r <> io.readFpRf(i + 12)})
lsDispatch.io.intRegRdy.zipWithIndex.map({case (r, i) => r <> io.intPregRdy(i + 8)})
lsDispatch.io.fpRegRdy.zipWithIndex.map({case (r, i) => r <> io.fpPregRdy(i + 12)})
lsDispatch.io.readIntState.zipWithIndex.map({case (r, i) => r <> io.readIntState(i + 8)})
lsDispatch.io.readFpState.zipWithIndex.map({case (r, i) => r <> io.readFpState(i + 12)})
lsDispatch.io.numExist.zipWithIndex.map({case (num, i) => num := io.numExist(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
lsDispatch.io.enqIQCtrl.zipWithIndex.map({case (enq, i) => enq <> io.enqIQCtrl(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
// lsDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
......@@ -8,9 +8,10 @@ import utils._
import xiangshan.backend.roq.{RoqPtr, RoqEnqIO}
import xiangshan.backend.rename.RenameBypassInfo
import xiangshan.mem.LsqEnqIO
import xiangshan.backend.fu.HasExceptionNO
// read rob and enqueue
class Dispatch1 extends XSModule {
class Dispatch1 extends XSModule with HasExceptionNO {
val io = IO(new Bundle() {
// from rename
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
......@@ -45,7 +46,10 @@ class Dispatch1 extends XSModule {
// valid bits for different dispatch queues
val isInt = VecInit(io.fromRename.map(req => FuType.isIntExu(req.bits.ctrl.fuType)))
val isBranch = VecInit(io.fromRename.map(req => !req.bits.cf.brUpdate.pd.notCFI))
val isBranch = VecInit(io.fromRename.map(req =>
// cover auipc (a fake branch)
!req.bits.cf.brUpdate.pd.notCFI || FuType.isJumpExu(req.bits.ctrl.fuType)
val isFp = VecInit(io.fromRename.map(req => FuType.isFpExu (req.bits.ctrl.fuType)))
val isLs = VecInit(io.fromRename.map(req => FuType.isMemExu(req.bits.ctrl.fuType)))
val isStore = VecInit(io.fromRename.map(req => FuType.isStoreExu(req.bits.ctrl.fuType)))
......@@ -113,6 +117,7 @@ class Dispatch1 extends XSModule {
// thisIsBlocked: this instruction is blocked by itself (based on noSpecExec)
// nextCanOut: next instructions can out (based on blockBackward)
// notBlockedByPrevious: previous instructions can enqueue
val hasException = VecInit(io.fromRename.map(r => selectFrontend(r.bits.cf.exceptionVec).asUInt.orR))
val thisIsBlocked = VecInit((0 until RenameWidth).map(i => {
// for i > 0, when Roq is empty but dispatch1 have valid instructions to enqueue, it's blocked
if (i > 0) isNoSpecExec(i) && (!io.enqRoq.isEmpty || Cat(io.fromRename.take(i).map(_.valid)).orR)
......@@ -153,17 +158,17 @@ class Dispatch1 extends XSModule {
// We use notBlockedByPrevious here.
io.toIntDq.needAlloc(i) := io.fromRename(i).valid && isInt(i)
io.toIntDq.req(i).bits := updatedUop(i)
io.toIntDq.req(i).valid := io.fromRename(i).valid && isInt(i) && thisCanActualOut(i) &&
io.toIntDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isInt(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept
io.toFpDq.needAlloc(i) := io.fromRename(i).valid && isFp(i)
io.toFpDq.req(i).bits := updatedUop(i)
io.toFpDq.req(i).valid := io.fromRename(i).valid && isFp(i) && thisCanActualOut(i) &&
io.toFpDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isFp(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toIntDq.canAccept && io.toLsDq.canAccept
io.toLsDq.needAlloc(i) := io.fromRename(i).valid && isLs(i)
io.toLsDq.req(i).bits := updatedUop(i)
io.toLsDq.req(i).valid := io.fromRename(i).valid && isLs(i) && thisCanActualOut(i) &&
io.toLsDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isLs(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toIntDq.canAccept && io.toFpDq.canAccept
XSDebug(io.toIntDq.req(i).valid, p"pc 0x${Hexadecimal(io.toIntDq.req(i).bits.cf.pc)} int index $i\n")
......@@ -5,13 +5,14 @@ import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.regfile.RfReadPort
import xiangshan.backend.rename.BusyTableReadIO
import xiangshan.backend.exu.Exu._
class Dispatch2Fp extends XSModule {
val io = IO(new Bundle() {
val fromDq = Flipped(Vec(dpParams.FpDqDeqWidth, DecoupledIO(new MicroOp)))
val readRf = Vec(NRFpReadPorts - exuParameters.StuCnt, Flipped(new RfReadPort(XLEN + 1)))
val regRdy = Vec(NRFpReadPorts - exuParameters.StuCnt, Input(Bool()))
val readRf = Vec(NRFpReadPorts - exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val readState = Vec(NRFpReadPorts - exuParameters.StuCnt, Flipped(new BusyTableReadIO))
val numExist = Input(Vec(exuParameters.FpExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.FpExuCnt, DecoupledIO(new MicroOp))
val readPortIndex = Vec(exuParameters.FpExuCnt, Output(UInt(log2Ceil((NRFpReadPorts - exuParameters.StuCnt) / 3).W)))
......@@ -51,14 +52,20 @@ class Dispatch2Fp extends XSModule {
val fpDynamicMapped = fpDynamicIndex.map(i => indexVec(i))
for (i <- fpStaticIndex.indices) {
val index = WireInit(VecInit(fpStaticMapped(i) +: fpDynamicMapped))
io.readRf(3*i ).addr := io.fromDq(index(fpReadPortSrc(i))).bits.psrc1
io.readRf(3*i+1).addr := io.fromDq(index(fpReadPortSrc(i))).bits.psrc2
io.readRf(3*i+2).addr := io.fromDq(index(fpReadPortSrc(i))).bits.psrc3
io.readRf(3*i ) := io.fromDq(index(fpReadPortSrc(i))).bits.psrc1
io.readRf(3*i+1) := io.fromDq(index(fpReadPortSrc(i))).bits.psrc2
io.readRf(3*i+2) := io.fromDq(index(fpReadPortSrc(i))).bits.psrc3
val readPortIndex = Wire(Vec(exuParameters.FpExuCnt, UInt(2.W)))
fpStaticIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := i.U})
fpDynamicIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := fpDynamicExuSrc(i)})
for (i <- 0 until dpParams.IntDqDeqWidth) {
io.readState(3*i ).req := io.fromDq(i).bits.psrc1
io.readState(3*i+1).req := io.fromDq(i).bits.psrc2
io.readState(3*i+2).req := io.fromDq(i).bits.psrc3
* Part 3: dispatch to reservation stations
......@@ -74,12 +81,12 @@ class Dispatch2Fp extends XSModule {
enq.bits := io.fromDq(indexVec(i)).bits
val src1Ready = VecInit((0 until 4).map(i => io.regRdy(i * 3)))
val src2Ready = VecInit((0 until 4).map(i => io.regRdy(i * 3 + 1)))
val src3Ready = VecInit((0 until 4).map(i => io.regRdy(i * 3 + 2)))
enq.bits.src1State := src1Ready(readPortIndex(i))
enq.bits.src2State := src2Ready(readPortIndex(i))
enq.bits.src3State := src3Ready(readPortIndex(i))
val src1Ready = VecInit((0 until 4).map(i => io.readState(i * 3).resp))
val src2Ready = VecInit((0 until 4).map(i => io.readState(i * 3 + 1).resp))
val src3Ready = VecInit((0 until 4).map(i => io.readState(i * 3 + 2).resp))
enq.bits.src1State := src1Ready(indexVec(i))
enq.bits.src2State := src2Ready(indexVec(i))
enq.bits.src3State := src3Ready(indexVec(i))
XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
p"srcState(${enq.bits.src1State} ${enq.bits.src2State} ${enq.bits.src3State}) " +
......@@ -6,12 +6,13 @@ import xiangshan._
import utils._
import xiangshan.backend.exu.Exu._
import xiangshan.backend.regfile.RfReadPort
import xiangshan.backend.rename.BusyTableReadIO
class Dispatch2Int extends XSModule {
val io = IO(new Bundle() {
val fromDq = Flipped(Vec(dpParams.IntDqDeqWidth, DecoupledIO(new MicroOp)))
val readRf = Vec(NRIntReadPorts - NRMemReadPorts, Flipped(new RfReadPort(XLEN)))
val regRdy = Vec(NRIntReadPorts - NRMemReadPorts, Input(Bool()))
val readRf = Vec(NRIntReadPorts - NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val readState = Vec(NRIntReadPorts - NRMemReadPorts, Flipped(new BusyTableReadIO))
val numExist = Input(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.IntExuCnt, DecoupledIO(new MicroOp))
val readPortIndex = Vec(exuParameters.IntExuCnt, Output(UInt(log2Ceil(8 / 2).W)))
......@@ -58,13 +59,18 @@ class Dispatch2Int extends XSModule {
val intDynamicMapped = intDynamicIndex.map(i => indexVec(i))
for (i <- intStaticIndex.indices) {
val index = WireInit(VecInit(intStaticMapped(i) +: intDynamicMapped))
io.readRf(2*i ).addr := io.fromDq(index(intReadPortSrc(i))).bits.psrc1
io.readRf(2*i+1).addr := io.fromDq(index(intReadPortSrc(i))).bits.psrc2
io.readRf(2*i ) := io.fromDq(index(intReadPortSrc(i))).bits.psrc1
io.readRf(2*i+1) := io.fromDq(index(intReadPortSrc(i))).bits.psrc2
val readPortIndex = Wire(Vec(exuParameters.IntExuCnt, UInt(2.W)))
intStaticIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := i.U})
intDynamicIndex.zipWithIndex.map({case (index, i) => readPortIndex(index) := intDynamicExuSrc(i)})
for (i <- 0 until dpParams.IntDqDeqWidth) {
io.readState(2*i ).req := io.fromDq(i).bits.psrc1
io.readState(2*i+1).req := io.fromDq(i).bits.psrc2
* Part 3: dispatch to reservation stations
......@@ -84,10 +90,11 @@ class Dispatch2Int extends XSModule {
enq.bits := io.fromDq(indexVec(i)).bits
val src1Ready = VecInit((0 until 4).map(i => io.regRdy(i * 2)))
val src2Ready = VecInit((0 until 4).map(i => io.regRdy(i * 2 + 1)))
enq.bits.src1State := src1Ready(readPortIndex(i))
enq.bits.src2State := src2Ready(readPortIndex(i))
val src1Ready = VecInit((0 until 4).map(i => io.readState(i * 2).resp))
val src2Ready = VecInit((0 until 4).map(i => io.readState(i * 2 + 1).resp))
enq.bits.src1State := src1Ready(indexVec(i))
enq.bits.src2State := src2Ready(indexVec(i))
enq.bits.src3State := DontCare
XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
p"srcState(${enq.bits.src1State} ${enq.bits.src2State}) " +
......@@ -5,17 +5,18 @@ import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.regfile.RfReadPort
import xiangshan.backend.rename.BusyTableReadIO
import xiangshan.backend.exu.Exu._
class Dispatch2Ls extends XSModule {
val io = IO(new Bundle() {
val fromDq = Flipped(Vec(dpParams.LsDqDeqWidth, DecoupledIO(new MicroOp)))
val readIntRf = Vec(NRMemReadPorts, Flipped(new RfReadPort(XLEN)))
val readFpRf = Vec(exuParameters.StuCnt, Flipped(new RfReadPort(XLEN + 1)))
val readIntRf = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
val readFpRf = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
// val intRegAddr = Vec(NRMemReadPorts, Output(UInt(PhyRegIdxWidth.W)))
// val fpRegAddr = Vec(exuParameters.StuCnt, Output(UInt(PhyRegIdxWidth.W)))
val intRegRdy = Vec(NRMemReadPorts, Input(Bool()))
val fpRegRdy = Vec(exuParameters.StuCnt, Input(Bool()))
val readIntState = Vec(NRMemReadPorts, Flipped(new BusyTableReadIO))
val readFpState = Vec(exuParameters.StuCnt, Flipped(new BusyTableReadIO))
val numExist = Input(Vec(exuParameters.LsExuCnt, UInt(log2Ceil(IssQueSize).W)))
val enqIQCtrl = Vec(exuParameters.LsExuCnt, DecoupledIO(new MicroOp))
......@@ -52,12 +53,16 @@ class Dispatch2Ls extends XSModule {
val readPort = Seq(0, 1, 2, 4)
for (i <- 0 until exuParameters.LsExuCnt) {
if (i < exuParameters.LduCnt) {
io.readIntRf(readPort(i)).addr := io.fromDq(indexVec(i)).bits.psrc1
io.readIntRf(readPort(i)) := io.fromDq(indexVec(i)).bits.psrc1
io.readIntState(readPort(i)).req := io.fromDq(indexVec(i)).bits.psrc1
else {
io.readFpRf(i - exuParameters.LduCnt).addr := io.fromDq(indexVec(i)).bits.psrc2
io.readIntRf(readPort(i) ).addr := io.fromDq(indexVec(i)).bits.psrc1
io.readIntRf(readPort(i)+1).addr := io.fromDq(indexVec(i)).bits.psrc2
io.readFpRf(i - exuParameters.LduCnt) := io.fromDq(indexVec(i)).bits.psrc2
io.readIntRf(readPort(i) ) := io.fromDq(indexVec(i)).bits.psrc1
io.readIntRf(readPort(i)+1) := io.fromDq(indexVec(i)).bits.psrc2
io.readFpState(i - exuParameters.LduCnt).req := io.fromDq(indexVec(i)).bits.psrc2
io.readIntState(readPort(i) ).req := io.fromDq(indexVec(i)).bits.psrc1
io.readIntState(readPort(i)+1).req := io.fromDq(indexVec(i)).bits.psrc2
......@@ -75,14 +80,15 @@ class Dispatch2Ls extends XSModule {
enq.valid := storeIndexGen.io.mapping(i - exuParameters.LduCnt).valid && storeReady
enq.bits := io.fromDq(indexVec(i)).bits
enq.bits.src1State := io.intRegRdy(readPort(i))
enq.bits.src1State := io.readIntState(readPort(i)).resp
if (i < exuParameters.LduCnt) {
enq.bits.src2State := DontCare
else {
enq.bits.src2State := Mux(io.fromDq(indexVec(i)).bits.ctrl.src2Type === SrcType.fp,
io.fpRegRdy(i - exuParameters.LduCnt), io.intRegRdy(readPort(i) + 1))
io.readFpState(i - exuParameters.LduCnt).resp, io.readIntState(readPort(i) + 1).resp)
enq.bits.src3State := DontCare
XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
p"srcState(${enq.bits.src1State} ${enq.bits.src2State}) " +
......@@ -100,7 +100,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
val src2 = in.bits.src2
val src3 = in.bits.src3
fu.io.in.valid := in.valid && sel && !in.bits.uop.roqIdx.needFlush(io.redirect)
fu.io.in.valid := in.valid && sel
fu.io.in.bits.uop := in.bits.uop
fu.io.in.bits.src.foreach(_ <> DontCare)
if (fuCfg.srcCnt > 0) {
......@@ -120,13 +120,21 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
def writebackArb(in: Seq[DecoupledIO[FuOutput]], out: DecoupledIO[ExuOutput]): Arbiter[FuOutput] = {
if (needArbiter) {
val arb = Module(new Arbiter(new FuOutput(in.head.bits.len), in.size))
arb.io.in <> in
arb.io.out.ready := out.ready
out.bits.data := arb.io.out.bits.data
out.bits.uop := arb.io.out.bits.uop
out.valid := arb.io.out.valid
if(in.size == 1){
in.head.ready := out.ready
out.bits.data := in.head.bits.data
out.bits.uop := in.head.bits.uop
out.valid := in.head.valid
} else {
val arb = Module(new Arbiter(new FuOutput(in.head.bits.len), in.size))
arb.io.in <> in
arb.io.out.ready := out.ready
out.bits.data := arb.io.out.bits.data
out.bits.uop := arb.io.out.bits.uop
out.valid := arb.io.out.valid
} else {
in.foreach(_.ready := out.ready)
val sel = Mux1H(in.map(x => x.valid -> x))
......@@ -14,8 +14,8 @@ class FmiscExeUnit extends Exu(fmiscExeUnitCfg) {
val toFpUnits = Seq(f2f, fdivSqrt)
val toIntUnits = Seq(f2i)
assert(fpArb.io.in.length == toFpUnits.size)
assert(intArb.io.in.length == toIntUnits.size)
assert(toFpUnits.size == 1 || fpArb.io.in.length == toFpUnits.size)
assert(toIntUnits.size == 1 || intArb.io.in.length == toIntUnits.size)
val input = io.fromFp
val isRVF = input.bits.uop.ctrl.isRVF
......@@ -56,15 +56,20 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
val arbiters = for(i <- mulReq.indices) yield {
val other = arbReq(i).getOrElse(Seq())
val arb = Module(new Arbiter(new ExuOutput, 1+other.size))
arb.io.in <> mulReq(i) +: other
for(i <- mulReq.indices) {
val out = io.out(directConnect.size + i)
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
arb.io.out.ready := true.B
val other = arbReq(i).getOrElse(Seq())
out.valid := mulReq(i).valid
out.bits := mulReq(i).bits
mulReq(i).ready := true.B
} else {
val arb = Module(new Arbiter(new ExuOutput, 1+other.size))
arb.io.in <> mulReq(i) +: other
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
arb.io.out.ready := true.B
if(portUsed < numOut){
......@@ -78,10 +83,11 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
for(i <- mulReq.indices){
sb.append(s"[ ${cfgs(io.in.indexOf(mulReq(i))).name} ")
val useArb = arbReq(i).nonEmpty
for(req <- arbReq(i).getOrElse(Nil)){
sb.append(s"${cfgs(io.in.indexOf(req)).name} ")
sb.append(s"] -> arb -> out #${directConnect.size + i}\n")
sb.append(s"] -> ${if(useArb) "arb ->" else ""} out #${directConnect.size + i}\n")
......@@ -47,6 +47,67 @@ trait HasExceptionNO {
val frontendSet = List(
// instrAddrMisaligned,
val csrSet = List(
val loadUnitSet = List(
val storeUnitSet = List(
val atomicsUnitSet = (loadUnitSet ++ storeUnitSet).distinct
val allPossibleSet = (frontendSet ++ csrSet ++ loadUnitSet ++ storeUnitSet).distinct
val csrWbCount = (0 until 16).map(i => if (csrSet.contains(i)) 1 else 0)
val loadWbCount = (0 until 16).map(i => if (loadUnitSet.contains(i)) 1 else 0)
val storeWbCount = (0 until 16).map(i => if (storeUnitSet.contains(i)) 1 else 0)
val atomicsWbCount = (0 until 16).map(i => if (atomicsUnitSet.contains(i)) 1 else 0)
val writebackCount = (0 until 16).map(i => csrWbCount(i) + atomicsWbCount(i) + loadWbCount(i) + 2 * storeWbCount(i))
def partialSelect(vec: Vec[Bool], select: Seq[Int], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] = {
if (dontCareBits) {
val new_vec = Wire(ExceptionVec())
new_vec := DontCare
select.map(i => new_vec(i) := vec(i))
return new_vec
else if (falseBits) {
val new_vec = Wire(ExceptionVec())
new_vec.map(_ := false.B)
select.map(i => new_vec(i) := vec(i))
return new_vec
else {
val new_vec = Wire(Vec(select.length, Bool()))
select.zipWithIndex.map{ case(s, i) => new_vec(i) := vec(s) }
return new_vec
def selectFrontend(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, frontendSet, dontCareBits, falseBits)
def selectCSR(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, csrSet, dontCareBits, falseBits)
def selectLoad(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, loadUnitSet, dontCareBits, falseBits)
def selectStore(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, storeUnitSet, dontCareBits, falseBits)
def selectAtomics(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, atomicsUnitSet, dontCareBits, falseBits)
def selectAll(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, allPossibleSet, dontCareBits, falseBits)
class FpuCsrIO extends XSBundle {
......@@ -166,7 +227,7 @@ class CSR extends FunctionUnit with HasCSRConst
if (HasFPU) { extList = extList ++ List('f', 'd') }
val misaInitVal = getMisaMxl(2) | extList.foldLeft(0.U)((sum, i) => sum | getMisaExt(i)) //"h8000000000141105".U
val misa = RegInit(UInt(XLEN.W), misaInitVal)
// MXL = 2 | 0 | EXT = b 00 0000 0100 0001 0001 0000 0101
......@@ -175,8 +236,8 @@ class CSR extends FunctionUnit with HasCSRConst
val mimpid = RegInit(UInt(XLEN.W), 0.U) // provides a unique encoding of the version of the processor implementation
val mhartNo = hartId()
val mhartid = RegInit(UInt(XLEN.W), mhartNo.asUInt) // the hardware thread running the code
val mstatus = RegInit(UInt(XLEN.W), "h00001800".U) // another option: "h8000c0100".U
val mstatus = RegInit(UInt(XLEN.W), 0.U)
// mstatus Value Table
// | sd |
// | pad1 |
......@@ -196,7 +257,7 @@ class CSR extends FunctionUnit with HasCSRConst
// | spp | 0 |
// | pie | 0000 | pie.h is used as UBE
// | ie | 0000 | uie hardlinked to 0, as N ext is not implemented
val mstatusStruct = mstatus.asTypeOf(new MstatusStruct)
def mstatusUpdateSideEffect(mstatus: UInt): UInt = {
val mstatusOld = WireInit(mstatus.asTypeOf(new MstatusStruct))
......@@ -318,11 +379,11 @@ class CSR extends FunctionUnit with HasCSRConst
// Emu perfcnt
val hasEmuPerfCnt = !env.FPGAPlatform
val nrEmuPerfCnts = if (hasEmuPerfCnt) 0x80 else 0x3
val emuPerfCnts = List.fill(nrEmuPerfCnts)(RegInit(0.U(XLEN.W)))
val emuPerfCntCond = List.fill(nrEmuPerfCnts)(WireInit(false.B))
(emuPerfCnts zip emuPerfCntCond).map { case (c, e) => when (e) { c := c + 1.U } }
val emuPerfCntsLoMapping = (0 until nrEmuPerfCnts).map(i => MaskedRegMap(0x1000 + i, emuPerfCnts(i)))
val emuPerfCntsHiMapping = (0 until nrEmuPerfCnts).map(i => MaskedRegMap(0x1080 + i, emuPerfCnts(i)(63, 32)))
println(s"CSR: hasEmuPerfCnt:${hasEmuPerfCnt}")
......@@ -336,7 +397,7 @@ class CSR extends FunctionUnit with HasCSRConst
mcycle := mcycle + 1.U
val minstret = RegInit(0.U(XLEN.W))
minstret := minstret + RegNext(csrio.perf.retiredInstr)
// CSR reg map
val basicPrivMapping = Map(
......@@ -424,13 +485,13 @@ class CSR extends FunctionUnit with HasCSRConst
val mapping = basicPrivMapping ++
perfCntMapping ++
pmpMapping ++
emuPerfCntsLoMapping ++
pmpMapping ++
emuPerfCntsLoMapping ++
(if (XLEN == 32) emuPerfCntsHiMapping else Nil) ++
(if (HasFPU) fcsrMapping else Nil)
val addr = src2(11, 0)
val csri = src2(16, 12)
val csri = ZeroExt(src2(16, 12), XLEN)
val rdata = Wire(UInt(XLEN.W))
val wdata = LookupTree(func, List(
CSROpType.wrt -> src1,
......@@ -441,14 +502,17 @@ class CSR extends FunctionUnit with HasCSRConst
CSROpType.clri -> (rdata & (~csri).asUInt())
csrio.isPerfCnt := (addr >= Mcycle.U) && (addr <= Mhpmcounter31.U)
val addrInPerfCnt = (addr >= Mcycle.U) && (addr <= Mhpmcounter31.U)
csrio.isPerfCnt := addrInPerfCnt
// satp wen check
val satpLegalMode = (wdata.asTypeOf(new SatpStruct).mode===0.U) || (wdata.asTypeOf(new SatpStruct).mode===8.U)
// general CSR wen check
val wen = valid && func =/= CSROpType.jmp && (addr=/=Satp.U || satpLegalMode)
val permitted = csrAccessPermissionCheck(addr, false.B, priviledgeMode)
val modePermitted = csrAccessPermissionCheck(addr, false.B, priviledgeMode)
val perfcntPermitted = perfcntPermissionCheck(addr, priviledgeMode, mcounteren, scounteren)
val permitted = Mux(addrInPerfCnt, perfcntPermitted, modePermitted)
// Writeable check is ingored.
// Currently, write to illegal csr addr will be ignored
MaskedRegMap.generate(mapping, addr, rdata, wen && permitted, wdata)
......@@ -580,6 +644,17 @@ class CSR extends FunctionUnit with HasCSRConst
io.in.ready := true.B
io.out.valid := valid
val csrExceptionVec = WireInit(cfIn.exceptionVec)
csrExceptionVec(breakPoint) := io.in.valid && isEbreak
csrExceptionVec(ecallM) := priviledgeMode === ModeM && io.in.valid && isEcall
csrExceptionVec(ecallS) := priviledgeMode === ModeS && io.in.valid && isEcall
csrExceptionVec(ecallU) := priviledgeMode === ModeU && io.in.valid && isEcall
// Trigger an illegal instr exception when:
// * unimplemented csr is being read/written
// * csr access is illegal
csrExceptionVec(illegalInstr) := (isIllegalAddr || isIllegalAccess) && wen
cfOut.exceptionVec := csrExceptionVec
* Exception and Intr
......@@ -609,24 +684,11 @@ class CSR extends FunctionUnit with HasCSRConst
val hasStorePageFault = csrio.exception.bits.cf.exceptionVec(storePageFault) && raiseException
val hasStoreAddrMisaligned = csrio.exception.bits.cf.exceptionVec(storeAddrMisaligned) && raiseException
val hasLoadAddrMisaligned = csrio.exception.bits.cf.exceptionVec(loadAddrMisaligned) && raiseException
val hasInstrAccessFault = csrio.exception.bits.cf.exceptionVec(instrAccessFault) && raiseException
val hasLoadAccessFault = csrio.exception.bits.cf.exceptionVec(loadAccessFault) && raiseException
val hasStoreAccessFault = csrio.exception.bits.cf.exceptionVec(storeAccessFault) && raiseException
val csrExceptionVec = Wire(Vec(16, Bool()))
csrExceptionVec.map(_ := false.B)
csrExceptionVec(breakPoint) := io.in.valid && isEbreak
csrExceptionVec(ecallM) := priviledgeMode === ModeM && io.in.valid && isEcall
csrExceptionVec(ecallS) := priviledgeMode === ModeS && io.in.valid && isEcall
csrExceptionVec(ecallU) := priviledgeMode === ModeU && io.in.valid && isEcall
// Trigger an illegal instr exception when:
// * unimplemented csr is being read/written
// * csr access is illegal
csrExceptionVec(illegalInstr) := (isIllegalAddr || isIllegalAccess) && wen
csrExceptionVec(loadPageFault) := hasLoadPageFault
csrExceptionVec(storePageFault) := hasStorePageFault
val iduExceptionVec = cfIn.exceptionVec
val exceptionVec = csrExceptionVec.asUInt() | iduExceptionVec.asUInt()
cfOut.exceptionVec.zipWithIndex.map{case (e, i) => e := exceptionVec(i) }
val raiseExceptionVec = csrio.exception.bits.cf.exceptionVec.asUInt()
val raiseExceptionVec = csrio.exception.bits.cf.exceptionVec
val exceptionNO = ExcPriority.foldRight(0.U)((i: Int, sum: UInt) => Mux(raiseExceptionVec(i), i.U, sum))
val causeNO = (raiseIntr << (XLEN-1)).asUInt() | Mux(raiseIntr, intrNO, exceptionNO)
......@@ -738,6 +800,11 @@ class CSR extends FunctionUnit with HasCSRConst
"PtwL2TlbHit" -> (0x1027, "perfCntPtwL2TlbHit" ),
"ICacheReq" -> (0x1028, "perfCntIcacheReqCnt" ),
"ICacheMiss" -> (0x1029, "perfCntIcacheMissCnt"),
"ICacheMMIO" -> (0x102a, "perfCntIcacheMMIOCnt"),
// "FetchFromLoopBuffer" -> (0x102b, "CntFetchFromLoopBuffer"),
// "ExitLoop1" -> (0x102c, "CntExitLoop1"),
// "ExitLoop2" -> (0x102d, "CntExitLoop2"),
// "ExitLoop3" -> (0x102e, "CntExitLoop3")
"ubtbRight" -> (0x1030, "perfCntubtbRight"),
"ubtbWrong" -> (0x1031, "perfCntubtbWrong"),
......@@ -20,41 +20,51 @@ class Fence extends FunctionUnit{ // TODO: check it
val fencei = IO(Output(Bool()))
val toSbuffer = IO(new FenceToSbuffer)
val (valid, src1, uop, func, lsrc1, lsrc2) = (
val (valid, src1) = (
val s_sb :: s_tlb :: s_icache :: s_none :: Nil = Enum(4)
val state = RegInit(s_sb)
val s_idle :: s_wait :: s_tlb :: s_icache :: s_fence :: Nil = Enum(5)
val state = RegInit(s_idle)
/* fsm
* s_idle : init state, send sbflush
* s_wait : send sbflush, wait for sbEmpty
* s_tlb : flush tlb, just hold one cycle
* s_icache: flush icache, just hold one cycle
* s_fence : do nothing, for timing optimiaztion
val sbuffer = toSbuffer.flushSb
val sbEmpty = toSbuffer.sbIsEmpty
val uop = RegEnable(io.in.bits.uop, io.in.fire())
val func = uop.ctrl.fuOpType
val lsrc1 = uop.ctrl.lsrc1
val lsrc2 = uop.ctrl.lsrc2
// NOTE: icache & tlb & sbuffer must receive flush signal at any time
sbuffer := valid && state === s_sb && !sbEmpty
fencei := (state === s_icache && sbEmpty) || (state === s_sb && valid && sbEmpty && func === FenceOpType.fencei)
sfence.valid := (state === s_tlb && sbEmpty) || (state === s_sb && valid && sbEmpty && func === FenceOpType.sfence)
sfence.bits.rs1 := Mux(state === s_sb, lsrc1 === 0.U, RegEnable(lsrc1 === 0.U, io.in.fire()))
sfence.bits.rs2 := Mux(state === s_sb, lsrc2 === 0.U, RegEnable(lsrc2 === 0.U, io.in.fire()))
sfence.bits.addr := Mux(state === s_sb, src1, RegEnable(src1, io.in.fire()))
when (state === s_sb && valid && func === FenceOpType.fencei && !sbEmpty) { state := s_icache }
when (state === s_sb && valid && func === FenceOpType.sfence && !sbEmpty) { state := s_tlb }
when (state === s_sb && valid && func === FenceOpType.fence && !sbEmpty) { state := s_none }
when (state =/= s_sb && sbEmpty) { state := s_sb }
sbuffer := state === s_wait
fencei := state === s_icache
sfence.valid := state === s_tlb
sfence.bits.rs1 := lsrc1 === 0.U
sfence.bits.rs2 := lsrc2 === 0.U
sfence.bits.addr := RegEnable(src1, io.in.fire())
assert(!(io.out.valid && io.out.bits.uop.ctrl.rfWen))
io.in.ready := state === s_sb
io.out.valid := (state =/= s_sb && sbEmpty) || (state === s_sb && sbEmpty && valid)
when (state === s_idle && valid) { state := s_wait }
when (state === s_wait && func === FenceOpType.fencei && sbEmpty) { state := s_icache }
when (state === s_wait && func === FenceOpType.sfence && sbEmpty) { state := s_tlb }
when (state === s_wait && func === FenceOpType.fence && sbEmpty) { state := s_fence }
when (state =/= s_idle && state =/= s_wait) { state := s_idle }
io.in.ready := state === s_idle
io.out.valid := state =/= s_idle && state =/= s_wait
io.out.bits.data := DontCare
io.out.bits.uop := Mux(state === s_sb, uop, RegEnable(uop, io.in.fire()))
io.out.bits.uop := uop
assert(!(valid || state =/= s_sb) || io.out.ready) // NOTE: fence instr must be the first(only one) instr, so io.out.ready must be true
XSDebug(valid, p"In(${io.in.valid} ${io.in.ready}) state:${state} Inpc:0x${Hexadecimal(io.in.bits.uop.cf.pc)} InroqIdx:${io.in.bits.uop.roqIdx}\n")
XSDebug(state =/= s_idle, p"state:${state} sbuffer(flush:${sbuffer} empty:${sbEmpty}) fencei:${fencei} sfence:${sfence}\n")
XSDebug(io.out.valid, p" Out(${io.out.valid} ${io.out.ready}) state:${state} Outpc:0x${Hexadecimal(io.out.bits.uop.cf.pc)} OutroqIdx:${io.out.bits.uop.roqIdx}\n")
XSDebug(valid || state=/=s_sb || io.out.valid, p"In(${io.in.valid} ${io.in.ready}) Out(${io.out.valid} ${io.out.ready}) state:${state} sbuffer(flush:${sbuffer} empty:${sbEmpty}) fencei:${fencei} sfence:${sfence} Inpc:0x${Hexadecimal(io.in.bits.uop.cf.pc)} InroqIdx:${io.in.bits.uop.roqIdx} Outpc:0x${Hexadecimal(io.out.bits.uop.cf.pc)} OutroqIdx:${io.out.bits.uop.roqIdx}\n")
assert(!(io.out.valid && io.out.bits.uop.ctrl.rfWen))
assert(!io.out.valid || io.out.ready, "when fence is out valid, out ready should always be true")
\ No newline at end of file
......@@ -25,10 +25,13 @@ class Jump extends FunctionUnit with HasRedirectOut {
val offset = SignExt(Mux(JumpOpType.jumpOpIsJal(func),
), XLEN)
val isJalr = JumpOpType.jumpOpisJalr(func)
val isAuipc = JumpOpType.jumpOpisAuipc(func)
val offset = SignExt(Mux1H(Seq(
isJalr -> ImmUnion.I.toImm32(immMin),
isAuipc -> ImmUnion.U.toImm32(immMin),
!(isJalr || isAuipc) -> ImmUnion.J.toImm32(immMin)
)), XLEN)
val redirectHit = uop.roqIdx.needFlush(io.redirectIn)
val valid = io.in.valid
......@@ -53,7 +56,7 @@ class Jump extends FunctionUnit with HasRedirectOut {
brUpdate.taken := true.B
// Output
val res = snpc
val res = Mux(JumpOpType.jumpOpisAuipc(func), target, snpc)
io.in.ready := io.out.ready
io.out.valid := valid
......@@ -41,7 +41,7 @@ class Radix2Divider(len: Int) extends AbstractDivider(len) {
val uopReg = RegEnable(uop, newReq)
val cnt = Counter(len)
when (newReq) {
when (newReq && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn)) {
state := s_log2
} .elsewhen (state === s_log2) {
// `canSkipShift` is calculated as following:
......@@ -85,6 +85,6 @@ class Radix2Divider(len: Int) extends AbstractDivider(len) {
io.out.bits.data := Mux(ctrlReg.isW, SignExt(res(31,0),xlen), res)
io.out.bits.uop := uopReg
io.out.valid := state === s_finish && !kill
io.out.valid := state === s_finish
io.in.ready := state === s_idle
\ No newline at end of file
......@@ -37,7 +37,9 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
when(io.in.fire()){ state := Mux(divZero, s_finish, s_lzd) }
when (io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn)) {
state := Mux(divZero, s_finish, s_lzd)
is(s_lzd){ // leading zero detection
state := s_normlize
......@@ -220,7 +222,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
io.in.ready := state===s_idle
io.out.valid := state===s_finish && !kill
io.out.valid := state===s_finish
io.out.bits.data := Mux(ctrlReg.isW,
SignExt(res(31, 0), len),
......@@ -47,7 +47,7 @@ class FDivSqrt extends FPUSubModule {
val src1 = unbox(io.in.bits.src(0), tag, None)
val src2 = unbox(io.in.bits.src(1), tag, None)
divSqrt.io.inValid := io.in.fire()
divSqrt.io.inValid := io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn)
divSqrt.io.sqrtOp := fpCtrl.sqrt
divSqrt.io.a := src1
divSqrt.io.b := src2
......@@ -74,7 +74,7 @@ class FDivSqrt extends FPUSubModule {
val flags = Mux(single, round32.io.exceptionFlags, round64.io.exceptionFlags)
io.in.ready := state===s_idle
io.out.valid := state===s_finish && !(killReg || kill)
io.out.valid := state===s_finish && !killReg
io.out.bits.uop := uopReg
io.out.bits.data := RegNext(data, divSqrtRawValid)
fflags := RegNext(flags, divSqrtRawValid)
......@@ -12,14 +12,16 @@ class FPToFP extends FPUPipelineModule{
override def latency: Int = FunctionUnit.f2iCfg.latency.latencyVal.get
val ctrl = io.in.bits.uop.ctrl.fpu
val ctrlIn = io.in.bits.uop.ctrl.fpu
val ctrl = S1Reg(ctrlIn)
val inTag = ctrl.typeTagIn
val outTag = ctrl.typeTagOut
val src1 = unbox(io.in.bits.src(0), inTag, None)
val src2 = unbox(io.in.bits.src(1), inTag, None)
val wflags = ctrl.wflags
val src1 = S1Reg(unbox(io.in.bits.src(0), ctrlIn.typeTagIn, None))
val src2 = S1Reg(unbox(io.in.bits.src(1), ctrlIn.typeTagIn, None))
val rmReg = S1Reg(rm)
val signNum = Mux(rm(1), src1 ^ src2, Mux(rm(0), ~src2, src2))
val signNum = Mux(rmReg(1), src1 ^ src2, Mux(rmReg(0), ~src2, src2))
val fsgnj = Cat(signNum(fLen), src1(fLen-1, 0))
val fsgnjMux = Wire(new Bundle() {
......@@ -32,7 +34,7 @@ class FPToFP extends FPUPipelineModule{
val dcmp = Module(new CompareRecFN(maxExpWidth, maxSigWidth))
dcmp.io.a := src1
dcmp.io.b := src2
dcmp.io.signaling := !rm(1)
dcmp.io.signaling := !rmReg(1)
val lt = dcmp.io.lt || (dcmp.io.a.asSInt() < 0.S && dcmp.io.b.asSInt() >= 0.S)
......@@ -41,7 +43,7 @@ class FPToFP extends FPUPipelineModule{
val isnan2 = maxType.isNaN(src2)
val isInvalid = maxType.isSNaN(src1) || maxType.isSNaN(src2)
val isNaNOut = isnan1 && isnan2
val isLHS = isnan2 || rm(0) =/= lt && !isnan1
val isLHS = isnan2 || rmReg(0) =/= lt && !isnan1
fsgnjMux.exc := isInvalid << 4
fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, src1, src2))
......@@ -67,7 +69,7 @@ class FPToFP extends FPUPipelineModule{
when(outTag === typeTag(outType).U && (typeTag(outType) == 0).B || (outTag < inTag)){
val narrower = Module(new hardfloat.RecFNToRecFN(maxType.exp, maxType.sig, outType.exp, outType.sig))
narrower.io.in := src1
narrower.io.roundingMode := rm
narrower.io.roundingMode := rmReg
narrower.io.detectTininess := hardfloat.consts.tininess_afterRounding
val narrowed = sanitizeNaN(narrower.io.out, outType)
mux.data := Cat(fsgnjMux.data >> narrowed.getWidth, narrowed)
......@@ -77,11 +79,6 @@ class FPToFP extends FPUPipelineModule{
var resVec = Seq(mux)
for(i <- 1 to latency){
resVec = resVec :+ PipelineReg(i)(resVec(i-1))
io.out.bits.data := resVec.last.data
fflags := resVec.last.exc
io.out.bits.data := S2Reg(mux.data)
fflags := S2Reg(mux.exc)
......@@ -18,30 +18,37 @@ class FPToInt extends FPUPipelineModule {
val ctrl = io.in.bits.uop.ctrl.fpu
val src1_s = unbox(src1, S, Some(FType.S))
val src1_d = unbox(src1, ctrl.typeTagIn, None)
val src2_d = unbox(src2, ctrl.typeTagIn, None)
val src1_ieee = ieee(src1)
val move_out = Mux(ctrl.typeTagIn === S, src1_ieee(31, 0), src1_ieee)
// stage 1: unbox inputs
val src1_d = S1Reg(unbox(src1, ctrl.typeTagIn, None))
val src2_d = S1Reg(unbox(src2, ctrl.typeTagIn, None))
val ctrl_reg = S1Reg(ctrl)
val rm_reg = S1Reg(rm)
// stage2
val src1_ieee = ieee(src1_d)
val move_out = Mux(ctrl_reg.typeTagIn === S,
src1_ieee(FType.S.ieeeWidth - 1, 0),
val classify_out = Mux(ctrl.typeTagIn === S,
val classify_out = Mux(ctrl_reg.typeTagIn === S,
FType.S.classify(maxType.unsafeConvert(src1_d, FType.S)),
val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth))
dcmp.io.a := src1_d
dcmp.io.b := src2_d
dcmp.io.signaling := !rm(1)
dcmp.io.signaling := !rm_reg(1)
val dcmp_out = ((~rm).asUInt() & Cat(dcmp.io.lt, dcmp.io.eq)).orR()
val dcmp_out = ((~rm_reg).asUInt() & Cat(dcmp.io.lt, dcmp.io.eq)).orR()
val dcmp_exc = dcmp.io.exceptionFlags
val conv = Module(new RecFNToIN(maxExpWidth, maxSigWidth, XLEN))
conv.io.in := src1_d
conv.io.roundingMode := rm
conv.io.signedOut := ~ctrl.typ(0)
conv.io.roundingMode := rm_reg
conv.io.signedOut := ~ctrl_reg.typ(0)
val conv_out = WireInit(conv.io.out)
val conv_exc = WireInit(Cat(
......@@ -52,10 +59,10 @@ class FPToInt extends FPUPipelineModule {
val narrow = Module(new RecFNToIN(maxExpWidth, maxSigWidth, 32))
narrow.io.in := src1_d
narrow.io.roundingMode := rm
narrow.io.signedOut := ~ctrl.typ(0)
narrow.io.roundingMode := rm_reg
narrow.io.signedOut := ~ctrl_reg.typ(0)
when(!ctrl.typ(1)) { // fcvt.w/wu.fp
when(!ctrl_reg.typ(1)) { // fcvt.w/wu.fp
val excSign = src1_d(maxExpWidth + maxSigWidth) && !maxType.isNaN(src1_d)
val excOut = Cat(conv.io.signedOut === excSign, Fill(32 - 1, !excSign))
val invalid = conv.io.intExceptionFlags(2) || narrow.io.intExceptionFlags(1)
......@@ -67,26 +74,18 @@ class FPToInt extends FPUPipelineModule {
val intData = Wire(UInt(XLEN.W))
intData := Mux(ctrl.wflags,
Mux(ctrl.fcvt, conv_out, dcmp_out),
Mux(rm(0), classify_out, move_out)
intData := Mux(ctrl_reg.wflags,
Mux(ctrl_reg.fcvt, conv_out, dcmp_out),
Mux(rm_reg(0), classify_out, move_out)
val doubleOut = Mux(ctrl.fcvt, ctrl.typ(1), ctrl.fmt(0))
val intValue = Mux(doubleOut,
val doubleOut = Mux(ctrl_reg.fcvt, ctrl_reg.typ(1), ctrl_reg.fmt(0))
val intValue = S2Reg(Mux(doubleOut,
SignExt(intData, XLEN),
SignExt(intData(31, 0), XLEN)
val exc = Mux(ctrl.fcvt, conv_exc, dcmp_exc)
var dataVec = Seq(intValue)
var excVec = Seq(exc)
for (i <- 1 to latency) {
dataVec = dataVec :+ PipelineReg(i)(dataVec(i - 1))
excVec = excVec :+ PipelineReg(i)(excVec(i - 1))
val exc = S2Reg(Mux(ctrl_reg.fcvt, conv_exc, dcmp_exc))
io.out.bits.data := dataVec.last
fflags := excVec.last
io.out.bits.data := intValue
fflags := exc
......@@ -185,4 +185,9 @@ trait HasCSRConst {
val lowestAccessPrivilegeLevel = addr(9,8)
mode >= lowestAccessPrivilegeLevel && !(wen && readOnly)
def perfcntPermissionCheck(addr: UInt, mode: UInt, mmask: UInt, smask: UInt): Bool = {
val index = UIntToOH(addr & 31.U)
Mux(mode === ModeM, true.B, Mux(mode === ModeS, (index & mmask) =/= 0.U, (index & mmask & smask) =/= 0.U))
\ No newline at end of file
......@@ -4,7 +4,8 @@ import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.decode.ImmUnion
import xiangshan.backend.SelImm
import xiangshan.backend.decode.{ImmUnion, Imm_U}
import xiangshan.backend.exu.{Exu, ExuConfig}
import xiangshan.backend.regfile.RfReadPort
......@@ -22,12 +23,13 @@ class BypassQueue(number: Int) extends XSModule {
} else if(number == 0) {
io.in <> io.out
io.out.valid := io.in.valid
// NOTE: no delay bypass don't care redirect
} else {
val queue = Seq.fill(number)(RegInit(0.U.asTypeOf(new Bundle{
val valid = Bool()
val bits = new MicroOp
queue(0).valid := io.in.valid
queue(0).valid := io.in.valid && !io.in.bits.roqIdx.needFlush(io.redirect)
queue(0).bits := io.in.bits
(0 until (number-1)).map{i =>
queue(i+1) := queue(i)
......@@ -50,7 +52,7 @@ class RSCtrlDataIO(srcNum: Int) extends XSBundle {
val fuReady = Input(Bool())
val srcUpdate = Input(Vec(IssQueSize+1, Vec(srcNum, Bool()))) // Note: the last one for enq
val redVec = Input(UInt(IssQueSize.W))
val redirectVec = Input(Vec(IssQueSize, Bool()))
val feedback = Input(Vec(IssQueSize+1, Bool())) // Note: the last one for hit
override def cloneType: RSCtrlDataIO.this.type = new RSCtrlDataIO(srcNum).asInstanceOf[this.type]
......@@ -100,16 +102,14 @@ class ReservationStationCtrl
* valid queue : from state queue, valid or not
* empty queue : from state queue, empty or not(not valid and not replay)
* src queue : record rdy or not
* cnt queue : record replay cycle
* count queue : record replay cycle
val s_idle :: s_valid :: s_selected :: s_bubble :: s_wait :: s_replay :: Nil = Enum(6)
val s_idle :: s_valid :: s_wait :: s_replay :: Nil = Enum(4)
/* state machine
* s_idle : empty slot, init state, set when deq
* s_valid : ready to be secleted
* s_selected : the not bubble that selected
* s_bubble : the bubble that selected
* s_wait : wait for feedback
* s_replay : replay after some particular cycle
......@@ -117,64 +117,69 @@ class ReservationStationCtrl
val validQueue = VecInit(stateQueue.map(_ === s_valid))
val emptyQueue = VecInit(stateQueue.map(_ === s_idle))
val srcQueue = Reg(Vec(iqSize, Vec(srcNum, Bool())))
val cntQueue = Reg(Vec(iqSize, UInt(log2Up(replayDelay).W)))
val countQueue = Reg(Vec(iqSize, UInt(log2Up(replayDelay).W)))
// rs queue part:
// val tailPtr = RegInit(0.U((iqIdxWidth+1).W))
val tailPtr = RegInit(0.U.asTypeOf(new CircularQueuePtr(iqSize)))
val idxQueue = RegInit(VecInit((0 until iqSize).map(_.U(iqIdxWidth.W))))
val indexQueue = RegInit(VecInit((0 until iqSize).map(_.U(iqIdxWidth.W))))
// turn to indexed index
def widthMap[T <: Data](f: Int => T) = VecInit((0 until iqSize).map(f))
val stateIdxQue = widthMap(i => stateQueue(idxQueue(i))) // NOTE: only use for debug, remove it later
val validIdxQue = widthMap(i => validQueue(idxQueue(i)))
val emptyIdxQue = widthMap(i => emptyQueue(idxQueue(i)))
val srcIdxQue = widthMap(i => srcQueue(idxQueue(i)))
val cntIdxQue = widthMap(i => cntQueue(idxQueue(i))) // NOTE: only use for debug, remove it later
val stateIdxQue = widthMap(i => stateQueue(indexQueue(i))) // NOTE: only use for debug, remove it later
val validIdxQue = widthMap(i => validQueue(indexQueue(i)))
val emptyIdxQue = widthMap(i => emptyQueue(indexQueue(i)))
val srcIdxQue = widthMap(i => srcQueue(indexQueue(i)))
val cntIdxQue = widthMap(i => countQueue(indexQueue(i))) // NOTE: only use for debug, remove it later
val readyIdxQue = VecInit(srcIdxQue.zip(validIdxQue).map{ case (a,b) => Cat(a).andR & b })
// redirect
val redVec = io.data.redVec
val redVecPtr = widthMap(i => io.data.redVec(idxQueue(i)))
val fbMatchVec = Wire(UInt(iqSize.W))
val redirectVec = io.data.redirectVec
val redirectVecPtr = widthMap(i => io.data.redirectVec(indexQueue(i)))
val feedbackMatchVec = Wire(UInt(iqSize.W))
if (feedback) {
fbMatchVec := widthMap(i => io.data.feedback(i) && (stateQueue(i) === s_wait || stateQueue(i)===s_valid)).asUInt
feedbackMatchVec := widthMap(i => io.data.feedback(i) && (stateQueue(i) === s_wait || stateQueue(i)===s_valid)).asUInt
} else {
fbMatchVec := 0.U
feedbackMatchVec := 0.U
val fbHit = io.data.feedback(IssQueSize)
val feedbackHit = io.data.feedback(IssQueSize)
// select ready
// for no replay, select just equal to deq (attached)
// with replay, select is just two stage with deq.
val issFire = Wire(Bool())
val issueFire = Wire(Bool())
val moveMask = WireInit(0.U(iqSize.W))
val selectMask = WireInit(VecInit((0 until iqSize).map(i => readyIdxQue(i))))
// val selIdx = ParallelMux(selectMask zip idxQueue) // NOTE: the idx in the idxQueue
val (selPtr, haveReady) = PriorityEncoderWithFlag(selectMask) // NOTE: the idx of idxQueue
val selIdx = idxQueue(selPtr)
val selIdxReg = RegNext(selIdx) // NOTE: may dup with other signal, fix it later
val redSel = redVec(selIdx)
val selValid = !redSel && haveReady
val selReg = RegNext(selValid)
val selPtrReg = RegNext(Mux(moveMask(selPtr), selPtr-1.U, selPtr))
val lastSelMask = Wire(UInt(iqSize.W))
val selectMask = WireInit(VecInit((0 until iqSize).map(i => readyIdxQue(i)))).asUInt & lastSelMask
val selectIndex = ParallelPriorityMux(selectMask.asBools zip indexQueue) // NOTE: the idx in the indexQueue
val selectPtr = ParallelPriorityMux(selectMask.asBools.zipWithIndex.map{ case (a,i) => (a, i.U)}) // NOTE: the idx of indexQueue
val haveReady = Cat(selectMask).orR
val selectIndexReg = RegNext(selectIndex)
val selectValid = haveReady
val selectReg = RegNext(selectValid)
val selectPtrReg = RegNext(Mux(moveMask(selectPtr), selectPtr-1.U, selectPtr))
lastSelMask := ~Mux(selectReg, UIntToOH(selectPtrReg), 0.U)
assert(RegNext(!(haveReady && selectPtr >= tailPtr.asUInt)), "bubble should not have valid state like s_valid or s_wait")
// sel bubble
val bubMask = WireInit(VecInit((0 until iqSize).map(i => emptyIdxQue(i))))
// val bubIdx = ParallelMux(bubMask zip idxQueue) // NOTE: the idx in the idxQueue
val (bubPtr, findBubble) = PriorityEncoderWithFlag(bubMask) // NOTE: the idx of the idxQueue
val haveBubble = findBubble && (bubPtr < tailPtr.asUInt)
val bubIdx = idxQueue(bubPtr)
val bubIdxReg = RegNext(bubIdx) // NOTE: may dup with other signal, fix it later
val bubValid = haveBubble && (if (feedback) true.B else !selValid)
val bubReg = RegNext(bubValid)
val bubPtrReg = RegNext(Mux(moveMask(bubPtr), bubPtr-1.U, bubPtr))
val lastbubbleMask = Wire(UInt(iqSize.W))
val bubbleMask = WireInit(VecInit((0 until iqSize).map(i => emptyIdxQue(i)))).asUInt & lastbubbleMask
// val bubbleIndex = ParallelMux(bubbleMask zip indexQueue) // NOTE: the idx in the indexQueue
val bubblePtr= ParallelPriorityMux(bubbleMask.asBools.zipWithIndex.map{ case (a,i) => (a, i.U)}) // NOTE: the idx of the indexQueue
val findBubble = Cat(bubbleMask).orR
val haveBubble = findBubble && (bubblePtr < tailPtr.asUInt)
val bubbleIndex = indexQueue(bubblePtr)
val bubbleValid = haveBubble && (if (feedback) true.B else !selectValid)
val bubbleReg = RegNext(bubbleValid)
val bubblePtrReg = RegNext(Mux(moveMask(bubblePtr), bubblePtr-1.U, bubblePtr))
lastbubbleMask := ~Mux(bubbleReg, UIntToOH(bubblePtrReg), 0.U) & (if(feedback) ~(0.U(iqSize.W))
else Mux(RegNext(selectValid && io.redirect.valid), 0.U, ~(0.U(iqSize.W))))
// deq
val dequeue = if (feedback) bubReg
else bubReg || issFire
val deqPtr = if (feedback) bubPtrReg
else Mux(selReg, selPtrReg, bubPtrReg)
val dequeue = if (feedback) bubbleReg
else bubbleReg || issueFire
val deqPtr = if (feedback) bubblePtrReg
else Mux(selectReg, selectPtrReg, bubblePtrReg)
moveMask := {
(Fill(iqSize, 1.U(1.W)) << deqPtr)(iqSize-1, 0)
} & Fill(iqSize, dequeue)
......@@ -182,31 +187,30 @@ class ReservationStationCtrl
// move, move happens when deq
for(i <- 0 until iqSize-1){
idxQueue(i) := idxQueue(i+1)
indexQueue(i) := indexQueue(i+1)
idxQueue.last := idxQueue(deqPtr)
indexQueue.last := indexQueue(deqPtr)
when (selValid) {
stateQueue(selIdx) := s_selected
when (bubValid) {
stateQueue(bubIdx) := s_bubble
when (issueFire) {
if (feedback) { when (stateQueue(selectIndexReg) === s_valid) { stateQueue(selectIndexReg) := s_wait } }
else { stateQueue(selectIndexReg) := s_idle } // NOTE: reset the state for seclectMask timing to avoid operaion '<'
// redirect and feedback && wakeup
for (i <- 0 until iqSize) {
// replay
val cnt = cntQueue(i)
val count = countQueue(i)
when (stateQueue(i) === s_replay) {
cnt := cnt - 1.U
when (cnt === 0.U) { stateQueue(i) := s_valid }
count := count - 1.U
when (count === 0.U) { stateQueue(i) := s_valid }
// feedback
when (fbMatchVec(i)) {
stateQueue(i) := Mux(fbHit, s_idle, s_replay)
cntQueue(i) := Mux(fbHit, cnt, (replayDelay-1).U)
when (feedbackMatchVec(i)) {
stateQueue(i) := Mux(!feedbackHit && (stateQueue(i) === s_wait || stateQueue(i) === s_valid), s_replay, s_idle)
countQueue(i) := Mux(feedbackHit, count, (replayDelay-1).U)
// wakeup
val hitVec = io.data.srcUpdate(i)
......@@ -216,33 +220,19 @@ class ReservationStationCtrl
XSDebug(p"srcHit: i:${i.U} j:${j.U} src:${srcQueue(i)(j)}\n")
// mask last selectet slot and deal with the mask
// TODO: state queu change may have long 'when' chain -> long latency
when (stateQueue(i) === s_selected) {
when (io.data.fuReady) {
if (feedback) {
stateQueue(i) := s_wait
} else {
stateQueue(i) := s_idle
}.otherwise { stateQueue(i) := s_valid }
when (stateQueue(i) === s_bubble) {
stateQueue(i) := s_idle
// redirect
when (redVec(i) && stateQueue(i) =/= s_idle) {
when (redirectVec(i) && stateQueue(i) =/= s_idle) {
stateQueue(i) := s_idle
// output
val issValid = selReg && !redVecPtr(selPtrReg)
val issueValid = selectReg
if (nonBlocked) {
issFire := issValid
issueFire := issueValid
assert(RegNext(io.data.fuReady), "if fu wanna fast wakeup, it should not block")
} else {
issFire := issValid && io.data.fuReady
issueFire := issueValid && io.data.fuReady
// enq
......@@ -253,14 +243,14 @@ class ReservationStationCtrl
val tailDec = tailPtr-1.U
tailPtr := Mux(dequeue === enqueue, tailPtr, Mux(dequeue, tailDec, tailInc))
io.enqCtrl.ready := !isFull || dequeue
io.enqCtrl.ready := !isFull || (if(feedback || nonBlocked) dequeue else false.B)
val enqUop = io.enqCtrl.bits
val srcSeq = Seq(enqUop.psrc1, enqUop.psrc2, enqUop.psrc3)
val srcTypeSeq = Seq(enqUop.ctrl.src1Type, enqUop.ctrl.src2Type, enqUop.ctrl.src3Type)
val srcStateSeq = Seq(enqUop.src1State, enqUop.src2State, enqUop.src3State)
val enqPtr = Mux(tailPtr.flag, deqPtr, tailPtr.value)
val enqIdx = idxQueue(enqPtr)
val enqIdx = indexQueue(enqPtr)
val enqBpVec = io.data.srcUpdate(IssQueSize)
def stateCheck(src: UInt, srcType: UInt): Bool = {
......@@ -280,8 +270,8 @@ class ReservationStationCtrl
// other to Data
io.data.enqPtr := enqIdx
io.data.deqPtr.valid := selValid
io.data.deqPtr.bits := selIdx
io.data.deqPtr.valid := selectValid
io.data.deqPtr.bits := selectIndex
io.data.enqCtrl.valid := enqueue
io.data.enqCtrl.bits := io.enqCtrl.bits
......@@ -292,20 +282,20 @@ class ReservationStationCtrl
assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
val print = !(tailPtr.asUInt===0.U) || io.enqCtrl.valid || enqueue || dequeue
XSDebug(print || true.B, p"In(${io.enqCtrl.valid} ${io.enqCtrl.ready}) Out(${issValid} ${io.data.fuReady}) nonBlocked:${nonBlocked.B} needfb:${feedback.B}\n")
XSDebug(print , p"tailPtr:${tailPtr} enq:${enqueue} deq:${dequeue} isFull:${isFull} " +
XSDebug(print || true.B, p"In(${io.enqCtrl.valid} ${io.enqCtrl.ready}) Out(${issueValid} ${io.data.fuReady}) nonBlocked:${nonBlocked.B} needfb:${feedback.B}\n")
XSDebug(print || true.B, p"tailPtr:${tailPtr} enq:${enqueue} deq:${dequeue} isFull:${isFull} " +
p"vIdxQue:${Binary(validIdxQue.asUInt)} rIdxQue:${Binary(readyIdxQue.asUInt)}\n")
XSDebug(print && Cat(redVecPtr).orR, p"Redirect: ${Hexadecimal(redVecPtr.asUInt)}\n")
XSDebug(print && Cat(fbMatchVec).orR, p"Feedback: ${Hexadecimal(fbMatchVec.asUInt)} Hit:${fbHit}\n")
XSDebug(print, p"moveMask:${Binary(moveMask)} selMask:${Binary(selectMask.asUInt)} bubMask:${Binary(bubMask.asUInt)}\n")
XSDebug(print, p"selIdxWire:${selPtr} haveReady:${haveReady} redSel:${redSel}" +
p"selV:${selValid} selReg:${selReg} selPtrReg:${selPtrReg} selIdx:${selIdx} selIdxReg:${selIdxReg}\n")
XSDebug(print, p"bubValid:${bubValid} haveBub:${haveBubble} bubPtr:${bubPtr} findBub:${findBubble} " +
p"bubReg:${bubReg} bubPtrReg:${bubPtrReg} bubIdx:${bubIdx} bubIdxReg:${bubIdxReg}\n")
XSDebug(print, p"issValid:${issValid} issueFire:${issFire} dequeue:${dequeue} deqPtr:${deqPtr}\n")
XSDebug(p" :Idx|v|r|s |cnt|s1:s2:s3\n")
XSDebug(print && Cat(redirectVecPtr).orR, p"Redirect: ${Hexadecimal(redirectVecPtr.asUInt)}\n")
XSDebug(print && Cat(feedbackMatchVec).orR, p"Feedback: ${Hexadecimal(feedbackMatchVec.asUInt)} Hit:${feedbackHit}\n")
XSDebug(print || true.B, p"moveMask:${Binary(moveMask)} selMask:${Binary(selectMask.asUInt)} bubbleMask:${Binary(bubbleMask.asUInt)}\n")
XSDebug(print || true.B, p"selectPtr:${selectPtr} haveReady:${haveReady} " +
p"selV:${selectValid} selectReg:${selectReg} selectPtrReg:${selectPtrReg} selectIndex:${selectIndex} lastSelMask:${Hexadecimal(lastSelMask)}\n")
XSDebug(print || true.B, p"bubbleValid:${bubbleValid} haveBub:${haveBubble} bubblePtr:${bubblePtr} findBub:${findBubble} " +
p"bubbleReg:${bubbleReg} bubblePtrReg:${bubblePtrReg} bubbleIndex:${bubbleIndex} lastbubbleMask:${Hexadecimal(lastbubbleMask)}\n")
XSDebug(print || true.B, p"issueValid:${issueValid} issueFire:${issueFire} dequeue:${dequeue} deqPtr:${deqPtr}\n")
XSDebug(p" :Idx|v|r|s |count|s1:s2:s3\n")
for(i <- srcQueue.indices) {
XSDebug(p"${i.U}: ${idxQueue(i)}|${validIdxQue(i)}|${readyIdxQue(i)}|${stateIdxQue(i)}|${cntIdxQue(i)}|" +
XSDebug(p"${i.U}: ${indexQueue(i)}|${validIdxQue(i)}|${readyIdxQue(i)}|${stateIdxQue(i)}|${cntIdxQue(i)}|" +
List.tabulate(srcNum)(j => p"${srcIdxQue(i)(j)}").reduce(_ + ":" + _) + "\n")
......@@ -387,7 +377,7 @@ class ReservationStationData
val uopMem = Module(new SyncDataModuleTemplate(new MicroOp, iqSize, iqSize, 1))
uopMem.io <> DontCare
uopMem.io.wen.foreach(_ := false.B)
// uop -- read = iqSize write = 1
// uopMem 's read ports have fixed values
uopMem.io.raddr.zipWithIndex.foreach{ case(r, i) => r := i.U }
......@@ -402,6 +392,7 @@ class ReservationStationData
val uop = WireInit(VecInit((0 until iqSize).map(i => uopRead(i.U))))
val redirectHit = WireInit(false.B)
val enq = io.ctrl.enqPtr
val sel = io.ctrl.deqPtr
val deq = RegEnable(sel.bits, sel.valid)
......@@ -435,10 +426,10 @@ class ReservationStationData
dataWrite(enqPtrReg, 0, src1Mux)
// TODO: opt this, a full map is not necesscary here
val imm32 = LookupTree(
ImmUnion.immSelMap.map(x => x._1 -> x._2.toImm32(enqUopReg.ctrl.imm))
// alu only need U type and I type imm
val imm32 = Mux(enqUopReg.ctrl.selImm === SelImm.IMM_U,
val imm64 = SignExt(imm32, XLEN)
val src2Mux = Mux(enqUopReg.ctrl.src2Type === SrcType.imm,
......@@ -476,23 +467,36 @@ class ReservationStationData
(hit, RegNext(hit), ParallelMux(hitVec.map(RegNext(_)) zip io.writeBackedData))
// NOTE: special case that bypass(fast) when enq for bypass's uop will arrive one cylce later
val lastFastUops = Reg(Vec(wakeupCnt, Valid(new MicroOp)))
for (i <- 0 until wakeupCnt) {
lastFastUops(i) := io.broadcastedUops(i)
def lastBypass(src: UInt, srcType: UInt, valid: Bool = true.B) : (Bool, Bool, UInt) = {
val hitVec = lastFastUops.map(port => wbHit(port.bits, src, srcType) && port.valid && valid)
assert(RegNext(PopCount(hitVec)===0.U || PopCount(hitVec)===1.U))
val hit = ParallelOR(hitVec)
(hit, RegNext(hit), RegNext(ParallelMux(hitVec zip io.writeBackedData)))
io.ctrl.srcUpdate.map(a => a.map(_ := false.B))
for (i <- 0 until iqSize) {
val srcSeq = Seq(uop(i).psrc1, uop(i).psrc2, uop(i).psrc3)
val srcTypeSeq = Seq(uop(i).ctrl.src1Type, uop(i).ctrl.src2Type, uop(i).ctrl.src3Type)
for (j <- 0 until srcNum) {
val (wuHit, wuData) = wakeup(srcSeq(j), srcTypeSeq(j))
val (bpHit, bpHitReg, bpData) = bypass(srcSeq(j), srcTypeSeq(j))
when (wuHit || bpHit) { io.ctrl.srcUpdate(i)(j) := true.B }
when (wuHit) { /* data(i)(j) := wuData */dataWrite(i.U, j, wuData) }
when (bpHitReg && !(enqPtrReg===i.U && enqEnReg)) { /* data(i)(j) := bpData */dataWrite(i.U, j, bpData) }
val (wakeupHit, wakeupData) = wakeup(srcSeq(j), srcTypeSeq(j))
val (bypassHit, bypassHitReg, bypassData) = bypass(srcSeq(j), srcTypeSeq(j))
when (wakeupHit || bypassHit) { io.ctrl.srcUpdate(i)(j) := true.B }
when (wakeupHit) { dataWrite(i.U, j, wakeupData) }
when (bypassHitReg && !(enqPtrReg===i.U && enqEnReg)) { dataWrite(i.U, j, bypassData) }
// NOTE: the hit is from data's info, so there is an erro that:
// when enq, hit use last instr's info not the enq info.
// it will be long latency to add correct here, so add it to ctrl or somewhere else
// enq bp is done at below
XSDebug(wuHit, p"WUHit: (${i.U})(${j.U}) Data:0x${Hexadecimal(wuData)}\n")
XSDebug(bpHit, p"BPHit: (${i.U})(${j.U})\n")
XSDebug(bpHitReg, p"BPHitData: (${i.U})(${j.U}) Data:0x${Hexadecimal(bpData)}\n")
XSDebug(wakeupHit, p"wakeupHit: (${i.U})(${j.U}) Data:0x${Hexadecimal(wakeupData)}\n")
XSDebug(bypassHit, p"bypassHit: (${i.U})(${j.U})\n")
XSDebug(bypassHitReg, p"bypassHitData: (${i.U})(${j.U}) Data:0x${Hexadecimal(bypassData)}\n")
......@@ -500,29 +504,41 @@ class ReservationStationData
val exuInput = io.deq.bits
exuInput := DontCare
exuInput.uop := uop(deq)
exuInput.uop.cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
val regValues = List.tabulate(srcNum)(i => dataRead(Mux(sel.valid, sel.bits, deq), i))
XSDebug(io.deq.fire(), p"[regValues] " + List.tabulate(srcNum)(idx => p"reg$idx: ${Hexadecimal(regValues(idx))}").reduce((p1, p2) => p1 + " " + p2) + "\n")
exuInput.src1 := regValues(0)
if (srcNum > 1) exuInput.src2 := regValues(1)
if (srcNum > 2) exuInput.src3 := regValues(2)
io.deq.valid := RegNext(sel.valid)
io.deq.valid := RegNext(sel.valid && ~redirectHit)
if (nonBlocked) { assert(RegNext(io.deq.ready), s"${name} if fu wanna fast wakeup, it should not block")}
// to ctrl
val srcSeq = Seq(enqUop.psrc1, enqUop.psrc2, enqUop.psrc3)
val srcTypeSeq = Seq(enqUop.ctrl.src1Type, enqUop.ctrl.src2Type, enqUop.ctrl.src3Type)
io.ctrl.srcUpdate(IssQueSize).zipWithIndex.map{ case (h, i) =>
val (bpHit, bpHitReg, bpData)= bypass(srcSeq(i), srcTypeSeq(i), enqCtrl.valid)
when (bpHitReg) { /* data(enqPtrReg)(i) := bpData */dataWrite(enqPtrReg, i, bpData) }
h := bpHit
io.ctrl.srcUpdate(IssQueSize).zipWithIndex.map{ case (h, i) => // h: port, i: 0~srcNum-1
val (bypassHit, bypassHitReg, bypassData) = bypass(srcSeq(i), srcTypeSeq(i), enqCtrl.valid)
val (wakeupHit, wakeupData) = wakeup(srcSeq(i), srcTypeSeq(i), enqCtrl.valid)
val (lastBypassHit, lastBypassHitReg, lastBypassDataReg) = lastBypass(srcSeq(i), srcTypeSeq(i), enqCtrl.valid)
val wakeupHitReg = RegNext(wakeupHit)
val wakeupDataReg = RegNext(wakeupData)
when (bypassHitReg) { dataWrite(enqPtrReg, i, bypassData) }
when (wakeupHitReg) { dataWrite(enqPtrReg, i, wakeupDataReg) }
when (lastBypassHitReg) { dataWrite(enqPtrReg, i, lastBypassDataReg) }
h := bypassHit || wakeupHit || lastBypassHit
// NOTE: enq bp is done here
XSDebug(bpHit, p"EnqBPHit: (${i.U})\n")
XSDebug(bpHitReg, p"EnqBPHitData: (${i.U}) data:${Hexadecimal(bpData)}\n")
XSDebug(bypassHit, p"EnqbypassHit: (${i.U})\n")
XSDebug(lastBypassHit, p"EnqLbypassHit: (${i.U})\n")
XSDebug(wakeupHit, p"EnqwakeupHit: (${Binary(io.ctrl.srcUpdate(iqSize).asUInt())})\n")
XSDebug(bypassHitReg, p"EnqbypassHitData: (${i.U}) data:${Hexadecimal(bypassData)}\n")
XSDebug(lastBypassHitReg, p"EnqLbypassHitData: (${i.U}) data:${Hexadecimal(lastBypassDataReg)}\n")
XSDebug(wakeupHitReg, p"EnqwakeupHitData: (${i.U}) data:${Hexadecimal(wakeupDataReg)}\n")
if (nonBlocked) { io.ctrl.fuReady := true.B }
else { io.ctrl.fuReady := io.deq.ready }
io.ctrl.redVec := VecInit(uop.map(_.roqIdx.needFlush(io.redirect))).asUInt
io.ctrl.redirectVec := uop.map(_.roqIdx.needFlush(io.redirect))
redirectHit := io.ctrl.redirectVec(sel.bits)
io.ctrl.feedback := DontCare
if (feedback) {
......@@ -535,12 +551,20 @@ class ReservationStationData
// bypass send
io.selectedUop <> DontCare
if (fastWakeup) {
val bpQueue = Module(new BypassQueue(fixedDelay))
bpQueue.io.in.valid := sel.valid // FIXME: error when function is blocked => fu should not be blocked
bpQueue.io.in.bits := uop(sel.bits)
bpQueue.io.redirect := io.redirect
io.selectedUop.valid := bpQueue.io.out.valid
io.selectedUop.bits := bpQueue.io.out.bits
if (fixedDelay == 0) {
io.selectedUop.valid := sel.valid
io.selectedUop.bits := uop(sel.bits)
io.selectedUop.bits.cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
} else {
val bpQueue = Module(new BypassQueue(fixedDelay))
bpQueue.io.in.valid := sel.valid // FIXME: error when function is blocked => fu should not be blocked
bpQueue.io.in.bits := uop(sel.bits)
bpQueue.io.redirect := io.redirect
io.selectedUop.valid := bpQueue.io.out.valid
io.selectedUop.bits := bpQueue.io.out.bits
io.selectedUop.bits.cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
XSDebug(io.selectedUop.valid, p"SelUop: pc:0x${Hexadecimal(io.selectedUop.bits.cf.pc)}" +
p" roqIdx:${io.selectedUop.bits.roqIdx} pdest:${io.selectedUop.bits.pdest} " +
......@@ -549,7 +573,7 @@ class ReservationStationData
// log
XSDebug(io.ctrl.redVec.orR, p"Red: ${Binary(io.ctrl.redVec)}\n")
XSDebug(Cat(io.ctrl.redirectVec).orR, p"Red: ${io.ctrl.redirectVec}\n")
XSDebug(io.feedback.valid && feedback.B, p"feedback: roqIdx:${io.feedback.bits.roqIdx} hit:${io.feedback.bits.hit}\n")
XSDebug(true.B, p"out(${io.deq.valid} ${io.deq.ready})\n")
XSDebug(io.deq.valid, p"Deq(${io.deq.valid} ${io.deq.ready}): deqPtr:${deq} pc:${Hexadecimal(io.deq.bits.uop.cf.pc)}" +
......@@ -17,12 +17,13 @@ package object backend {
// jump
object JumpOpType {
def jal = "b11_000".U
def jalr = "b11_010".U
def jal = "b00".U
def jalr = "b01".U
def auipc = "b10".U
// def call = "b11_011".U
// def ret = "b11_100".U
def jumpOpIsJal(op: UInt) = !op(1)
def jumpOpisJalr(op: UInt) = op(1)
def jumpOpisJalr(op: UInt) = op(0)
def jumpOpisAuipc(op: UInt) = op(1)
object FenceOpType {
......@@ -35,9 +35,8 @@ class Regfile
if (!useBlackBox) {
val mem = Mem(NRPhyRegs, UInt(len.W))
for (r <- io.readPorts) {
val raddr_reg = RegNext(r.addr)
val rdata = if (hasZero) Mux(raddr_reg === 0.U, 0.U, mem(raddr_reg)) else mem(raddr_reg)
r.data := rdata
val rdata = if (hasZero) Mux(r.addr === 0.U, 0.U, mem(r.addr)) else mem(r.addr)
r.data := RegNext(rdata)
for (w <- io.writePorts) {
when(w.wen) {
......@@ -5,6 +5,11 @@ import chisel3.util._
import xiangshan._
import utils.{ParallelOR, XSDebug}
class BusyTableReadIO extends XSBundle {
val req = Input(UInt(PhyRegIdxWidth.W))
val resp = Output(Bool())
class BusyTable(numReadPorts: Int, numWritePorts: Int) extends XSModule {
val io = IO(new Bundle() {
val flush = Input(Bool())
......@@ -13,8 +18,7 @@ class BusyTable(numReadPorts: Int, numWritePorts: Int) extends XSModule {
// set preg state to ready (write back regfile + roq walk)
val wbPregs = Vec(numWritePorts, Flipped(ValidIO(UInt(PhyRegIdxWidth.W))))
// read preg state
val rfReadAddr = Vec(numReadPorts, Input(UInt(PhyRegIdxWidth.W)))
val pregRdy = Vec(numReadPorts, Output(Bool()))
val read = Vec(numReadPorts, new BusyTableReadIO)
val table = RegInit(0.U(NRPhyRegs.W))
......@@ -29,27 +33,10 @@ class BusyTable(numReadPorts: Int, numWritePorts: Int) extends XSModule {
val tableAfterWb = table & (~wbMask).asUInt
val tableAfterAlloc = tableAfterWb | allocMask
for((raddr, rdy) <- io.rfReadAddr.zip(io.pregRdy)){
rdy := !tableAfterWb(raddr)
io.read.map(r => r.resp := !table(r.req))
table := tableAfterAlloc
// for((alloc, i) <- io.allocPregs.zipWithIndex){
// when(alloc.valid){
// table(alloc.bits) := true.B
// }
// XSDebug(alloc.valid, "Allocate %d\n", alloc.bits)
// }
// for((wb, i) <- io.wbPregs.zipWithIndex){
// when(wb.valid){
// table(wb.bits) := false.B
// }
// XSDebug(wb.valid, "writeback %d\n", wb.bits)
// }
table := 0.U(NRPhyRegs.W)
......@@ -54,12 +54,9 @@ class RoqEnqIO extends XSBundle {
class RoqDispatchData extends RoqCommitInfo {
val crossPageIPFFix = Bool()
val exceptionVec = Vec(16, Bool())
class RoqWbData extends XSBundle {
// mostly for exceptions
val exceptionVec = Vec(16, Bool())
val fflags = UInt(5.W)
val flushPipe = Bool()
......@@ -70,7 +67,7 @@ class RoqDeqPtrWrapper extends XSModule with HasCircularQueuePtrHelper {
val state = Input(UInt(2.W))
val deq_v = Vec(CommitWidth, Input(Bool()))
val deq_w = Vec(CommitWidth, Input(Bool()))
val deq_exceptionVec = Vec(CommitWidth, Input(UInt(16.W)))
val deq_exceptionVec = Vec(CommitWidth, Input(ExceptionVec()))
val deq_flushPipe = Vec(CommitWidth, Input(Bool()))
// for flush: when exception occurs, reset deqPtrs to range(0, CommitWidth)
val intrBitSetReg = Input(Bool())
......@@ -83,19 +80,21 @@ class RoqDeqPtrWrapper extends XSModule with HasCircularQueuePtrHelper {
val deqPtrVec = RegInit(VecInit((0 until CommitWidth).map(_.U.asTypeOf(new RoqPtr))))
val possibleException = VecInit(io.deq_exceptionVec.map(selectAll(_, false)))
// for exceptions (flushPipe included) and interrupts:
// only consider the first instruction
val intrEnable = io.intrBitSetReg && !io.hasNoSpecExec && !CommitType.isLoadStore(io.commitType)
val exceptionEnable = io.deq_w(0) && (io.deq_exceptionVec(0).orR || io.deq_flushPipe(0))
val exceptionEnable = io.deq_w(0) && (possibleException(0).asUInt.orR || io.deq_flushPipe(0))
val redirectOutValid = io.state === 0.U && io.deq_v(0) && (intrEnable || exceptionEnable)
// for normal commits: only to consider when there're no exceptions
// we don't need to consider whether the first instruction has exceptions since it wil trigger exceptions.
val commitBlocked = VecInit((0 until CommitWidth).map(i => if (i == 0) false.B else io.deq_exceptionVec(i).orR || io.deq_flushPipe(i)))
val canCommit = VecInit((0 until CommitWidth).map(i => io.deq_v(i) && io.deq_w(i) && !commitBlocked(i)))
val commitBlocked = VecInit((0 until CommitWidth).map(i => if (i == 0) false.B else possibleException(i).asUInt.orR || io.deq_flushPipe(i)))
val canCommit = VecInit((0 until CommitWidth).map(i => io.deq_v(i) && io.deq_w(i) /*&& !commitBlocked(i)*/))
val normalCommitCnt = PriorityEncoder(canCommit.map(c => !c) :+ true.B)
// when io.intrBitSetReg, only one instruction is allowed to commit
val commitCnt = Mux(io.intrBitSetReg, io.deq_v(0) && io.deq_w(0), normalCommitCnt)
// when io.intrBitSetReg or there're possible exceptions in these instructions, only one instruction is allowed to commit
val allowOnlyOne = VecInit(commitBlocked.drop(1)).asUInt.orR || io.intrBitSetReg
val commitCnt = Mux(allowOnlyOne, io.deq_v(0) && io.deq_w(0), normalCommitCnt)
val resetDeqPtrVec = VecInit((0 until CommitWidth).map(_.U.asTypeOf(new RoqPtr)))
val commitDeqPtrVec = VecInit(deqPtrVec.map(_ + commitCnt))
......@@ -118,7 +117,7 @@ class RoqEnqPtrWrapper extends XSModule with HasCircularQueuePtrHelper {
val state = Input(UInt(2.W))
val deq_v = Input(Bool())
val deq_w = Input(Bool())
val deq_exceptionVec = Input(UInt(16.W))
val deq_exceptionVec = Input(ExceptionVec())
val deq_flushPipe = Input(Bool())
val intrBitSetReg = Input(Bool())
val hasNoSpecExec = Input(Bool())
......@@ -137,7 +136,7 @@ class RoqEnqPtrWrapper extends XSModule with HasCircularQueuePtrHelper {
// for exceptions (flushPipe included) and interrupts:
// only consider the first instruction
val intrEnable = io.intrBitSetReg && !io.hasNoSpecExec && !CommitType.isLoadStore(io.commitType)
val exceptionEnable = io.deq_w && (io.deq_exceptionVec.orR || io.deq_flushPipe)
val exceptionEnable = io.deq_w && (selectAll(io.deq_exceptionVec, false).asUInt.orR || io.deq_flushPipe)
val redirectOutValid = io.state === 0.U && io.deq_v && (intrEnable || exceptionEnable)
// enqueue
......@@ -264,28 +263,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val writebackData = Module(new SyncDataModuleTemplate(new RoqWbData, RoqSize, CommitWidth, numWbPorts))
val writebackDataRead = writebackData.io.rdata
def mergeExceptionVec(dpData: RoqDispatchData, wbData: RoqWbData) = {
// these exceptions can be determined before dispatch.
// by default, let all exceptions be determined by dispatch.
// mergeVec(instrAddrMisaligned) := dpData(instrAddrMisaligned)
// mergeVec(instrAccessFault) := dpData(instrAccessFault)
// mergeVec(instrPageFault) := dpData(instrPageFault)
val mergeVec = WireInit(dpData.exceptionVec)
// these exceptions are determined in execution units
mergeVec(illegalInstr) := wbData.exceptionVec(illegalInstr)
mergeVec(breakPoint) := wbData.exceptionVec(breakPoint)
mergeVec(loadAddrMisaligned) := wbData.exceptionVec(loadAddrMisaligned)
mergeVec(loadAccessFault) := wbData.exceptionVec(loadAccessFault)
mergeVec(storeAddrMisaligned) := wbData.exceptionVec(storeAddrMisaligned)
mergeVec(storeAccessFault) := wbData.exceptionVec(storeAccessFault)
mergeVec(ecallU) := wbData.exceptionVec(ecallU)
mergeVec(ecallS) := wbData.exceptionVec(ecallS)
mergeVec(ecallM) := wbData.exceptionVec(ecallM)
mergeVec(loadPageFault) := wbData.exceptionVec(loadPageFault)
mergeVec(storePageFault) := wbData.exceptionVec(storePageFault)
// returns the merged exception vector
val exceptionDataRead = Wire(Vec(CommitWidth, ExceptionVec()))
io.roqDeqPtr := deqPtr
......@@ -303,8 +281,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
when (io.commits.valid.asUInt.orR && state =/= s_extrawalk) { hasNoSpecExec:= false.B }
io.enq.canAccept := allowEnqueue && !hasBlockBackward
io.enq.isEmpty := isEmpty
io.enq.resp := enqPtrVec
io.enq.resp := enqPtrVec
val canEnqueue = VecInit(io.enq.req.map(_.valid && io.enq.canAccept))
for (i <- 0 until RenameWidth) {
// we don't check whether io.redirect is valid here since redirect has higher priority
......@@ -319,9 +296,10 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val dispatchNum = Mux(io.enq.canAccept, PopCount(Cat(io.enq.req.map(_.valid))), 0.U)
io.enq.isEmpty := RegNext(isEmpty && dispatchNum === 0.U)
// debug info for enqueue (dispatch)
val dispatchNum = Mux(io.enq.canAccept, PopCount(Cat(io.enq.req.map(_.valid))), 0.U)
XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
XSInfo(dispatchNum =/= 0.U, p"dispatched $dispatchNum insts\n")
......@@ -359,7 +337,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val deqWritebackData = writebackDataRead(0)
val debug_deqUop = debug_microOp(deqPtr.value)
val deqExceptionVec = mergeExceptionVec(deqDispatchData, deqWritebackData)
val deqExceptionVec = exceptionDataRead(0)
// For MMIO instructions, they should not trigger interrupts since they may be sent to lower level before it writes back.
// However, we cannot determine whether a load/store instruction is MMIO.
// Thus, we don't allow load/store instructions to trigger an interrupt.
......@@ -369,9 +347,9 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val isFlushPipe = writebacked(deqPtr.value) && deqWritebackData.flushPipe
io.redirectOut := DontCare
io.redirectOut.valid := (state === s_idle) && valid(deqPtr.value) && (intrEnable || exceptionEnable || isFlushPipe)
io.redirectOut.bits.level := Mux(isFlushPipe, RedirectLevel.flushAll, RedirectLevel.exception)
io.redirectOut.bits.level := Mux(intrEnable || exceptionEnable, RedirectLevel.exception, RedirectLevel.flushAll)
io.redirectOut.bits.interrupt := intrEnable
io.redirectOut.bits.target := Mux(isFlushPipe, deqDispatchData.pc + 4.U, io.csr.trapTarget)
io.redirectOut.bits.target := Mux(intrEnable || exceptionEnable, io.csr.trapTarget, deqDispatchData.pc + 4.U)
io.exception := debug_deqUop
io.exception.ctrl.commitType := deqDispatchData.commitType
......@@ -421,13 +399,15 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
io.commits.isWalk := state =/= s_idle
val commit_v = Mux(state === s_idle, VecInit(deqPtrVec.map(ptr => valid(ptr.value))), VecInit(walkPtrVec.map(ptr => valid(ptr.value))))
val commit_w = VecInit(deqPtrVec.map(ptr => writebacked(ptr.value)))
val commit_exception = dispatchDataRead.zip(writebackDataRead).map{ case (d, w) => mergeExceptionVec(d, w).asUInt.orR }
val commit_block = VecInit((0 until CommitWidth).map(i => !commit_w(i) || commit_exception(i) || writebackDataRead(i).flushPipe))
val commit_exception = exceptionDataRead.zip(writebackDataRead.map(_.flushPipe)).map{ case (e, f) => e.asUInt.orR || f }
val commit_block = VecInit((0 until CommitWidth).map(i => !commit_w(i)))
val allowOnlyOneCommit = VecInit(commit_exception.drop(1)).asUInt.orR || intrBitSetReg
// for instructions that may block others, we don't allow them to commit
for (i <- 0 until CommitWidth) {
// defaults: state === s_idle and instructions commit
// when intrBitSetReg, allow only one instruction to commit at each clock cycle
val isBlocked = if (i != 0) Cat(commit_block.take(i)).orR || intrBitSetReg else intrEnable
io.commits.valid(i) := commit_v(i) && commit_w(i) && !isBlocked && !commit_exception(i)
val isBlocked = if (i != 0) Cat(commit_block.take(i)).orR || allowOnlyOneCommit else intrEnable || commit_exception(0)
io.commits.valid(i) := commit_v(i) && commit_w(i) && !isBlocked
io.commits.info(i) := dispatchDataRead(i)
when (state === s_walk) {
......@@ -495,7 +475,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
deqPtrGenModule.io.state := state
deqPtrGenModule.io.deq_v := commit_v
deqPtrGenModule.io.deq_w := commit_w
deqPtrGenModule.io.deq_exceptionVec := VecInit(dispatchDataRead.zip(writebackDataRead).map{ case (d, w) => mergeExceptionVec(d, w).asUInt })
deqPtrGenModule.io.deq_exceptionVec := exceptionDataRead
deqPtrGenModule.io.deq_flushPipe := writebackDataRead.map(_.flushPipe)
deqPtrGenModule.io.intrBitSetReg := intrBitSetReg
deqPtrGenModule.io.hasNoSpecExec := hasNoSpecExec
......@@ -507,7 +487,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
enqPtrGenModule.io.state := state
enqPtrGenModule.io.deq_v := commit_v(0)
enqPtrGenModule.io.deq_w := commit_w(0)
enqPtrGenModule.io.deq_exceptionVec := deqExceptionVec.asUInt
enqPtrGenModule.io.deq_exceptionVec := deqExceptionVec
enqPtrGenModule.io.deq_flushPipe := writebackDataRead(0).flushPipe
enqPtrGenModule.io.intrBitSetReg := intrBitSetReg
enqPtrGenModule.io.hasNoSpecExec := hasNoSpecExec
......@@ -600,7 +580,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// enqueue logic set 6 writebacked to false
for (i <- 0 until RenameWidth) {
when (canEnqueue(i)) {
writebacked(enqPtrVec(i).value) := false.B
writebacked(enqPtrVec(i).value) := selectFrontend(io.enq.req(i).bits.cf.exceptionVec, false).asUInt.orR
// writeback logic set numWbPorts writebacked to true
......@@ -641,19 +621,59 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
wdata.sqIdx := req.sqIdx
wdata.pc := req.cf.pc
wdata.crossPageIPFFix := req.cf.crossPageIPFFix
wdata.exceptionVec := req.cf.exceptionVec
// wdata.exceptionVec := req.cf.exceptionVec
dispatchData.io.raddr := commitReadAddr_next
writebackData.io.wen := io.exeWbResults.map(_.valid)
writebackData.io.waddr := io.exeWbResults.map(_.bits.uop.roqIdx.value)
writebackData.io.wdata.zip(io.exeWbResults.map(_.bits)).map{ case (wdata, wb) =>
wdata.exceptionVec := wb.uop.cf.exceptionVec
wdata.fflags := wb.fflags
wdata.flushPipe := wb.uop.ctrl.flushPipe
writebackData.io.raddr := commitReadAddr_next
for (i <- 0 until 16) {
val exceptionData = Module(new SyncDataModuleTemplate(Bool(), RoqSize, CommitWidth, RenameWidth + writebackCount(i)))
var wPortIdx = 0
for (j <- 0 until RenameWidth) {
exceptionData.io.wen (wPortIdx) := canEnqueue(j)
exceptionData.io.waddr(wPortIdx) := enqPtrVec(j).value
exceptionData.io.wdata(wPortIdx) := (if (allPossibleSet.contains(i)) io.enq.req(j).bits.cf.exceptionVec(i) else false.B)
wPortIdx = wPortIdx + 1
if (csrWbCount(i) > 0) {
exceptionData.io.wen (wPortIdx) := io.exeWbResults(6).valid
exceptionData.io.waddr(wPortIdx) := io.exeWbResults(6).bits.uop.roqIdx.value
exceptionData.io.wdata(wPortIdx) := io.exeWbResults(6).bits.uop.cf.exceptionVec(i)
wPortIdx = wPortIdx + 1
if (atomicsWbCount(i) > 0) {
exceptionData.io.wen (wPortIdx) := io.exeWbResults(4).valid
exceptionData.io.waddr(wPortIdx) := io.exeWbResults(4).bits.uop.roqIdx.value
exceptionData.io.wdata(wPortIdx) := io.exeWbResults(4).bits.uop.cf.exceptionVec(i)
wPortIdx = wPortIdx + 1
if (loadWbCount(i) > 0) {
exceptionData.io.wen (wPortIdx) := io.exeWbResults(5).valid
exceptionData.io.waddr(wPortIdx) := io.exeWbResults(5).bits.uop.roqIdx.value
exceptionData.io.wdata(wPortIdx) := io.exeWbResults(5).bits.uop.cf.exceptionVec(i)
wPortIdx = wPortIdx + 1
if (storeWbCount(i) > 0) {
exceptionData.io.wen (wPortIdx) := io.exeWbResults(16).valid
exceptionData.io.waddr(wPortIdx) := io.exeWbResults(16).bits.uop.roqIdx.value
exceptionData.io.wdata(wPortIdx) := io.exeWbResults(16).bits.uop.cf.exceptionVec(i)
wPortIdx = wPortIdx + 1
exceptionData.io.wen (wPortIdx) := io.exeWbResults(17).valid
exceptionData.io.waddr(wPortIdx) := io.exeWbResults(17).bits.uop.roqIdx.value
exceptionData.io.wdata(wPortIdx) := io.exeWbResults(17).bits.uop.cf.exceptionVec(i)
wPortIdx = wPortIdx + 1
exceptionData.io.raddr := VecInit(deqPtrVec_next.map(_.value))
exceptionDataRead.zip(exceptionData.io.rdata).map{ case (d, r) => d(i) := r }
* debug info
package xiangshan.cache
import chisel3._
import chisel3.util._
import utils._
import chipsalliance.rocketchip.config.Parameters
import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
import xiangshan._
import xiangshan.frontend._
class InsUncacheReq extends ICacheBundle
val addr = UInt(PAddrBits.W)
val id = UInt(3.W)
class InsUncacheResp extends ICacheBundle
val data = UInt((mmioBeats * mmioBusWidth).W)
val id = UInt(3.W)
// One miss entry deals with one mmio request
class InstrMMIOEntry(edge: TLEdgeOut) extends XSModule with HasICacheParameters
val io = IO(new Bundle {
val id = Input(UInt(log2Up(cacheParams.nMMIOs).W))
// client requests
val req = Flipped(DecoupledIO(new InsUncacheReq ))
val resp = DecoupledIO(new InsUncacheResp)
val mmio_acquire = DecoupledIO(new TLBundleA(edge.bundle))
val mmio_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
val flush = Input(Bool())
val s_invalid :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4)
val state = RegInit(s_invalid)
val req = Reg(new InsUncacheReq )
val respDataReg = Reg(Vec(mmioBeats,UInt(mmioBusWidth.W)))
val beatCounter = Counter(mmioBeats)
// assign default values to output signals
io.req.ready := false.B
io.resp.valid := false.B
io.resp.bits := DontCare
io.mmio_acquire.valid := false.B
io.mmio_acquire.bits := DontCare
io.mmio_grant.ready := false.B
val needFlush = RegInit(false.B)
XSDebug("[ICache MMIO]entry: %d state: %d needFlush%d flush:%d\n", io.id, state, needFlush,io.flush)
XSDebug("[ICache MMIO]req.addr: %x req.id \n", req.addr)
XSDebug("[ICache MMIO]mmio_acquire:(v:%d r:%d) mmio_grant:(v:%d r:%d)\n", io.mmio_acquire.valid, io.mmio_acquire.ready, io.mmio_grant.valid, io.mmio_grant.ready)
XSDebug("[ICache MMIO]mmio_acquire:(v:%d r:%d) mmio_grant:(v:%d r:%d)\n", io.mmio_acquire.valid, io.mmio_acquire.ready, io.mmio_grant.valid, io.mmio_grant.ready)
XSDebug("[ICache MMIO]respReg: %x\n",respDataReg.asUInt)
when(io.flush && (state =/= s_invalid) && (state =/= s_send_resp)){ needFlush := true.B }
.elsewhen((state=== s_send_resp) && needFlush){ needFlush := false.B }
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
io.req.ready := true.B
beatCounter.value := 0.U
when (io.req.fire()) {
req := io.req.bits
state := s_refill_req
when (state === s_refill_req) {
io.mmio_acquire.valid := true.B
io.mmio_acquire.bits := edge.Get(
fromSource = io.id,
toAddress = req.addr + (beatCounter.value << log2Ceil(mmioBusBytes).U),
lgSize = log2Ceil(mmioBusBytes).U
when (io.mmio_acquire.fire()) {
state := s_refill_resp
val (_, _, refill_done, _) = edge.addr_inc(io.mmio_grant)
when (state === s_refill_resp) {
io.mmio_grant.ready := true.B
when (io.mmio_grant.fire()) {
respDataReg(beatCounter.value) := io.mmio_grant.bits.data
state := Mux(needFlush || io.flush, s_invalid,Mux(beatCounter.value === (mmioBeats - 1).U,s_send_resp,s_refill_req))
// --------------------------------------------
when (state === s_send_resp) {
io.resp.valid := true.B
io.resp.bits.data := respDataReg.asUInt
io.resp.bits.id := req.id
// meta data should go with the response
when (io.resp.fire() || needFlush) {
state := s_invalid
beatCounter.value := 0.U
class icacheUncacheIO extends DCacheBundle {
val req = Flipped(DecoupledIO(new InsUncacheReq ))
val resp = DecoupledIO(new InsUncacheResp)
val flush = Input(Bool())
// convert DCacheIO to TileLink
// for Now, we only deal with TL-UL
class InstrUncache()(implicit p: Parameters) extends LazyModule with HasICacheParameters {
val clientParameters = TLMasterPortParameters.v1(
clients = Seq(TLMasterParameters.v1(
sourceId = IdRange(0, cacheParams.nMMIOs)
val clientNode = TLClientNode(Seq(clientParameters))
lazy val module = new icacheUncacheImp(this)
class icacheUncacheImp(outer: InstrUncache)
extends LazyModuleImp(outer)
with HasICacheParameters
with HasXSLog
with HasTLDump
val io = IO(new icacheUncacheIO)
val (bus, edge) = outer.clientNode.out.head
require(bus.d.bits.data.getWidth == wordBits, "Uncache: tilelink width does not match")
val resp_arb = Module(new Arbiter(new InsUncacheResp, cacheParams.nMMIOs))
val req = io.req
val resp = io.resp
val mmio_acquire = bus.a
val mmio_grant = bus.d
val entry_alloc_idx = Wire(UInt())
val req_ready = WireInit(false.B)
// assign default values to output signals
bus.b.ready := false.B
bus.c.valid := false.B
bus.c.bits := DontCare
bus.d.ready := false.B
bus.e.valid := false.B
bus.e.bits := DontCare
val entries = (0 until cacheParams.nMMIOs) map { i =>
val entry = Module(new InstrMMIOEntry(edge))
entry.io.id := i.U(log2Up(cacheParams.nMMIOs).W)
entry.io.flush := io.flush
// entry req
entry.io.req.valid := (i.U === entry_alloc_idx) && req.valid
entry.io.req.bits := req.bits
when (i.U === entry_alloc_idx) {
req_ready := entry.io.req.ready
// entry resp
resp_arb.io.in(i) <> entry.io.resp
entry.io.mmio_grant.valid := false.B
entry.io.mmio_grant.bits := DontCare
when (mmio_grant.bits.source === i.U) {
entry.io.mmio_grant <> mmio_grant
entry_alloc_idx := PriorityEncoder(entries.map(m=>m.io.req.ready))
req.ready := req_ready
resp <> resp_arb.io.out
TLArbiter.lowestFromSeq(edge, mmio_acquire, entries.map(_.io.mmio_acquire))
......@@ -130,23 +130,35 @@ class L1plusCacheDataArray extends L1plusCacheModule {
io.read.ready := !rwhazard
for (w <- 0 until nWays) {
val array = Module(new SRAMTemplate(Bits((blockRows * encRowBits).W), set=nSets, way=1,
shouldReset=false, holdRead=false, singlePort=singlePort))
// data write
array.io.w.req.valid := io.write.bits.way_en(w) && io.write.valid
// data read
array.io.r.req.valid := io.read.bits.way_en(w) && io.read.valid
for (r <- 0 until blockRows) {
val array = Module(new SRAMTemplate(Bits(encRowBits.W), set=nSets, way=1,
shouldReset=false, holdRead=false, singlePort=singlePort))
// data write
array.io.w.req.valid := io.write.bits.way_en(w) && io.write.bits.wmask(r).asBool && io.write.valid
// data read
array.io.r.req.valid := io.read.bits.way_en(w) && io.read.bits.rmask(r) && io.read.valid
io.resp(w)(r) := RegNext(array.io.r.resp.data(0))
io.resp(w)(r) := RegNext(array.io.r.resp.data(0)((r + 1) * encRowBits - 1, r * encRowBits))
// since we use a RAM of block width
// we must do full read and write
when (io.write.valid) {
assert (io.write.bits.wmask.andR)
// since we use a RAM of block width
// we must do full read and write
when (io.read.valid) {
assert (io.read.bits.rmask.andR)
// debug output
def dumpRead() = {
when (io.read.valid) {
......@@ -230,7 +242,7 @@ class L1plusCacheMetadataArray extends L1plusCacheModule {
for (i <- 0 until nWays) {
io.resp(i).valid := RegNext(valid_array(io.read.bits.idx)(i))
io.resp(i).valid := valid_array(RegNext(io.read.bits.idx))(i)
io.resp(i).tag := rtags(i)
......@@ -197,15 +197,22 @@ class DuplicatedDataArray extends AbstractDataArray
io.resp(j)(w)(r) := Cat((0 until rowWords).reverse map (k => resp(k)))
for (k <- 0 until rowWords) {
val array = Module(new SRAMTemplate(Bits(encWordBits.W), set=nSets, way=1,
shouldReset=false, holdRead=false, singlePort=singlePort))
val array = Module(new SRAMTemplate(
// data write
val wen = io.write.valid && io.write.bits.way_en(w) && io.write.bits.wmask(r)(k)
array.io.w.req.valid := wen
// data read
val ren = io.read(j).valid && io.read(j).bits.way_en(w) && io.read(j).bits.rmask(r)
......@@ -43,7 +43,7 @@ trait HasTlbConst extends HasXSParameter {
abstract class TlbBundle extends XSBundle with HasTlbConst
abstract class TlbModule extends XSModule with HasTlbConst
class PermBundle(val hasV: Boolean = true) extends TlbBundle {
class PtePermBundle extends TlbBundle {
val d = Bool()
val a = Bool()
val g = Bool()
......@@ -51,7 +51,6 @@ class PermBundle(val hasV: Boolean = true) extends TlbBundle {
val x = Bool()
val w = Bool()
val r = Bool()
if (hasV) { val v = Bool() }
override def toPrintable: Printable = {
p"d:${d} a:${a} g:${g} u:${u} x:${x} w:${w} r:${r}"// +
......@@ -59,6 +58,27 @@ class PermBundle(val hasV: Boolean = true) extends TlbBundle {
class TlbPermBundle extends TlbBundle {
val pf = Bool() // NOTE: if this is true, just raise pf
val d = Bool()
val a = Bool()
val g = Bool()
val u = Bool()
val x = Bool()
val w = Bool()
val r = Bool()
// pma perm check
// val at = Bool() // Access Type
// val as = Bool() // Atomic Swap
// val al = Bool() // Atomic Logical
// val aa = Bool() // Atomic Arithmetic
// TODO: add pma check
override def toPrintable: Printable = {
p"pf:${pf} d:${d} a:${a} g:${g} u:${u} x:${x} w:${w} r:${r}"
class comBundle extends TlbBundle with HasCircularQueuePtrHelper{
val roqIdx = new RoqPtr
val valid = Bool()
......@@ -73,111 +93,94 @@ object Compare {
class TlbEntry extends TlbBundle {
val vpn = UInt(vpnLen.W) // tag is vpn
val ppn = UInt(ppnLen.W)
val level = UInt(log2Up(Level).W) // 2 for 4KB, 1 for 2MB, 0 for 1GB
// val asid = UInt(asidLen.W), asid maybe expensive to support, but useless
// val v = Bool() // v&g is special, may need sperate storage?
val perm = new PermBundle(hasV = false)
def vpnHit(vpn: UInt):Bool = {
val fullMask = VecInit((Seq.fill(vpnLen)(true.B))).asUInt
val maskLevel = VecInit((Level-1 to 0 by -1).map{i => // NOTE: level 2 for 4KB, 1 for 2MB, 0 for 1GB
Reverse(VecInit(Seq.fill(vpnLen-i*vpnnLen)(true.B) ++ Seq.fill(i*vpnnLen)(false.B)).asUInt)})
val mask = maskLevel(level)
(mask&this.vpn) === (mask&vpn)
// def asidHit(asid: UInt) = {
// this.asid === asid
// }
// multi-read && single-write
// input is data, output is hot-code(not one-hot)
class CAMTemplate[T <: Data](val gen: T, val set: Int, val readWidth: Int) extends TlbModule {
val io = IO(new Bundle {
val r = new Bundle {
val req = Input(Vec(readWidth, gen))
val resp = Output(Vec(readWidth, UInt(set.W)))
val w = Flipped(ValidIO(new Bundle {
val index = UInt(log2Up(set).W)
val data = gen
def hit(vpn: UInt/*, asid: UInt*/):Bool = {
vpnHit(vpn) // && asidHit(asid)
val wordType = UInt(gen.getWidth.W)
val array = Reg(Vec(set, wordType))
def genTlbEntry(pte: UInt, level: UInt, vpn: UInt/*, asid: UInt*/) = {
val e = Wire(new TlbEntry)
e.ppn := pte.asTypeOf(pteBundle).ppn
e.level := level
e.vpn := vpn
e.perm := pte.asTypeOf(pteBundle).perm
// e.asid := asid
io.r.resp.zipWithIndex.map{ case (a,i) =>
a := VecInit(array.map(io.r.req(i).asUInt === _)).asUInt
override def toPrintable: Printable = {
p"vpn:0x${Hexadecimal(vpn)} ppn:0x${Hexadecimal(ppn)} level:${level} perm:${perm}"
when (io.w.valid) {
array(io.w.bits.index) := io.w.bits.data
class TlbEntires(num: Int, tagLen: Int) extends TlbBundle {
/* vpn can be divide into three part */
// vpn: tagPart(17bit) + addrPart(8bit) + cutLenPart(2bit)
val cutLen = log2Up(num)
val tag = UInt(tagLen.W) // NOTE: high part of vpn
val level = UInt(log2Up(Level).W)
val ppns = Vec(num, UInt(ppnLen.W))
val perms = Vec(num, new PermBundle(hasV = false))
val vs = Vec(num, Bool())
def tagClip(vpn: UInt, level: UInt) = { // full vpn => tagLen
val tmp = Mux(level===0.U, Cat(vpn(vpnLen-1, vpnnLen*2+cutLen), 0.U(vpnnLen*2)),
Mux(level===1.U, Cat(vpn(vpnLen-1, vpnnLen*1+cutLen), 0.U(vpnnLen*1)),
Cat(vpn(vpnLen-1, vpnnLen*0+cutLen), 0.U(vpnnLen*0))))
tmp(tmp.getWidth-1, tmp.getWidth-tagLen)
class TlbEntryData extends TlbBundle {
val ppn = UInt(ppnLen.W)
val perm = new TlbPermBundle
// TODO: change perm to every kinds of pf check
// NOTE: get insize idx
def idxClip(vpn: UInt, level: UInt) = {
Mux(level===0.U, vpn(vpnnLen*2+cutLen-1, vpnnLen*2),
Mux(level===1.U, vpn(vpnnLen*1+cutLen-1, vpnnLen*1),
vpn(vpnnLen*0+cutLen-1, vpnnLen*0)))
override def toPrintable: Printable = {
p"ppn:0x${Hexadecimal(ppn)} perm:${perm}"
def hit(vpn: UInt) = {
(tag === tagClip(vpn, level)) && vs(idxClip(vpn, level)) && (level === 2.U)
class TlbEntry(superpage: Boolean = false) extends TlbBundle {
val tag = UInt(vpnLen.W) // tag is vpn
val level = if(superpage) Some(UInt(1.W)) else None // /*2 for 4KB,*/ 1 for 2MB, 0 for 1GB
val data = new TlbEntryData
def hit(vpn: UInt): Bool = {
if (superpage) {
val insideLevel = level.getOrElse(0.U)
val a = tag(vpnnLen*3-1, vpnnLen*2) === vpn(vpnnLen*3-1, vpnnLen*2)
val b = tag(vpnnLen*2-1, vpnnLen*1) === vpn(vpnnLen*2-1, vpnnLen*1)
XSDebug(Mux(insideLevel.asBool, a&b, a), p"Hit superpage: hit:${Mux(insideLevel.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${insideLevel} data:${data} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n")("TlbEntrySuperpage")
Mux(insideLevel.asBool, a&b, a)
} else {
XSDebug(tag === vpn, p"Hit normalpage: hit:${tag === vpn} tag:${Hexadecimal(tag)} data:${data} vpn:${Hexadecimal(vpn)}\n")("TlbEntryNormalpage")
tag === vpn
def genEntries(data: UInt, level: UInt, vpn: UInt): TlbEntires = {
require((data.getWidth / XLEN) == num,
"input data length must be multiple of pte length")
assert(level=/=3.U, "level should not be 3")
val ts = Wire(new TlbEntires(num, tagLen))
ts.tag := tagClip(vpn, level)
ts.level := level
for (i <- 0 until num) {
val pte = data((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle)
ts.ppns(i) := pte.ppn
ts.perms(i):= pte.perm // this.perms has no v
ts.vs(i) := !pte.isPf(level) && pte.isLeaf() // legal and leaf, store to l2Tlb
def ppn(vpn: UInt): UInt = {
if (superpage) {
val insideLevel = level.getOrElse(0.U)
Mux(insideLevel.asBool, Cat(data.ppn(data.ppn.getWidth-1, vpnnLen*1), vpn(vpnnLen*1-1, 0)),
Cat(data.ppn(data.ppn.getWidth-1, vpnnLen*2), vpn(vpnnLen*2-1, 0)))
} else {
def get(vpn: UInt): TlbEntry = {
val t = Wire(new TlbEntry())
val idx = idxClip(vpn, level)
t.vpn := vpn // Note: Use input vpn, not vpn in TlbL2
t.ppn := ppns(idx)
t.level := level
t.perm := perms(idx)
def apply(vpn: UInt, ppn: UInt, level: UInt, perm: UInt, pf: Bool) = {
this.tag := vpn
this.level.map(_ := level(0))
this.data.ppn := ppn
val ptePerm = perm.asTypeOf(new PtePermBundle)
this.data.perm.pf:= pf
this.data.perm.d := ptePerm.d
this.data.perm.a := ptePerm.a
this.data.perm.g := ptePerm.g
this.data.perm.u := ptePerm.u
this.data.perm.x := ptePerm.x
this.data.perm.w := ptePerm.w
this.data.perm.r := ptePerm.r
override def cloneType: this.type = (new TlbEntires(num, tagLen)).asInstanceOf[this.type]
override def toPrintable: Printable = {
require(num == 4, "if num is not 4, please comment this toPrintable")
// NOTE: if num is not 4, please comment this toPrintable
p"tag:${Hexadecimal(tag)} level:${level} ppn(0):${Hexadecimal(ppns(0))} ppn(1):${Hexadecimal(ppns(1))}" +
p"ppn(2):${Hexadecimal(ppns(2))} ppn(3):${Hexadecimal(ppns(3))} " +
p"perms(0):${perms(0)} perms(1):${perms(1)} perms(2):${perms(2)} perms(3):${perms(3)} vs:${Binary(vs.asUInt)}"
val insideLevel = level.getOrElse(0.U)
p"vpn:0x${Hexadecimal(tag)} level:${insideLevel} data:${data}"
override def cloneType: this.type = (new TlbEntry(superpage)).asInstanceOf[this.type]
object TlbCmd {
......@@ -185,10 +188,15 @@ object TlbCmd {
def write = "b01".U
def exec = "b10".U
def apply() = UInt(2.W)
def isRead(a: UInt) = a===read
def isWrite(a: UInt) = a===write
def isExec(a: UInt) = a===exec
def atom_read = "b100".U // lr
def atom_write = "b101".U // sc / amo
def apply() = UInt(3.W)
def isRead(a: UInt) = a(1,0)===read
def isWrite(a: UInt) = a(1,0)===write
def isExec(a: UInt) = a(1,0)===exec
def isAtom(a: UInt) = a(2)
class TlbReq extends TlbBundle {
......@@ -207,12 +215,18 @@ class TlbReq extends TlbBundle {
class TlbResp extends TlbBundle {
val paddr = UInt(PAddrBits.W)
val miss = Bool()
val mmio = Bool()
val excp = new Bundle {
val pf = new Bundle {
val ld = Bool()
val st = Bool()
val instr = Bool()
val af = new Bundle {
val ld = Bool()
val st = Bool()
val instr = Bool()
override def toPrintable: Printable = {
p"paddr:0x${Hexadecimal(paddr)} miss:${miss} excp.pf: ld:${excp.pf.ld} st:${excp.pf.st} instr:${excp.pf.instr}"
......@@ -267,34 +281,63 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
def widthMapSeq[T <: Seq[Data]](f: Int => T) = (0 until Width).map(f)
def widthMap[T <: Data](f: Int => T) = (0 until Width).map(f)
val v = RegInit(0.U(TlbEntrySize.W))
val pf = RegInit(0.U(TlbEntrySize.W)) // TODO: when ptw resp a pf(now only page not found), store here
val entry = Reg(Vec(TlbEntrySize, new TlbEntry))
val g = VecInit(entry.map(_.perm.g)).asUInt // TODO: need check if reverse is needed
// Normal page && Super page
val nv = RegInit(VecInit(Seq.fill(TlbEntrySize)(false.B)))
val nentry = Reg(Vec(TlbEntrySize, new TlbEntry(false)))
val sv = RegInit(VecInit(Seq.fill(TlbSPEntrySize)(false.B)))
val sentry = Reg(Vec(TlbSPEntrySize, new TlbEntry(true)))
val v = nv ++ sv
val entry = nentry ++ sentry
val g = VecInit(entry.map(_.data.perm.g))
val pf = VecInit(entry.zip(v).map{ case(e, vi) => e.data.perm.pf & vi })
* PTW refill
val refill = ptw.resp.fire()
val randIdx = LFSR64()(log2Up(TlbEntrySize)-1,0)
val priorIdx = PriorityEncoder(~(v|pf))
val tlbfull = ParallelAND((v|pf).asBools)
val refillIdx = Mux(tlbfull, randIdx, priorIdx)
val refillIdxOH = UIntToOH(refillIdx)
def randReplace(v: UInt) = {
val width = v.getWidth
val randIdx = LFSR64()(log2Up(width)-1, 0)
val priorIdx = PriorityEncoder(~(v))
val full = Cat(v).andR
Mux(full, randIdx, priorIdx)
when (refill) {
v := Mux(ptw.resp.bits.pf, v & ~refillIdxOH, v | refillIdxOH)
entry(refillIdx) := ptw.resp.bits.entry
XSDebug(p"Refill: idx:${refillIdx} entry:${ptw.resp.bits.entry}\n")
val resp = ptw.resp.bits
when (resp.entry.level === 2.U) {
val refillIdx = randReplace(nv.asUInt)
nv(refillIdx) := true.B
vpn = resp.entry.tag,
ppn = resp.entry.ppn,
level = resp.entry.level,
perm = VecInit(resp.entry.perm).asUInt,
pf = resp.pf
XSDebug(p"Refill normal: idx:${refillIdx} entry:${resp.entry} pf:${resp.pf}\n")
}.otherwise {
val refillIdx = randReplace(sv.asUInt)
sv(refillIdx) := true.B
vpn = resp.entry.tag,
ppn = resp.entry.ppn,
level = resp.entry.level,
perm = VecInit(resp.entry.perm).asUInt,
pf = resp.pf
XSDebug(p"Refill superpage: idx:${refillIdx} entry:${resp.entry} pf:${resp.pf}\n")
* L1 TLB read
val tlb_read_mask = Mux(refill, refillIdxOH, 0.U(TlbEntrySize.W))
def TLBRead(i: Int) = {
// val tlb_read_mask = Mux(refill, ((1<<(TlbEntrySize+TlbSPEntrySize))-1).U, 0.U((TlbEntrySize+TlbSPEntrySize).W))
def TLBNormalRead(i: Int) = {
val entryHitVec = (
if (isDtlb)
VecInit((tlb_read_mask.asBools zip entry).map{ case (r, e) => !r && e.hit(reqAddr(i).vpn/*, satp.asid*/)})
VecInit(entry.map{ e => ~refill && e.hit(reqAddr(i).vpn/*, satp.asid*/)})
VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/)))
......@@ -304,26 +347,24 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val validReg = if (isDtlb) RegNext(valid(i)) else valid(i)
val entryHitVecReg = if (isDtlb) RegNext(entryHitVec) else entryHitVec
val hitVec = (v.asBools zip entryHitVecReg).map{ case (a,b) => a&b }
val pfHitVec = (pf.asBools zip entryHitVecReg).map{ case (a,b) => a&b }
val hitVec = (v zip entryHitVecReg).map{ case (a,b) => a&b }
val pfHitVec = (pf zip entryHitVecReg).map{ case (a,b) => a&b }
val pfArray = ParallelOR(pfHitVec).asBool && validReg && vmEnable
val hit = ParallelOR(hitVec).asBool && validReg && vmEnable && ~pfArray
val miss = !hit && validReg && vmEnable && ~pfArray
val hitppn = ParallelMux(hitVec zip entry.map(_.ppn))
val hitPerm = ParallelMux(hitVec zip entry.map(_.perm))
val hitLevel= ParallelMux(hitVec zip entry.map(_.level))
val hitppn = ParallelMux(hitVec zip entry.map(_.ppn(reqAddrReg.vpn)))
val hitPerm = ParallelMux(hitVec zip entry.map(_.data.perm))
XSDebug(valid(i), p"(${i.U}) entryHit:${Hexadecimal(entryHitVec.asUInt)}\n")
XSDebug(validReg, p"(${i.U}) entryHitReg:${Hexadecimal(entryHitVecReg.asUInt)} hitVec:${Hexadecimal(VecInit(hitVec).asUInt)} pfHitVec:${Hexadecimal(VecInit(pfHitVec).asUInt)} pfArray:${Hexadecimal(pfArray.asUInt)} hit:${hit} miss:${miss} hitppn:${Hexadecimal(hitppn)} hitPerm:${hitPerm}\n")
val multiHit = {
val hitSum = PopCount(hitVec)
val pfHitSum = PopCount(pfHitVec)
!(hitSum===0.U || hitSum===1.U) || !(pfHitSum===0.U || pfHitSum===1.U)
!(hitSum===0.U || hitSum===1.U)
// resp // TODO: A/D has not being concerned
val paddr = LookupTreeDefault(hitLevel, Cat(hitppn, reqAddrReg.off), List(
0.U -> Cat(hitppn(ppnLen - 1, 2*vpnnLen), reqAddrReg.vpn(2*vpnnLen - 1, 0), reqAddrReg.off),
1.U -> Cat(hitppn(ppnLen - 1, vpnnLen), reqAddrReg.vpn(vpnnLen - 1, 0), reqAddrReg.off),
2.U -> Cat(hitppn, reqAddrReg.off)
val paddr = Cat(hitppn, reqAddrReg.off)
val vaddr = SignExt(req(i).bits.vaddr, PAddrBits)
req(i).ready := resp(i).ready
......@@ -341,10 +382,16 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
resp(i).bits.excp.pf.st := stPf || update
resp(i).bits.excp.pf.instr := instrPf || update
val (pmaMode, accessWidth) = AddressSpace.memmapAddrMatch(resp(i).bits.paddr)
resp(i).bits.mmio := Mux(TlbCmd.isExec(cmdReg), !PMAMode.icache(pmaMode), !PMAMode.dcache(pmaMode))
resp(i).bits.excp.af.ld := Mux(TlbCmd.isAtom(cmdReg), !PMAMode.atomic(pmaMode), !PMAMode.read(pmaMode)) && TlbCmd.isRead(cmdReg)
resp(i).bits.excp.af.st := Mux(TlbCmd.isAtom(cmdReg), !PMAMode.atomic(pmaMode), !PMAMode.write(pmaMode)) && TlbCmd.isWrite(cmdReg)
resp(i).bits.excp.af.instr := Mux(TlbCmd.isAtom(cmdReg), false.B, !PMAMode.execute(pmaMode))
(hit, miss, pfHitVec, multiHit)
val readResult = (0 until Width).map(TLBRead(_))
val readResult = (0 until Width).map(TLBNormalRead(_))
val hitVec = readResult.map(res => res._1)
val missVec = readResult.map(res => res._2)
val pfHitVecVec = readResult.map(res => res._3)
......@@ -352,12 +399,15 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val hasMissReq = Cat(missVec).orR
// ptw
val state_idle :: state_wait :: Nil = Enum(2)
val state = RegInit(state_idle)
ptw <> DontCare // TODO: need check it
ptw.req.valid := hasMissReq && state===state_idle && !sfence.valid
ptw.resp.ready := state===state_wait
val waiting = RegInit(false.B)
when (ptw.req.fire()) {
waiting := true.B
}.elsewhen (sfence.valid || ptw.resp.valid) {
waiting := false.B
// ptw <> DontCare // TODO: need check it
ptw.req.valid := hasMissReq && !sfence.valid && !waiting && !RegNext(refill)
ptw.resp.ready := waiting
// val ptwReqSeq = Wire(Seq.fill(Width)(new comBundle()))
val ptwReqSeq = Seq.fill(Width)(Wire(new comBundle()))
......@@ -368,82 +418,49 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
ptw.req.bits := Compare(ptwReqSeq).bits
switch (state) {
is (state_idle) {
when (hasMissReq && ptw.req.fire()) {
state := state_wait
is (state_wait) {
when (ptw.resp.fire()) {
state := state_idle
// reset pf when pf hit
val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVecVec(i)).asUInt, 0.U) })
val pfHitRefill = false.B//ParallelOR(pfHitReset.asBools)
// pf update
when (refill) {
when (pfHitRefill) {
pf := Mux(ptw.resp.bits.pf, pf | refillIdxOH, pf & ~refillIdxOH) & ~pfHitReset
} .otherwise {
pf := Mux(ptw.resp.bits.pf, pf | refillIdxOH, pf & ~refillIdxOH)
} .otherwise {
when (pfHitRefill) {
pf := pf & ~pfHitReset
when (PopCount(pf) > 10.U) { // when too much pf, just clear
pf := Mux(refill && ptw.resp.bits.pf, refillIdxOH, 0.U)
val tooManyPf = PopCount(pf) > 5.U
when (tooManyPf) { // when too much pf, just clear
XSDebug(p"Too many pf just flush all the pf v:${Hexadecimal(VecInit(v).asUInt)} pf:${Hexadecimal(pf.asUInt)}\n")
v.zipWithIndex.map{ case (a, i) => a := a & !pf(i) }
// sfence (flush)
when (sfence.valid) {
state := state_idle
ptw.req.valid := false.B
when (sfence.bits.rs1) { // virtual address *.rs1 <- (rs1===0.U)
when (sfence.bits.rs2) { // asid, but i do not want to support asid, *.rs2 <- (rs2===0.U)
// all addr and all asid
v := 0.U
pf := 0.U
v.map(_ := false.B)
}.otherwise {
// all addr but specific asid
v := v & g // TODO: need check if reverse is needed
pf := pf & g
v.zipWithIndex.map{ case (a,i) => a := a & g(i) }
}.otherwise {
val sfenceVpn = sfence.bits.addr.asTypeOf(vaBundle).vpn
when (sfence.bits.rs2) {
// specific addr but all asid
v := v & ~VecInit(entry.map(_.hit(sfence.bits.addr.asTypeOf(vaBundle).vpn))).asUInt
pf := pf & ~VecInit(entry.map(_.hit(sfence.bits.addr.asTypeOf(vaBundle).vpn))).asUInt
v.zipWithIndex.map{ case (a,i) => a := a & !entry(i).hit(sfenceVpn) }
}.otherwise {
// specific addr and specific asid
v := v & ~VecInit(entry.map(e => e.hit(sfence.bits.addr.asTypeOf(vaBundle).vpn) && (/*e.asid === sfence.bits.asid && */!e.perm.g))).asUInt
pf := pf & ~VecInit(entry.map(e => e.hit(sfence.bits.addr.asTypeOf(vaBundle).vpn) && (/*e.asid === sfence.bits.asid && */!e.perm.g))).asUInt
v.zipWithIndex.map{ case (a,i) => a := a & !(entry(i).hit(sfenceVpn) && !g(i))}
if (!env.FPGAPlatform && isDtlb) {
ExcitingUtils.addSource(valid(0)/* && vmEnable*/, "perfCntDtlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(1)/* && vmEnable*/, "perfCntDtlbReqCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/, "perfCntDtlbReqCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/, "perfCntDtlbReqCnt3", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && missVec(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && missVec(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && missVec(3), "perfCntDtlbMissCnt3", Perf)
ExcitingUtils.addSource(valid(0) && vmEnable, "perfCntDtlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(1) && vmEnable, "perfCntDtlbReqCnt1", Perf)
ExcitingUtils.addSource(valid(2) && vmEnable, "perfCntDtlbReqCnt2", Perf)
ExcitingUtils.addSource(valid(3) && vmEnable, "perfCntDtlbReqCnt3", Perf)
ExcitingUtils.addSource(valid(0) && vmEnable && missVec(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1) && vmEnable && missVec(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2) && vmEnable && missVec(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3) && vmEnable && missVec(3), "perfCntDtlbMissCnt3", Perf)
if (!env.FPGAPlatform && !isDtlb) {
ExcitingUtils.addSource(valid(0)/* && vmEnable*/, "perfCntItlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntItlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(0) && vmEnable, "perfCntItlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(0) && vmEnable && missVec(0), "perfCntItlbMissCnt0", Perf)
// Log
......@@ -454,35 +471,20 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
XSDebug(sfence.valid, p"Sfence: ${sfence}\n")
XSDebug(ParallelOR(valid)|| ptw.resp.valid, p"CSR: ${csr}\n")
XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n")
XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)} v:${Hexadecimal(VecInit(v).asUInt)} pf:${Hexadecimal(pf.asUInt)}\n")
XSDebug(ptw.req.fire(), p"PTW req:${ptw.req.bits}\n")
XSDebug(ptw.resp.valid, p"PTW resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n")
// // assert check, can be remove when tlb can work
// for(i <- 0 until Width) {
// assert((hit(i)&pfArray(i))===false.B, "hit(%d):%d pfArray(%d):%d v:0x%x pf:0x%x", i.U, hit(i), i.U, pfArray(i), v, pf)
// }
// for(i <- 0 until Width) {
// XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVecVec:0x${Hexadecimal(VecInit(pfHitVecVec(i)).asUInt)}\n")
// }
// for(i <- 0 until TlbEntrySize) {
// XSDebug(multiHit, p"entry(${i.U}): v:${v(i)} ${entry(i)}\n")
// }
// assert(!multiHit) // add multiHit here, later it should be removed (maybe), turn to miss and flush
// for (i <- 0 until Width) {
// XSDebug(resp(i).valid && hit(i) && !(req(i).bits.vaddr===resp(i).bits.paddr), p"vaddr:0x${Hexadecimal(req(i).bits.vaddr)} paddr:0x${Hexadecimal(resp(i).bits.paddr)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)}}\n")
// when (resp(i).valid && hit(i) && !(req(i).bits.vaddr===resp(i).bits.paddr)) {
// for (j <- 0 until TlbEntrySize) {
// XSDebug(true.B, p"TLBEntry(${j.U}): v:${v(j)} ${entry(j)}\n")
// }
// } // FIXME: remove me when tlb may be ok
// when(resp(i).valid && hit(i)) {
// assert(req(i).bits.vaddr===resp(i).bits.paddr, "vaddr:0x%x paddr:0x%x hitVec:%x ", req(i).bits.vaddr, resp(i).bits.paddr, VecInit(hitVec(i)).asUInt)
// } // FIXME: remove me when tlb may be ok
// }
// assert((v&pf)===0.U, "v and pf can't be true at same time: v:0x%x pf:0x%x", v, pf)
// // NOTE: just for simple tlb debug, comment it after tlb's debug
// for (i <- 0 until Width) {
// if(isDtlb) {
// XSDebug(!(!vmEnable || RegNext(req(i).bits.vaddr)===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss || Cat(VecInit(resp(i).bits.excp.pf).asUInt).orR), p"Dtlb: vaddr:${Hexadecimal(RegNext(req(i).bits.vaddr))} paddr:${Hexadecimal(resp(i).bits.paddr)} should be equal\n")
// assert(!vmEnable || RegNext(req(i).bits.vaddr)===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss || Cat(VecInit(resp(i).bits.excp.pf).asUInt).orR)
// } else {
// XSDebug(!(!vmEnable || req(i).bits.vaddr===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss || Cat(VecInit(resp(i).bits.excp.pf).asUInt).orR), p"Itlb: vaddr:${Hexadecimal(RegNext(req(i).bits.vaddr))} paddr:${Hexadecimal(resp(i).bits.paddr)} should be equal\n")
// assert(!vmEnable || req(i).bits.vaddr===resp(i).bits.paddr || !resp(i).valid || resp(i).bits.miss || Cat(VecInit(resp(i).bits.excp.pf).asUInt).orR)
// }
// }
object TLB {
......@@ -7,6 +7,7 @@ import xiangshan._
import xiangshan.frontend._
import utils._
import chisel3.ExcitingUtils._
import bus.tilelink.TLParameters
case class ICacheParameters(
nSets: Int = 64,
......@@ -27,22 +28,34 @@ case class ICacheParameters(
def replacement = new RandomReplacement(nWays)
trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst {
trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst with HasInstrMMIOConst {
val cacheParams = icacheParameters
val groupAlign = log2Up(cacheParams.blockBytes)
val packetInstNum = packetBytes/instBytes
val packetInstNumBit = log2Up(packetInstNum)
val ptrHighBit = log2Up(groupBytes) - 1
val ptrLowBit = log2Up(packetBytes)
val encUnitBits = 8
val bankRows = 2
val bankBits = bankRows * rowBits
val nBanks = blockRows/bankRows
val bankUnitNum = (bankBits / encUnitBits)
def accessBorder = 0x80000000L
def cacheID = 0
def insLen = if (HasCExtension) 16 else 32
def RVCInsLen = 16
def groupPC(pc: UInt): UInt = Cat(pc(PAddrBits-1, groupAlign), 0.U(groupAlign.W))
def encRowBits = cacheParams.dataCode.width(rowBits)
def encTagBits = cacheParams.tagCode.width(tagBits)
// def encRowBits = cacheParams.dataCode.width(rowBits)
// def encTagBits = cacheParams.tagCode.width(tagBits)
def encMetaBits = cacheParams.tagCode.width(tagBits)
def metaEntryBits = encMetaBits
def encDataBits = cacheParams.dataCode.width(encUnitBits)
def dataEntryBits = encDataBits * bankUnitNum
// def encDataBits
// def encCacheline
require(isPow2(nSets), s"nSets($nSets) must be pow2")
require(isPow2(nWays), s"nWays($nWays) must be pow2")
......@@ -52,12 +65,18 @@ trait HasICacheParameters extends HasL1CacheParameters with HasIFUConst {
require(pgIdxBits >= untagBits, s"page aliasing problem: pgIdxBits($pgIdxBits) < untagBits($untagBits)")
trait HasFrontEndExceptionNo {
def accessFault = 0
def pageFault = 1
abstract class ICacheBundle extends XSBundle
with HasICacheParameters
abstract class ICacheModule extends XSModule
with HasICacheParameters
with ICacheBase
with HasFrontEndExceptionNo
abstract class ICacheArray extends XSModule
with HasICacheParameters
......@@ -65,15 +84,6 @@ abstract class ICacheArray extends XSModule
abstract class ICachArray extends XSModule
with HasICacheParameters
// sealed class ICacheMetaBundle extends ICacheBundle
// {
// val tag = UInt(tagBits.W)
// }
// sealed class ICacheDataBundle extends ICacheBundle
// {
// val data = UInt(encRowBits.W)
// }
class ICacheReq extends ICacheBundle
......@@ -85,6 +95,7 @@ class ICacheResp extends ICacheBundle
val pc = UInt(VAddrBits.W)
val data = UInt((FetchWidth * 32).W)
val mmio = Bool()
val mask = UInt(PredictWidth.W)
val ipf = Bool()
val acf = Bool()
......@@ -97,6 +108,9 @@ class ICacheIO extends ICacheBundle
val resp = DecoupledIO(new ICacheResp)
val mem_acquire = DecoupledIO(new L1plusCacheReq)
val mem_grant = Flipped(DecoupledIO(new L1plusCacheResp))
val mmio_acquire = DecoupledIO(new InsUncacheReq)
val mmio_grant = Flipped(DecoupledIO(new InsUncacheResp))
val mmio_flush = Output(Bool())
val prefetchTrainReq = ValidIO(new IcacheMissReq)
val tlb = new BlockTlbRequestIO
val flush = Input(UInt(2.W))
......@@ -177,23 +191,35 @@ class ICacheMetaArray extends ICachArray
val readResp = Output(Vec(nWays,UInt(tagBits.W)))
val metaArray = Module(new SRAMTemplate(UInt(encTagBits.W), set=nSets, way=nWays, shouldReset = true))
val metaArray = Module(new SRAMTemplate(
shouldReset = true,
singlePort = true
// read
//do Parity decoding after way choose
// do not read and write in the same cycle: when write SRAM disable read
val readNextReg = RegNext(io.read.fire())
val rtags = metaArray.io.r.resp.asTypeOf(Vec(nWays,UInt(encMetaBits.W)))
val rtags_decoded = rtags.map{ wtag =>cacheParams.dataCode.decode(wtag)}
val rtags_wrong = rtags_decoded.map{ wtag_decoded => wtag_decoded.uncorrectable}
//assert(readNextReg && !ParallelOR(rtags_wrong))
val rtags_corrected = VecInit(rtags_decoded.map{ wtag_decoded => wtag_decoded.corrected})
metaArray.io.r.req.valid := io.read.valid
io.read.ready := metaArray.io.r.req.ready
io.write.ready := DontCare
io.read.ready := !io.write.valid
io.readResp := rtags_corrected.asTypeOf(Vec(nWays,UInt(tagBits.W)))
val rtag = metaArray.io.r.resp.asTypeOf(Vec(nWays,UInt(encTagBits.W)))
val tag_encoded = VecInit(rtag.map(wtag => cacheParams.tagCode.decode(wtag).corrected))
io.readResp :=tag_encoded.asTypeOf(Vec(nWays,UInt(tagBits.W)))
val write = io.write.bits
val wdata_encoded = cacheParams.tagCode.encode(write.phyTag.asUInt)
val wtag_encoded = cacheParams.tagCode.encode(write.phyTag.asUInt)
metaArray.io.w.req.valid := io.write.valid
metaArray.io.w.req.bits.apply(data=wdata_encoded, setIdx=write.virIdx, waymask=write.waymask)
metaArray.io.w.req.bits.apply(data=wtag_encoded, setIdx=write.virIdx, waymask=write.waymask)
io.write.ready := DontCare
......@@ -202,36 +228,77 @@ class ICacheDataArray extends ICachArray
val io=IO{new Bundle{
val write = Flipped(DecoupledIO(new ICacheDataWriteBundle))
val read = Flipped(DecoupledIO(UInt(idxBits.W)))
val readResp = Output(Vec(blockWords,Vec(nWays,UInt(encRowBits.W))))
val readResp = Output(Vec(nWays,Vec(blockRows,UInt(rowBits.W))))
val dataArray = List.fill(blockWords){ Module(new SRAMTemplate(UInt(encRowBits.W), set=nSets, way = nWays))}
//do ECC decoding after way choose
for(b <- 0 until blockWords){
dataArray(b).io.r.req.valid := io.read.valid
//dataEntryBits = 144
val dataArray = List.fill(nWays){List.fill(nBanks){Module(new SRAMTemplate(
way = 1,
singlePort = true
// read
// do Parity decoding after way choose
// do not read and write in the same cycle: when write SRAM disable read
val readNextReg = RegNext(io.read.fire())
val rdatas = VecInit((0 until nWays).map( w =>
VecInit( (0 until nBanks).map( b =>
dataArray(w)(b).io.r.resp.asTypeOf(Vec( bankUnitNum, UInt(encDataBits.W)))
for(w <- 0 until nWays){
for(b <- 0 until nBanks){
dataArray(w)(b).io.r.req.valid := io.read.valid
val rdatas_decoded = rdatas.map{wdata => wdata.map{ bdata => bdata.map{ unit => cacheParams.dataCode.decode(unit)}}}
val rdata_corrected = VecInit((0 until nWays).map{ w =>
VecInit((0 until nBanks).map{ b =>
VecInit((0 until bankUnitNum).map{ i =>
(0 until nWays).map{ w =>
(0 until blockRows).map{ r =>
io.readResp(w)(r) := Cat(
(0 until bankUnitNum/2).map{ i =>
//println("result: ",r,i)
rdata_corrected(w)(r >> 1)((r%2) * 8 + i).asUInt
}.reverse )
val dataArrayReadyVec = dataArray.map(b => b.io.r.req.ready)
io.read.ready := ParallelOR(dataArrayReadyVec)
io.write.ready := DontCare
io.readResp := VecInit(dataArray.map(b => b.io.r.resp.asTypeOf(Vec(nWays,UInt(encRowBits.W)))))
io.read.ready := !io.write.valid
val write = io.write.bits
val write_data = write.data.asTypeOf(Vec(blockWords,UInt(rowBits.W)))
val write_data_encoded = write_data.map(wdata => cacheParams.tagCode.encode(wdata))
val write_way = OHToUInt(write.waymask)
val write_data = write.data.asTypeOf(Vec(nBanks,Vec( bankUnitNum, UInt(encUnitBits.W))))
val write_data_encoded = write_data.map(b => b.map{ unit => cacheParams.dataCode.encode(unit) } )
val write_bank_data = Wire(Vec(nBanks,UInt((dataEntryBits).W)))
(0 until nBanks).map{ b =>
write_bank_data(b) := Cat(
(0 until bankUnitNum).map{ i =>
}.reverse )
for(b <- 0 until blockWords){
dataArray(b).io.w.req.valid := io.write.valid
dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx,
for(w <- 0 until nWays){
for(b <- 0 until nBanks){
dataArray(w)(b).io.w.req.valid := io.write.valid && w.U === write_way
dataArray(w)(b).io.w.req.bits.setIdx := write.virIdx
dataArray(w)(b).io.w.req.bits.data := write_bank_data(b)
io.write.ready := DontCare
/* ------------------------------------------------------------
......@@ -243,10 +310,10 @@ class ICache extends ICacheModule
// cut a cacheline into a fetch packet
def cutHelper(sourceVec: Vec[UInt], pc: UInt, mask: UInt): UInt = {
val sourceVec_inst = Wire(Vec(blockWords*wordBytes/instBytes,UInt(insLen.W)))
(0 until blockWords).foreach{ i =>
(0 until wordBytes/instBytes).foreach{ j =>
sourceVec_inst(i*wordBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen)
val sourceVec_inst = Wire(Vec(blockRows*rowBytes/instBytes,UInt(insLen.W)))
(0 until blockRows).foreach{ i =>
(0 until rowBytes/instBytes).foreach{ j =>
sourceVec_inst(i*rowBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen)
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
......@@ -257,6 +324,23 @@ class ICache extends ICacheModule
def cutHelperMMIO(sourceVec: Vec[UInt], pc: UInt, mask: UInt) = {
val sourceVec_inst = Wire(Vec(mmioBeats * mmioBusBytes/instBytes,UInt(insLen.W)))
(0 until mmioBeats).foreach{ i =>
(0 until mmioBusBytes/instBytes).foreach{ j =>
sourceVec_inst(i*mmioBusBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen)
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
val insLenLog = log2Ceil(insLen)
val start = (pc >> insLenLog.U)(log2Ceil(mmioBeats * mmioBusBytes/instBytes) -1, 0)
val outMask = mask >> start
(0 until PredictWidth ).foreach{ i =>
cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U)
(cutPacket.asUInt, outMask.asUInt)
// generate the one hot code according to a UInt between 0-8
def PriorityMask(sourceVec: UInt) : UInt = {
val oneHot = Mux(sourceVec >= 8.U, "b1000".U,
......@@ -299,25 +383,25 @@ class ICache extends ICacheModule
// Stage 2
// Stage 2
val s2_idx = get_idx(s2_req_pc)
val s2_tlb_resp = WireInit(io.tlb.resp.bits)
val s2_tag = get_tag(s2_tlb_resp.paddr)
val s2_hit = WireInit(false.B)
val s2_access_fault = WireInit(false.B)
val s2_allValid = s2_valid && io.tlb.resp.valid
val s2_mmio = WireInit(false.B)
s2_fire := s2_allValid && s3_ready
s2_ready := s3_ready || !s2_valid
when(s1_fire) { s2_valid := true.B }
.elsewhen(s2_flush) { s2_valid := false.B }
.elsewhen(s2_fire) { s2_valid := false.B }
//physical address < 0x80000000
//TODO: May have bugs
s2_access_fault := (s2_tlb_resp.paddr < accessBorder.U) && s2_valid
// SRAM(Meta and Data) read reseponse
val metas = metaArray.io.readResp
// TODO :Parity wrong excetion
val metas = metaArray.io.readResp
val datas =RegEnable(next=dataArray.io.readResp, enable=s2_fire)
val validMeta = Cat((0 until nWays).map{w => validArray(Cat(s2_idx, w.U(log2Ceil(nWays).W)))}.reverse).asUInt
......@@ -329,15 +413,26 @@ class ICache extends ICacheModule
val hasInvalidWay = invalidVec.orR
val refillInvalidWaymask = PriorityMask(invalidVec)
val waymask = Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask))
s2_hit := ParallelOR(hitVec) || s2_tlb_resp.excp.pf.instr || s2_access_fault
s2_ready := s3_ready || !s2_valid
//deal with icache exception
val icacheExceptionVec = Wire(Vec(8,Bool()))
val hasIcacheException = icacheExceptionVec.asUInt().orR()
icacheExceptionVec := DontCare
icacheExceptionVec(accessFault) := s2_tlb_resp.excp.af.instr && s2_allValid
icacheExceptionVec(pageFault) := s2_tlb_resp.excp.pf.instr && s2_allValid
s2_mmio := s2_valid && io.tlb.resp.valid && s2_tlb_resp.mmio && !hasIcacheException
s2_hit := s2_valid && ParallelOR(hitVec)
val waymask = Mux(hasIcacheException,1.U(nWays.W),Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask)))
XSDebug("[Stage 2] v : r : f (%d %d %d) pc: 0x%x mask: %b acf:%d\n",s2_valid,s3_ready,s2_fire,s2_req_pc,s2_req_mask,s2_access_fault)
assert(!(s2_hit && s2_mmio),"MMIO address should not hit in icache")
XSDebug("[Stage 2] v : r : f (%d %d %d) pc: 0x%x mask: %b mmio:%d \n",s2_valid,s3_ready,s2_fire,s2_req_pc,s2_req_mask,s2_mmio)
XSDebug("[Stage 2] exception: af:%d pf:%d \n",icacheExceptionVec(accessFault),icacheExceptionVec(pageFault))
XSDebug(p"[Stage 2] tlb req: v ${io.tlb.req.valid} r ${io.tlb.req.ready} ${io.tlb.req.bits}\n")
XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n")
XSDebug("[Stage 2] tag: %x hit:%d\n",s2_tag,s2_hit)
XSDebug("[Stage 2] tag: %x hit:%d mmio:%d\n",s2_tag,s2_hit,s2_mmio)
XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt)
......@@ -348,28 +443,21 @@ class ICache extends ICacheModule
val s3_data = datas
val s3_tag = RegEnable(s2_tag, s2_fire)
val s3_hit = RegEnable(next=s2_hit,init=false.B,enable=s2_fire)
val s3_mmio = RegEnable(next=s2_mmio,init=false.B,enable=s2_fire)
val s3_wayMask = RegEnable(next=waymask,init=0.U,enable=s2_fire)
val s3_miss = s3_valid && !s3_hit
val s3_idx = get_idx(s3_req_pc)
val s3_access_fault = RegEnable(s2_access_fault,init=false.B,enable=s2_fire)
val s3_exception_vec = RegEnable(next= icacheExceptionVec,init=0.U.asTypeOf(Vec(8,Bool())), enable=s2_fire)
val s3_has_exception = s3_exception_vec.asUInt.orR
val s3_miss = s3_valid && !s3_hit && !s3_mmio && !s3_has_exception
when(s3_flush) { s3_valid := false.B }
.elsewhen(s2_fire && !s2_flush) { s3_valid := true.B }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
val refillDataReg = Reg(Vec(refillCycles,UInt(beatBits.W)))
// icache hit
// data ECC encoding
// data Parity encoding
// simply cut the hit cacheline
val dataHitWay = VecInit(s3_data.map(b => Mux1H(s3_wayMask,b).asUInt))
val dataHitWay = Mux1H(s3_wayMask,s3_data)
val outPacket = Wire(UInt((FetchWidth * 32).W))
val dataHitWayDecoded = VecInit(
(0 until blockWords).map{r =>
val row = dataHitWay.asTypeOf(Vec(blockWords,UInt(encRowBits.W)))(r)
val decodedRow = cacheParams.dataCode.decode(row)
assert(!(s3_valid && s3_hit && decodedRow.uncorrectable))
outPacket := cutHelper(dataHitWay,s3_req_pc.asUInt,s3_req_mask.asUInt)
......@@ -378,13 +466,13 @@ class ICache extends ICacheModule
val icacheMissQueue = Module(new IcacheMissQueue)
val blocking = RegInit(false.B)
val isICacheResp = icacheMissQueue.io.resp.valid && icacheMissQueue.io.resp.bits.clientID === cacheID.U(2.W)
icacheMissQueue.io.req.valid := s3_miss && !s3_flush && !blocking//TODO: specificate flush condition
icacheMissQueue.io.req.valid := s3_miss && !s3_has_exception && !s3_flush && !blocking//TODO: specificate flush condition
icacheMissQueue.io.resp.ready := io.resp.ready
icacheMissQueue.io.flush := s3_flush
when(icacheMissQueue.io.req.fire()){blocking := true.B}
.elsewhen(blocking && ((icacheMissQueue.io.resp.fire() && isICacheResp) || s3_flush) ){blocking := false.B}
when(icacheMissQueue.io.req.fire() || io.mmio_acquire.fire()){blocking := true.B}
.elsewhen(blocking && ((icacheMissQueue.io.resp.fire() && isICacheResp) || io.mmio_grant.fire() || s3_flush) ){blocking := false.B}
XSDebug(blocking && s3_flush,"check for icache non-blocking")
//cache flush register
......@@ -421,66 +509,74 @@ class ICache extends ICacheModule
//icache flush: only flush valid Array register
when(icacheFlush){ validArray := 0.U }
val refillDataVec = icacheMissQueue.io.resp.bits.data.asTypeOf(Vec(blockWords,UInt(wordBits.W)))
val refillDataVec = icacheMissQueue.io.resp.bits.data.asTypeOf(Vec(blockRows,UInt(wordBits.W)))
val refillDataOut = cutHelper(refillDataVec, s3_req_pc,s3_req_mask )
s3_ready := ((io.resp.ready && s3_hit || !s3_valid) && !blocking) || (blocking && icacheMissQueue.io.resp.valid && io.resp.ready)
val is_same_cacheline = s3_miss && s2_valid && (groupAligned(s2_req_pc) ===groupAligned(s3_req_pc))
val useRefillReg = RegNext(is_same_cacheline && icacheMissQueue.io.resp.fire())
val refillDataVecReg = RegEnable(next=refillDataVec, enable= (is_same_cacheline && icacheMissQueue.io.resp.fire()))
val mmioDataVec = io.mmio_grant.bits.data.asTypeOf(Vec(mmioBeats,UInt(mmioBusWidth.W)))
val mmio_packet = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)._1
val mmio_mask = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)._2
XSDebug("mmio data %x\n", mmio_packet)
s3_ready := ((io.resp.ready && s3_hit || !s3_valid) && !blocking) || (blocking && ((icacheMissQueue.io.resp.fire()) || io.mmio_grant.fire()))
val pds = Seq.fill(nWays)(Module(new PreDecode))
for (i <- 0 until nWays) {
val wayResp = Wire(new ICacheResp)
val wayData = cutHelper(VecInit(s3_data.map(b => b(i).asUInt)), s3_req_pc, s3_req_mask)
val refillData = cutHelper(refillDataVec, s3_req_pc,s3_req_mask)
val wayData = cutHelper(s3_data(i), s3_req_pc, s3_req_mask)
val refillData = Mux(useRefillReg,cutHelper(refillDataVecReg, s3_req_pc,s3_req_mask),cutHelper(refillDataVec, s3_req_pc,s3_req_mask))
wayResp.pc := s3_req_pc
wayResp.data := Mux(s3_valid && s3_hit, wayData, refillData)
wayResp.mask := s3_req_mask
wayResp.ipf := s3_tlb_resp.excp.pf.instr
wayResp.acf := s3_access_fault
wayResp.data := Mux(s3_valid && s3_hit, wayData, Mux(s3_mmio ,mmio_packet ,refillData))
wayResp.mask := Mux(s3_mmio,mmio_mask,s3_req_mask)
wayResp.ipf := s3_exception_vec(pageFault)
wayResp.acf := s3_exception_vec(accessFault)
wayResp.mmio := s3_mmio
pds(i).io.in := wayResp
pds(i).io.prev <> io.prev
pds(i).io.prev_pc := io.prev_pc
// if a fetch packet triggers page fault, set the pf instruction to nop
when ((!(HasCExtension.B) || io.prev.valid) && s3_tlb_resp.excp.pf.instr ) {
val instrs = Wire(Vec(FetchWidth, UInt(32.W)))
(0 until FetchWidth).foreach(i => instrs(i) := ZeroExt("b0010011".U, 32)) // nop
pds(i).io.in.data := instrs.asUInt
}.elsewhen (HasCExtension.B && io.prev.valid && (io.prev_ipf || s3_tlb_resp.excp.pf.instr)) {
pds(i).io.prev.bits := ZeroExt("b0010011".U, 16)
val instrs = Wire(Vec(FetchWidth, UInt(32.W)))
(0 until FetchWidth).foreach(i => instrs(i) := Cat(ZeroExt("b0010011".U, 16), Fill(16, 0.U(1.W))))
pds(i).io.in.data := instrs.asUInt
// if a fetch packet triggers page fault, at least send a valid instruction
io.pd_out := Mux1H(s3_wayMask, pds.map(_.io.out))
val s3_noHit = s3_wayMask === 0.U
//TODO: coherence
XSDebug("[Stage 3] valid:%d pc: 0x%x mask: %b ipf:%d acf:%d \n",s3_valid,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr,s3_access_fault)
XSDebug("[Stage 3] valid:%d miss:%d pc: 0x%x mmio :%d mask: %b ipf:%d\n",s3_valid, s3_miss,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr, s3_mmio)
XSDebug("[Stage 3] hit:%d miss:%d waymask:%x blocking:%d\n",s3_hit,s3_miss,s3_wayMask.asUInt,blocking)
XSDebug("[Stage 3] tag: %x idx: %d\n",s3_tag,get_idx(s3_req_pc))
XSDebug(p"[Stage 3] tlb resp: ${s3_tlb_resp}\n")
XSDebug("[mem_acquire] valid:%d ready:%d\n",io.mem_acquire.valid,io.mem_acquire.ready)
XSDebug("[mem_grant] valid:%d ready:%d data:%x id:%d \n",io.mem_grant.valid,io.mem_grant.ready,io.mem_grant.bits.data,io.mem_grant.bits.id)
XSDebug("[Stage 3] ---------Hit Way--------- \n")
for(i <- 0 until blockWords){
for(i <- 0 until blockRows){
XSDebug("[Stage 3] %x\n",dataHitWay(i))
XSDebug("[Stage 3] outPacket :%x\n",outPacket)
XSDebug("[Stage 3] refillDataOut :%x\n",refillDataOut)
XSDebug("[Stage 3] refillDataOutVec :%x startPtr:%d\n",refillDataVec.asUInt, s3_req_pc(5,1).asUInt)
// Out Put
//icache request
io.req.ready := s2_ready
io.req.ready := s2_ready && metaArray.io.read.ready && dataArray.io.read.ready
//icache response: to pre-decoder
io.resp.valid := s3_valid && (s3_hit || icacheMissQueue.io.resp.valid)
io.resp.bits.data := Mux((s3_valid && s3_hit),outPacket,refillDataOut)
io.resp.bits.mask := s3_req_mask
io.resp.valid := s3_valid && (s3_hit || s3_has_exception || icacheMissQueue.io.resp.valid || io.mmio_grant.valid)
io.resp.bits.data := Mux(s3_mmio,mmio_packet,Mux((s3_valid && s3_hit),outPacket,refillDataOut))
io.resp.bits.mask := Mux(s3_mmio,mmio_mask,s3_req_mask)
io.resp.bits.pc := s3_req_pc
io.resp.bits.ipf := s3_tlb_resp.excp.pf.instr
io.resp.bits.acf := s3_access_fault
io.resp.bits.acf := s3_exception_vec(accessFault)
io.resp.bits.mmio := s3_mmio
//to itlb
io.tlb.resp.ready := true.B // DontCare
......@@ -499,6 +595,15 @@ class ICache extends ICacheModule
io.prefetchTrainReq.bits := DontCare
io.prefetchTrainReq.bits.addr := groupPC(s3_tlb_resp.paddr)
//To icache Uncache
io.mmio_acquire.valid := s3_mmio && s3_valid
io.mmio_acquire.bits.addr := mmioBusAligned(s3_tlb_resp.paddr)
io.mmio_acquire.bits.id := cacheID.U
io.mmio_grant.ready := io.resp.ready
io.mmio_flush := io.flush(1)
io.l1plusflush := icacheFlush
XSDebug("[flush] flush_0:%d flush_1:%d\n",s2_flush,s3_flush)
......@@ -507,6 +612,6 @@ class ICache extends ICacheModule
if (!env.FPGAPlatform ) {
ExcitingUtils.addSource( s3_valid && !blocking, "perfCntIcacheReqCnt", Perf)
ExcitingUtils.addSource( s3_miss && blocking && io.resp.fire(), "perfCntIcacheMissCnt", Perf)
ExcitingUtils.addSource( s3_mmio && blocking && io.resp.fire(), "perfCntIcacheMMIOCnt", Perf)
\ No newline at end of file
......@@ -135,7 +135,7 @@ class IcacheMissEntry extends ICacheMissQueueModule
//TODO: Maybe this sate is noe necessary so we don't need respDataReg
when(io.refill.fire() && io.meta_write.fire()){
when((io.refill.fire() && io.meta_write.fire()) || needFlush || io.flush){
state := s_wait_resp
......@@ -150,10 +150,10 @@ class IcacheMissEntry extends ICacheMissQueueModule
//refill write and meta write
//WARNING: Maybe could not finish refill in 1 cycle
io.meta_write.valid := (state === s_write_back) && !needFlush
io.meta_write.valid := (state === s_write_back) && !needFlush && !io.flush
io.meta_write.bits.apply(tag=req_tag, setIdx=req_idx, waymask=req_waymask)
io.refill.valid := (state === s_write_back) && !needFlush
io.refill.valid := (state === s_write_back) && !needFlush && !io.flush
......@@ -495,8 +495,8 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
if (!env.FPGAPlatform) {
start = entry.io.req.fire(),
stop = entry.io.resp.fire(),
start = entry.io.block_idx.valid,
stop = !entry.io.block_idx.valid,
startHighPriority = true),
"perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
......@@ -12,21 +12,24 @@ case class BOPParameters(
scoreBits: Int,
roundMax: Int,
badScore: Int,
scores: Int = 52,
// TODO: Is 256-offset necessary, which will cross pages?
offsetList: Seq[Int] = Seq(
1, 2, 3, 4, 5, 6, 8, 9, 10, 12,
15, 16, 18, 20, 24, 25, 27, 30, 32, 36,
15, 16/*, 18, 20, 24, 25, 27, 30, 32, 36,
40, 45, 48, 50, 54, 60, 64, 72, 75, 80,
81, 90, 96, 100, 108, 120, 125, 128, 135, 144,
150, 160, 162, 180, 192, 200, 216, 225, 240, 243,
250, 256
250, 256*/
blockBytes: Int
blockBytes: Int,
nEntries: Int
) {
def scores = offsetList.length
def offsetWidth = log2Up(offsetList(scores - 1)) + 1
def rrIdxBits = log2Up(rrTableEntries)
def roundBits = log2Up(roundMax)
def scoreMax = (1 << scoreBits) - 1
def totalWidth = log2Up(nEntries) // id's width
class ScoreTableEntry(p: BOPParameters) extends PrefetchBundle {
......@@ -34,7 +37,7 @@ class ScoreTableEntry(p: BOPParameters) extends PrefetchBundle {
val score = UInt(p.scoreBits.W)
def apply(offset: UInt, score: UInt) = {
val entry = new ScoreTableEntry(p)
val entry = Wire(new ScoreTableEntry(p))
entry.offset := offset
entry.score := score
......@@ -78,9 +81,51 @@ class TestOffsetBundle(p: BOPParameters) extends PrefetchBundle {
override def cloneType: this.type = (new TestOffsetBundle(p)).asInstanceOf[this.type]
class BestOffsetPrefetchReq(p: BOPParameters) extends PrefetchReq {
val id = UInt(p.totalWidth.W)
override def toPrintable: Printable = {
p"addr=0x${Hexadecimal(addr)} w=${write} id=0x${Hexadecimal(id)}"
override def cloneType: this.type = (new BestOffsetPrefetchReq(p)).asInstanceOf[this.type]
class BestOffsetPrefetchResp(p: BOPParameters) extends PrefetchResp {
val id = UInt(p.totalWidth.W)
override def toPrintable: Printable = {
override def cloneType: this.type = (new BestOffsetPrefetchResp(p)).asInstanceOf[this.type]
class BestOffsetPrefetchFinish(p: BOPParameters) extends PrefetchFinish {
val id = UInt(p.totalWidth.W)
override def toPrintable: Printable = {
override def cloneType: this.type = (new BestOffsetPrefetchFinish(p)).asInstanceOf[this.type]
class BestOffsetPrefetchIO(p: BOPParameters) extends PrefetchBundle {
val train = Flipped(ValidIO(new PrefetchTrain))
val req = DecoupledIO(new BestOffsetPrefetchReq(p))
val resp = Flipped(DecoupledIO(new BestOffsetPrefetchResp(p)))
val finish = DecoupledIO(new BestOffsetPrefetchFinish(p))
override def toPrintable: Printable = {
p"train: v=${train.valid} ${train.bits} " +
p"req: v=${req.valid} r=${req.ready} ${req.bits} " +
p"resp: v=${resp.valid} r=${resp.ready} ${resp.bits} " +
p"finish: v=${finish.valid} r=${finish.ready} ${finish.bits}"
override def cloneType: this.type = (new BestOffsetPrefetchIO(p)).asInstanceOf[this.type]
class RecentRequestTable(p: BOPParameters) extends PrefetchModule {
val io = IO(new Bundle {
val w = Flipped(ValidIO(UInt(PAddrBits.W)))
val w = Flipped(DecoupledIO(UInt(PAddrBits.W)))
val r = Flipped(new TestOffsetBundle(p))
def rrIdxBits = p.rrIdxBits
......@@ -108,10 +153,10 @@ class RecentRequestTable(p: BOPParameters) extends PrefetchModule {
val rrTable = Module(new SRAMTemplate(rrTableEntry(), set = rrTableEntries, way = 1, shouldReset = true))
val rrTable = Module(new SRAMTemplate(rrTableEntry(), set = rrTableEntries, way = 1, shouldReset = true, singlePort = true))
val wAddr = io.w.bits
rrTable.io.w.req.valid := io.w.valid
rrTable.io.w.req.valid := io.w.valid && !io.r.req.valid
rrTable.io.w.req.bits.setIdx := idx(wAddr)
rrTable.io.w.req.bits.data.valid := true.B
rrTable.io.w.req.bits.data.tag := tag(wAddr)
......@@ -122,32 +167,35 @@ class RecentRequestTable(p: BOPParameters) extends PrefetchModule {
rrTable.io.r.req.bits.setIdx := idx(rAddr)
rData := rrTable.io.r.resp.data(0)
val rwConflict = io.w.valid && io.r.req.fire() && idx(wAddr) === idx(rAddr)
when (rwConflict) {
rrTable.io.r.req.valid := false.B
when (RegNext(rwConflict)) {
rData.valid := true.B
rData.tag := RegNext(tag(wAddr))
val rwConflict = io.w.fire() && io.r.req.fire() && idx(wAddr) === idx(rAddr)
// when (rwConflict) {
// rrTable.io.r.req.valid := false.B
// }
// when (RegNext(rwConflict)) {
// rData.valid := true.B
// rData.tag := RegNext(tag(wAddr))
// }
io.w.ready := rrTable.io.w.req.ready && !io.r.req.valid
io.r.req.ready := true.B
io.r.resp.valid := RegNext(io.r.req.fire())
io.r.resp.valid := RegNext(rrTable.io.r.req.fire())
io.r.resp.bits.testOffset := RegNext(io.r.req.bits.testOffset)
io.r.resp.bits.ptr := RegNext(io.r.req.bits.ptr)
io.r.resp.bits.hit := rData.valid && rData.tag === RegNext(tag(rAddr))
assert(!RegNext(rwConflict), "single port SRAM should not read and write at the same time")
// debug info
XSDebug(io.w.valid, p"io.write: v=${io.w.valid} addr=0x${Hexadecimal(io.w.bits)}\n")
XSDebug(io.w.fire(), p"io.write: v=${io.w.valid} addr=0x${Hexadecimal(io.w.bits)}\n")
XSDebug(p"io.read: ${io.r}\n")
XSDebug(io.w.valid, p"wAddr=0x${Hexadecimal(wAddr)} idx=${Hexadecimal(idx(wAddr))} tag=${Hexadecimal(tag(wAddr))}\n")
XSDebug(io.w.fire(), p"wAddr=0x${Hexadecimal(wAddr)} idx=${Hexadecimal(idx(wAddr))} tag=${Hexadecimal(tag(wAddr))}\n")
XSDebug(io.r.req.fire(), p"rAddr=0x${Hexadecimal(rAddr)} idx=${Hexadecimal(idx(rAddr))} rData=${rData}\n")
XSDebug(rwConflict, p"write and read conflict!\n")
class OffsetScoreTable(p: BOPParameters) extends PrefetchModule {
val io = IO(new Bundle {
val req = Flipped(DecoupledIO(UInt(PAddrBits.W))) // req addr from L1
val prefetchOffset = Output(UInt(p.offsetWidth.W))
val test = new TestOffsetBundle(p)
......@@ -158,33 +206,34 @@ class OffsetScoreTable(p: BOPParameters) extends PrefetchModule {
def roundBits = p.roundBits
def roundMax = p.roundMax
def scoreMax = p.scoreMax
def badScore = p.badScore
val prefetchOffset = RegInit(1.U(offsetWidth)) // best offset is 1, this is, a next-line prefetcher as initialization
val prefetchOffset = RegInit(2.U(offsetWidth.W)) // best offset is 1, that is, a next-line prefetcher as initialization
val st = RegInit(VecInit(offsetList.map(off => new ScoreTableEntry(p).apply(off.U, 0.U))))
val ptr = RegInit(0.U(log2Up(scores).W))
val round = RegInit(0.U(roundBits.W))
val bestOffset = RegInit(new ScoreTableEntry(p).apply(1.U, 0.U)) // the entry with the highest score while traversing
val testOffset = WireInit(0.U(offsetWidth.W))
val bestOffset = RegInit(new ScoreTableEntry(p).apply(2.U, 0.U)) // the entry with the highest score while traversing
val testOffset = WireInit(st(ptr).offset)
def winner(e1: ScoreTableEntry, e2: ScoreTableEntry): ScoreTableEntry = {
val w = new ScoreTableEntry(p)
val w = Wire(new ScoreTableEntry(p))
w := Mux(e1.score > e2.score, e1, e2)
val s_idle :: s_learn :: s_finish :: Nil = Enum(3)
val s_idle :: s_learn :: Nil = Enum(2)
val state = RegInit(s_idle)
// 1. At the start of a learning phase
// All the scores are reset to 0.
// At the end of every learning phase, the prefetch offset is updated as the one with the highest score.
when (state === s_idle) {
when (ptr =/= scores.U) {
st(ptr).score := 0.U
ptr := ptr + 1.U
}.otherwise {
ptr := 0.U
state := s_learn
st.foreach(_.score := 0.U)
ptr := 0.U
round := 0.U
bestOffset.score := badScore.U
prefetchOffset := bestOffset.offset
state := s_learn
// 2. During a learning phase
......@@ -196,16 +245,18 @@ class OffsetScoreTable(p: BOPParameters) extends PrefetchModule {
// (1) one of the score equals SCOREMAX, or
// (2) the number of rounds equals ROUNDMAX.
when (state === s_learn) {
testOffset := st(ptr).offset
when (io.test.req.fire()) {
val roundFinish = ptr === (scores - 1).U
ptr := Mux(roundFinish, 0.U, ptr + 1.U)
round := Mux(roundFinish, round + 1.U, round)
XSDebug(p"test offset ${testOffset} req fire\n")
// (2) the number of rounds equals ROUNDMAX.
when (round === roundMax.U) {
state := s_finish
when (round >= roundMax.U) {
state := s_idle
XSDebug(p"round reaches roundMax(${roundMax.U})\n")
when (io.test.resp.fire() && io.test.resp.bits.hit) {
......@@ -216,25 +267,148 @@ class OffsetScoreTable(p: BOPParameters) extends PrefetchModule {
st(io.test.resp.bits.ptr).score := newScore
bestOffset := winner(new ScoreTableEntry(p).apply(offset, newScore), bestOffset)
// (1) one of the score equals SCOREMAX
when (newScore === scoreMax.U) {
state := s_finish
when (newScore >= scoreMax.U) {
state := s_idle
XSDebug(p"newScore reaches scoreMax(${scoreMax.U})\n")
// 3. At the end of every learning phase, the prefetch offset is updated as the one with the highest score.
when (state === s_finish) {
prefetchOffset := bestOffset.offset
ptr := 0.U
round := 0.U
bestOffset.offset := 1.U
bestOffset.score := 0.U
state := s_idle
XSDebug(p"test offset ${offset} resp fire and hit. score ${oldScore} -> ${newScore}\n")
io.req.ready := true.B
io.prefetchOffset := prefetchOffset
io.test.req.valid := state === s_learn && round =/= roundMax.U
io.test.req.bits.addr := DontCare // assign this outside the score table
io.test.req.valid := state === s_learn && io.req.fire()
io.test.req.bits.addr := io.req.bits
io.test.req.bits.testOffset := testOffset
io.test.req.bits.ptr := ptr
io.test.resp.ready := true.B
XSDebug(p"state=${state} prefetchOffset=${prefetchOffset} ptr=${ptr} round=${round} bestOffset=${bestOffset} testOffset=${testOffset}\n")
// score table
XSDebug(p"OffsetScoreTable(idx:offset:score) as follows:\n")
for (i <- 0 until scores) {
if (i % 8 == 0) { XSDebug(p"${i.U}:${st(i)}\t") }
else if (i % 8 == 7 || i == scores - 1) { XSDebug(false, true.B, p"${i.U}:${st(i)}\n") }
else { XSDebug(false, true.B, p"${i.U}:${st(i)}\t") }
XSDebug(io.req.fire(), p"receive req from L1. io.req.bits=0x${Hexadecimal(io.req.bits)}\n")
class BestOffsetPrefetchEntry(p: BOPParameters) extends PrefetchModule {
val io = IO(new Bundle {
val id = Input(UInt(p.totalWidth.W))
val prefetchOffset = Input(UInt(p.offsetWidth.W))
val pft = new BestOffsetPrefetchIO(p)
val inflight = ValidIO(UInt(PAddrBits.W))
val writeRRTable = DecoupledIO(UInt(PAddrBits.W))
def blockBytes = p.blockBytes
def getBlockAddr(addr: UInt) = Cat(addr(PAddrBits - 1, log2Up(blockBytes)), 0.U(log2Up(blockBytes).W))
val s_idle :: s_req :: s_resp :: s_write_recent_req :: s_finish :: Nil = Enum(5)
val state = RegInit(s_idle)
val req = RegInit(0.U.asTypeOf(new PrefetchReq))
val baseAddr = RegInit(0.U(PAddrBits.W))
when (state === s_idle) {
when (io.pft.train.valid) {
state := s_req
req.addr := getBlockAddr(io.pft.train.bits.addr) + (io.prefetchOffset << log2Up(blockBytes))
req.write := io.pft.train.bits.write
baseAddr := getBlockAddr(io.pft.train.bits.addr)
when (state === s_req) {
when (io.pft.req.fire()) {
state := s_resp
when (state === s_resp) {
when (io.pft.resp.fire()) {
state := s_write_recent_req
when (state === s_write_recent_req) {
when (io.writeRRTable.fire()) {
state := s_finish
when (state === s_finish) {
when (io.pft.finish.fire()) {
state := s_idle
io.pft.req.valid := state === s_req
io.pft.req.bits.addr := req.addr
io.pft.req.bits.write := req.write
io.pft.req.bits.id := io.id
io.pft.resp.ready := state === s_resp
io.pft.finish.valid := state === s_finish
io.pft.finish.bits.id := io.id
io.inflight.valid := state =/= s_idle
io.inflight.bits := req.addr
io.writeRRTable.valid := state === s_write_recent_req
io.writeRRTable.bits := baseAddr // write this into recent request table
XSDebug(p"bopEntry ${io.id}: state=${state} prefetchOffset=${io.prefetchOffset} inflight=${io.inflight.valid} 0x${Hexadecimal(io.inflight.bits)} writeRRTable: ${io.writeRRTable.valid} 0x${Hexadecimal(io.writeRRTable.bits)} baseAddr=0x${Hexadecimal(baseAddr)} req: ${req}\n")
XSDebug(p"bopEntry ${io.id}: io.pft: ${io.pft}\n")
class BestOffsetPrefetch(p: BOPParameters) extends PrefetchModule {
val io = IO(new BestOffsetPrefetchIO(p))
def nEntries = p.nEntries
def blockBytes = p.blockBytes
def getBlockAddr(addr: UInt) = Cat(addr(PAddrBits - 1, log2Up(blockBytes)), 0.U(log2Up(blockBytes).W))
val scoreTable = Module(new OffsetScoreTable(p))
val rrTable = Module(new RecentRequestTable(p))
val reqArb = Module(new Arbiter(new BestOffsetPrefetchReq(p), nEntries))
val finishArb = Module(new Arbiter(new BestOffsetPrefetchFinish(p), nEntries))
val writeRRTableArb = Module(new Arbiter(UInt(PAddrBits.W), nEntries))
val entryReadyIdx = Wire(UInt(log2Up(nEntries).W))
val inflightMatchVec = Wire(Vec(nEntries, Bool()))
val bopEntries = (0 until nEntries).map { i =>
val bopEntry = Module(new BestOffsetPrefetchEntry(p))
bopEntry.io.id := i.U
bopEntry.io.prefetchOffset := scoreTable.io.prefetchOffset
bopEntry.io.pft.train.valid := io.train.valid && i.U === entryReadyIdx && !inflightMatchVec.asUInt.orR
bopEntry.io.pft.train.bits := io.train.bits
reqArb.io.in(i) <> bopEntry.io.pft.req
bopEntry.io.pft.resp.valid := io.resp.valid && i.U === io.resp.bits.id
bopEntry.io.pft.resp.bits := io.resp.bits
finishArb.io.in(i) <> bopEntry.io.pft.finish
writeRRTableArb.io.in(i) <> bopEntry.io.writeRRTable
entryReadyIdx := PriorityEncoder(bopEntries.map { e => !e.io.inflight.valid })
(0 until nEntries).foreach(i =>
inflightMatchVec(i) := bopEntries(i).io.inflight.valid && bopEntries(i).io.inflight.bits === getBlockAddr(io.train.bits.addr)
io.req <> reqArb.io.out
io.resp.ready := VecInit(bopEntries.zipWithIndex.map { case (e, i) => i.U === io.resp.bits.id && e.io.pft.resp.ready }).asUInt.orR
io.finish <> finishArb.io.out
rrTable.io.w <> writeRRTableArb.io.out
rrTable.io.r <> scoreTable.io.test
scoreTable.io.req.valid := io.train.valid
scoreTable.io.req.bits := getBlockAddr(io.train.bits.addr)
XSDebug(p"io: ${io}\n")
XSDebug(p"entryReadyIdx=${entryReadyIdx} inflightMatchVec=${Binary(inflightMatchVec.asUInt)}\n")
......@@ -15,13 +15,30 @@ import freechips.rocketchip.tilelink.{TLClientNode, TLClientParameters,
TLEdgeOut, TLBundleA, TLBundleD,
ClientStates, ClientMetadata, TLHints
import sifive.blocks.inclusivecache.PrefetcherIO
case class L2PrefetcherParameters(
enable: Boolean,
_type: String,
streamParams: StreamPrefetchParameters
streamParams: StreamPrefetchParameters,
bopParams: BOPParameters
) {
def nEntries: Int = streamParams.streamCnt * streamParams.streamSize
// def nEntries: Int = streamParams.streamCnt * streamParams.streamSize
def nEntries: Int = {
if (enable && _type == "stream") { streamParams.streamCnt * streamParams.streamSize }
else if (enable && _type == "bop") { bopParams.nEntries }
else 1
def totalWidth: Int = {
if (enable && _type == "stream") streamParams.totalWidth
else if (enable && _type == "bop") bopParams.totalWidth
else 1
def blockBytes: Int = {
if (enable && _type == "stream") streamParams.blockBytes
else if (enable && _type == "bop") bopParams.blockBytes
else 64
class L2Prefetcher()(implicit p: Parameters) extends LazyModule with HasPrefetchParameters {
......@@ -37,18 +54,41 @@ class L2Prefetcher()(implicit p: Parameters) extends LazyModule with HasPrefetch
lazy val module = new L2PrefetcherImp(this)
class L2PrefetcherIO extends XSBundle with HasPrefetchParameters {
val in = Flipped(DecoupledIO(new MissReq))
// prefetch DCache lines in L2 using StreamPrefetch
class L2PrefetcherImp(outer: L2Prefetcher) extends LazyModuleImp(outer) with HasPrefetchParameters with HasXSLog {
val io = IO(new Bundle {
val in = Flipped(DecoupledIO(new MissReq))
// prefetch
// val mem_acquire = Decoupled(new TLBundleA(edge.bundle))
// val mem_grant = Flipped(Decoupled(new TLBundleD(edge.bundle)))
// val mem_finish = Decoupled(new TLBundleE(edge.bundle))
val io = IO(new L2PrefetcherIO)
val (bus, edge) = outer.clientNode.out.head
if (l2PrefetcherParameters.enable && l2PrefetcherParameters._type == "stream") {
if (l2PrefetcherParameters.enable && l2PrefetcherParameters._type == "bop") {
val bopParams = l2PrefetcherParameters.bopParams
val dPrefetch = Module(new BestOffsetPrefetch(bopParams))
dPrefetch.io.train.valid := io.in.fire()
dPrefetch.io.train.bits.addr := io.in.bits.addr
dPrefetch.io.train.bits.write := MemoryOpConstants.isWrite(io.in.bits.cmd)
dPrefetch.io.train.bits.miss := true.B
io.in.ready := true.B
bus.a.valid := dPrefetch.io.req.valid
bus.a.bits := DontCare
bus.a.bits := edge.Hint(
fromSource = dPrefetch.io.req.bits.id,
toAddress = dPrefetch.io.req.bits.addr,
lgSize = log2Up(bopParams.blockBytes).U,
param = Mux(dPrefetch.io.req.bits.write, TLHints.PREFETCH_WRITE, TLHints.PREFETCH_READ)
dPrefetch.io.req.ready := bus.a.ready
dPrefetch.io.resp.valid := bus.d.valid
dPrefetch.io.resp.bits.id := bus.d.bits.source(bopParams.totalWidth - 1, 0)
bus.d.ready := dPrefetch.io.resp.ready
dPrefetch.io.finish.ready := true.B
} else if (l2PrefetcherParameters.enable && l2PrefetcherParameters._type == "stream") {
val streamParams = l2PrefetcherParameters.streamParams
val dPrefetch = Module(new StreamPrefetch(streamParams))
dPrefetch.io.train.valid := io.in.fire()
......@@ -62,49 +102,44 @@ class L2PrefetcherImp(outer: L2Prefetcher) extends LazyModuleImp(outer) with Has
bus.a.bits := edge.Hint(
fromSource = dPrefetch.io.req.bits.id,
toAddress = dPrefetch.io.req.bits.addr,
lgSize = log2Up(streamParams.blockBytes).U,
lgSize = log2Up(l2PrefetcherParameters.blockBytes).U,
param = Mux(dPrefetch.io.req.bits.write, TLHints.PREFETCH_WRITE, TLHints.PREFETCH_READ) // TODO
dPrefetch.io.req.ready := bus.a.ready
bus.b.ready := true.B
bus.c.valid := false.B
bus.c.bits := DontCare
dPrefetch.io.resp.valid := bus.d.valid
dPrefetch.io.resp.bits.id := bus.d.bits.source(streamParams.totalWidth - 1, 0)
dPrefetch.io.resp.bits.id := bus.d.bits.source(l2PrefetcherParameters.totalWidth - 1, 0)
bus.d.ready := dPrefetch.io.resp.ready
bus.e.valid := false.B
bus.e.bits := DontCare
dPrefetch.io.finish.ready := true.B
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(bus.a.fire(), "perfCntL2PrefetchReqCnt", Perf)
def idWidth = log2Up(l2PrefetcherParameters.nEntries)
(0 until l2PrefetcherParameters.nEntries).foreach(i =>
start = bus.a.fire() && dPrefetch.io.req.bits.id(streamParams.totalWidth - 1, 0) === i.U,
stop = bus.d.fire() && bus.d.bits.source(streamParams.totalWidth - 1, 0) === i.U,
startHighPriority = true
"perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10),
} else {
bus.a.valid := false.B
bus.a.bits := DontCare
bus.b.ready := true.B
bus.c.valid := false.B
bus.c.bits := DontCare
bus.d.ready := true.B
bus.e.valid := false.B
bus.e.bits := DontCare
bus.b.ready := true.B
bus.c.valid := false.B
bus.c.bits := DontCare
bus.e.valid := false.B
bus.e.bits := DontCare
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(bus.a.fire(), "perfCntL2PrefetchReqCnt", Perf)
(0 until l2PrefetcherParameters.nEntries).foreach(i =>
start = bus.a.fire() && bus.a.bits.source(l2PrefetcherParameters.totalWidth - 1, 0) === i.U,
stop = bus.d.fire() && bus.d.bits.source(l2PrefetcherParameters.totalWidth - 1, 0) === i.U,
startHighPriority = true
"perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10),
......@@ -40,11 +40,11 @@ class PrefetchTrain extends PrefetchBundle {
class PrefetchIO extends PrefetchBundle {
val train = Flipped(ValidIO(new PrefetchTrain))
val req = DecoupledIO(new PrefetchReq)
val resp = Flipped(DecoupledIO(new PrefetchResp))
// class PrefetchIO extends PrefetchBundle {
// val train = Flipped(ValidIO(new PrefetchTrain))
// val req = DecoupledIO(new PrefetchReq)
// val resp = Flipped(DecoupledIO(new PrefetchResp))
// }
// class FakePrefetcher extends PrefetchModule {
// val io = IO(new PrefetchIO)
......@@ -11,7 +11,8 @@ case class StreamPrefetchParameters(
streamSize: Int,
ageWidth: Int,
blockBytes: Int,
reallocStreamOnMissInstantly: Boolean
reallocStreamOnMissInstantly: Boolean,
cacheName: String // distinguish between different prefetchers
) {
def streamWidth = log2Up(streamCnt)
def idxWidth = log2Up(streamSize)
......@@ -107,7 +108,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
val buf = RegInit(VecInit(Seq.fill(streamSize)(0.U.asTypeOf(new PrefetchReq))))
val valid = RegInit(VecInit(Seq.fill(streamSize)(false.B)))
val head = RegInit(0.U(log2Up(streamSize).W))
val tail = RegInit(0.U(log2Up(streamCnt).W))
val tail = RegInit(0.U(log2Up(streamSize).W))
val s_idle :: s_req :: s_resp :: s_finish :: Nil = Enum(4)
val state = RegInit(VecInit(Seq.fill(streamSize)(s_idle)))
......@@ -122,7 +123,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
// dequeue
val hitIdx = io.update.bits.hitIdx
when (io.update.valid && !empty && valid(hitIdx)) {
when (io.update.valid && !empty && (isPrefetching(hitIdx) || valid(hitIdx))) {
val headBeforehitIdx = head <= hitIdx && (hitIdx < tail || tail <= head)
val hitIdxBeforeHead = hitIdx < tail && tail <= head
when (headBeforehitIdx) {
......@@ -132,6 +133,8 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
when (hitIdxBeforeHead) {
(0 until streamSize).foreach(i => deqLater(i) := Mux(i.U >= head || i.U <= hitIdx, true.B, deqLater(i)))
XSDebug(io.update.valid && !empty && (isPrefetching(hitIdx) || valid(hitIdx)), p"hitIdx=${hitIdx} headBeforehitIdx=${headBeforehitIdx} hitIdxBeforeHead=${hitIdxBeforeHead}\n")
val deqValid = WireInit(VecInit(Seq.fill(streamSize)(false.B)))
......@@ -143,8 +146,15 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
deqValid(idx) := deq
(0 until streamSize).foreach(i => valid(i) := valid(i) && !deqValid(i))
(0 until streamSize).foreach(i => deqLater(i) := deqLater(i) && !deqValid(i))
// (0 until streamSize).foreach(i => valid(i) := valid(i) && !deqValid(i))
// (0 until streamSize).foreach(i => deqLater(i) := deqLater(i) && !deqValid(i))
for (i <- 0 until streamSize) {
when (deqValid(i)) {
valid(i) := false.B
deqLater(i) := false.B
val nextHead = head + PopCount(deqValid)
when (deqValid.asUInt.orR) {
head := nextHead
......@@ -198,13 +208,17 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
val finishArb = Module(new Arbiter(new StreamPrefetchFinish(p), streamSize))
for (i <- 0 until streamSize) {
prefetchPrior(i) := head + i.U
reqs(i).ready := false.B
reqArb.io.in(i) <> reqs(prefetchPrior(i))
finishs(i).ready := false.B
reqs(i).ready := DontCare
finishArb.io.in(i) <> finishs(prefetchPrior(i))
finishs(i).ready := DontCare
resps(i).bits := io.resp.bits
resps(i).valid := io.resp.valid && io.resp.bits.idx === i.U
for (i <- 0 until streamSize) {
reqs(prefetchPrior(i)).ready := reqArb.io.in(i).ready
finishs(prefetchPrior(i)).ready := finishArb.io.in(i).ready
io.req <> reqArb.io.out
io.finish <> finishArb.io.out
io.resp.ready := VecInit(resps.zipWithIndex.map{ case (r, i) =>
......@@ -225,6 +239,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
needRealloc := false.B
state.foreach(_ := s_idle)
valid.foreach(_ := false.B)
deqLater.foreach(_ := false.B)
for (i <- 0 until streamSize) {
......@@ -233,20 +248,20 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
// debug info
XSDebug(p"StreamBuf ${io.streamBufId} io.req: v=${io.req.valid} r=${io.req.ready} ${io.req.bits}\n")
XSDebug(p"StreamBuf ${io.streamBufId} io.resp: v=${io.resp.valid} r=${io.resp.ready} ${io.resp.bits}\n")
XSDebug(p"StreamBuf ${io.streamBufId} io.finish: v=${io.finish.valid} r=${io.finish.ready} ${io.finish.bits}")
XSDebug(p"StreamBuf ${io.streamBufId} io.update: v=${io.update.valid} ${io.update.bits}\n")
XSDebug(p"StreamBuf ${io.streamBufId} io.alloc: v=${io.alloc.valid} ${io.alloc.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.req: v=${io.req.valid} r=${io.req.ready} ${io.req.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.resp: v=${io.resp.valid} r=${io.resp.ready} ${io.resp.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.finish: v=${io.finish.valid} r=${io.finish.ready} ${io.finish.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.update: v=${io.update.valid} ${io.update.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.alloc: v=${io.alloc.valid} ${io.alloc.bits}\n")
for (i <- 0 until streamSize) {
XSDebug(p"StreamBuf ${io.streamBufId} [${i.U}] io.addrs: ${io.addrs(i).valid} 0x${Hexadecimal(io.addrs(i).bits)} " +
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} [${i.U}] io.addrs: ${io.addrs(i).valid} 0x${Hexadecimal(io.addrs(i).bits)} " +
p"buf: ${buf(i)} valid: ${valid(i)} state: ${state(i)} isPfting: ${isPrefetching(i)} " +
p"deqLater: ${deqLater(i)} deqValid: ${deqValid(i)}\n")
XSDebug(p"StreamBuf ${io.streamBufId} head: ${head} tail: ${tail} full: ${full} empty: ${empty} nextHead: ${nextHead}\n")
XSDebug(p"StreamBuf ${io.streamBufId} baseReq: v=${baseReq.valid} ${baseReq.bits} nextReq: ${nextReq}\n")
XSDebug(needRealloc, p"StreamBuf ${io.streamBufId} needRealloc: ${needRealloc} reallocReq: ${reallocReq}\n")
XSDebug(p"StreamBuf ${io.streamBufId} prefetchPrior: ")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} head: ${head} tail: ${tail} full: ${full} empty: ${empty} nextHead: ${nextHead} blockBytes: ${blockBytes.U}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} baseReq: v=${baseReq.valid} ${baseReq.bits} nextReq: ${nextReq}\n")
XSDebug(needRealloc, s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} needRealloc: ${needRealloc} reallocReq: ${reallocReq}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} prefetchPrior: ")
(0 until streamSize).foreach(i => XSDebug(false, true.B, p"${prefetchPrior(i)} "))
XSDebug(false, true.B, "\n")
......@@ -266,6 +281,8 @@ object ParallelMin {
class StreamPrefetch(p: StreamPrefetchParameters) extends PrefetchModule {
val io = IO(new StreamPrefetchIO(p))
require(p.blockBytes > 0)
// TODO: implement this
def streamCnt = p.streamCnt
......@@ -352,8 +369,8 @@ class StreamPrefetch(p: StreamPrefetchParameters) extends PrefetchModule {
i.U === io.resp.bits.stream && buf.io.resp.ready}).asUInt.orR
// debug info
XSDebug(p"io: ${io}\n")
XSDebug(p"bufValids: ${Binary(bufValids.asUInt)} hit: ${hit} ages: ")
XSDebug(s"${p.cacheName} " + p"io: ${io}\n")
XSDebug(s"${p.cacheName} " + p"bufValids: ${Binary(bufValids.asUInt)} hit: ${hit} ages: ")
(0 until streamCnt).foreach(i => XSDebug(false, true.B, p"${Hexadecimal(ages(i))} "))
XSDebug(false, true.B, "\n")
......@@ -156,6 +156,94 @@ class PtwEntries(num: Int, tagLen: Int) extends PtwBundle {
class L2TlbEntry extends TlbBundle {
val tag = UInt(vpnLen.W) // tag is vpn
val level = UInt(log2Up(Level).W) // 2 for 4KB, 1 for 2MB, 0 for 1GB
val ppn = UInt(ppnLen.W)
val perm = new PtePermBundle
def hit(vpn: UInt):Bool = {
val fullMask = VecInit((Seq.fill(vpnLen)(true.B))).asUInt
val maskLevel = VecInit((Level-1 to 0 by -1).map{i => // NOTE: level 2 for 4KB, 1 for 2MB, 0 for 1GB
Reverse(VecInit(Seq.fill(vpnLen-i*vpnnLen)(true.B) ++ Seq.fill(i*vpnnLen)(false.B)).asUInt)})
val mask = maskLevel(level)
(mask&this.tag) === (mask&vpn)
def apply(pte: UInt, level: UInt, vpn: UInt) = {
this.tag := vpn
this.level := level
this.ppn := pte.asTypeOf(pteBundle).ppn
this.perm := pte.asTypeOf(pteBundle).perm
override def toPrintable: Printable = {
p"vpn:0x${Hexadecimal(tag)} level:${level} ppn:${Hexadecimal(ppn)} perm:${perm}"
class L2TlbEntires(num: Int, tagLen: Int) extends TlbBundle {
/* vpn can be divide into three part */
// vpn: tagPart(17bit) + addrPart(8bit) + cutLenPart(2bit)
val cutLen = log2Up(num)
val tag = UInt(tagLen.W) // NOTE: high part of vpn
val ppns = Vec(num, UInt(ppnLen.W))
val perms = Vec(num, new PtePermBundle)
val vs = Vec(num, Bool())
def tagClip(vpn: UInt) = { // full vpn => tagLen
vpn(vpn.getWidth-1, vpn.getWidth-tagLen)
// NOTE: get insize idx
def idxClip(vpn: UInt) = {
vpn(cutLen-1, 0)
def hit(vpn: UInt) = {
(tag === tagClip(vpn)) && vs(idxClip(vpn))
def genEntries(data: UInt, level: UInt, vpn: UInt): L2TlbEntires = {
require((data.getWidth / XLEN) == num,
"input data length must be multiple of pte length")
assert(level===2.U, "tlb entries only support 4K pages")
val ts = Wire(new L2TlbEntires(num, tagLen))
ts.tag := tagClip(vpn)
for (i <- 0 until num) {
val pte = data((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle)
ts.ppns(i) := pte.ppn
ts.perms(i):= pte.perm // this.perms has no v
ts.vs(i) := !pte.isPf(level) && pte.isLeaf() // legal and leaf, store to l2Tlb
def get(vpn: UInt): L2TlbEntry = {
val t = Wire(new L2TlbEntry)
val idx = idxClip(vpn)
t.tag := vpn // Note: Use input vpn, not vpn in TlbL2
t.level := 2.U // L2TlbEntries only support 4k page
t.ppn := ppns(idx)
t.perm := perms(idx)
override def cloneType: this.type = (new L2TlbEntires(num, tagLen)).asInstanceOf[this.type]
override def toPrintable: Printable = {
require(num == 4, "if num is not 4, please comment this toPrintable")
// NOTE: if num is not 4, please comment this toPrintable
p"tag:${Hexadecimal(tag)} ppn(0):${Hexadecimal(ppns(0))} ppn(1):${Hexadecimal(ppns(1))}" +
p"ppn(2):${Hexadecimal(ppns(2))} ppn(3):${Hexadecimal(ppns(3))} " +
p"perms(0):${perms(0)} perms(1):${perms(1)} perms(2):${perms(2)} perms(3):${perms(3)} vs:${Binary(vs.asUInt)}"
class PtwReq extends PtwBundle {
val vpn = UInt(vpnLen.W)
......@@ -165,8 +253,8 @@ class PtwReq extends PtwBundle {
class PtwResp extends PtwBundle {
val entry = new TlbEntry
val pf = Bool() // simple pf no matter cmd
val entry = new L2TlbEntry
val pf = Bool()
override def toPrintable: Printable = {
p"entry:${entry} pf:${pf}"
......@@ -235,13 +323,26 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
// two level: l2-tlb-cache && pde/pte-cache
// l2-tlb-cache is ram-larger-edition tlb
// pde/pte-cache is cache of page-table, speeding up ptw
val tlbl2 = Module(new SRAMTemplate(new TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen), set = TlbL2LineNum)) // (total 256, one line is 4 => 64 lines)
val tlbl2 = Module(new SRAMTemplate(
new L2TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen),
set = TlbL2LineNum,
singlePort = true
)) // (total 256, one line is 4 => 64 lines)
val tlbv = RegInit(0.U(TlbL2LineNum.W)) // valid
val tlbg = Reg(UInt(TlbL2LineNum.W)) // global
val sp = Reg(Vec(TlbL2SPEntrySize, new L2TlbEntry)) // (total 16, one is 4M or 1G)
val spv = RegInit(0.U(TlbL2SPEntrySize.W))
val spg = Reg(UInt(TlbL2SPEntrySize.W))
val ptwl1 = Reg(Vec(PtwL1EntrySize, new PtwEntry(tagLen = PtwL1TagLen)))
val l1v = RegInit(0.U(PtwL1EntrySize.W)) // valid
val l1g = Reg(UInt(PtwL1EntrySize.W))
val ptwl2 = Module(new SRAMTemplate(new PtwEntries(num = PtwL2LineSize, tagLen = PtwL2TagLen), set = PtwL2LineNum)) // (total 256, one line is 4 => 64 lines)
val ptwl2 = Module(new SRAMTemplate(
new PtwEntries(num = PtwL2LineSize, tagLen = PtwL2TagLen),
set = PtwL2LineNum,
singlePort = true
)) // (total 256, one line is 4 => 64 lines)
val l2v = RegInit(0.U(PtwL2LineNum.W)) // valid
val l2g = Reg(UInt(PtwL2LineNum.W)) // global
......@@ -268,7 +369,6 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
* tlbl2
val (tlbHit, tlbHitData) = {
val ridx = genTlbL2Idx(req.vpn)
val vidx = RegEnable(tlbv(ridx), validOneCycle)
......@@ -276,10 +376,22 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
tlbl2.io.r.req.bits.apply(setIdx = ridx)
val ramData = tlbl2.io.r.resp.data(0)
assert(tlbl2.io.r.req.ready || !tlbl2.io.r.req.valid)
XSDebug(tlbl2.io.r.req.valid, p"tlbl2 Read rIdx:${Hexadecimal(ridx)}\n")
XSDebug(RegNext(tlbl2.io.r.req.valid), p"tlbl2 RamData:${ramData}\n")
XSDebug(RegNext(tlbl2.io.r.req.valid), p"tlbl2 v:${vidx} hit:${ramData.hit(req.vpn)} tlbPte:${ramData.get(req.vpn)}\n")
(ramData.hit(req.vpn) && vidx, ramData.get(req.vpn))
val spHitVec = sp.zipWithIndex.map{ case (a,i) =>
RegEnable(a.hit(req.vpn) && spv(i), validOneCycle)
val spHitData = ParallelMux(spHitVec zip sp)
val spHit = Cat(spHitVec).orR
XSDebug(RegNext(validOneCycle), p"tlbl2 sp: spHit:${spHit} spPte:${spHitData}\n")
assert(RegNext(!(ramData.hit(req.vpn) && vidx && spHit && RegNext(validOneCycle))), "pages should not be normal page and super page as well")
(ramData.hit(req.vpn) && vidx || spHit, Mux(spHit, spHitData, ramData.get(req.vpn)))
......@@ -306,7 +418,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
val idx = RegEnable(l2addr(log2Up(PtwL2LineSize)+log2Up(XLEN/8)-1, log2Up(XLEN/8)), readRam)
val vidx = RegEnable(l2v(ridx), readRam)
assert(ptwl2.io.r.req.ready || !readRam)
ptwl2.io.r.req.valid := readRam
ptwl2.io.r.req.bits.apply(setIdx = ridx)
val ramData = ptwl2.io.r.resp.data(0)
......@@ -361,7 +473,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
state := state_idle
}.otherwise {
state := state_wait_ready
latch.entry := new TlbEntry().genTlbEntry(memRdata, level, req.vpn)
latch.entry := Wire(new L2TlbEntry()).apply(memRdata, level, req.vpn)
latch.pf := memPte.isPf(level)
}.otherwise {
......@@ -418,7 +530,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
for(i <- 0 until PtwWidth) {
resp(i).valid := valid && arbChosen===i.U && ptwFinish // TODO: add resp valid logic
resp(i).bits.entry := Mux(tlbHit, tlbHitData,
Mux(state===state_wait_ready, latch.entry, new TlbEntry().genTlbEntry(memSelData, Mux(level===3.U, 2.U, level), req.vpn)))
Mux(state===state_wait_ready, latch.entry, Wire(new L2TlbEntry()).apply(memSelData, Mux(level===3.U, 2.U, level), req.vpn)))
resp(i).bits.pf := Mux(level===3.U || notFound, true.B, Mux(tlbHit, false.B, Mux(state===state_wait_ready, latch.pf, memPte.isPf(level))))
// TODO: the pf must not be correct, check it
......@@ -434,13 +546,15 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
when (memRespFire && !memPte.isPf(level) && !sfenceLatch) {
when (level===0.U && !memPte.isLeaf) {
val refillIdx = LFSR64()(log2Up(PtwL1EntrySize)-1,0) // TODO: may be LRU
val rfOH = UIntToOH(refillIdx)
ptwl1(refillIdx).refill(l1addr, memSelData)
l1v := l1v | UIntToOH(refillIdx)
l1g := (l1g & ~UIntToOH(refillIdx)) | Mux(memPte.perm.g, UIntToOH(refillIdx), 0.U)
l1v := l1v | rfOH
l1g := (l1g & ~rfOH) | Mux(memPte.perm.g, rfOH, 0.U)
when (level===1.U && !memPte.isLeaf) {
val l2addrStore = RegEnable(l2addr, memReqFire && state===state_req && level===1.U)
val refillIdx = genPtwL2Idx(l2addrStore) //getVpnn(req.vpn, 1)(log2Up(PtwL2EntrySize)-1, 0)
val rfOH = UIntToOH(refillIdx)
//TODO: check why the old refillIdx is right
......@@ -451,26 +565,34 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
data = ps,
waymask = -1.S.asUInt
l2v := l2v | UIntToOH(refillIdx)
l2g := (l2g & ~UIntToOH(refillIdx)) | Mux(Cat(memPtes.map(_.perm.g)).andR, UIntToOH(refillIdx), 0.U)
l2v := l2v | rfOH
l2g := (l2g & ~rfOH) | Mux(Cat(memPtes.map(_.perm.g)).andR, rfOH, 0.U)
XSDebug(p"ptwl2 RefillIdx:${Hexadecimal(refillIdx)} ps:${ps}\n")
when (memPte.isLeaf() && (level===2.U)) {
val refillIdx = genTlbL2Idx(req.vpn)//getVpnn(req.vpn, 0)(log2Up(TlbL2EntrySize)-1, 0)
val rfOH = UIntToOH(refillIdx)
//TODO: check why the old refillIdx is right
val ts = new TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen).genEntries(memRdata, level, req.vpn)
val ts = new L2TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen).genEntries(memRdata, level, req.vpn)
valid = true.B,
setIdx = refillIdx,
data = ts,
waymask = -1.S.asUInt
tlbv := tlbv | UIntToOH(refillIdx)
tlbg := (tlbg & ~UIntToOH(refillIdx)) | Mux(Cat(memPtes.map(_.perm.g)).andR, UIntToOH(refillIdx), 0.U)
tlbv := tlbv | rfOH
tlbg := (tlbg & ~rfOH) | Mux(Cat(memPtes.map(_.perm.g)).andR, rfOH, 0.U)
XSDebug(p"tlbl2 refillIdx:${Hexadecimal(refillIdx)} ts:${ts}\n")
when (memPte.isLeaf() && (level===1.U || level===0.U)) {
val refillIdx = LFSR64()(log2Up(TlbL2SPEntrySize)-1,0) // TODO: may be LRU
val rfOH = UIntToOH(refillIdx)
sp(refillIdx) := Wire(new L2TlbEntry()).apply(memSelData, Mux(level===3.U, 2.U, level), req.vpn)
spv := spv | rfOH
spg := (spg & ~rfOH) | Mux(memPte.perm.g, rfOH, 0.U)
/* sfence
......@@ -488,25 +610,29 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
when (sfence.bits.rs2) {
// all va && all asid
tlbv := 0.U
tlbg := 0.U
spv := 0.U
// tlbg := 0.U
l1v := 0.U
l2v := 0.U
l2g := 0.U
// l2g := 0.U
} .otherwise {
// all va && specific asid except global
tlbv := tlbv & tlbg
spv := spv & spg
l1v := l1v & l1g
l2v := l2v & l2g
} .otherwise {
val sfenceTlbL2IdxOH = UIntToOH(genTlbL2Idx(sfence.bits.addr(sfence.bits.addr.getWidth-1, offLen)))
when (sfence.bits.rs2) {
// specific leaf of addr && all asid
tlbv := tlbv & ~UIntToOH(genTlbL2Idx(sfence.bits.addr(sfence.bits.addr.getWidth-1, offLen)))
tlbg := tlbg & ~UIntToOH(genTlbL2Idx(sfence.bits.addr(sfence.bits.addr.getWidth-1, offLen)))
tlbv := tlbv & ~sfenceTlbL2IdxOH
tlbg := tlbg & ~sfenceTlbL2IdxOH
} .otherwise {
// specific leaf of addr && specific asid
tlbv := tlbv & (~UIntToOH(genTlbL2Idx(sfence.bits.addr(sfence.bits.addr.getWidth-1, offLen)))| tlbg)
tlbv := tlbv & (~sfenceTlbL2IdxOH| tlbg)
spv := 0.U
......@@ -2,13 +2,28 @@ package xiangshan.frontend
import utils.XSInfo
import chisel3._
import chisel3.util._
import chipsalliance.rocketchip.config.Parameters
import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
import utils.PipelineConnect
import xiangshan._
import xiangshan.cache._
import xiangshan.cache.prefetch.L1plusPrefetcher
import xiangshan.backend.fu.HasExceptionNO
class Frontend()(implicit p: Parameters) extends LazyModule with HasXSParameter{
class Frontend extends XSModule with HasL1plusCacheParameters {
val instrUncache = LazyModule(new InstrUncache())
lazy val module = new FrontendImp(this)
class FrontendImp (outer: Frontend) extends LazyModuleImp(outer)
with HasL1plusCacheParameters
with HasXSParameter
with HasExceptionNO
with HasXSLog
val io = IO(new Bundle() {
val icacheMemAcq = DecoupledIO(new L1plusCacheReq)
val icacheMemGrant = Flipped(DecoupledIO(new L1plusCacheResp))
......@@ -23,7 +38,7 @@ class Frontend extends XSModule with HasL1plusCacheParameters {
val ifu = Module(new IFU)
val ibuffer = Module(new Ibuffer)
val l1plusPrefetcher = Module(new L1plusPrefetcher)
val instrUncache = outer.instrUncache.module
val needFlush = io.backend.redirect.valid
......@@ -43,6 +58,11 @@ class Frontend extends XSModule with HasL1plusCacheParameters {
ifu.io.fencei := io.fencei
instrUncache.io.req <> ifu.io.mmio_acquire
instrUncache.io.resp <> ifu.io.mmio_grant
instrUncache.io.flush <> ifu.io.mmio_flush
// to tlb
ifu.io.sfence := io.sfence
ifu.io.tlbCsr := io.tlbCsr
......@@ -10,8 +10,16 @@ import chisel3.experimental.chiselName
import freechips.rocketchip.tile.HasLazyRoCC
import chisel3.ExcitingUtils._
trait HasInstrMMIOConst extends HasXSParameter with HasIFUConst{
def mmioBusWidth = 64
def mmioBusBytes = mmioBusWidth /8
def mmioBeats = FetchWidth * 4 * 8 / mmioBusWidth
def mmioMask = VecInit(List.fill(PredictWidth)(true.B)).asUInt
def mmioBusAligned(pc :UInt): UInt = align(pc, mmioBusBytes)
trait HasIFUConst extends HasXSParameter {
val resetVector = 0x80000000L//TODO: set reset vec
val resetVector = 0x10000000L//TODO: set reset vec
def align(pc: UInt, bytes: Int): UInt = Cat(pc(VAddrBits-1, log2Ceil(bytes)), 0.U(log2Ceil(bytes).W))
val instBytes = if (HasCExtension) 2 else 4
val instOffsetBits = log2Ceil(instBytes)
......@@ -71,6 +79,10 @@ class IFUIO extends XSBundle
val tlbCsr = Input(new TlbCsrBundle)
// from tlb
val ptw = new TlbPtwIO
// icache uncache
val mmio_acquire = DecoupledIO(new InsUncacheReq)
val mmio_grant = Flipped(DecoupledIO(new InsUncacheResp))
val mmio_flush = Output(Bool())
class PrevHalfInstr extends XSBundle {
......@@ -119,7 +131,7 @@ class IFU extends XSModule with HasIFUConst
val if2_valid = RegInit(init = false.B)
val if2_allReady = WireInit(if2_ready && icache.io.req.ready)
val if1_fire = (if1_valid && if2_allReady) && (icache.io.tlb.resp.valid || !if2_valid)
val if1_can_go = if1_fire || if2_flush
val if1_can_go = if1_fire || if3_flush
val if1_gh, if2_gh, if3_gh, if4_gh = Wire(new GlobalHistory)
val if2_predicted_gh, if3_predicted_gh, if4_predicted_gh = Wire(new GlobalHistory)
......@@ -418,6 +430,9 @@ class IFU extends XSModule with HasIFUConst
icache.io.prev.bits := if3_prevHalfInstr.bits.instr
icache.io.prev_ipf := if3_prevHalfInstr.bits.ipf
icache.io.prev_pc := if3_prevHalfInstr.bits.pc
icache.io.mmio_acquire <> io.mmio_acquire
icache.io.mmio_grant <> io.mmio_grant
icache.io.mmio_flush <> io.mmio_flush
io.icacheMemAcq <> icache.io.mem_acquire
io.l1plusFlush := icache.io.l1plusflush
io.prefetchTrainReq := icache.io.prefetchTrainReq
......@@ -443,10 +458,18 @@ class IFU extends XSModule with HasIFUConst
crossPageIPF := true.B // higher 16 bits page fault
//RVC expand
val expandedInstrs = Wire(Vec(PredictWidth, UInt(32.W)))
for(i <- 0 until PredictWidth){
val expander = Module(new RVCExpander)
expander.io.in := if4_pd.instrs(i)
expandedInstrs(i) := expander.io.out.bits
val fetchPacketValid = if4_valid && !io.redirect.valid
val fetchPacketWire = Wire(new FetchPacket)
fetchPacketWire.instrs := if4_pd.instrs
fetchPacketWire.instrs := expandedInstrs
fetchPacketWire.mask := if4_pd.mask & (Fill(PredictWidth, !if4_bp.taken) | (Fill(PredictWidth, 1.U(1.W)) >> (~if4_bp.jmpIdx)))
fetchPacketWire.pdmask := if4_pd.mask
......@@ -3,6 +3,7 @@ package xiangshan.frontend
import chisel3._
import chisel3.util._
import utils._
import freechips.rocketchip.rocket.{RVCDecoder, ExpandedInstruction}
import xiangshan._
import xiangshan.backend.decode.isa.predecode.PreDecodeInst
import xiangshan.cache._
......@@ -134,3 +135,16 @@ class PreDecode extends XSModule with HasPdconst with HasIFUConst {
class RVCExpander extends XSModule {
val io = IO(new Bundle {
val in = Input(UInt(32.W))
val out = Output(new ExpandedInstruction)
if (HasCExtension) {
io.out := new RVCDecoder(io.in, XLEN).decode
} else {
io.out := new RVCDecoder(io.in, XLEN).passthrough
......@@ -5,6 +5,10 @@ import chisel3.util._
import xiangshan._
import utils._
import chisel3.experimental.chiselName
import chisel3.stage.{ChiselGeneratorAnnotation, ChiselStage}
import firrtl.stage.RunFirrtlTransformAnnotation
import firrtl.transforms.RenameModules
import freechips.rocketchip.transforms.naming.RenameDesiredNames
import scala.math.min
import scala.util.matching.Regex
......@@ -373,14 +377,13 @@ class FakeTage extends BaseTage {
class Tage extends BaseTage {
val tables = TableInfo.map {
case (nRows, histLen, tagLen) => {
case (nRows, histLen, tagLen) =>
val t = if(EnableBPD) Module(new TageTable(nRows, histLen, tagLen, UBitPeriod)) else Module(new FakeTageTable)
t.io.req.valid := io.pc.valid
t.io.req.bits.pc := io.pc.bits
t.io.req.bits.hist := io.hist
t.io.req.bits.mask := io.inMask
val scTables = SCTableInfo.map {
......@@ -658,4 +661,13 @@ class Tage extends BaseTage {
XSDebug(io.update.valid && updateIsBr, p"update: sc: ${updateSCMeta}\n")
XSDebug(true.B, p"scThres: use(${useThreshold}), update(${updateThreshold})\n")
object TageTest extends App {
override def main(args: Array[String]): Unit = {
(new ChiselStage).execute(args, Seq(
ChiselGeneratorAnnotation(() => new Tage),
RunFirrtlTransformAnnotation(new RenameDesiredNames)
\ No newline at end of file
......@@ -41,7 +41,6 @@ class LsPipelineBundle extends XSBundle {
val miss = Bool()
val tlbMiss = Bool()
val mmio = Bool()
val rollback = Bool()
val forwardMask = Vec(8, Bool())
val forwardData = Vec(8, UInt(8.W))
......@@ -51,6 +51,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
val exceptionAddr = new ExceptionAddrIO
val sqempty = Output(Bool())
val loadQueue = Module(new LoadQueue)
......@@ -103,6 +104,8 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
loadQueue.io.load_s1 <> io.forward
storeQueue.io.forward <> io.forward // overlap forwardMask & forwardData, DO NOT CHANGE SEQUENCE
storeQueue.io.sqempty <> io.sqempty
io.exceptionAddr.vaddr := Mux(io.exceptionAddr.isStore, storeQueue.io.exceptionAddr.vaddr, loadQueue.io.exceptionAddr.vaddr)
// naive uncache arbiter
......@@ -10,6 +10,7 @@ import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbReques
import xiangshan.backend.LSUOpType
import xiangshan.mem._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.fu.HasExceptionNO
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
......@@ -58,6 +59,7 @@ class LoadQueue extends XSModule
with HasDCacheParameters
with HasCircularQueuePtrHelper
with HasLoadHelper
with HasExceptionNO
val io = IO(new Bundle() {
val enq = new LqEnqIO
......@@ -92,6 +94,7 @@ class LoadQueue extends XSModule
val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new LqPtr))))
val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
val deqPtrExtNext = Wire(new LqPtr)
val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
val allowEnqueue = RegInit(true.B)
......@@ -150,7 +153,7 @@ class LoadQueue extends XSModule
vaddrModule.io.wen(i) := false.B
when(io.loadIn(i).fire()) {
when(io.loadIn(i).bits.miss) {
XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
......@@ -159,116 +162,44 @@ class LoadQueue extends XSModule
}.otherwise {
XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
val loadWbData = Wire(new LQDataEntry)
loadWbData.paddr := io.loadIn(i).bits.paddr
loadWbData.mask := io.loadIn(i).bits.mask
loadWbData.data := io.loadIn(i).bits.data // fwd data
loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
dataModule.io.wb.wen(i) := true.B
vaddrModule.io.waddr(i) := loadWbIndex
vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr
vaddrModule.io.wen(i) := true.B
debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
// listening(loadWbIndex) := dcacheMissed
pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime
}.otherwise {
XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
val loadWbData = Wire(new LQDataEntry)
loadWbData.paddr := io.loadIn(i).bits.paddr
loadWbData.mask := io.loadIn(i).bits.mask
loadWbData.data := io.loadIn(i).bits.data // fwd data
loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
dataModule.io.wb.wen(i) := true.B
vaddrModule.io.waddr(i) := loadWbIndex
vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr
vaddrModule.io.wen(i) := true.B
debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
miss(loadWbIndex) := dcacheMissed
pending(loadWbIndex) := io.loadIn(i).bits.mmio
uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime
* Cache miss request
* (1) writeback: miss
* (2) send to dcache: listing
* (3) dcache response: datavalid
* (4) writeback to ROB: writeback
// val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
// val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
// val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)
// val missRefillSelVec = VecInit(
// (0 until LoadQueueSize).map{ i =>
// val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
// allocated(i) && miss(i) && !inflight
// })
// val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
// val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
// io.dcache.req.valid := missRefillSelVec.asUInt.orR
// io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
// io.dcache.req.bits.addr := missRefillBlockAddr
// io.dcache.req.bits.data := DontCare
// io.dcache.req.bits.mask := DontCare
// io.dcache.req.bits.meta.id := DontCare
// io.dcache.req.bits.meta.vaddr := DontCare // dataModule.io.rdata(missRefillSel).vaddr
// io.dcache.req.bits.meta.paddr := missRefillBlockAddr
// io.dcache.req.bits.meta.uop := uop(missRefillSel)
// io.dcache.req.bits.meta.mmio := false.B // dataModule.io.rdata(missRefillSel).mmio
// io.dcache.req.bits.meta.tlb_miss := false.B
// io.dcache.req.bits.meta.mask := DontCare
// io.dcache.req.bits.meta.replay := false.B
// assert(!(dataModule.io.rdata(missRefillSel).mmio && io.dcache.req.valid))
// when(io.dcache.req.fire()) {
// miss(missRefillSel) := false.B
// listening(missRefillSel) := true.B
// mark this block as inflight
// inflightReqs(reqBlockIndex).valid := true.B
// inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
// assert(!inflightReqs(reqBlockIndex).valid)
// }
// when(io.dcache.resp.fire()) {
// val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
// assert(inflight)
// for (i <- 0 until cfg.nLoadMissEntries) {
// when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
// inflightReqs(i).valid := false.B
// }
// }
// }
// when(io.dcache.req.fire()){
// XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
// io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
// io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
// )
// }
when(io.dcache.valid) {
XSDebug("miss resp: paddr:0x%x data %x\n", io.dcache.bits.addr, io.dcache.bits.data)
......@@ -295,47 +226,57 @@ class LoadQueue extends XSModule
// Stage 0
// Generate writeback indexes
def getEvenBits(input: UInt): UInt = {
require(input.getWidth == LoadQueueSize)
VecInit((0 until LoadQueueSize/2).map(i => {input(2*i)})).asUInt
def getOddBits(input: UInt): UInt = {
require(input.getWidth == LoadQueueSize)
VecInit((0 until LoadQueueSize/2).map(i => {input(2*i+1)})).asUInt
val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W))) // index selected last cycle
val loadWbSelV = Wire(Vec(LoadPipelineWidth, Bool())) // index selected in last cycle is valid
val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
allocated(i) && !writebacked(i) && datavalid(i)
})).asUInt() // use uint instead vec to reduce verilog lines
val loadEvenSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i)}))
val loadOddSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i+1)}))
val evenDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i)})).asUInt
val oddDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i+1)})).asUInt
val evenDeqMask = getEvenBits(deqMask)
val oddDeqMask = getOddBits(deqMask)
// generate lastCycleSelect mask
val evenSelectMask = Mux(io.ldout(0).fire(), getEvenBits(UIntToOH(loadWbSel(0))), 0.U)
val oddSelectMask = Mux(io.ldout(1).fire(), getOddBits(UIntToOH(loadWbSel(1))), 0.U)
// generate real select vec
val loadEvenSelVec = getEvenBits(loadWbSelVec) & ~evenSelectMask
val loadOddSelVec = getOddBits(loadWbSelVec) & ~oddSelectMask
def toVec(a: UInt): Vec[Bool] = {
val loadWbSelGen = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
val loadWbSelVGen = Wire(Vec(LoadPipelineWidth, Bool()))
loadWbSelGen(0) := Cat(getFirstOne(loadEvenSelVec, evenDeqMask), 0.U(1.W))
loadWbSelGen(0) := Cat(getFirstOne(toVec(loadEvenSelVec), evenDeqMask), 0.U(1.W))
loadWbSelVGen(0):= loadEvenSelVec.asUInt.orR
loadWbSelGen(1) := Cat(getFirstOne(loadOddSelVec, oddDeqMask), 1.U(1.W))
loadWbSelGen(1) := Cat(getFirstOne(toVec(loadOddSelVec), oddDeqMask), 1.U(1.W))
loadWbSelVGen(1) := loadOddSelVec.asUInt.orR
val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
val loadWbSelV = RegInit(VecInit(List.fill(LoadPipelineWidth)(false.B)))
(0 until LoadPipelineWidth).map(i => {
val canGo = io.ldout(i).fire() || !loadWbSelV(i)
val valid = loadWbSelVGen(i)
// store selected index in pipeline reg
loadWbSel(i) := RegEnable(loadWbSelGen(i), valid && canGo)
// Mark them as writebacked, so they will not be selected in the next cycle
when(valid && canGo){
writebacked(loadWbSelGen(i)) := true.B
// update loadWbSelValidReg
loadWbSel(i) := RegNext(loadWbSelGen(i))
loadWbSelV(i) := RegNext(loadWbSelVGen(i), init = false.B)
loadWbSelV(i) := false.B
when(valid && canGo){
loadWbSelV(i) := true.B
// Mark them as writebacked, so they will not be selected in the next cycle
writebacked(loadWbSel(i)) := true.B
// Stage 1
// Use indexes generated in cycle 0 to read data
// writeback data to cdb
(0 until LoadPipelineWidth).map(i => {
// data select
dataModule.io.wb.raddr(i) := loadWbSel(i)
dataModule.io.wb.raddr(i) := loadWbSelGen(i)
val rdata = dataModule.io.wb.rdata(i).data
val seluop = uop(loadWbSel(i))
val func = seluop.ctrl.fuOpType
......@@ -353,10 +294,9 @@ class LoadQueue extends XSModule
val rdataPartialLoad = rdataHelper(seluop, rdataSel)
// writeback missed int/fp load
// Int load writeback will finish (if not blocked) in one cycle
io.ldout(i).bits.uop := seluop
io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.wb.rdata(i).exception.asBools
io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
io.ldout(i).bits.data := rdataPartialLoad
io.ldout(i).bits.redirectValid := false.B
......@@ -368,12 +308,10 @@ class LoadQueue extends XSModule
io.ldout(i).valid := loadWbSelV(i)
when(io.ldout(i).fire()) {
XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x paddr %x data %x mmio %x\n",
XSInfo("int load miss write to cbd roqidx %d lqidx %d pc 0x%x mmio %x\n",
......@@ -430,7 +368,9 @@ class LoadQueue extends XSModule
* Besides, load instructions in LoadUnit_S1 and S2 are also checked.
* Cycle 1: Redirect Generation
* There're three possible types of violations. Choose the oldest load.
* Set io.redirect according to the detected violation.
* Prepare redirect request according to the detected violation.
* Cycle 2: Redirect Fire
* Fire redirect request (if valid)
io.load_s1 := DontCare
def detectRollback(i: Int) = {
......@@ -530,18 +470,29 @@ class LoadQueue extends XSModule
val rollbackSelected = ParallelOperation(rollback, rollbackSel)
val lastCycleRedirect = RegNext(io.brqRedirect)
// S2: select rollback and generate rollback request
// Note that we use roqIdx - 1.U to flush the load instruction itself.
// Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
io.rollback.valid := rollbackSelected.valid &&
val rollbackGen = Wire(Valid(new Redirect))
val rollbackReg = Reg(Valid(new Redirect))
rollbackGen.valid := rollbackSelected.valid &&
(!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
!(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional())
io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx
io.rollback.bits.level := RedirectLevel.flush
io.rollback.bits.interrupt := DontCare
io.rollback.bits.pc := DontCare
io.rollback.bits.target := rollbackSelected.bits.cf.pc
io.rollback.bits.brTag := rollbackSelected.bits.brTag
rollbackGen.bits.roqIdx := rollbackSelected.bits.roqIdx
rollbackGen.bits.level := RedirectLevel.flush
rollbackGen.bits.interrupt := DontCare
rollbackGen.bits.pc := DontCare
rollbackGen.bits.target := rollbackSelected.bits.cf.pc
rollbackGen.bits.brTag := rollbackSelected.bits.brTag
rollbackReg := rollbackGen
// S3: fire rollback request
io.rollback := rollbackReg
io.rollback.valid := rollbackReg.valid &&
(!lastCycleRedirect.valid || !isAfter(rollbackReg.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
!(lastCycleRedirect.valid && lastCycleRedirect.bits.isUnconditional())
when(io.rollback.valid) {
XSDebug("Mem rollback: pc %x roqidx %d\n", io.rollback.bits.pc, io.rollback.bits.roqIdx.asUInt)
......@@ -556,7 +507,7 @@ class LoadQueue extends XSModule
io.roqDeqPtr === uop(deqPtr).roqIdx &&
dataModule.io.uncache.raddr := deqPtr
dataModule.io.uncache.raddr := deqPtrExtNext.value
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr
......@@ -623,7 +574,8 @@ class LoadQueue extends XSModule
val commitCount = PopCount(loadCommit)
deqPtrExt := deqPtrExt + commitCount
deqPtrExtNext := deqPtrExt + commitCount
deqPtrExt := deqPtrExtNext
val lastLastCycleRedirect = RegNext(lastCycleRedirect.valid)
val trueValidCounter = distanceBetween(enqPtrExt(0), deqPtrExt)
......@@ -15,7 +15,6 @@ class LQDataEntry extends XSBundle {
val paddr = UInt(PAddrBits.W)
val mask = UInt(8.W)
val data = UInt(XLEN.W)
val exception = UInt(16.W) // TODO: opt size
val fwdMask = Vec(8, Bool())
......@@ -38,7 +37,7 @@ class PaddrModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(io.raddr(i))
io.rdata(i) := data(RegNext(io.raddr(i)))
// below is the write ports (with priorities)
......@@ -82,7 +81,7 @@ class MaskModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(io.raddr(i))
io.rdata(i) := data(RegNext(io.raddr(i)))
// below is the write ports (with priorities)
......@@ -138,7 +137,7 @@ class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSMod
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(io.raddr(i))
io.rdata(i) := data(RegNext(io.raddr(i)))
// below is the write ports (with priorities)
......@@ -236,7 +235,6 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
// data module
val paddrModule = Module(new PaddrModule(size, numRead = 3, numWrite = 2))
val maskModule = Module(new MaskModule(size, numRead = 3, numWrite = 2))
val exceptionModule = Module(new AsyncDataModuleTemplate(UInt(16.W), size, numRead = 3, numWrite = 2))
val coredataModule = Module(new CoredataModule(size, numRead = 3, numWrite = 3))
// read data
......@@ -244,26 +242,22 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
(0 until wbNumRead).map(i => {
paddrModule.io.raddr(i) := io.wb.raddr(i)
maskModule.io.raddr(i) := io.wb.raddr(i)
exceptionModule.io.raddr(i) := io.wb.raddr(i)
coredataModule.io.raddr(i) := io.wb.raddr(i)
io.wb.rdata(i).paddr := paddrModule.io.rdata(i)
io.wb.rdata(i).mask := maskModule.io.rdata(i)
io.wb.rdata(i).data := coredataModule.io.rdata(i)
io.wb.rdata(i).exception := exceptionModule.io.rdata(i)
io.wb.rdata(i).fwdMask := DontCare
// read port wbNumRead
paddrModule.io.raddr(wbNumRead) := io.uncache.raddr
maskModule.io.raddr(wbNumRead) := io.uncache.raddr
exceptionModule.io.raddr(wbNumRead) := io.uncache.raddr
coredataModule.io.raddr(wbNumRead) := io.uncache.raddr
io.uncache.rdata.paddr := paddrModule.io.rdata(wbNumRead)
io.uncache.rdata.mask := maskModule.io.rdata(wbNumRead)
io.uncache.rdata.data := exceptionModule.io.rdata(wbNumRead)
io.uncache.rdata.exception := coredataModule.io.rdata(wbNumRead)
io.uncache.rdata.data := coredataModule.io.rdata(wbNumRead)
io.uncache.rdata.fwdMask := DontCare
// write data
......@@ -271,19 +265,16 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
(0 until wbNumWrite).map(i => {
paddrModule.io.wen(i) := false.B
maskModule.io.wen(i) := false.B
exceptionModule.io.wen(i) := false.B
coredataModule.io.wen(i) := false.B
coredataModule.io.fwdMaskWen(i) := false.B
coredataModule.io.paddrWen(i) := false.B
paddrModule.io.waddr(i) := io.wb.waddr(i)
maskModule.io.waddr(i) := io.wb.waddr(i)
exceptionModule.io.waddr(i) := io.wb.waddr(i)
coredataModule.io.waddr(i) := io.wb.waddr(i)
paddrModule.io.wdata(i) := io.wb.wdata(i).paddr
maskModule.io.wdata(i) := io.wb.wdata(i).mask
exceptionModule.io.wdata(i) := io.wb.wdata(i).exception
coredataModule.io.wdata(i) := io.wb.wdata(i).data
coredataModule.io.fwdMaskWdata(i) := io.wb.wdata(i).fwdMask.asUInt
coredataModule.io.paddrWdata(i) := io.wb.wdata(i).paddr
......@@ -291,7 +282,6 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
paddrModule.io.wen(i) := true.B
maskModule.io.wen(i) := true.B
exceptionModule.io.wen(i) := true.B
coredataModule.io.wen(i) := true.B
coredataModule.io.fwdMaskWen(i) := true.B
coredataModule.io.paddrWen(i) := true.B
......@@ -43,6 +43,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val roqDeqPtr = Input(new RoqPtr)
// val refill = Flipped(Valid(new DCacheLineReq ))
val exceptionAddr = new ExceptionAddrIO
val sqempty = Output(Bool())
// data modules
......@@ -52,8 +53,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
dataModule.io := DontCare
val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), StoreQueueSize, numRead = 1, numWrite = StorePipelineWidth))
vaddrModule.io := DontCare
val exceptionModule = Module(new AsyncDataModuleTemplate(UInt(16.W), StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth))
exceptionModule.io := DontCare
// state & misc
val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated
......@@ -77,13 +76,21 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val headMask = UIntToMask(enqPtr, StoreQueueSize)
// Read dataModule
// deqPtr and deqPtr+1 entry will be read from dataModule
// deqPtrExtNext and deqPtrExtNext+1 entry will be read from dataModule
// if !sbuffer.fire(), read the same ptr
// if sbuffer.fire(), read next
val deqPtrExtNext = WireInit(Mux(io.sbuffer(1).fire(),
VecInit(deqPtrExt.map(_ + 2.U)),
Mux(io.sbuffer(0).fire() || io.mmioStout.fire(),
VecInit(deqPtrExt.map(_ + 1.U)),
val dataModuleRead = dataModule.io.rdata
for (i <- 0 until StorePipelineWidth) {
dataModule.io.raddr(i) := deqPtrExt(i).value
dataModule.io.raddr(i) := deqPtrExtNext(i).value
vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.sqIdx.value
exceptionModule.io.raddr(0) := deqPtr // read exception
* Enqueue at dispatch
......@@ -123,14 +130,11 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
for (i <- 0 until StorePipelineWidth) {
dataModule.io.wen(i) := false.B
vaddrModule.io.wen(i) := false.B
exceptionModule.io.wen(i) := false.B
when(io.storeIn(i).fire()) {
when (io.storeIn(i).fire()) {
val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value
val hasException = io.storeIn(i).bits.uop.cf.exceptionVec.asUInt.orR
val hasWritebacked = !io.storeIn(i).bits.mmio || hasException
datavalid(stWbIndex) := hasWritebacked
writebacked(stWbIndex) := hasWritebacked
pending(stWbIndex) := !hasWritebacked // valid mmio require
datavalid(stWbIndex) := !io.storeIn(i).bits.mmio
writebacked(stWbIndex) := !io.storeIn(i).bits.mmio
pending(stWbIndex) := io.storeIn(i).bits.mmio
val storeWbData = Wire(new SQDataEntry)
storeWbData := DontCare
......@@ -145,21 +149,15 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
vaddrModule.io.wdata(i) := io.storeIn(i).bits.vaddr
vaddrModule.io.wen(i) := true.B
exceptionModule.io.waddr(i) := stWbIndex
exceptionModule.io.wdata(i) := io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
exceptionModule.io.wen(i) := true.B
mmio(stWbIndex) := io.storeIn(i).bits.mmio
XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x mmio %x roll %x exc %x\n",
XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x mmio %x\n",
......@@ -258,7 +256,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.mmioStout.valid := allocated(deqPtr) && datavalid(deqPtr) && !writebacked(deqPtr)
io.mmioStout.bits.uop := uop(deqPtr)
io.mmioStout.bits.uop.sqIdx := deqPtrExt(0)
io.mmioStout.bits.uop.cf.exceptionVec := exceptionModule.io.rdata(0).asBools
io.mmioStout.bits.data := dataModuleRead(0).data // dataModuleRead.read(deqPtr)
io.mmioStout.bits.redirectValid := false.B
io.mmioStout.bits.redirect := DontCare
......@@ -288,9 +285,11 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// Commited stores will not be cancelled and can be sent to lower level.
// remove retired insts from sq, add retired store to sbuffer
for (i <- 0 until StorePipelineWidth) {
// We use RegNext to prepare data for sbuffer
val ptr = deqPtrExt(i).value
val ismmio = mmio(ptr)
io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !ismmio
// if !sbuffer.fire(), read the same ptr
// if sbuffer.fire(), read next
io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr)
io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR
io.sbuffer(i).bits.addr := dataModuleRead(i).paddr
io.sbuffer(i).bits.data := dataModuleRead(i).data
......@@ -299,7 +298,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.sbuffer(i).bits.meta.tlb_miss := false.B
io.sbuffer(i).bits.meta.uop := DontCare
io.sbuffer(i).bits.meta.mmio := false.B
io.sbuffer(i).bits.meta.mask := dataModuleRead(i).mask
io.sbuffer(i).bits.meta.mask := io.sbuffer(i).bits.mask
when (io.sbuffer(i).fire()) {
allocated(ptr) := false.B
......@@ -349,13 +348,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
deqPtrExt := Mux(io.sbuffer(1).fire(),
VecInit(deqPtrExt.map(_ + 2.U)),
Mux(io.sbuffer(0).fire() || io.mmioStout.fire(),
VecInit(deqPtrExt.map(_ + 1.U)),
deqPtrExt := deqPtrExtNext
val lastLastCycleRedirect = RegNext(lastCycleRedirect)
val dequeueCount = Mux(io.sbuffer(1).fire(), 2.U, Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 1.U, 0.U))
......@@ -373,6 +366,12 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// io.sqempty will be used by sbuffer
// We delay it for 1 cycle for better timing
// When sbuffer need to check if it is empty, the pipeline is blocked, which means delay io.sqempty
// for 1 cycle will also promise that sq is empty in that cycle
io.sqempty := RegNext(enqPtrExt(0).value === deqPtrExt(0).value && enqPtrExt(0).flag === deqPtrExt(0).flag)
// debug info
XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt(0).flag, deqPtr)
......@@ -51,7 +51,7 @@ class StoreQueueData(size: Int, numRead: Int, numWrite: Int, numForward: Int) ex
// destorequeue read data
(0 until numRead).map(i => {
io.rdata(i) := data(io.raddr(i))
io.rdata(i) := data(RegNext(io.raddr(i)))
// DataModuleTemplate should not be used when there're any write conflicts
......@@ -25,9 +25,11 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
val s_invalid :: s_tlb :: s_flush_sbuffer_req :: s_flush_sbuffer_resp :: s_cache_req :: s_cache_resp :: s_finish :: Nil = Enum(7)
val state = RegInit(s_invalid)
val in = Reg(new ExuInput())
val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec()))
val atom_override_xtval = RegInit(false.B)
// paddr after translation
val paddr = Reg(UInt())
val is_mmio = Reg(Bool())
// dcache response data
val resp_data = Reg(UInt())
val is_lrsc_valid = Reg(Bool())
......@@ -68,7 +70,6 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
io.tlbFeedback.bits.hit := true.B
io.tlbFeedback.bits.roqIdx := in.uop.roqIdx
// tlb translation, manipulating signals && deal with exception
when (state === s_tlb) {
// send req to dtlb
......@@ -78,7 +79,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
io.dtlb.req.bits.roqIdx := in.uop.roqIdx
io.dtlb.resp.ready := true.B
val is_lr = in.uop.ctrl.fuOpType === LSUOpType.lr_w || in.uop.ctrl.fuOpType === LSUOpType.lr_d
io.dtlb.req.bits.cmd := Mux(is_lr, TlbCmd.read, TlbCmd.write)
io.dtlb.req.bits.cmd := Mux(is_lr, TlbCmd.atom_read, TlbCmd.atom_write)
io.dtlb.req.bits.debug.pc := in.uop.cf.pc
when(io.dtlb.resp.fire && !io.dtlb.resp.bits.miss){
......@@ -89,10 +90,17 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
"b10".U -> (in.src1(1,0) === 0.U), //w
"b11".U -> (in.src1(2,0) === 0.U) //d
in.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned
in.uop.cf.exceptionVec(storePageFault) := io.dtlb.resp.bits.excp.pf.st
in.uop.cf.exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld
val exception = !addrAligned || io.dtlb.resp.bits.excp.pf.st || io.dtlb.resp.bits.excp.pf.ld
exceptionVec(storeAddrMisaligned) := !addrAligned
exceptionVec(storePageFault) := io.dtlb.resp.bits.excp.pf.st
exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld
exceptionVec(storeAccessFault) := io.dtlb.resp.bits.excp.af.st
exceptionVec(loadAccessFault) := io.dtlb.resp.bits.excp.af.ld
val exception = !addrAligned ||
io.dtlb.resp.bits.excp.pf.st ||
io.dtlb.resp.bits.excp.pf.ld ||
io.dtlb.resp.bits.excp.af.st ||
is_mmio := io.dtlb.resp.bits.mmio
when (exception) {
// check for exceptions
// if there are exceptions, no need to execute it
......@@ -208,12 +216,13 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
when (state === s_finish) {
io.out.valid := true.B
io.out.bits.uop := in.uop
io.out.bits.uop.cf.exceptionVec := exceptionVec
io.out.bits.uop.diffTestDebugLrScValid := is_lrsc_valid
io.out.bits.data := resp_data
io.out.bits.redirectValid := false.B
io.out.bits.redirect := DontCare
io.out.bits.brUpdate := DontCare
io.out.bits.debug.isMMIO := AddressSpace.isMMIO(paddr)
io.out.bits.debug.isMMIO := is_mmio
when (io.out.fire()) {
XSDebug("atomics writeback: pc %x data %x\n", io.out.bits.uop.cf.pc, io.dcache.resp.bits.data)
state := s_invalid
......@@ -91,9 +91,9 @@ class LoadUnit_S1 extends XSModule {
val s1_uop = io.in.bits.uop
val s1_paddr = io.dtlbResp.bits.paddr
val s1_exception = io.out.bits.uop.cf.exceptionVec.asUInt.orR
val s1_exception = selectLoad(io.out.bits.uop.cf.exceptionVec, false).asUInt.orR
val s1_tlb_miss = io.dtlbResp.bits.miss
val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr)
val s1_mmio = !s1_tlb_miss && io.dtlbResp.bits.mmio
val s1_mask = io.in.bits.mask
io.out.bits := io.in.bits // forwardXX field will be updated in s1
......@@ -124,6 +124,7 @@ class LoadUnit_S1 extends XSModule {
io.out.bits.mmio := s1_mmio && !s1_exception
io.out.bits.tlbMiss := s1_tlb_miss
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.out.bits.uop.cf.exceptionVec(loadAccessFault) := io.dtlbResp.bits.excp.af.ld
io.in.ready := !io.in.valid || io.out.ready
......@@ -147,7 +148,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
val s2_paddr = io.in.bits.paddr
val s2_tlb_miss = io.in.bits.tlbMiss
val s2_mmio = io.in.bits.mmio
val s2_exception = io.in.bits.uop.cf.exceptionVec.asUInt.orR
val s2_exception = selectLoad(io.in.bits.uop.cf.exceptionVec, false).asUInt.orR
val s2_cache_miss = io.dcacheResp.bits.miss
val s2_cache_replay = io.dcacheResp.bits.replay
......@@ -192,7 +193,9 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
// so we do not need to care about flush in load / store unit's out.valid
io.out.bits := io.in.bits
io.out.bits.data := rdataPartialLoad
io.out.bits.miss := s2_cache_miss && !fullForward
// when exception occurs, set it to not miss and let it write back to roq (via int port)
io.out.bits.miss := s2_cache_miss && !fullForward && !s2_exception
io.out.bits.uop.ctrl.fpWen := io.in.bits.uop.ctrl.fpWen && !s2_exception
io.out.bits.mmio := s2_mmio
io.in.ready := io.out.ready || !io.in.valid
......@@ -269,12 +272,14 @@ class LoadUnit extends XSModule with HasLoadHelper {
// Load queue will be updated at s2 for both hit/miss int/fp load
io.lsq.loadIn.valid := load_s2.io.out.valid
io.lsq.loadIn.bits := load_s2.io.out.bits
val s2Valid = load_s2.io.out.valid && (!load_s2.io.out.bits.miss || load_s2.io.out.bits.uop.cf.exceptionVec.asUInt.orR)
// write to rob and writeback bus
val s2_wb_valid = load_s2.io.out.valid && !load_s2.io.out.bits.miss
val refillFpLoad = io.lsq.ldout.bits.uop.ctrl.fpWen
// Int load, if hit, will be writebacked at s2
val intHitLoadOut = Wire(Valid(new ExuOutput))
intHitLoadOut.valid := s2Valid && !load_s2.io.out.bits.uop.ctrl.fpWen
intHitLoadOut.valid := s2_wb_valid && !load_s2.io.out.bits.uop.ctrl.fpWen
intHitLoadOut.bits.uop := load_s2.io.out.bits.uop
intHitLoadOut.bits.data := load_s2.io.out.bits.data
intHitLoadOut.bits.redirectValid := false.B
......@@ -288,10 +293,10 @@ class LoadUnit extends XSModule with HasLoadHelper {
io.ldout.bits := Mux(intHitLoadOut.valid, intHitLoadOut.bits, io.lsq.ldout.bits)
io.ldout.valid := intHitLoadOut.valid || io.lsq.ldout.valid && !refillFpLoad
// Fp load, if hit, will be send to recoder at s2, then it will be recoded & writebacked at s3
val fpHitLoadOut = Wire(Valid(new ExuOutput))
fpHitLoadOut.valid := s2Valid && load_s2.io.out.bits.uop.ctrl.fpWen
fpHitLoadOut.valid := s2_wb_valid && load_s2.io.out.bits.uop.ctrl.fpWen
fpHitLoadOut.bits := intHitLoadOut.bits
val fpLoadOut = Wire(Valid(new ExuOutput))
......@@ -85,11 +85,12 @@ class StoreUnit_S1 extends XSModule {
io.lsq.bits := io.in.bits
io.lsq.bits.paddr := s1_paddr
io.lsq.bits.miss := false.B
io.lsq.bits.mmio := AddressSpace.isMMIO(s1_paddr)
io.lsq.bits.mmio := io.dtlbResp.bits.mmio
io.lsq.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
io.lsq.bits.uop.cf.exceptionVec(storeAccessFault) := io.dtlbResp.bits.excp.af.st
// mmio inst with exception will be writebacked immediately
val hasException = io.out.bits.uop.cf.exceptionVec.asUInt.orR
val hasException = selectStore(io.out.bits.uop.cf.exceptionVec, false).asUInt.orR
io.out.valid := io.in.valid && (!io.out.bits.mmio || hasException) && !s1_tlb_miss
io.out.bits := io.lsq.bits
......@@ -101,6 +102,18 @@ class StoreUnit_S1 extends XSModule {
class StoreUnit_S2 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
io.in.ready := true.B
io.out.bits := io.in.bits
io.out.valid := io.in.valid
class StoreUnit_S3 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val stout = DecoupledIO(new ExuOutput) // writeback store
......@@ -133,6 +146,7 @@ class StoreUnit extends XSModule {
val store_s0 = Module(new StoreUnit_S0)
val store_s1 = Module(new StoreUnit_S1)
val store_s2 = Module(new StoreUnit_S2)
val store_s3 = Module(new StoreUnit_S3)
store_s0.io.in <> io.stin
store_s0.io.dtlbReq <> io.dtlb.req
......@@ -145,7 +159,9 @@ class StoreUnit extends XSModule {
PipelineConnect(store_s1.io.out, store_s2.io.in, true.B, store_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect))
store_s2.io.stout <> io.stout
PipelineConnect(store_s2.io.out, store_s3.io.in, true.B, store_s2.io.out.bits.uop.roqIdx.needFlush(io.redirect))
store_s3.io.stout <> io.stout
private def printPipeLine(pipeline: LsPipelineBundle, cond: Bool, name: String): Unit = {
......@@ -13,9 +13,9 @@ trait HasSbufferCst extends HasXSParameter {
def s_prepare = 2.U(2.W)
def s_inflight = 3.U(2.W)
val evictCycle = 8192
val evictCycle = 1 << 20
val countBits = 1 + log2Up(evictCycle)
val countBits = log2Up(evictCycle+1)
val SbufferIndexWidth: Int = log2Up(StoreBufferSize)
// paddr = tag + offset
......@@ -108,6 +108,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq))) //Todo: store logic only support Width == 2 now
val dcache = new DCacheLineIO
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val sqempty = Input(Bool())
val flush = new Bundle {
val valid = Input(Bool())
val empty = Output(Bool())
......@@ -291,7 +292,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
do_eviction := validCount >= 12.U
io.flush.empty := empty
io.flush.empty := empty && io.sqempty
lru.io.flush := sbuffer_state === x_drain_sbuffer && empty
......@@ -48,10 +48,11 @@ package object xiangshan {
def apply() = UInt(log2Up(num).W)
def isIntExu(fuType: UInt) = !fuType(3)
def isIntExu(fuType: UInt) = !fuType(3)
def isJumpExu(fuType: UInt) = fuType === jmp
def isFpExu(fuType: UInt) = fuType(3, 2) === "b10".U
def isMemExu(fuType: UInt) = fuType(3, 2) === "b11".U
def isLoadExu(fuType: UInt) = fuType === ldu || fuType===mou
def isLoadExu(fuType: UInt) = fuType === ldu || fuType === mou
def isStoreExu(fuType: UInt) = fuType === stu
val functionNameMap = Map(
......@@ -108,4 +109,44 @@ package object xiangshan {
def flushItself(level: UInt) = level(0)
def isException(level: UInt) = level(1) && level(0)
object ExceptionVec {
def apply() = Vec(16, Bool())
object PMAMode {
def R = "b1".U << 0 //readable
def W = "b1".U << 1 //writeable
def X = "b1".U << 2 //executable
def I = "b1".U << 3 //cacheable: icache
def D = "b1".U << 4 //cacheable: dcache
def S = "b1".U << 5 //enable speculative access
def A = "b1".U << 6 //enable atomic operation, A imply R & W
def C = "b1".U << 7 //if it is cacheable is configable
def Reserved = "b0".U
def apply() = UInt(7.W)
def read(mode: UInt) = mode(0)
def write(mode: UInt) = mode(1)
def execute(mode: UInt) = mode(2)
def icache(mode: UInt) = mode(3)
def dcache(mode: UInt) = mode(4)
def speculate(mode: UInt) = mode(5)
def atomic(mode: UInt) = mode(6)
def configable_cache(mode: UInt) = mode(7)
def strToMode(s: String) = {
var result = 0.U << 8
if (s.toUpperCase.indexOf("R") >= 0) result = result + R
if (s.toUpperCase.indexOf("W") >= 0) result = result + W
if (s.toUpperCase.indexOf("X") >= 0) result = result + X
if (s.toUpperCase.indexOf("I") >= 0) result = result + I
if (s.toUpperCase.indexOf("D") >= 0) result = result + D
if (s.toUpperCase.indexOf("S") >= 0) result = result + S
if (s.toUpperCase.indexOf("A") >= 0) result = result + A
if (s.toUpperCase.indexOf("C") >= 0) result = result + C
......@@ -4,6 +4,7 @@
#include "ram.h"
#include "compress.h"
// #define TLB_UNITTEST
#include "cosimulation.h"
......@@ -84,8 +85,8 @@ void addpageSv39() {
//pdde[2] = ((0x80000000&0xc0000000) >> 2) | 0xf;
for(int i = 0; i < PTENUM ;i++) {
pde[i] = ((PTEADDR(i)&0xfffff000)>>2) | 0x1;
//pde[i] = (((0x8000000+i*2*1024*1024)&0xffe00000)>>2) | 0xf;
// pde[i] = ((PTEADDR(i)&0xfffff000)>>2) | 0x1;
pde[i] = (((0x80000000+i*2*1024*1024)&0xffe00000)>>2) | 0xf;
for(int outidx = 0; outidx < PTENUM; outidx++ ) {
......@@ -94,6 +95,7 @@ void addpageSv39() {
printf("try to add identical tlb page to ram\n");
......@@ -117,6 +119,12 @@ void init_ram(const char *img) {
//new add
//new end
int ret;
if (isGzFile(img)) {
printf("Gzip file detected and loading image from extracted gz file\n");
......@@ -143,12 +151,6 @@ void init_ram(const char *img) {
//new add
//new end
#if !defined(DRAMSIM3_CONFIG) || !defined(DRAMSIM3_OUTDIR)
#error DRAMSIM3_CONFIG or DRAMSIM3_OUTDIR is not defined
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册