提交 ad9f5370 编写于 作者: A Allen

Merge branch 'master' of github.com:RISCVERS/XiangShan into measure_L1_miss_latency

......@@ -27,7 +27,7 @@ help:
$(TOP_V): $(SCALA_FILE)
mkdir -p $(@D)
mill XiangShan.test.runMain $(SIMTOP) -X verilog -td $(@D) --full-stacktrace --output-file $(@F) --disable-all --fpga-platform $(SIM_ARGS)
mill XiangShan.test.runMain $(SIMTOP) -X verilog -td $(@D) --full-stacktrace --output-file $(@F) --disable-all --fpga-platform --remove-assert $(SIM_ARGS)
# mill XiangShan.runMain top.$(TOP) -X verilog -td $(@D) --output-file $(@F) --infer-rw $(FPGATOP) --repl-seq-mem -c:$(FPGATOP):-o:$(@D)/$(@F).conf
# $(MEM_GEN) $(@D)/$(@F).conf >> $@
# sed -i -e 's/_\(aw\|ar\|w\|r\|b\)_\(\|bits_\)/_\1/g' $@
......@@ -139,6 +139,8 @@ endif
SEED ?= $(shell shuf -i 1-10000 -n 1)
VME_SOURCE ?= $(shell pwd)
VME_MODULE ?=
# log will only be printed when (B<=GTimer<=E) && (L < loglevel)
# use 'emu -h' to see more details
......@@ -165,6 +167,23 @@ emu: $(EMU)
ls build
$(EMU) -i $(IMAGE) $(EMU_FLAGS)
# extract verilog module from sim_top.v
# usage: make vme VME_MODULE=Roq
vme: $(SIM_TOP_V)
mill XiangShan.runMain utils.ExtractVerilogModules -m $(VME_MODULE)
# usage: make phy_evaluate VME_MODULE=Roq REMOTE=100
phy_evaluate: vme
scp -r ./build/extracted/* $(REMOTE):~/phy_evaluation/remote_run/rtl
ssh -tt $(REMOTE) 'cd ~/phy_evaluation/remote_run && $(MAKE) evaluate DESIGN_NAME=$(VME_MODULE)'
scp -r $(REMOTE):~/phy_evaluation/remote_run/rpts ./build
# usage: make phy_evaluate_atc VME_MODULE=Roq REMOTE=100
phy_evaluate_atc: vme
scp -r ./build/extracted/* $(REMOTE):~/phy_evaluation/remote_run/rtl
ssh -tt $(REMOTE) 'cd ~/phy_evaluation/remote_run && $(MAKE) evaluate_atc DESIGN_NAME=$(VME_MODULE)'
scp -r $(REMOTE):~/phy_evaluation/remote_run/rpts ./build
cache:
$(MAKE) emu IMAGE=Makefile
......
......@@ -37,10 +37,11 @@ class AXI4RAM
val split = beatBytes / 8
val bankByte = memByte / split
val offsetBits = log2Up(memByte)
val offsetMask = (1 << offsetBits) - 1
require(address.length >= 1)
val baseAddress = address(0).base
def index(addr: UInt) = ((addr & offsetMask.U) >> log2Ceil(beatBytes)).asUInt()
def index(addr: UInt) = ((addr - baseAddress.U)(offsetBits - 1, 0) >> log2Ceil(beatBytes)).asUInt()
def inRange(idx: UInt) = idx < (memByte / beatBytes).U
......@@ -53,7 +54,7 @@ class AXI4RAM
val mems = (0 until split).map {_ => Module(new RAMHelper(bankByte))}
mems.zipWithIndex map { case (mem, i) =>
mem.io.clk := clock
mem.io.en := !reset.asBool()
mem.io.en := !reset.asBool() && (state === s_rdata)
mem.io.rIdx := (rIdx << log2Up(split)) + i.U
mem.io.wIdx := (wIdx << log2Up(split)) + i.U
mem.io.wdata := in.w.bits.data((i + 1) * 64 - 1, i * 64)
......
......@@ -75,9 +75,9 @@ class AXI4SlaveModuleImp[T<:Data](outer: AXI4SlaveModule[T])
assert(in.ar.bits.burst === AXI4Parameters.BURST_INCR, "only support busrt ince!")
}
private val s_idle :: s_rdata :: s_wdata :: s_wresp :: Nil = Enum(4)
val s_idle :: s_rdata :: s_wdata :: s_wresp :: Nil = Enum(4)
private val state = RegInit(s_idle)
val state = RegInit(s_idle)
switch(state){
is(s_idle){
......
package utils
/*
https://github.com/Lingrui98/scalaTage/blob/vme/src/main/scala/getVerilogModules.scala
*/
import scala.io.Source
import java.io._
import scala.language.postfixOps
import sys.process._
import sys._
class VerilogModuleExtractor {
// name
val modulePattern = "module ([\\w]+)\\(".r.unanchored
// type name
val subMoudlePattern = "([\\w]+) ([\\w]+) \\((?: //.*)*\\Z".r.unanchored
val endMoudleIOPattern = "\\);".r.unanchored
val endMoudlePattern = "endmodule".r.unanchored
// (submoudle type, submoudle name)
type SubMoudleRecord = Tuple2[String, String]
// (content, submodules)
type ModuleRecord = Tuple2[List[String], List[SubMoudleRecord]]
// name
type ModuleMap = Map[String, ModuleRecord]
def getLines(s: scala.io.BufferedSource): Iterator[String] = s.getLines()
def makeRecord(s: Iterator[String]): ModuleMap = {
val m: ModuleMap = Map()
// called before we see the first line of a module
def processModule(firstLine: String, it: Iterator[String]): ModuleRecord = {
val content: List[String] = List(firstLine)
val submodules: List[SubMoudleRecord] = List()
def iter(cont: List[String], subm: List[SubMoudleRecord]): ModuleRecord =
it.next() match {
case l: String => l match {
case endMoudlePattern() => (l :: cont, subm)
case subMoudlePattern(ty, name) =>
// println(s"submoudle $ty $name")
iter(l :: cont, (ty, name) :: subm)
case _ => iter(l :: cont, subm)
}
case _ => println("Should not reach here"); (cont, subm)
}
val temp = iter(content, submodules)
(temp._1.reverse, temp._2)
}
def traverse(m: ModuleMap, it: Iterator[String]): ModuleMap =
if (it.hasNext) {
it.next() match {
case l: String =>
// println(f"traversing $l")
l match {
case modulePattern(name) =>
// println(f"get Module of name $name")
traverse(m ++ Map(name -> processModule(l, it)), it)
case _ =>
println(f"line $l is not a module definition")
traverse(m, it)
}
case _ => traverse(m, it)
}
}
else m
traverse(m, s)
}
def makeRecordFromFile(file: String): ModuleMap = {
val bufSrc = Source.fromFile(file)
makeRecord(bufSrc.getLines())
}
def writeModuleToFile(name: String, record: ModuleRecord, dir: String) = {
val path = dir+name+".v"
val writer = new PrintWriter(new File(path))
println(f"Writing module $name%20s to $path")
record._1.foreach(r => {
writer.write(f"$r\n")
})
writer.close()
}
// get moudle definition of specified name
def getModule(name: String, m: ModuleMap): ModuleRecord = {
m(name)
}
def showModuleRecord(r: ModuleRecord) = {
val (content, submodules) = r
submodules.foreach {
case (t, n) => println(f"submoudle type: $t, submodule name: $n")
}
println("\nprinting module contents...")
content.foreach(println(_))
}
// We first get records of all the modules and its submodule record
// Then we choose a module as the root node to traverse its submodule
def processFromModule(name: String, map: ModuleMap, outPath: String, doneSet: Set[String] = Set(), top: Tuple2[String, Boolean]): Unit = {
def printSRAMs(sub: List[SubMoudleRecord]) = {
sub map {
case (ty, subn) if (ty contains "SRAM") => println(s"top module $name, sub module type $ty, name $subn")
case _ =>
}
}
val (topName, isTop) = top
if (!map.contains(name)) {
println(s"${if (isTop) "chosen top" else s"submodule of ${topName},"} module $name does not exist!")
return
}
if (isTop) println(s"\nProcessing top module $name")
val r = map(name)
new File(outPath).mkdirs() // ensure the path exists
writeModuleToFile(name, r, outPath)
val submodules = r._2
// printSRAMs(submodules)
// DFS
val subTypesSet = submodules map (m => m._1) toSet
val nowMap = map - name
val nowSet = doneSet ++ subTypesSet
subTypesSet.foreach { s => if (!doneSet.contains(s)) processFromModule(s, nowMap, outPath, nowSet, (if (isTop) name else topName, false)) }
}
def getDate: String = {
val d = java.time.LocalDate.now
d.toString.toCharArray.filterNot(_ == '-').mkString
}
def makePath(topModule: String, outDir: String , user: String = "glr"): String = {
(if (outDir.last == '/')
outDir
else
outDir+"/") + getDate + "-" + user + "-" + topModule + "/"
}
def extract(src: String, topModule: String, outDir: String, user: String, mapp: Option[ModuleMap]): Unit = {
val useMap = mapp.getOrElse(makeRecordFromFile(src))
val path = makePath(topModule, outDir, user)
processFromModule(topModule, useMap, path, top=(topModule, true))
}
def extract(src: String, topModules: List[String], outDir: String, user: String): Unit = {
// avoid repeat
val mapp = makeRecordFromFile(src)
topModules.foreach(n => extract(src, n, outDir, user, Some(mapp)))
}
}
trait VMEArgParser {
type OptionMap = Map[String, Option[Any]]
val usage = """
Usage: sbt "run [OPTION...]"
-s, --source the verilog file generated by chisel, all in one file
default: $NOOP_HOME/build/XSSimTop.v
-h, --help print this help info
-o, --output the place you want to store your extracted verilog
default: $NOOP_HOME/build/extracted
-u, --usr your name, will be used to name the output folder
default: current user
-m, --modules the top modules you would like to extract verilog from
should always be the last argument
default: IFU
"""
def parse(args: List[String]) = {
def nextOption(map: OptionMap, l: List[String]): OptionMap = {
def isSwitch(s : String)= (s(0) == '-')
l match {
case Nil => map
case ("--help" | "-h") :: tail => {
println(usage)
sys.exit()
map
}
case ("--source" | "-s") :: file :: tail =>
nextOption(map ++ Map("source" -> Some(file)), tail)
case ("--output" | "-o") :: path :: tail =>
nextOption(map ++ Map("output" -> Some(path)), tail)
case ("--usr" | "-u") :: name :: tail =>
nextOption(map ++ Map("usr" -> Some(name)), tail)
// this should always be the last argument, since it is length variable
case ("--modules" | "-m") :: m :: tail =>
map ++ Map("modules" -> Some(m :: tail))
case s :: tail => {
if (isSwitch(s)) println(s"unexpected argument $s")
nextOption(map, tail)
}
}
}
nextOption(Map("source" -> None, "output" -> None, "usr" -> None, "modules" -> None), args)
}
def wrapParams(args: Array[String]): (String, List[String], String, String) = {
val argL = args.toList
val paramMap = parse(argL)
(paramMap("source").map(_.asInstanceOf[String]).getOrElse(env("NOOP_HOME")+"/build/XSSimTop.v"),
paramMap("modules").map(_.asInstanceOf[List[String]]).getOrElse(List("IFU")),
paramMap("output").map(_.asInstanceOf[String]).getOrElse(env("NOOP_HOME")+"/build/extracted/"),
paramMap("usr").map(_.asInstanceOf[String]).getOrElse("whoami".!!.init))
}
}
object ExtractVerilogModules extends VMEArgParser {
def main(args: Array[String]): Unit = {
val vme = new VerilogModuleExtractor()
val (sourceFile, topModules, outTopDir, usr) = wrapParams(args)
vme.extract(sourceFile, topModules, outTopDir, usr)
}
}
......@@ -52,7 +52,7 @@ case class XSCoreParameters
RenameWidth: Int = 6,
CommitWidth: Int = 6,
BrqSize: Int = 32,
IssQueSize: Int = 8,
IssQueSize: Int = 12,
NRPhyRegs: Int = 160,
NRIntReadPorts: Int = 14,
NRIntWritePorts: Int = 8,
......
......@@ -61,7 +61,7 @@ class ReservationStationCtrl
feedback: Boolean,
fixedDelay: Int,
replayDelay: Int = 10
) extends XSModule {
) extends XSModule with HasCircularQueuePtrHelper {
val iqSize = IssQueSize
val iqIdxWidth = log2Up(iqSize)
......@@ -95,7 +95,8 @@ class ReservationStationCtrl
val cntQueue = Reg(Vec(iqSize, UInt(log2Up(replayDelay).W)))
// rs queue part:
val tailPtr = RegInit(0.U((iqIdxWidth+1).W))
// val tailPtr = RegInit(0.U((iqIdxWidth+1).W))
val tailPtr = RegInit(0.U.asTypeOf(new CircularQueuePtr(iqSize)))
val idxQueue = RegInit(VecInit((0 until iqSize).map(_.U(iqIdxWidth.W))))
val readyQueue = VecInit(srcQueue.zip(validQueue).map{ case (a,b) => Cat(a).andR & b })
......@@ -133,7 +134,7 @@ class ReservationStationCtrl
Mux(notBlock, !selectedIdxRegOH(i), true.B)
)))
val (firstBubble, findBubble) = PriorityEncoderWithFlag(bubMask)
haveBubble := findBubble && (firstBubble < tailPtr)
haveBubble := findBubble && (firstBubble < tailPtr.asUInt)
val bubValid = haveBubble
val bubReg = RegNext(bubValid)
val bubIdxReg = RegNext(firstBubble - moveMask(firstBubble))
......@@ -204,7 +205,7 @@ class ReservationStationCtrl
// enq
val tailAfterRealDeq = tailPtr - (issFire && !needFeedback|| bubReg)
val isFull = tailAfterRealDeq.head(1).asBool() // tailPtr===qsize.U
val isFull = tailAfterRealDeq.flag // tailPtr===qsize.U
tailPtr := tailAfterRealDeq + io.enqCtrl.fire()
io.enqCtrl.ready := !isFull && !io.redirect.valid // TODO: check this redirect && need more optimization
......@@ -213,7 +214,7 @@ class ReservationStationCtrl
val srcTypeSeq = Seq(enqUop.ctrl.src1Type, enqUop.ctrl.src2Type, enqUop.ctrl.src3Type)
val srcStateSeq = Seq(enqUop.src1State, enqUop.src2State, enqUop.src3State)
val enqIdx_ctrl = tailAfterRealDeq.tail(1)
val enqIdx_ctrl = tailAfterRealDeq.value
val enqBpVec = io.data.srcUpdate(IssQueSize)
def stateCheck(src: UInt, srcType: UInt): Bool = {
......@@ -245,19 +246,19 @@ class ReservationStationCtrl
}
// other to Data
io.data.enqPtr := idxQueue(Mux(tailPtr.head(1).asBool, deqIdx, tailPtr.tail(1)))
io.data.enqPtr := idxQueue(Mux(tailPtr.flag, deqIdx, tailPtr.value))
io.data.deqPtr.valid := selValid
io.data.deqPtr.bits := idxQueue(selectedIdxWire)
io.data.enqCtrl.valid := io.enqCtrl.fire
io.data.enqCtrl.bits := io.enqCtrl.bits
// other io
io.numExist := tailPtr
io.numExist := Mux(tailPtr.flag, (iqSize-1).U, tailPtr.value) // NOTE: numExist is iqIdxWidth.W, maybe a bug
// assert
assert(RegNext(tailPtr <= iqSize.U))
assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
val print = !(tailPtr===0.U) || io.enqCtrl.valid
val print = !(tailPtr.asUInt===0.U) || io.enqCtrl.valid
XSDebug(print || true.B, p"In(${io.enqCtrl.valid} ${io.enqCtrl.ready}) Out(${issValid} ${io.data.fuReady})\n")
XSDebug(print , p"tailPtr:${tailPtr} tailPtrAdq:${tailAfterRealDeq} isFull:${isFull} " +
p"needFeed:${needFeedback} vQue:${Binary(VecInit(validQueue).asUInt)} rQue:${Binary(readyQueue.asUInt)}\n")
......
......@@ -56,7 +56,7 @@ class PermBundle(val hasV: Boolean = true) extends TlbBundle {
if (hasV) { val v = Bool() }
override def toPrintable: Printable = {
p"d:${d} a:${a} g:${g} u:${u} x:${x} w:${w} r:${r}"// +
p"d:${d} a:${a} g:${g} u:${u} x:${x} w:${w} r:${r}"// +
//(if(hasV) (p"v:${v}") else p"")
}
}
......@@ -114,6 +114,73 @@ class TlbEntry extends TlbBundle {
}
}
class TlbEntires(num: Int, tagLen: Int) extends TlbBundle {
require(log2Up(num)==log2Down(num))
/* vpn can be divide into three part */
// vpn: tagPart + addrPart
val cutLen = log2Up(num)
val tag = UInt(tagLen.W) // NOTE: high part of vpn
val level = UInt(log2Up(Level).W)
val ppns = Vec(num, UInt(ppnLen.W))
val perms = Vec(num, new PermBundle(hasV = false))
val vs = Vec(num, Bool())
def tagClip(vpn: UInt, level: UInt) = { // full vpn => tagLen
Mux(level===0.U, Cat(vpn(vpnLen-1, vpnnLen*2+cutLen), 0.U(vpnnLen*2+cutLen)),
Mux(level===1.U, Cat(vpn(vpnLen-1, vpnnLen*1+cutLen), 0.U(vpnnLen*1+cutLen)),
Cat(vpn(vpnLen-1, vpnnLen*0+cutLen), 0.U(vpnnLen*0+cutLen))))(tagLen-1, 0)
}
// NOTE: get insize idx
def idxClip(vpn: UInt, level: UInt) = {
Mux(level===0.U, vpn(vpnnLen*2+cutLen-1, vpnnLen*2),
Mux(level===1.U, vpn(vpnnLen*1+cutLen-1, vpnnLen*1),
vpn(vpnnLen*0+cutLen-1, vpnnLen*0)))
}
def hit(vpn: UInt) = {
(tag === tagClip(vpn, level)) && vs(idxClip(vpn, level))
}
def genEntries(data: UInt, level: UInt, vpn: UInt): TlbEntires = {
require((data.getWidth / XLEN) == num,
"input data length must be multiple of pte length")
assert(level=/=3.U, "level should not be 3")
val ts = Wire(new TlbEntires(num, tagLen))
ts.tag := tagClip(vpn, level)
ts.level := level
for (i <- 0 until num) {
val pte = data((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle)
ts.ppns(i) := pte.ppn
ts.perms(i):= pte.perm // this.perms has no v
ts.vs(i) := !pte.isPf(level) && pte.isLeaf() // legal and leaf, store to l2Tlb
}
ts
}
def get(vpn: UInt): TlbEntry = {
val t = Wire(new TlbEntry())
val idx = idxClip(vpn, level)
t.vpn := vpn // Note: Use input vpn, not vpn in TlbL2
t.ppn := ppns(idx)
t.level := level
t.perm := perms(idx)
t
}
override def cloneType: this.type = (new TlbEntires(num, tagLen)).asInstanceOf[this.type]
override def toPrintable: Printable = {
require(num == 4, "if num is not 4, please comment this toPrintable")
// NOTE: if num is not 4, please comment this toPrintable
p"tag:${Hexadecimal(tag)} level:${level} ppn(0):${Hexadecimal(ppns(0))} ppn(1):${Hexadecimal(ppns(1))}" +
p"ppn(2):${Hexadecimal(ppns(2))} ppn(3):${Hexadecimal(ppns(3))} " +
p"perms(0):${perms(0)} perms(1):${perms(1)} perms(2):${perms(2)} perms(3):${perms(3)} vs:${Binary(vs.asUInt)}"
}
}
object TlbCmd {
def read = "b00".U
def write = "b01".U
......@@ -388,7 +455,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
// assert(req(i).bits.vaddr===resp(i).bits.paddr, "vaddr:0x%x paddr:0x%x hitVec:%x ", req(i).bits.vaddr, resp(i).bits.paddr, VecInit(hitVec(i)).asUInt)
// } // FIXME: remove me when tlb may be ok
// }
// assert((v&pf)===0.U, "v and pf can't be true at same time: v:0x%x pf:0x%x", v, pf)
}
......@@ -403,12 +470,12 @@ object TLB {
shouldBlock: Boolean
) = {
require(in.length == width)
val tlb = Module(new TLB(width, isDtlb))
tlb.io.sfence <> sfence
tlb.io.csr <> csr
if (!shouldBlock) { // dtlb
for (i <- 0 until width) {
tlb.io.requestor(i) <> in(i)
......
......@@ -11,6 +11,23 @@ import freechips.rocketchip.tilelink.{TLClientNode, TLMasterParameters, TLMaster
trait HasPtwConst extends HasTlbConst with MemoryOpConstants{
val PtwWidth = 2
val MemBandWidth = 256 // TODO: change to IO bandwidth param
val TlbL2LineSize = MemBandWidth/XLEN
val TlbL2LineNum = TlbL2EntrySize/TlbL2LineSize
val PtwL2LineSize = MemBandWidth/XLEN
val PtwL2LineNum = PtwL2EntrySize/PtwL2LineSize
val PtwL1TagLen = PAddrBits - log2Up(XLEN/8)
val PtwL2TagLen = PAddrBits - log2Up(XLEN/8) - log2Up(PtwL2EntrySize)
val TlbL2TagLen = vpnLen - log2Up(TlbL2EntrySize)
def genPtwL2Idx(addr: UInt) = {
/* tagLen :: outSizeIdxLen :: insideIdxLen*/
addr(log2Up(PtwL2EntrySize)-1+log2Up(XLEN/8), log2Up(PtwL2LineSize)+log2Up(XLEN/8))
}
def genTlbL2Idx(vpn: UInt) = {
vpn(log2Up(TlbL2LineNum)-1+log2Up(TlbL2LineSize), 0+log2Up(TlbL2LineSize))
}
def MakeAddr(ppn: UInt, off: UInt) = {
require(off.getWidth == 9)
......@@ -41,12 +58,19 @@ class PteBundle extends PtwBundle{
val v = Bool()
}
def isPf() = {
!perm.v || (!perm.r && perm.w)
def unaligned(level: UInt) = {
assert(level=/=3.U)
isLeaf() && !(level === 2.U ||
level === 1.U && ppn(vpnnLen-1, 0) === 0.U ||
level === 0.U && ppn(vpnnLen*2-1, 0) === 0.U)
}
def isPf(level: UInt) = {
!perm.v || (!perm.r && perm.w) || unaligned(level)
}
def isLeaf() = {
!isPf() && (perm.r || perm.x)
perm.r || perm.x || perm.w
}
override def toPrintable: Printable = {
......@@ -57,9 +81,7 @@ class PteBundle extends PtwBundle{
class PtwEntry(tagLen: Int) extends PtwBundle {
val tag = UInt(tagLen.W)
val ppn = UInt(ppnLen.W)
val perm = new PermBundle
// TODO: add superpage
def hit(addr: UInt) = {
require(addr.getWidth >= PAddrBits)
tag === addr(PAddrBits-1, PAddrBits-tagLen)
......@@ -68,21 +90,69 @@ class PtwEntry(tagLen: Int) extends PtwBundle {
def refill(addr: UInt, pte: UInt) {
tag := addr(PAddrBits-1, PAddrBits-tagLen)
ppn := pte.asTypeOf(pteBundle).ppn
perm := pte.asTypeOf(pteBundle).perm
}
def genPtwEntry(addr: UInt, pte: UInt) = {
val e = Wire(new PtwEntry(tagLen))
e.tag := addr(PAddrBits-1, PAddrBits-tagLen)
e.ppn := pte.asTypeOf(pteBundle).ppn
e.perm := pte.asTypeOf(pteBundle).perm
e
}
override def cloneType: this.type = (new PtwEntry(tagLen)).asInstanceOf[this.type]
override def toPrintable: Printable = {
p"tag:0x${Hexadecimal(tag)} ppn:0x${Hexadecimal(ppn)} perm:${perm}"
// p"tag:0x${Hexadecimal(tag)} ppn:0x${Hexadecimal(ppn)} perm:${perm}"
p"tag:0x${Hexadecimal(tag)} ppn:0x${Hexadecimal(ppn)}"
}
}
class PtwEntries(num: Int, tagLen: Int) extends PtwBundle {
require(log2Up(num)==log2Down(num))
val tag = UInt(tagLen.W)
val ppns = Vec(num, UInt(ppnLen.W))
val vs = Vec(num, Bool())
def tagClip(addr: UInt) = {
require(addr.getWidth==PAddrBits)
addr(PAddrBits-1, PAddrBits-tagLen)
}
def hit(idx: UInt, addr: UInt) = {
require(idx.getWidth == log2Up(num), s"PtwEntries.hit: error idx width idxWidth:${idx.getWidth} num:${num}")
(tag === tagClip(addr)) && vs(idx)
}
def genEntries(addr: UInt, data: UInt, level: UInt): PtwEntries = {
require((data.getWidth / XLEN) == num,
"input data length must be multiple of pte length")
val ps = Wire(new PtwEntries(num, tagLen))
ps.tag := tagClip(addr)
for (i <- 0 until num) {
val pte = data((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle)
ps.ppns(i) := pte.ppn
ps.vs(i) := !pte.isPf(level) && !pte.isLeaf()
}
ps
}
def get(idx: UInt) = {
require(idx.getWidth == log2Up(num), s"PtwEntries.get: error idx width idxWidth:${idx.getWidth} num:${num}")
(vs(idx), ppns(idx))
}
override def cloneType: this.type = (new PtwEntries(num, tagLen)).asInstanceOf[this.type]
override def toPrintable: Printable = {
require(num == 4, "if num is not 4, please comment this toPrintable")
// NOTE: if num is not 4, please comment this toPrintable
p"tag:${Hexadecimal(tag)} ppn(0):${Hexadecimal(ppns(0))} ppn(1):${Hexadecimal(ppns(1))}" +
p"ppn(2):${Hexadecimal(ppns(2))} ppn(3):${Hexadecimal(ppns(3))} vs:${Binary(vs.asUInt)}"
}
}
......@@ -153,7 +223,6 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
val req = RegEnable(arb.io.out.bits, arb.io.out.fire())
val resp = VecInit(io.tlb.map(_.resp))
val valid = ValidHold(arb.io.out.fire(), resp(arbChosen).fire())
val validOneCycle = OneCycleValid(arb.io.out.fire())
arb.io.out.ready := !valid// || resp(arbChosen).fire()
......@@ -166,27 +235,21 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
// two level: l2-tlb-cache && pde/pte-cache
// l2-tlb-cache is ram-larger-edition tlb
// pde/pte-cache is cache of page-table, speeding up ptw
// may seperate valid bits to speed up sfence's flush
// Reg/Mem/SyncReadMem is not sure now
val tagLen1 = PAddrBits - log2Up(XLEN/8)
val tagLen2 = PAddrBits - log2Up(XLEN/8) - log2Up(PtwL2EntrySize)
// val tlbl2 = SyncReadMem(TlbL2EntrySize, new TlbEntry)
val tlbl2 = Module(new SRAMTemplate(new TlbEntry, set = TlbL2EntrySize))
val tlbv = RegInit(0.U(TlbL2EntrySize.W)) // valid
val tlbg = RegInit(0.U(TlbL2EntrySize.W)) // global
val ptwl1 = Reg(Vec(PtwL1EntrySize, new PtwEntry(tagLen = tagLen1)))
val tlbl2 = Module(new SRAMTemplate(new TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen), set = TlbL2LineNum)) // (total 256, one line is 4 => 64 lines)
val tlbv = RegInit(0.U(TlbL2LineNum.W)) // valid
val tlbg = Reg(UInt(TlbL2LineNum.W)) // global
val ptwl1 = Reg(Vec(PtwL1EntrySize, new PtwEntry(tagLen = PtwL1TagLen)))
val l1v = RegInit(0.U(PtwL1EntrySize.W)) // valid
val l1g = VecInit((ptwl1.map(_.perm.g))).asUInt
// val ptwl2 = SyncReadMem(PtwL2EntrySize, new PtwEntry(tagLen = tagLen2)) // NOTE: the Mem could be only single port(r&w)
val ptwl2 = Module(new SRAMTemplate(new PtwEntry(tagLen = tagLen2), set = PtwL2EntrySize))
val l2v = RegInit(0.U(PtwL2EntrySize.W)) // valid
val l2g = RegInit(0.U(PtwL2EntrySize.W)) // global
val l1g = Reg(UInt(PtwL1EntrySize.W))
val ptwl2 = Module(new SRAMTemplate(new PtwEntries(num = PtwL2LineSize, tagLen = PtwL2TagLen), set = PtwL2LineNum)) // (total 256, one line is 4 => 64 lines)
val l2v = RegInit(0.U(PtwL2LineNum.W)) // valid
val l2g = Reg(UInt(PtwL2LineNum.W)) // global
// mem alias
// val memRdata = mem.d.bits.data
val memRdata = Wire(UInt(XLEN.W))
val memPte = memRdata.asTypeOf(new PteBundle)
val memRdata = mem.d.bits.data
val memSelData = Wire(UInt(XLEN.W))
val memPte = memSelData.asTypeOf(new PteBundle)
val memPtes =(0 until TlbL2LineSize).map(i => memRdata((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle))
val memValid = mem.d.valid
val memRespReady = mem.d.ready
val memRespFire = mem.d.fire()
......@@ -205,26 +268,27 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
* tlbl2
*/
val (tlbHit, tlbHitData) = {
// tlbl2 is by addr
// TODO: optimize tlbl2'l2 tag len
assert(tlbl2.io.r.req.ready)
val ridx = genTlbL2Idx(req.vpn)
val vidx = RegEnable(tlbv(ridx), validOneCycle)
tlbl2.io.r.req.valid := validOneCycle
tlbl2.io.r.req.bits.apply(setIdx = req.vpn(log2Up(TlbL2EntrySize-1), 0))
tlbl2.io.r.req.bits.apply(setIdx = ridx)
val ramData = tlbl2.io.r.resp.data(0)
// val ramData = tlbl2.r(req.vpn(log2Up(TlbL2EntrySize)-1, 0), validOneCycle)
val vidx = RegEnable(tlbv(req.vpn(log2Up(TlbL2EntrySize)-1, 0)), validOneCycle)
(ramData.hit(req.vpn) && vidx, ramData) // TODO: optimize tag
// TODO: add exception and refill
XSDebug(tlbl2.io.r.req.valid, p"tlbl2 Read rIdx:${Hexadecimal(ridx)}\n")
XSDebug(RegNext(tlbl2.io.r.req.valid), p"tlbl2 RamData:${ramData}\n")
XSDebug(RegNext(tlbl2.io.r.req.valid), p"tlbl2 v:${vidx} hit:${ramData.hit(req.vpn)} tlbPte:${ramData.get(req.vpn)}\n")
(ramData.hit(req.vpn) && vidx, ramData.get(req.vpn))
}
/*
* ptwl1
*/
val l1addr = MakeAddr(satp.ppn, getVpnn(req.vpn, 2))
val (l1Hit, l1HitData) = { // TODO: add excp
// 16 terms may casue long latency, so divide it into 2 stage, like l2tlb
val (l1Hit, l1HitData) = {
val hitVecT = ptwl1.zipWithIndex.map{case (a,b) => a.hit(l1addr) && l1v(b) }
val hitVec = hitVecT.map(RegEnable(_, validOneCycle)) // TODO: could have useless init value
val hitVec = hitVecT.map(RegEnable(_, validOneCycle))
val hitData = ParallelMux(hitVec zip ptwl1)
val hit = ParallelOR(hitVec).asBool
(hit, hitData)
......@@ -236,17 +300,21 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
val l1MemBack = memRespFire && state===state_wait_resp && level===0.U
val l1Res = Mux(l1Hit, l1HitData.ppn, RegEnable(memPte.ppn, l1MemBack))
val l2addr = MakeAddr(l1Res, getVpnn(req.vpn, 1))
val (l2Hit, l2HitData) = { // TODO: add excp
val readRam = (l1Hit && level===0.U && state===state_req) || (memRespFire && state===state_wait_resp && level===0.U)
val ridx = l2addr(log2Up(PtwL2EntrySize)-1+log2Up(XLEN/8), log2Up(XLEN/8))
val (l2Hit, l2HitPPN) = {
val readRam = (!tlbHit && l1Hit && level===0.U && state===state_req) || (memRespFire && state===state_wait_resp && level===0.U)
val ridx = genPtwL2Idx(l2addr)
val idx = RegEnable(l2addr(log2Up(PtwL2LineSize)+log2Up(XLEN/8)-1, log2Up(XLEN/8)), readRam)
val vidx = RegEnable(l2v(ridx), readRam)
assert(ptwl2.io.r.req.ready)
ptwl2.io.r.req.valid := readRam
ptwl2.io.r.req.bits.apply(setIdx = ridx)
val ramData = ptwl2.io.r.resp.data(0)
// val ramData = ptwl2.read(ridx, readRam)
val vidx = RegEnable(l2v(ridx), readRam)
(ramData.hit(l2addr) && vidx, ramData) // TODO: optimize tag
XSDebug(ptwl2.io.r.req.valid, p"ptwl2 rIdx:${Hexadecimal(ridx)}\n")
XSDebug(RegNext(ptwl2.io.r.req.valid), p"ptwl2 RamData:${ramData}\n")
XSDebug(RegNext(ptwl2.io.r.req.valid), p"ptwl2 v:${vidx} hit:${ramData.hit(idx, l2addr)}\n")
(ramData.hit(idx, l2addr) && vidx, ramData.get(idx)._2) // TODO: optimize tag
}
/* ptwl3
......@@ -255,7 +323,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
* if l2-tlb does not hit, ptwl3 would not hit (mostly)
*/
val l2MemBack = memRespFire && state===state_wait_resp && level===1.U
val l2Res = Mux(l2Hit, l2HitData.ppn, RegEnable(memPte.ppn, l2MemBack))
val l2Res = Mux(l2Hit, l2HitPPN, RegEnable(memPte.ppn, l2MemBack))
val l3addr = MakeAddr(l2Res, getVpnn(req.vpn, 0))
/*
......@@ -280,7 +348,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
state := state_wait_ready
}
} .elsewhen (l1Hit && level===0.U || l2Hit && level===1.U) {
level := levelNext // TODO: consider superpage
level := levelNext
} .elsewhen (memReqReady && !sfenceLatch) {
state := state_wait_resp
}
......@@ -288,13 +356,13 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
is (state_wait_resp) {
when (memRespFire) {
when (memPte.isLeaf() || memPte.isPf()) {
when (memPte.isLeaf() || memPte.isPf(level)) {
when (resp(arbChosen).ready) {
state := state_idle
}.otherwise {
state := state_wait_ready
latch.entry := new TlbEntry().genTlbEntry(memRdata, level, req.vpn)
latch.pf := memPte.isPf()
latch.pf := memPte.isPf(level)
}
}.otherwise {
level := levelNext
......@@ -331,24 +399,27 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
lgSize = log2Up(l1BusDataWidth/8).U
)._2
mem.a.bits := pteRead
mem.a.valid := state === state_req &&
mem.a.valid := state === state_req &&
((level===0.U && !tlbHit && !l1Hit) ||
(level===1.U && !l2Hit) ||
(level===2.U)) && !sfenceLatch && !sfence.valid
mem.d.ready := state === state_wait_resp || sfenceLatch
val memAddrLatch = RegEnable(memAddr, mem.a.valid)
memRdata := (mem.d.bits.data >> (memAddrLatch(log2Up(l1BusDataWidth/8) - 1, log2Up(XLEN/8)) << log2Up(XLEN)))(XLEN - 1, 0)
memSelData := memRdata.asTypeOf(Vec(MemBandWidth/XLEN, UInt(XLEN.W)))(memAddrLatch(log2Up(l1BusDataWidth/8) - 1, log2Up(XLEN/8)))
/*
* resp
*/
val ptwFinish = (state===state_req && tlbHit && level===0.U) || ((memPte.isLeaf() || memPte.isPf() || (!memPte.isLeaf() && level===2.U)) && memRespFire && !sfenceLatch) || state===state_wait_ready
val ptwFinish = (state===state_req && tlbHit && level===0.U) ||
((memPte.isLeaf() || memPte.isPf(level) ||
(!memPte.isLeaf() && level===2.U)) && memRespFire && !sfenceLatch) ||
state===state_wait_ready
for(i <- 0 until PtwWidth) {
resp(i).valid := valid && arbChosen===i.U && ptwFinish // TODO: add resp valid logic
resp(i).bits.entry := Mux(tlbHit, tlbHitData,
Mux(state===state_wait_ready, latch.entry, new TlbEntry().genTlbEntry(memRdata, Mux(level===3.U, 2.U, level), req.vpn)))
resp(i).bits.pf := Mux(level===3.U || notFound, true.B, Mux(tlbHit, false.B, Mux(state===state_wait_ready, latch.pf, memPte.isPf())))
Mux(state===state_wait_ready, latch.entry, new TlbEntry().genTlbEntry(memSelData, Mux(level===3.U, 2.U, level), req.vpn)))
resp(i).bits.pf := Mux(level===3.U || notFound, true.B, Mux(tlbHit, false.B, Mux(state===state_wait_ready, latch.pf, memPte.isPf(level))))
// TODO: the pf must not be correct, check it
}
......@@ -360,32 +431,45 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
ptwl2.io.w.req.valid := false.B
tlbl2.io.w.req.valid := false.B
assert(!memRespFire || (state===state_wait_resp || sfenceLatch))
when (memRespFire && !memPte.isPf() && !sfenceLatch) {
when (memRespFire && !memPte.isPf(level) && !sfenceLatch) {
when (level===0.U && !memPte.isLeaf) {
val refillIdx = LFSR64()(log2Up(PtwL1EntrySize)-1,0) // TODO: may be LRU
ptwl1(refillIdx).refill(l1addr, memRdata)
ptwl1(refillIdx).refill(l1addr, memSelData)
l1v := l1v | UIntToOH(refillIdx)
l1g := (l1g & ~UIntToOH(refillIdx)) | Mux(memPte.perm.g, UIntToOH(refillIdx), 0.U)
}
when (level===1.U && !memPte.isLeaf) {
val l2addrStore = RegEnable(l2addr, memReqFire && state===state_req && level===1.U)
val refillIdx = getVpnn(req.vpn, 1)(log2Up(PtwL2EntrySize)-1, 0)
val refillIdx = genPtwL2Idx(l2addrStore) //getVpnn(req.vpn, 1)(log2Up(PtwL2EntrySize)-1, 0)
//TODO: check why the old refillIdx is right
assert(ptwl2.io.w.req.ready)
// ptwl2.io.w.req.valid := true.B
ptwl2.io.w.apply(valid = true.B, setIdx = refillIdx, data = new PtwEntry(tagLen2).genPtwEntry(l2addrStore, memRdata), waymask = -1.S.asUInt)
// ptwl2.write(refillIdx, new PtwEntry(tagLen2).genPtwEntry(l2addrStore, memRdata))
val ps = new PtwEntries(PtwL2LineSize, PtwL2TagLen).genEntries(l2addrStore, memRdata, level)
ptwl2.io.w.apply(
valid = true.B,
setIdx = refillIdx,
data = ps,
waymask = -1.S.asUInt
)
l2v := l2v | UIntToOH(refillIdx)
l2g := l2g | Mux(memPte.perm.g, UIntToOH(refillIdx), 0.U)
l2g := (l2g & ~UIntToOH(refillIdx)) | Mux(Cat(memPtes.map(_.perm.g)).andR, UIntToOH(refillIdx), 0.U)
XSDebug(p"ptwl2 RefillIdx:${Hexadecimal(refillIdx)} ps:${ps}\n")
}
when (memPte.isLeaf()) {
val refillIdx = getVpnn(req.vpn, 0)(log2Up(TlbL2EntrySize)-1, 0)
val refillIdx = genTlbL2Idx(req.vpn)//getVpnn(req.vpn, 0)(log2Up(TlbL2EntrySize)-1, 0)
//TODO: check why the old refillIdx is right
assert(tlbl2.io.w.req.ready)
// tlbl2.io.w.req.valid := true.B
tlbl2.io.w.apply(valid = true.B, setIdx = refillIdx, data = new TlbEntry().genTlbEntry(memRdata, level, req.vpn), waymask = -1.S.asUInt)
// tlbl2.write(refillIdx, new TlbEntry().genTlbEntry(memRdata, level, req.vpn))
val ts = new TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen).genEntries(memRdata, level, req.vpn)
tlbl2.io.w.apply(
valid = true.B,
setIdx = refillIdx,
data = ts,
waymask = -1.S.asUInt
)
tlbv := tlbv | UIntToOH(refillIdx)
tlbg := tlbg | Mux(memPte.perm.g, UIntToOH(refillIdx), 0.U)
tlbg := (tlbg & ~UIntToOH(refillIdx)) | Mux(Cat(memPtes.map(_.perm.g)).andR, UIntToOH(refillIdx), 0.U)
XSDebug(p"tlbl2 refillIdx:${Hexadecimal(refillIdx)} ts:${ts}\n")
}
}
......@@ -447,18 +531,25 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
XSDebug(false, validOneCycle, p"(v:${validOneCycle} r:${arb.io.out.ready}) vpn:0x${Hexadecimal(req.vpn)}\n")
XSDebug(resp(arbChosen).fire(), "**Ptw Resp to ")
PrintFlag(resp(arbChosen).fire(), arbChosen===0.U, "DTLB**:\n", "ITLB**\n")
XSDebug(resp(arbChosen).fire(), p"(v:${resp(arbChosen).valid} r:${resp(arbChosen).ready}) entry:${resp(arbChosen).bits.entry} pf:${resp(arbChosen).bits.pf}\n")
XSDebug(resp(arbChosen).fire(), p"(v:${resp(arbChosen).valid} r:${resp(arbChosen).ready})" +
p" entry:${resp(arbChosen).bits.entry} pf:${resp(arbChosen).bits.pf}\n")
XSDebug(sfence.valid, p"Sfence: sfence instr here ${sfence.bits}\n")
XSDebug(valid, p"CSR: ${csr}\n")
XSDebug(valid, p"vpn2:0x${Hexadecimal(getVpnn(req.vpn, 2))} vpn1:0x${Hexadecimal(getVpnn(req.vpn, 1))} vpn0:0x${Hexadecimal(getVpnn(req.vpn, 0))}\n")
XSDebug(valid, p"state:${state} level:${level} tlbHit:${tlbHit} l1addr:0x${Hexadecimal(l1addr)} l1Hit:${l1Hit} l2addr:0x${Hexadecimal(l2addr)} l2Hit:${l2Hit} l3addr:0x${Hexadecimal(l3addr)} memReq(v:${mem.a.valid} r:${mem.a.ready})\n")
XSDebug(valid, p"vpn2:0x${Hexadecimal(getVpnn(req.vpn, 2))} vpn1:0x${Hexadecimal(getVpnn(req.vpn, 1))}" +
p" vpn0:0x${Hexadecimal(getVpnn(req.vpn, 0))}\n")
XSDebug(valid, p"state:${state} level:${level} tlbHit:${tlbHit} l1addr:0x${Hexadecimal(l1addr)} l1Hit:${l1Hit}" +
p" l2addr:0x${Hexadecimal(l2addr)} l2Hit:${l2Hit} l3addr:0x${Hexadecimal(l3addr)} memReq(v:${mem.a.valid} r:${mem.a.ready})\n")
XSDebug(memReqFire, p"mem req fire addr:0x${Hexadecimal(memAddr)}\n")
XSDebug(memRespFire, p"mem resp fire rdata:0x${Hexadecimal(mem.d.bits.data)} Pte:${memPte}\n")
XSDebug(memRespFire, p"mem resp fire: \n")
for(i <- 0 until (MemBandWidth/XLEN)) {
XSDebug(memRespFire, p" ${i.U}: ${memPtes(i)} isPf:${memPtes(i).isPf(level)} isLeaf:${memPtes(i).isLeaf}\n")
}
XSDebug(sfenceLatch, p"ptw has a flushed req waiting for resp... state:${state} mem.a(${mem.a.valid} ${mem.a.ready}) d($memValid} ${memRespReady})\n")
XSDebug(sfenceLatch, p"ptw has a flushed req waiting for resp... " +
p"state:${state} mem.a(${mem.a.valid} ${mem.a.ready}) d($memValid} ${memRespReady})\n")
// TODO: add ptw perf cnt
}
......@@ -28,6 +28,11 @@ class LsqEntry extends XSBundle {
val fwdData = Vec(8, UInt(8.W))
}
class FwdEntry extends XSBundle {
val mask = Vec(8, Bool())
val data = Vec(8, UInt(8.W))
}
class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
......@@ -124,6 +129,8 @@ class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParam
// i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
// entry with larger index should have higher priority since it's data is younger
// FIXME: old fwd logic for assertion, remove when rtl freeze
(0 until nchannel).map(i => {
val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B)))
......@@ -152,10 +159,63 @@ class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParam
// merge forward lookup results
// forward2 is younger than forward1 and should have higher priority
val oldFwdResult = Wire(new FwdEntry)
(0 until XLEN / 8).map(k => {
io.forward(i).forwardMask(k) := forwardMask1(k) || forwardMask2(k)
io.forward(i).forwardData(k) := Mux(forwardMask2(k), forwardData2(k), forwardData1(k))
oldFwdResult.mask(k) := RegNext(forwardMask1(k) || forwardMask2(k))
oldFwdResult.data(k) := RegNext(Mux(forwardMask2(k), forwardData2(k), forwardData1(k)))
})
// parallel fwd logic
val paddrMatch = Wire(Vec(size, Bool()))
val matchResultVec = Wire(Vec(size * 2, new FwdEntry))
def parallelFwd(xs: Seq[Data]): Data = {
ParallelOperation(xs, (a: Data, b: Data) => {
val l = a.asTypeOf(new FwdEntry)
val r = b.asTypeOf(new FwdEntry)
val res = Wire(new FwdEntry)
(0 until 8).map(p => {
res.mask(p) := l.mask(p) || r.mask(p)
res.data(p) := Mux(r.mask(p), r.data(p), l.data(p))
})
res
})
}
for (j <- 0 until size) {
paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
}
for (j <- 0 until size) {
val needCheck0 = RegNext(paddrMatch(j) && io.needForward(i)(0)(j))
val needCheck1 = RegNext(paddrMatch(j) && io.needForward(i)(1)(j))
(0 until XLEN / 8).foreach(k => {
matchResultVec(j).mask(k) := needCheck0 && data(j).mask(k)
matchResultVec(j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
matchResultVec(size + j).mask(k) := needCheck1 && data(j).mask(k)
matchResultVec(size + j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
})
}
val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
io.forward(i).forwardMask := parallelFwdResult.mask
io.forward(i).forwardData := parallelFwdResult.data
when(
oldFwdResult.mask.asUInt =/= parallelFwdResult.mask.asUInt
){
printf("%d: mask error: right: %b false %b\n", GTimer(), oldFwdResult.mask.asUInt, parallelFwdResult.mask.asUInt)
}
for (p <- 0 until 8) {
when(
oldFwdResult.data(p) =/= parallelFwdResult.data(p) && oldFwdResult.mask(p)
){
printf("%d: data "+p+" error: right: %x false %x\n", GTimer(), oldFwdResult.data(p), parallelFwdResult.data(p))
}
}
})
// data read
......
......@@ -51,7 +51,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth))
dataModule.io := DontCare
val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
val valid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
......@@ -87,7 +87,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
when(io.enq.req(i).valid) {
uop(index) := io.enq.req(i).bits
allocated(index) := true.B
valid(index) := false.B
datavalid(index) := false.B
writebacked(index) := false.B
commited(index) := false.B
miss(index) := false.B
......@@ -138,7 +138,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
)
}
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
valid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
allocated(loadWbIndex) := !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
......@@ -237,7 +237,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
dataModule.io.refill.wen(i) := false.B
when(allocated(i) && listening(i) && blockMatch && io.dcache.resp.fire()) {
dataModule.io.refill.wen(i) := true.B
valid(i) := true.B
datavalid(i) := true.B
listening(i) := false.B
}
})
......@@ -245,7 +245,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
// writeback up to 2 missed load insts to CDB
// just randomly pick 2 missed load (data refilled), write them back to cdb
val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
allocated(i) && valid(i) && !writebacked(i)
allocated(i) && datavalid(i) && !writebacked(i)
})).asUInt() // use uint instead vec to reduce verilog lines
val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W)))
val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool()))
......@@ -387,7 +387,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (valid(j) || listening(j) || miss(j))
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
// TODO: update refilled data
val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
Cat(violationVec).orR() && entryNeedCheck
......@@ -500,7 +500,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
dataModule.io.uncache.wen := false.B
when(io.uncache.resp.fire()){
valid(deqPtr) := true.B
datavalid(deqPtr) := true.B
dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
dataModule.io.uncache.wen := true.B
// TODO: write back exception info
......@@ -530,7 +530,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
when(needCancel(i)) {
when(io.brqRedirect.bits.isReplay){
valid(i) := false.B
datavalid(i) := false.B
writebacked(i) := false.B
listening(i) := false.B
miss(i) := false.B
......@@ -564,7 +564,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
if (i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
PrintFlag(allocated(i), "a")
PrintFlag(allocated(i) && valid(i), "v")
PrintFlag(allocated(i) && datavalid(i), "v")
PrintFlag(allocated(i) && writebacked(i), "w")
PrintFlag(allocated(i) && commited(i), "c")
PrintFlag(allocated(i) && miss(i), "m")
......
......@@ -129,19 +129,6 @@ class LoadUnit_S1 extends XSModule {
io.out.bits.forwardMask := io.sbuffer.forwardMask
io.out.bits.forwardData := io.sbuffer.forwardData
// generate XLEN/8 Muxs
for (i <- 0 until XLEN / 8) {
when(io.lsq.forwardMask(i)) {
io.out.bits.forwardMask(i) := true.B
io.out.bits.forwardData(i) := io.lsq.forwardData(i)
}
}
XSDebug(io.out.fire(), "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
s1_uop.cf.pc,
io.lsq.forwardData.asUInt, io.lsq.forwardMask.asUInt,
io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt
)
io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect)
io.out.bits.paddr := s1_paddr
......@@ -161,6 +148,7 @@ class LoadUnit_S2 extends XSModule {
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val lsq = new LoadForwardQueryIO
})
val s2_uop = io.in.bits.uop
......@@ -173,10 +161,16 @@ class LoadUnit_S2 extends XSModule {
io.dcacheResp.ready := true.B
assert(!(io.in.valid && !io.dcacheResp.valid), "DCache response got lost")
val forwardMask = io.in.bits.forwardMask
val forwardData = io.in.bits.forwardData
val forwardMask = io.out.bits.forwardMask
val forwardData = io.out.bits.forwardData
val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U
XSDebug(io.out.fire(), "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
s2_uop.cf.pc,
io.lsq.forwardData.asUInt, io.lsq.forwardMask.asUInt,
io.in.bits.forwardData.asUInt, io.in.bits.forwardMask.asUInt
)
// data merge
val rdata = VecInit((0 until XLEN / 8).map(j =>
Mux(forwardMask(j), forwardData(j), io.dcacheResp.bits.data(8*(j+1)-1, 8*j)))).asUInt
......@@ -213,9 +207,19 @@ class LoadUnit_S2 extends XSModule {
io.in.ready := io.out.ready || !io.in.valid
// merge forward result
io.lsq := DontCare
// generate XLEN/8 Muxs
for (i <- 0 until XLEN / 8) {
when(io.lsq.forwardMask(i)) {
io.out.bits.forwardMask(i) := true.B
io.out.bits.forwardData(i) := io.lsq.forwardData(i)
}
}
XSDebug(io.out.fire(), "[DCACHE LOAD RESP] pc %x rdata %x <- D$ %x + fwd %x(%b)\n",
s2_uop.cf.pc, rdataPartialLoad, io.dcacheResp.bits.data,
io.in.bits.forwardData.asUInt, io.in.bits.forwardMask.asUInt
io.out.bits.forwardData.asUInt, io.out.bits.forwardMask.asUInt
)
}
......@@ -268,6 +272,9 @@ class LoadUnit extends XSModule {
load_s2.io.redirect <> io.redirect
load_s2.io.dcacheResp <> io.dcache.resp
load_s2.io.lsq := DontCare
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
// PipelineConnect(load_s2.io.fp_out, load_s3.io.in, true.B, false.B)
// load_s3.io.redirect <> io.redirect
......
......@@ -4,7 +4,7 @@
#include "common.h"
#include "ram.h"
#define RAMSIZE (256 * 1024 * 1024UL)
#define RAMSIZE (64 * 1024 * 1024 * 1024UL)
#ifdef WITH_DRAMSIM3
#include "cosimulation.h"
......@@ -24,12 +24,12 @@ void addpageSv39() {
//addr range: 0x0000000080000000 - 0x0000000088000000 for 128MB from 2GB - 2GB128MB
//the first layer: one entry for 1GB. (512GB in total by 512 entries). need the 2th entries
//the second layer: one entry for 2MB. (1GB in total by 512 entries). need the 0th-63rd entries
//the third layer: one entry for 4KB (2MB in total by 512 entries). need 64 with each one all
//the third layer: one entry for 4KB (2MB in total by 512 entries). need 64 with each one all
#define TOPSIZE (128 * 1024 * 1024)
#define PAGESIZE (4 * 1024) // 4KB = 2^12B
#define ENTRYNUM (PAGESIZE / 8) //512 2^9
#define PTEVOLUME (PAGESIZE * ENTRYNUM) // 2MB
#define PTENUM (RAMSIZE / PTEVOLUME) // 128MB / 2MB = 64
#define PTENUM (TOPSIZE / PTEVOLUME) // 128MB / 2MB = 64
#define PDDENUM 1
#define PDENUM 1
#define PDDEADDR (0x88000000 - (PAGESIZE * (PTENUM + 2))) //0x88000000 - 0x1000*66
......@@ -43,7 +43,7 @@ void addpageSv39() {
uint64_t pdde[ENTRYNUM];
uint64_t pde[ENTRYNUM];
uint64_t pte[PTENUM][ENTRYNUM];
// special addr for mmio 0x40000000 - 0x4fffffff
uint64_t pdemmio[ENTRYNUM];
uint64_t ptemmio[PTEMMIONUM][ENTRYNUM];
......@@ -71,13 +71,13 @@ void addpageSv39() {
for(int i = 0; i < PTEMMIONUM; i++) {
pdemmio[i] = (((PDDEADDR-PAGESIZE*(PTEMMIONUM+PDEMMIONUM-i)) & 0xfffff000) >> 2) | 0x1;
}
for(int outidx = 0; outidx < PTEMMIONUM; outidx++) {
for(int inidx = 0; inidx < ENTRYNUM; inidx++) {
ptemmio[outidx][inidx] = (((0x40000000 + outidx*PTEVOLUME + inidx*PAGESIZE) & 0xfffff000) >> 2) | 0xf;
}
}
//0x800000000 - 0x87ffffff
pdde[2] = ((PDEADDR & 0xfffff000) >> 2) | 0x1;
//pdde[2] = ((0x80000000&0xc0000000) >> 2) | 0xf;
......@@ -93,13 +93,13 @@ void addpageSv39() {
}
}
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM+PDEDEVNUM+PTEDEVNUM)),ptedev,PAGESIZE*PTEDEVNUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM+PDEDEVNUM)),pdedev,PAGESIZE*PDEDEVNUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM)),ptemmio, PAGESIZE*PTEMMIONUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM)), pdemmio, PAGESIZE*PDEMMIONUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM)), pdde, PAGESIZE*PDDENUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*(PTENUM+PDENUM)), pde, PAGESIZE*PDENUM);
memcpy((char *)ram+(RAMSIZE-PAGESIZE*PTENUM), pte, PAGESIZE*PTENUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM+PDEDEVNUM+PTEDEVNUM)),ptedev,PAGESIZE*PTEDEVNUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM+PDEDEVNUM)),pdedev,PAGESIZE*PDEDEVNUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM)),ptemmio, PAGESIZE*PTEMMIONUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM)), pdemmio, PAGESIZE*PDEMMIONUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM)), pdde, PAGESIZE*PDDENUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDENUM)), pde, PAGESIZE*PDENUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*PTENUM), pte, PAGESIZE*PTENUM);
}
#endif
......@@ -187,7 +187,7 @@ void init_ram(const char *img) {
ret = fread(ram, img_size, 1, fp);
assert(ret == 1);
fclose(fp);
fclose(fp);
}
#ifdef TLB_UNITTEST
......
......@@ -5,7 +5,7 @@
#include "VXSSimSoC.h"
#include <verilated_save.h>
class VerilatedSaveMem : public VerilatedSave {
class VerilatedSaveMem : public VerilatedSerialize {
const static long buf_size = 1024 * 1024 * 1024;
uint8_t *buf;
long size;
......
......@@ -85,7 +85,7 @@ class XSSimSoC(axiSim: Boolean)(implicit p: config.Parameters) extends LazyModul
else
LazyModule(new AXI4RAM(
dramRange,
memByte = 128 * 1024 * 1024,
memByte = 64L * 1024 * 1024 * 1024,
useBlackBox = true,
beatBytes = L3BusWidth / 8
)).node
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册