提交 ffd9d2f1 编写于 作者: Y Yinan Xu

Merge remote-tracking branch 'origin/master' into opt-brq

......@@ -27,7 +27,7 @@ help:
$(TOP_V): $(SCALA_FILE)
mkdir -p $(@D)
mill XiangShan.test.runMain $(SIMTOP) -X verilog -td $(@D) --full-stacktrace --output-file $(@F) --disable-all --fpga-platform $(SIM_ARGS)
mill XiangShan.test.runMain $(SIMTOP) -X verilog -td $(@D) --full-stacktrace --output-file $(@F) --disable-all --fpga-platform --remove-assert $(SIM_ARGS)
# mill XiangShan.runMain top.$(TOP) -X verilog -td $(@D) --output-file $(@F) --infer-rw $(FPGATOP) --repl-seq-mem -c:$(FPGATOP):-o:$(@D)/$(@F).conf
# $(MEM_GEN) $(@D)/$(@F).conf >> $@
# sed -i -e 's/_\(aw\|ar\|w\|r\|b\)_\(\|bits_\)/_\1/g' $@
......@@ -139,6 +139,8 @@ endif
SEED ?= $(shell shuf -i 1-10000 -n 1)
VME_SOURCE ?= $(shell pwd)
VME_MODULE ?=
# log will only be printed when (B<=GTimer<=E) && (L < loglevel)
# use 'emu -h' to see more details
......@@ -165,6 +167,23 @@ emu: $(EMU)
ls build
$(EMU) -i $(IMAGE) $(EMU_FLAGS)
# extract verilog module from sim_top.v
# usage: make vme VME_MODULE=Roq
vme: $(SIM_TOP_V)
mill XiangShan.runMain utils.ExtractVerilogModules -m $(VME_MODULE)
# usage: make phy_evaluate VME_MODULE=Roq REMOTE=100
phy_evaluate: vme
scp -r ./build/extracted/* $(REMOTE):~/phy_evaluation/remote_run/rtl
ssh -tt $(REMOTE) 'cd ~/phy_evaluation/remote_run && $(MAKE) evaluate DESIGN_NAME=$(VME_MODULE)'
scp -r $(REMOTE):~/phy_evaluation/remote_run/rpts ./build
# usage: make phy_evaluate_atc VME_MODULE=Roq REMOTE=100
phy_evaluate_atc: vme
scp -r ./build/extracted/* $(REMOTE):~/phy_evaluation/remote_run/rtl
ssh -tt $(REMOTE) 'cd ~/phy_evaluation/remote_run && $(MAKE) evaluate_atc DESIGN_NAME=$(VME_MODULE)'
scp -r $(REMOTE):~/phy_evaluation/remote_run/rpts ./build
cache:
$(MAKE) emu IMAGE=Makefile
......
package utils
/*
https://github.com/Lingrui98/scalaTage/blob/vme/src/main/scala/getVerilogModules.scala
*/
import scala.io.Source
import java.io._
import scala.language.postfixOps
import sys.process._
import sys._
class VerilogModuleExtractor {
// name
val modulePattern = "module ([\\w]+)\\(".r.unanchored
// type name
val subMoudlePattern = "([\\w]+) ([\\w]+) \\((?: //.*)*\\Z".r.unanchored
val endMoudleIOPattern = "\\);".r.unanchored
val endMoudlePattern = "endmodule".r.unanchored
// (submoudle type, submoudle name)
type SubMoudleRecord = Tuple2[String, String]
// (content, submodules)
type ModuleRecord = Tuple2[List[String], List[SubMoudleRecord]]
// name
type ModuleMap = Map[String, ModuleRecord]
def getLines(s: scala.io.BufferedSource): Iterator[String] = s.getLines()
def makeRecord(s: Iterator[String]): ModuleMap = {
val m: ModuleMap = Map()
// called before we see the first line of a module
def processModule(firstLine: String, it: Iterator[String]): ModuleRecord = {
val content: List[String] = List(firstLine)
val submodules: List[SubMoudleRecord] = List()
def iter(cont: List[String], subm: List[SubMoudleRecord]): ModuleRecord =
it.next() match {
case l: String => l match {
case endMoudlePattern() => (l :: cont, subm)
case subMoudlePattern(ty, name) =>
// println(s"submoudle $ty $name")
iter(l :: cont, (ty, name) :: subm)
case _ => iter(l :: cont, subm)
}
case _ => println("Should not reach here"); (cont, subm)
}
val temp = iter(content, submodules)
(temp._1.reverse, temp._2)
}
def traverse(m: ModuleMap, it: Iterator[String]): ModuleMap =
if (it.hasNext) {
it.next() match {
case l: String =>
// println(f"traversing $l")
l match {
case modulePattern(name) =>
// println(f"get Module of name $name")
traverse(m ++ Map(name -> processModule(l, it)), it)
case _ =>
println(f"line $l is not a module definition")
traverse(m, it)
}
case _ => traverse(m, it)
}
}
else m
traverse(m, s)
}
def makeRecordFromFile(file: String): ModuleMap = {
val bufSrc = Source.fromFile(file)
makeRecord(bufSrc.getLines())
}
def writeModuleToFile(name: String, record: ModuleRecord, dir: String) = {
val path = dir+name+".v"
val writer = new PrintWriter(new File(path))
println(f"Writing module $name%20s to $path")
record._1.foreach(r => {
writer.write(f"$r\n")
})
writer.close()
}
// get moudle definition of specified name
def getModule(name: String, m: ModuleMap): ModuleRecord = {
m(name)
}
def showModuleRecord(r: ModuleRecord) = {
val (content, submodules) = r
submodules.foreach {
case (t, n) => println(f"submoudle type: $t, submodule name: $n")
}
println("\nprinting module contents...")
content.foreach(println(_))
}
// We first get records of all the modules and its submodule record
// Then we choose a module as the root node to traverse its submodule
def processFromModule(name: String, map: ModuleMap, outPath: String, doneSet: Set[String] = Set(), top: Tuple2[String, Boolean]): Unit = {
def printSRAMs(sub: List[SubMoudleRecord]) = {
sub map {
case (ty, subn) if (ty contains "SRAM") => println(s"top module $name, sub module type $ty, name $subn")
case _ =>
}
}
val (topName, isTop) = top
if (!map.contains(name)) {
println(s"${if (isTop) "chosen top" else s"submodule of ${topName},"} module $name does not exist!")
return
}
if (isTop) println(s"\nProcessing top module $name")
val r = map(name)
new File(outPath).mkdirs() // ensure the path exists
writeModuleToFile(name, r, outPath)
val submodules = r._2
// printSRAMs(submodules)
// DFS
val subTypesSet = submodules map (m => m._1) toSet
val nowMap = map - name
val nowSet = doneSet ++ subTypesSet
subTypesSet.foreach { s => if (!doneSet.contains(s)) processFromModule(s, nowMap, outPath, nowSet, (if (isTop) name else topName, false)) }
}
def getDate: String = {
val d = java.time.LocalDate.now
d.toString.toCharArray.filterNot(_ == '-').mkString
}
def makePath(topModule: String, outDir: String , user: String = "glr"): String = {
(if (outDir.last == '/')
outDir
else
outDir+"/") + getDate + "-" + user + "-" + topModule + "/"
}
def extract(src: String, topModule: String, outDir: String, user: String, mapp: Option[ModuleMap]): Unit = {
val useMap = mapp.getOrElse(makeRecordFromFile(src))
val path = makePath(topModule, outDir, user)
processFromModule(topModule, useMap, path, top=(topModule, true))
}
def extract(src: String, topModules: List[String], outDir: String, user: String): Unit = {
// avoid repeat
val mapp = makeRecordFromFile(src)
topModules.foreach(n => extract(src, n, outDir, user, Some(mapp)))
}
}
trait VMEArgParser {
type OptionMap = Map[String, Option[Any]]
val usage = """
Usage: sbt "run [OPTION...]"
-s, --source the verilog file generated by chisel, all in one file
default: $NOOP_HOME/build/XSSimTop.v
-h, --help print this help info
-o, --output the place you want to store your extracted verilog
default: $NOOP_HOME/build/extracted
-u, --usr your name, will be used to name the output folder
default: current user
-m, --modules the top modules you would like to extract verilog from
should always be the last argument
default: IFU
"""
def parse(args: List[String]) = {
def nextOption(map: OptionMap, l: List[String]): OptionMap = {
def isSwitch(s : String)= (s(0) == '-')
l match {
case Nil => map
case ("--help" | "-h") :: tail => {
println(usage)
sys.exit()
map
}
case ("--source" | "-s") :: file :: tail =>
nextOption(map ++ Map("source" -> Some(file)), tail)
case ("--output" | "-o") :: path :: tail =>
nextOption(map ++ Map("output" -> Some(path)), tail)
case ("--usr" | "-u") :: name :: tail =>
nextOption(map ++ Map("usr" -> Some(name)), tail)
// this should always be the last argument, since it is length variable
case ("--modules" | "-m") :: m :: tail =>
map ++ Map("modules" -> Some(m :: tail))
case s :: tail => {
if (isSwitch(s)) println(s"unexpected argument $s")
nextOption(map, tail)
}
}
}
nextOption(Map("source" -> None, "output" -> None, "usr" -> None, "modules" -> None), args)
}
def wrapParams(args: Array[String]): (String, List[String], String, String) = {
val argL = args.toList
val paramMap = parse(argL)
(paramMap("source").map(_.asInstanceOf[String]).getOrElse(env("NOOP_HOME")+"/build/XSSimTop.v"),
paramMap("modules").map(_.asInstanceOf[List[String]]).getOrElse(List("IFU")),
paramMap("output").map(_.asInstanceOf[String]).getOrElse(env("NOOP_HOME")+"/build/extracted/"),
paramMap("usr").map(_.asInstanceOf[String]).getOrElse("whoami".!!.init))
}
}
object ExtractVerilogModules extends VMEArgParser {
def main(args: Array[String]): Unit = {
val vme = new VerilogModuleExtractor()
val (sourceFile, topModules, outTopDir, usr) = wrapParams(args)
vme.extract(sourceFile, topModules, outTopDir, usr)
}
}
......@@ -28,6 +28,11 @@ class LsqEntry extends XSBundle {
val fwdData = Vec(8, UInt(8.W))
}
class FwdEntry extends XSBundle {
val mask = Vec(8, Bool())
val data = Vec(8, UInt(8.W))
}
class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
......@@ -124,6 +129,8 @@ class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParam
// i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
// entry with larger index should have higher priority since it's data is younger
// FIXME: old fwd logic for assertion, remove when rtl freeze
(0 until nchannel).map(i => {
val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B)))
......@@ -152,10 +159,63 @@ class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParam
// merge forward lookup results
// forward2 is younger than forward1 and should have higher priority
val oldFwdResult = Wire(new FwdEntry)
(0 until XLEN / 8).map(k => {
io.forward(i).forwardMask(k) := forwardMask1(k) || forwardMask2(k)
io.forward(i).forwardData(k) := Mux(forwardMask2(k), forwardData2(k), forwardData1(k))
oldFwdResult.mask(k) := RegNext(forwardMask1(k) || forwardMask2(k))
oldFwdResult.data(k) := RegNext(Mux(forwardMask2(k), forwardData2(k), forwardData1(k)))
})
// parallel fwd logic
val paddrMatch = Wire(Vec(size, Bool()))
val matchResultVec = Wire(Vec(size * 2, new FwdEntry))
def parallelFwd(xs: Seq[Data]): Data = {
ParallelOperation(xs, (a: Data, b: Data) => {
val l = a.asTypeOf(new FwdEntry)
val r = b.asTypeOf(new FwdEntry)
val res = Wire(new FwdEntry)
(0 until 8).map(p => {
res.mask(p) := l.mask(p) || r.mask(p)
res.data(p) := Mux(r.mask(p), r.data(p), l.data(p))
})
res
})
}
for (j <- 0 until size) {
paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
}
for (j <- 0 until size) {
val needCheck0 = RegNext(paddrMatch(j) && io.needForward(i)(0)(j))
val needCheck1 = RegNext(paddrMatch(j) && io.needForward(i)(1)(j))
(0 until XLEN / 8).foreach(k => {
matchResultVec(j).mask(k) := needCheck0 && data(j).mask(k)
matchResultVec(j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
matchResultVec(size + j).mask(k) := needCheck1 && data(j).mask(k)
matchResultVec(size + j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
})
}
val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
io.forward(i).forwardMask := parallelFwdResult.mask
io.forward(i).forwardData := parallelFwdResult.data
when(
oldFwdResult.mask.asUInt =/= parallelFwdResult.mask.asUInt
){
printf("%d: mask error: right: %b false %b\n", GTimer(), oldFwdResult.mask.asUInt, parallelFwdResult.mask.asUInt)
}
for (p <- 0 until 8) {
when(
oldFwdResult.data(p) =/= parallelFwdResult.data(p) && oldFwdResult.mask(p)
){
printf("%d: data "+p+" error: right: %x false %x\n", GTimer(), oldFwdResult.data(p), parallelFwdResult.data(p))
}
}
})
// data read
......
......@@ -51,7 +51,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth))
dataModule.io := DontCare
val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
val valid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
val commited = Reg(Vec(LoadQueueSize, Bool())) // inst has been writebacked to CDB
val miss = Reg(Vec(LoadQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
......@@ -87,7 +87,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
when(io.enq.req(i).valid) {
uop(index) := io.enq.req(i).bits
allocated(index) := true.B
valid(index) := false.B
datavalid(index) := false.B
writebacked(index) := false.B
commited(index) := false.B
miss(index) := false.B
......@@ -138,7 +138,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
)
}
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
valid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
allocated(loadWbIndex) := !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
......@@ -237,7 +237,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
dataModule.io.refill.wen(i) := false.B
when(allocated(i) && listening(i) && blockMatch && io.dcache.resp.fire()) {
dataModule.io.refill.wen(i) := true.B
valid(i) := true.B
datavalid(i) := true.B
listening(i) := false.B
}
})
......@@ -245,7 +245,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
// writeback up to 2 missed load insts to CDB
// just randomly pick 2 missed load (data refilled), write them back to cdb
val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
allocated(i) && valid(i) && !writebacked(i)
allocated(i) && datavalid(i) && !writebacked(i)
})).asUInt() // use uint instead vec to reduce verilog lines
val loadWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LoadQueueSize).W)))
val loadWbSelV= Wire(Vec(StorePipelineWidth, Bool()))
......@@ -387,7 +387,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (valid(j) || listening(j) || miss(j))
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
// TODO: update refilled data
val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
Cat(violationVec).orR() && entryNeedCheck
......@@ -500,7 +500,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
dataModule.io.uncache.wen := false.B
when(io.uncache.resp.fire()){
valid(deqPtr) := true.B
datavalid(deqPtr) := true.B
dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
dataModule.io.uncache.wen := true.B
// TODO: write back exception info
......@@ -530,7 +530,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
when(needCancel(i)) {
when(io.brqRedirect.bits.isReplay){
valid(i) := false.B
datavalid(i) := false.B
writebacked(i) := false.B
listening(i) := false.B
miss(i) := false.B
......@@ -564,7 +564,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
if (i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
PrintFlag(allocated(i), "a")
PrintFlag(allocated(i) && valid(i), "v")
PrintFlag(allocated(i) && datavalid(i), "v")
PrintFlag(allocated(i) && writebacked(i), "w")
PrintFlag(allocated(i) && commited(i), "c")
PrintFlag(allocated(i) && miss(i), "m")
......
......@@ -129,19 +129,6 @@ class LoadUnit_S1 extends XSModule {
io.out.bits.forwardMask := io.sbuffer.forwardMask
io.out.bits.forwardData := io.sbuffer.forwardData
// generate XLEN/8 Muxs
for (i <- 0 until XLEN / 8) {
when(io.lsq.forwardMask(i)) {
io.out.bits.forwardMask(i) := true.B
io.out.bits.forwardData(i) := io.lsq.forwardData(i)
}
}
XSDebug(io.out.fire(), "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
s1_uop.cf.pc,
io.lsq.forwardData.asUInt, io.lsq.forwardMask.asUInt,
io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt
)
io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect)
io.out.bits.paddr := s1_paddr
......@@ -161,6 +148,7 @@ class LoadUnit_S2 extends XSModule {
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val lsq = new LoadForwardQueryIO
})
val s2_uop = io.in.bits.uop
......@@ -173,10 +161,16 @@ class LoadUnit_S2 extends XSModule {
io.dcacheResp.ready := true.B
assert(!(io.in.valid && !io.dcacheResp.valid), "DCache response got lost")
val forwardMask = io.in.bits.forwardMask
val forwardData = io.in.bits.forwardData
val forwardMask = io.out.bits.forwardMask
val forwardData = io.out.bits.forwardData
val fullForward = (~forwardMask.asUInt & s2_mask) === 0.U
XSDebug(io.out.fire(), "[FWD LOAD RESP] pc %x fwd %x(%b) + %x(%b)\n",
s2_uop.cf.pc,
io.lsq.forwardData.asUInt, io.lsq.forwardMask.asUInt,
io.in.bits.forwardData.asUInt, io.in.bits.forwardMask.asUInt
)
// data merge
val rdata = VecInit((0 until XLEN / 8).map(j =>
Mux(forwardMask(j), forwardData(j), io.dcacheResp.bits.data(8*(j+1)-1, 8*j)))).asUInt
......@@ -213,9 +207,19 @@ class LoadUnit_S2 extends XSModule {
io.in.ready := io.out.ready || !io.in.valid
// merge forward result
io.lsq := DontCare
// generate XLEN/8 Muxs
for (i <- 0 until XLEN / 8) {
when(io.lsq.forwardMask(i)) {
io.out.bits.forwardMask(i) := true.B
io.out.bits.forwardData(i) := io.lsq.forwardData(i)
}
}
XSDebug(io.out.fire(), "[DCACHE LOAD RESP] pc %x rdata %x <- D$ %x + fwd %x(%b)\n",
s2_uop.cf.pc, rdataPartialLoad, io.dcacheResp.bits.data,
io.in.bits.forwardData.asUInt, io.in.bits.forwardMask.asUInt
io.out.bits.forwardData.asUInt, io.out.bits.forwardMask.asUInt
)
}
......@@ -268,6 +272,9 @@ class LoadUnit extends XSModule {
load_s2.io.redirect <> io.redirect
load_s2.io.dcacheResp <> io.dcache.resp
load_s2.io.lsq := DontCare
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
// PipelineConnect(load_s2.io.fp_out, load_s3.io.in, true.B, false.B)
// load_s3.io.redirect <> io.redirect
......
......@@ -5,7 +5,7 @@
#include "VXSSimSoC.h"
#include <verilated_save.h>
class VerilatedSaveMem : public VerilatedSave {
class VerilatedSaveMem : public VerilatedSerialize {
const static long buf_size = 1024 * 1024 * 1024;
uint8_t *buf;
long size;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册