提交 36067372 编写于 作者: Y Yinan Xu

Merge remote-tracking branch 'origin/master' into opt-brq

......@@ -24,7 +24,7 @@ class FetchPacket extends XSBundle {
// val pc = UInt(VAddrBits.W)
val pc = Vec(PredictWidth, UInt(VAddrBits.W))
val pnpc = Vec(PredictWidth, UInt(VAddrBits.W))
val brInfo = Vec(PredictWidth, new BranchInfo)
val bpuMeta = Vec(PredictWidth, new BpuMeta)
val pd = Vec(PredictWidth, new PreDecodeInfo)
val ipf = Bool()
val acf = Bool()
......@@ -116,7 +116,7 @@ class BranchPrediction extends XSBundle with HasIFUConst {
def hasNotTakenBrs = Mux(taken, ParallelPriorityMux(realTakens, sawNotTakenBr), ParallelORR(brNotTakens))
}
class BranchInfo extends XSBundle with HasBPUParameter {
class BpuMeta extends XSBundle with HasBPUParameter {
val ubtbWriteWay = UInt(log2Up(UBtbWays).W)
val ubtbHits = Bool()
val btbWriteWay = UInt(log2Up(BtbWays).W)
......@@ -155,20 +155,22 @@ class Predecode extends XSBundle with HasIFUConst {
val pd = Vec(FetchWidth*2, (new PreDecodeInfo))
}
class BranchUpdateInfo extends XSBundle {
class CfiUpdateInfo extends XSBundle {
// from backend
val pc = UInt(VAddrBits.W)
val pnpc = UInt(VAddrBits.W)
val fetchIdx = UInt(log2Up(FetchWidth*2).W)
// frontend -> backend -> frontend
val pd = new PreDecodeInfo
val bpuMeta = new BpuMeta
// need pipeline update
val target = UInt(VAddrBits.W)
val brTarget = UInt(VAddrBits.W)
val taken = Bool()
val fetchIdx = UInt(log2Up(FetchWidth*2).W)
val isMisPred = Bool()
val brTag = new BrqPtr
// frontend -> backend -> frontend
val pd = new PreDecodeInfo
val brInfo = new BranchInfo
val isReplay = Bool()
}
// Dequeue DecodeWidth insts from Ibuffer
......@@ -177,7 +179,7 @@ class CtrlFlow extends XSBundle {
val pc = UInt(VAddrBits.W)
val exceptionVec = Vec(16, Bool())
val intrVec = Vec(12, Bool())
val brUpdate = new BranchUpdateInfo
val brUpdate = new CfiUpdateInfo
val crossPageIPFFix = Bool()
}
......@@ -274,7 +276,7 @@ class ExuOutput extends XSBundle {
val fflags = new Fflags
val redirectValid = Bool()
val redirect = new Redirect
val brUpdate = new BranchUpdateInfo
val brUpdate = new CfiUpdateInfo
val debug = new DebugBundle
}
......@@ -321,8 +323,8 @@ class FrontendToBackendIO extends XSBundle {
val cfVec = Vec(DecodeWidth, DecoupledIO(new CtrlFlow))
// from backend
val redirect = Flipped(ValidIO(UInt(VAddrBits.W)))
val outOfOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo))
val inOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo))
// val cfiUpdateInfo = Flipped(ValidIO(new CfiUpdateInfo))
val cfiUpdateInfo = Flipped(ValidIO(new CfiUpdateInfo))
}
class TlbCsrBundle extends XSBundle {
......
......@@ -78,10 +78,10 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
val redirectValid = roq.io.redirect.valid || brq.io.redirect.valid || io.fromLsBlock.replay.valid
val redirect = Mux(roq.io.redirect.valid, roq.io.redirect.bits, redirectArb)
io.frontend.redirect.valid := redirectValid
io.frontend.redirect.bits := Mux(roq.io.redirect.valid, roq.io.redirect.bits.target, redirectArb.target)
io.frontend.outOfOrderBrInfo <> brq.io.outOfOrderBrInfo
io.frontend.inOrderBrInfo <> brq.io.inOrderBrInfo
io.frontend.redirect.valid := RegNext(redirectValid)
io.frontend.redirect.bits := RegNext(Mux(roq.io.redirect.valid, roq.io.redirect.bits.target, redirectArb.target))
// io.frontend.cfiUpdateInfo <> brq.io.cfiInfo
io.frontend.cfiUpdateInfo <> brq.io.cfiInfo
decode.io.in <> io.frontend.cfVec
decode.io.toBrq <> brq.io.enqReqs
......@@ -95,8 +95,9 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
brq.io.exuRedirect <> io.fromIntBlock.exuRedirect
// pipeline between decode and dispatch
val lastCycleRedirect = RegNext(redirectValid)
for (i <- 0 until RenameWidth) {
PipelineConnect(decode.io.out(i), rename.io.in(i), rename.io.in(i).ready, redirectValid)
PipelineConnect(decode.io.out(i), rename.io.in(i), rename.io.in(i).ready, redirectValid || lastCycleRedirect)
}
rename.io.redirect.valid <> redirectValid
......
......@@ -51,11 +51,9 @@ class BrqIO extends XSBundle{
val out = ValidIO(new ExuOutput)
// misprediction, flush pipeline
val redirect = Output(Valid(new Redirect))
val outOfOrderBrInfo = ValidIO(new BranchUpdateInfo)
val cfiInfo = ValidIO(new CfiUpdateInfo)
// commit cnt of branch instr
val bcommit = Input(UInt(BrTagWidth.W))
// in order dequeue to train bpd
val inOrderBrInfo = ValidIO(new BranchUpdateInfo)
}
class Brq extends XSModule with HasCircularQueuePtrHelper {
......@@ -63,7 +61,6 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
class BrqEntry extends Bundle {
val ptrFlag = Bool()
val npc = UInt(VAddrBits.W)
val exuOut = new ExuOutput
}
......@@ -91,15 +88,13 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
val deqValid = stateQueue(headIdx).isCommit && brCommitCnt=/=0.U
val commitValid = stateQueue(commitIdx).isWb
val commitEntry = brQueue(commitIdx)
val commitIsMisPred = commitEntry.exuOut.redirect.isMisPred
val commitIsMisPred = commitEntry.exuOut.brUpdate.isMisPred
brCommitCnt := brCommitCnt + io.bcommit - deqValid
XSDebug(p"brCommitCnt:$brCommitCnt\n")
assert(brCommitCnt+io.bcommit >= deqValid)
io.inOrderBrInfo.valid := commitValid
io.inOrderBrInfo.bits := commitEntry.exuOut.brUpdate
XSDebug(io.inOrderBrInfo.valid, "inOrderValid: pc=%x\n", io.inOrderBrInfo.bits.pc)
XSDebug(io.cfiInfo.valid, "inOrderValid: pc=%x\n", io.cfiInfo.bits.pc)
XSDebug(p"headIdx:$headIdx commitIdx:$commitIdx\n")
XSDebug(p"headPtr:$headPtr tailPtr:$tailPtr\n")
......@@ -128,16 +123,22 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
// !io.roqRedirect.valid &&
// !io.redirect.bits.roqIdx.needFlush(io.memRedirect)
io.redirect.valid := commitValid && commitIsMisPred
io.redirect.bits := commitEntry.exuOut.redirect
io.redirect.bits.brTag := BrqPtr(commitEntry.ptrFlag, commitIdx)
io.out.valid := commitValid
io.out.bits := commitEntry.exuOut
io.outOfOrderBrInfo.valid := commitValid
io.outOfOrderBrInfo.bits := commitEntry.exuOut.brUpdate
when (commitValid) {
redirectPtr := redirectPtr + 1.U
}
val brTagRead = RegNext(Mux(io.memRedirect.bits.isReplay, io.memRedirect.bits.brTag - 1.U, io.memRedirect.bits.brTag))
io.cfiInfo.valid := RegNext(io.memRedirect.valid || commitValid)
io.cfiInfo.bits := brQueue(brTagRead.value).exuOut.brUpdate
io.cfiInfo.bits.brTag := brTagRead
io.cfiInfo.bits.isReplay := RegNext(io.memRedirect.bits.isReplay)
XSInfo(io.out.valid,
p"commit branch to roq, mispred:${io.redirect.valid} pc=${Hexadecimal(io.out.bits.uop.cf.pc)}\n"
)
......@@ -150,9 +151,13 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
val idx = brTag.value
io.enqReqs(i).ready := validEntries <= (BrqSize - (i + 1)).U
io.brTags(i) := brTag
when(io.enqReqs(i).fire()){
brQueue(idx).npc := io.enqReqs(i).bits.cf.brUpdate.pnpc
when (io.enqReqs(i).fire()) {
brQueue(idx).ptrFlag := brTag.flag
brQueue(idx).exuOut.brUpdate.pc := io.enqReqs(i).bits.cf.brUpdate.pc
brQueue(idx).exuOut.brUpdate.pnpc := io.enqReqs(i).bits.cf.brUpdate.pnpc
brQueue(idx).exuOut.brUpdate.fetchIdx := io.enqReqs(i).bits.cf.brUpdate.fetchIdx
brQueue(idx).exuOut.brUpdate.pd := io.enqReqs(i).bits.cf.brUpdate.pd
brQueue(idx).exuOut.brUpdate.bpuMeta := io.enqReqs(i).bits.cf.brUpdate.bpuMeta
stateQueue(idx) := s_idle
}
}
......@@ -161,20 +166,29 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
// exu write back
for(exuWb <- io.exuRedirect){
when(exuWb.valid){
when (exuWb.valid) {
val wbIdx = exuWb.bits.redirect.brTag.value
XSInfo(
p"exu write back: brTag:${exuWb.bits.redirect.brTag}" +
p" pc=${Hexadecimal(exuWb.bits.uop.cf.pc)} pnpc=${Hexadecimal(brQueue(wbIdx).npc)} target=${Hexadecimal(exuWb.bits.redirect.target)}\n"
p" pc=${Hexadecimal(exuWb.bits.uop.cf.pc)} " +
p"pnpc=${Hexadecimal(brQueue(wbIdx).exuOut.brUpdate.pnpc)} " +
p"target=${Hexadecimal(exuWb.bits.redirect.target)}\n"
)
when(stateQueue(wbIdx).isIdle){
when (stateQueue(wbIdx).isIdle) {
stateQueue(wbIdx) := s_wb
}
val exuOut = WireInit(exuWb.bits)
val isMisPred = brQueue(wbIdx).npc =/= exuWb.bits.redirect.target
exuOut.redirect.isMisPred := isMisPred
exuOut.brUpdate.isMisPred := isMisPred
brQueue(wbIdx).exuOut := exuOut
val isMisPred = brQueue(wbIdx).exuOut.brUpdate.pnpc =/= exuWb.bits.redirect.target
// only writeback necessary information
brQueue(wbIdx).exuOut.uop := exuWb.bits.uop
brQueue(wbIdx).exuOut.data := exuWb.bits.data
brQueue(wbIdx).exuOut.fflags := exuWb.bits.fflags
brQueue(wbIdx).exuOut.redirectValid := exuWb.bits.redirectValid
brQueue(wbIdx).exuOut.redirect := exuWb.bits.redirect
brQueue(wbIdx).exuOut.debug := exuWb.bits.debug
brQueue(wbIdx).exuOut.brUpdate.target := exuWb.bits.brUpdate.target
brQueue(wbIdx).exuOut.brUpdate.brTarget := exuWb.bits.brUpdate.brTarget
brQueue(wbIdx).exuOut.brUpdate.taken := exuWb.bits.brUpdate.taken
brQueue(wbIdx).exuOut.brUpdate.isMisPred := isMisPred
}
}
......
......@@ -11,7 +11,7 @@ import xiangshan.backend.decode.isa._
trait HasRedirectOut { this: RawModule =>
val redirectOutValid = IO(Output(Bool()))
val redirectOut = IO(Output(new Redirect))
val brUpdate = IO(Output(new BranchUpdateInfo))
val brUpdate = IO(Output(new CfiUpdateInfo))
}
class Jump extends FunctionUnit with HasRedirectOut {
......@@ -44,7 +44,7 @@ class Jump extends FunctionUnit with HasRedirectOut {
brUpdate := uop.cf.brUpdate
brUpdate.pc := uop.cf.pc
brUpdate.target := target
brUpdate.brTarget := target // DontCare
brUpdate.brTarget := target
brUpdate.taken := true.B
// Output
......
......@@ -170,7 +170,7 @@ class ReservationStationCtrl
// redirect and feedback
for (i <- 0 until iqSize) {
val cnt = cntQueue(idxQueue(i))
when (!(deqIdx === i.U && deqValid)) {
if (i != 0) { // TODO: combine the two case
val nextIdx = i.U - moveMask(i-1)
when (stateQueue(i)===s_replay) {
......@@ -194,6 +194,7 @@ class ReservationStationCtrl
}
when (redHitVec(i)) { stateQueue(nextIdx) := s_idle }
}}
}
}
// output
......@@ -454,4 +455,4 @@ class ReservationStationData
p"${Binary(io.ctrl.srcUpdate(i).asUInt)}|${uop(i).pdest}:${uop(i).ctrl.rfWen}:" +
p"${uop(i).ctrl.fpWen}|${uop(i).roqIdx} |${Hexadecimal(uop(i).cf.pc)}\n")
}
}
\ No newline at end of file
}
......@@ -117,7 +117,7 @@ class TlbEntry extends TlbBundle {
class TlbEntires(num: Int, tagLen: Int) extends TlbBundle {
require(log2Up(num)==log2Down(num))
/* vpn can be divide into three part */
// vpn: tagPart + addrPart
// vpn: tagPart(17bit) + addrPart(8bit) + cutLenPart(2bit)
val cutLen = log2Up(num)
val tag = UInt(tagLen.W) // NOTE: high part of vpn
......@@ -127,9 +127,10 @@ class TlbEntires(num: Int, tagLen: Int) extends TlbBundle {
val vs = Vec(num, Bool())
def tagClip(vpn: UInt, level: UInt) = { // full vpn => tagLen
Mux(level===0.U, Cat(vpn(vpnLen-1, vpnnLen*2+cutLen), 0.U(vpnnLen*2+cutLen)),
Mux(level===1.U, Cat(vpn(vpnLen-1, vpnnLen*1+cutLen), 0.U(vpnnLen*1+cutLen)),
Cat(vpn(vpnLen-1, vpnnLen*0+cutLen), 0.U(vpnnLen*0+cutLen))))(tagLen-1, 0)
val tmp = Mux(level===0.U, Cat(vpn(vpnLen-1, vpnnLen*2+cutLen), 0.U(vpnnLen*2)),
Mux(level===1.U, Cat(vpn(vpnLen-1, vpnnLen*1+cutLen), 0.U(vpnnLen*1)),
Cat(vpn(vpnLen-1, vpnnLen*0+cutLen), 0.U(vpnnLen*0))))
tmp(tmp.getWidth-1, tmp.getWidth-tagLen)
}
// NOTE: get insize idx
......@@ -527,4 +528,4 @@ object TLB {
tlb.io.ptw
}
}
\ No newline at end of file
}
......@@ -115,7 +115,7 @@ abstract class BasePredictor extends XSModule
val pc = Flipped(ValidIO(UInt(VAddrBits.W)))
val hist = Input(UInt(HistoryLength.W))
val inMask = Input(UInt(PredictWidth.W))
val update = Flipped(ValidIO(new BranchUpdateInfoWithHist))
val update = Flipped(ValidIO(new CfiUpdateInfo))
val outFire = Input(Bool())
}
......@@ -129,7 +129,7 @@ class BPUStageIO extends XSBundle {
val mask = UInt(PredictWidth.W)
val resp = new PredictorResponse
// val target = UInt(VAddrBits.W)
val brInfo = Vec(PredictWidth, new BranchInfo)
val brInfo = Vec(PredictWidth, new BpuMeta)
// val saveHalfRVI = Bool()
}
......@@ -251,7 +251,7 @@ class BPUStage3 extends BPUStage {
val predecode = Input(new Predecode)
val realMask = Input(UInt(PredictWidth.W))
val prevHalf = Input(new PrevHalfInstr)
val recover = Flipped(ValidIO(new BranchUpdateInfo))
val recover = Flipped(ValidIO(new CfiUpdateInfo))
}
val s3IO = IO(new S3IO)
// TAGE has its own pipelines and the
......@@ -315,9 +315,9 @@ class BPUStage3 extends BPUStage {
ras.io.recover := s3IO.recover
for(i <- 0 until PredictWidth){
io.out.brInfo(i).rasSp := ras.io.branchInfo.rasSp
io.out.brInfo(i).rasTopCtr := ras.io.branchInfo.rasTopCtr
io.out.brInfo(i).rasToqAddr := ras.io.branchInfo.rasToqAddr
io.out.brInfo(i).rasSp := ras.io.meta.rasSp
io.out.brInfo(i).rasTopCtr := ras.io.meta.rasTopCtr
io.out.brInfo(i).rasToqAddr := ras.io.meta.rasToqAddr
}
takens := VecInit((0 until PredictWidth).map(i => {
((brTakens(i) || jalrs(i)) && btbHits(i)) ||
......@@ -401,25 +401,25 @@ class BPUReq extends XSBundle {
// val histPtr = UInt(log2Up(ExtHistoryLength).W) // only for debug
}
class BranchUpdateInfoWithHist extends XSBundle {
val ui = new BranchUpdateInfo
val hist = UInt(HistoryLength.W)
}
// class CfiUpdateInfoWithHist extends XSBundle {
// val ui = new CfiUpdateInfo
// val hist = UInt(HistoryLength.W)
// }
object BranchUpdateInfoWithHist {
def apply (brInfo: BranchUpdateInfo, hist: UInt) = {
val b = Wire(new BranchUpdateInfoWithHist)
b.ui <> brInfo
b.hist := hist
b
}
}
// object CfiUpdateInfoWithHist {
// def apply (brInfo: CfiUpdateInfo, hist: UInt) = {
// val b = Wire(new CfiUpdateInfoWithHist)
// b.ui <> brInfo
// b.hist := hist
// b
// }
// }
abstract class BaseBPU extends XSModule with BranchPredictorComponents with HasBPUParameter{
val io = IO(new Bundle() {
// from backend
val inOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfoWithHist))
val outOfOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfoWithHist))
val cfiUpdateInfo = Flipped(ValidIO(new CfiUpdateInfo))
// val cfiUpdateInfo = Flipped(ValidIO(new CfiUpdateInfoWithHist))
// from ifu, frontend redirect
val flush = Input(Vec(3, Bool()))
// from if1
......@@ -432,13 +432,13 @@ abstract class BaseBPU extends XSModule with BranchPredictorComponents with HasB
val realMask = Input(UInt(PredictWidth.W))
val prevHalf = Input(new PrevHalfInstr)
// to if4, some bpu info used for updating
val branchInfo = Output(Vec(PredictWidth, new BranchInfo))
val bpuMeta = Output(Vec(PredictWidth, new BpuMeta))
})
def npc(pc: UInt, instCount: UInt) = pc + (instCount << 1.U)
preds.map(_.io.update <> io.outOfOrderBrInfo)
tage.io.update <> io.inOrderBrInfo
preds.map(_.io.update <> io.cfiUpdateInfo)
// tage.io.update <> io.cfiUpdateInfo
val s1 = Module(new BPUStage1)
val s2 = Module(new BPUStage2)
......@@ -469,12 +469,12 @@ abstract class BaseBPU extends XSModule with BranchPredictorComponents with HasB
io.out(1) <> s2.io.pred
io.out(2) <> s3.io.pred
io.branchInfo := s3.io.out.brInfo
io.bpuMeta := s3.io.out.brInfo
if (BPUDebug) {
XSDebug(io.inFire(3), "branchInfo sent!\n")
XSDebug(io.inFire(3), "bpuMeta sent!\n")
for (i <- 0 until PredictWidth) {
val b = io.branchInfo(i)
val b = io.bpuMeta(i)
XSDebug(io.inFire(3), "brInfo(%d): ubtbWrWay:%d, ubtbHit:%d, btbWrWay:%d, btbHitJal:%d, bimCtr:%d, fetchIdx:%d\n",
i.U, b.ubtbWriteWay, b.ubtbHits, b.btbWriteWay, b.btbHitJal, b.bimCtr, b.fetchIdx)
val t = b.tageMeta
......@@ -492,7 +492,7 @@ class FakeBPU extends BaseBPU {
i <> DontCare
i.takens := 0.U
})
io.branchInfo <> DontCare
io.bpuMeta <> DontCare
}
@chiselName
class BPU extends BaseBPU {
......@@ -500,7 +500,7 @@ class BPU extends BaseBPU {
//**********************Stage 1****************************//
val s1_resp_in = Wire(new PredictorResponse)
val s1_brInfo_in = Wire(Vec(PredictWidth, new BranchInfo))
val s1_brInfo_in = Wire(Vec(PredictWidth, new BpuMeta))
s1_resp_in.tage := DontCare
s1_resp_in.loop := DontCare
......@@ -518,8 +518,8 @@ class BPU extends BaseBPU {
// Wrap ubtb response into resp_in and brInfo_in
s1_resp_in.ubtb <> ubtb.io.out
for (i <- 0 until PredictWidth) {
s1_brInfo_in(i).ubtbWriteWay := ubtb.io.uBTBBranchInfo.writeWay(i)
s1_brInfo_in(i).ubtbHits := ubtb.io.uBTBBranchInfo.hits(i)
s1_brInfo_in(i).ubtbWriteWay := ubtb.io.uBTBMeta.writeWay(i)
s1_brInfo_in(i).ubtbHits := ubtb.io.uBTBMeta.hits(i)
}
btb.io.flush := io.flush(0) // TODO: fix this
......@@ -598,8 +598,8 @@ class BPU extends BaseBPU {
s3.s3IO.prevHalf := io.prevHalf
s3.s3IO.recover.valid <> io.inOrderBrInfo.valid
s3.s3IO.recover.bits <> io.inOrderBrInfo.bits.ui
s3.s3IO.recover.valid <> io.cfiUpdateInfo.valid
s3.s3IO.recover.bits <> io.cfiUpdateInfo.bits
if (BPUDebug) {
if (debug_verbose) {
......@@ -615,11 +615,11 @@ class BPU extends BaseBPU {
if (EnableCFICommitLog) {
val buValid = io.inOrderBrInfo.valid
val buinfo = io.inOrderBrInfo.bits.ui
val buValid = io.cfiUpdateInfo.valid
val buinfo = io.cfiUpdateInfo.bits
val pd = buinfo.pd
val tage_cycle = buinfo.brInfo.debug_tage_cycle
XSDebug(buValid, p"cfi_update: isBr(${pd.isBr}) pc(${Hexadecimal(buinfo.pc)}) taken(${buinfo.taken}) mispred(${buinfo.isMisPred}) cycle($tage_cycle) hist(${Hexadecimal(io.inOrderBrInfo.bits.hist)})\n")
val tage_cycle = buinfo.bpuMeta.debug_tage_cycle
XSDebug(buValid, p"cfi_update: isBr(${pd.isBr}) pc(${Hexadecimal(buinfo.pc)}) taken(${buinfo.taken}) mispred(${buinfo.isMisPred}) cycle($tage_cycle) hist(${Hexadecimal(buinfo.bpuMeta.predHist.asUInt)})\n")
}
}
......
......@@ -80,7 +80,7 @@ class BIM extends BasePredictor with BimParams {
io.meta.ctrs(b) := ctr
}
val u = io.update.bits.ui
val u = io.update.bits
val updateBank = bimAddr.getBank(u.pc)
val updateRow = bimAddr.getBankIdx(u.pc)
......@@ -96,12 +96,12 @@ class BIM extends BasePredictor with BimParams {
val wrbypass_hit = wrbypass_hits.reduce(_||_)
val wrbypass_hit_idx = PriorityEncoder(wrbypass_hits)
val oldCtr = Mux(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(updateBank), wrbypass_ctrs(wrbypass_hit_idx)(updateBank), u.brInfo.bimCtr)
val oldCtr = Mux(wrbypass_hit && wrbypass_ctr_valids(wrbypass_hit_idx)(updateBank), wrbypass_ctrs(wrbypass_hit_idx)(updateBank), u.bpuMeta.bimCtr)
val newTaken = u.taken
val newCtr = satUpdate(oldCtr, 2, newTaken)
// val oldSaturated = newCtr === oldCtr
val needToUpdate = io.update.valid && u.pd.isBr
val needToUpdate = io.update.valid && u.pd.isBr && !u.isReplay
when (reset.asBool) { wrbypass_ctr_valids.foreach(_.foreach(_ := false.B))}
......@@ -126,9 +126,9 @@ class BIM extends BasePredictor with BimParams {
if (BPUDebug && debug) {
XSDebug(doing_reset, "Reseting...\n")
XSDebug("[update] v=%d pc=%x pnpc=%x tgt=%x brTgt=%x\n", io.update.valid, u.pc, u.pnpc, u.target, u.brTarget)
XSDebug("[update] v=%d pc=%x pnpc=%x tgt=%x", io.update.valid, u.pc, u.pnpc, u.target)
XSDebug("[update] taken=%d isMisPred=%d", u.taken, u.isMisPred)
XSDebug(false, true.B, p"brTag=${u.brTag} pd.isBr=${u.pd.isBr} brInfo.bimCtr=${Binary(u.brInfo.bimCtr)}\n")
XSDebug(false, true.B, p"brTag=${u.brTag} pd.isBr=${u.pd.isBr} brInfo.bimCtr=${Binary(u.bpuMeta.bimCtr)}\n")
XSDebug("needToUpdate=%d updateBank=%x updateRow=%x newCtr=%b oldCtr=%b\n", needToUpdate, updateBank, updateRow, newCtr, oldCtr)
XSDebug("[wrbypass] hit=%d hits=%b\n", wrbypass_hit, wrbypass_hits.asUInt)
}
......
......@@ -27,8 +27,8 @@ class Frontend extends XSModule {
//backend
ifu.io.redirect <> io.backend.redirect
ifu.io.inOrderBrInfo <> io.backend.inOrderBrInfo
ifu.io.outOfOrderBrInfo <> io.backend.outOfOrderBrInfo
ifu.io.cfiUpdateInfo <> io.backend.cfiUpdateInfo
// ifu.io.cfiUpdateInfo <> io.backend.cfiUpdateInfo
//icache
ifu.io.icacheResp <> icache.io.resp
icache.io.req <> ifu.io.icacheReq
......
......@@ -70,8 +70,8 @@ class IFUIO extends XSBundle
{
val fetchPacket = DecoupledIO(new FetchPacket)
val redirect = Flipped(ValidIO(UInt(VAddrBits.W)))
val outOfOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo))
val inOrderBrInfo = Flipped(ValidIO(new BranchUpdateInfo))
// val cfiUpdateInfo = Flipped(ValidIO(new CfiUpdateInfo))
val cfiUpdateInfo = Flipped(ValidIO(new CfiUpdateInfo))
val icacheReq = DecoupledIO(new ICacheReq)
val icacheResp = Flipped(DecoupledIO(new ICacheResp))
val icacheFlush = Output(UInt(2.W))
......@@ -355,11 +355,11 @@ class IFU extends XSModule with HasIFUConst
when (io.outOfOrderBrInfo.valid && io.outOfOrderBrInfo.bits.isMisPred) {
val b = io.outOfOrderBrInfo.bits
val oldGh = b.brInfo.hist
val sawNTBr = b.brInfo.sawNotTakenBranch
val cfiUpdate = io.cfiUpdateInfo
when (cfiUpdate.valid && (cfiUpdate.bits.isMisPred || cfiUpdate.bits.isReplay)) {
val b = cfiUpdate.bits
val oldGh = b.bpuMeta.hist
val sawNTBr = b.bpuMeta.sawNotTakenBranch
val isBr = b.pd.isBr
val taken = b.taken
val updatedGh = oldGh.update(sawNTBr, isBr && taken)
......@@ -398,11 +398,7 @@ class IFU extends XSModule with HasIFUConst
io.icacheFlush := Cat(if3_flush, if2_flush)
val inOrderBrHist = io.inOrderBrInfo.bits.brInfo.predHist
bpu.io.inOrderBrInfo.valid := io.inOrderBrInfo.valid
bpu.io.inOrderBrInfo.bits := BranchUpdateInfoWithHist(io.inOrderBrInfo.bits, inOrderBrHist.asUInt)
bpu.io.outOfOrderBrInfo.valid := io.outOfOrderBrInfo.valid
bpu.io.outOfOrderBrInfo.bits := BranchUpdateInfoWithHist(io.outOfOrderBrInfo.bits, inOrderBrHist.asUInt) // Dont care about hist
bpu.io.cfiUpdateInfo <> io.cfiUpdateInfo
// bpu.io.flush := Cat(if4_flush, if3_flush, if2_flush)
bpu.io.flush := VecInit(if2_flush, if3_flush, if4_flush)
......@@ -465,9 +461,9 @@ class IFU extends XSModule with HasIFUConst
when (if4_bp.taken) {
fetchPacketWire.pnpc(if4_bp.jmpIdx) := if4_bp.target
}
fetchPacketWire.brInfo := bpu.io.branchInfo
(0 until PredictWidth).foreach(i => fetchPacketWire.brInfo(i).hist := final_gh)
(0 until PredictWidth).foreach(i => fetchPacketWire.brInfo(i).predHist := if4_predHist.asTypeOf(new GlobalHistory))
fetchPacketWire.bpuMeta := bpu.io.bpuMeta
(0 until PredictWidth).foreach(i => fetchPacketWire.bpuMeta(i).hist := final_gh)
(0 until PredictWidth).foreach(i => fetchPacketWire.bpuMeta(i).predHist := if4_predHist.asTypeOf(new GlobalHistory))
fetchPacketWire.pd := if4_pd.pd
fetchPacketWire.ipf := if4_ipf
fetchPacketWire.acf := if4_acf
......
......@@ -20,7 +20,7 @@ class Ibuffer extends XSModule {
val inst = UInt(32.W)
val pc = UInt(VAddrBits.W)
val pnpc = UInt(VAddrBits.W)
val brInfo = new BranchInfo
val brInfo = new BpuMeta
val pd = new PreDecodeInfo
val ipf = Bool()
val acf = Bool()
......@@ -63,7 +63,7 @@ class Ibuffer extends XSModule {
inWire.inst := io.in.bits.instrs(i)
inWire.pc := io.in.bits.pc(i)
inWire.pnpc := io.in.bits.pnpc(i)
inWire.brInfo := io.in.bits.brInfo(i)
inWire.brInfo := io.in.bits.bpuMeta(i)
inWire.pd := io.in.bits.pd(i)
inWire.ipf := io.in.bits.ipf
inWire.acf := io.in.bits.acf
......@@ -98,7 +98,7 @@ class Ibuffer extends XSModule {
io.out(i).bits.brUpdate.pc := outWire.pc
io.out(i).bits.brUpdate.pnpc := outWire.pnpc
io.out(i).bits.brUpdate.pd := outWire.pd
io.out(i).bits.brUpdate.brInfo := outWire.brInfo
io.out(i).bits.brUpdate.bpuMeta := outWire.brInfo
io.out(i).bits.crossPageIPFFix := outWire.crossPageIPFFix
}
head_ptr := head_ptr + io.out.map(_.fire).fold(0.U(log2Up(DecodeWidth).W))(_+_)
......
......@@ -309,13 +309,13 @@ class LoopPredictor extends BasePredictor with LTBParams {
ltbs(i).io.req.idx := Mux(isInNextRow(i), baseRow + 1.U, baseRow)
ltbs(i).io.req.tag := realTags(i)
// ltbs(i).io.outMask := outMask(i)
ltbs(i).io.update.valid := i.U === ltbAddr.getBank(io.update.bits.ui.pc) && io.update.valid && io.update.bits.ui.pd.isBr
ltbs(i).io.update.bits.misPred := io.update.bits.ui.isMisPred
ltbs(i).io.update.bits.pc := io.update.bits.ui.pc
ltbs(i).io.update.bits.meta := io.update.bits.ui.brInfo.specCnt
ltbs(i).io.update.bits.taken := io.update.bits.ui.taken
ltbs(i).io.update.bits.brTag := io.update.bits.ui.brTag
ltbs(i).io.repair := i.U =/= ltbAddr.getBank(io.update.bits.ui.pc) && io.update.valid && io.update.bits.ui.isMisPred
ltbs(i).io.update.valid := i.U === ltbAddr.getBank(io.update.bits.pc) && io.update.valid && io.update.bits.pd.isBr
ltbs(i).io.update.bits.misPred := io.update.bits.isMisPred
ltbs(i).io.update.bits.pc := io.update.bits.pc
ltbs(i).io.update.bits.meta := io.update.bits.bpuMeta.specCnt
ltbs(i).io.update.bits.taken := io.update.bits.taken
ltbs(i).io.update.bits.brTag := io.update.bits.brTag
ltbs(i).io.repair := i.U =/= ltbAddr.getBank(io.update.bits.pc) && io.update.valid && io.update.bits.isMisPred
}
val ltbResps = VecInit((0 until PredictWidth).map(i => ltbs(i).io.resp))
......
......@@ -28,9 +28,9 @@ class RAS extends BasePredictor
val callIdx = Flipped(ValidIO(UInt(log2Ceil(PredictWidth).W)))
val isRVC = Input(Bool())
val isLastHalfRVI = Input(Bool())
val recover = Flipped(ValidIO(new BranchUpdateInfo))
val recover = Flipped(ValidIO(new CfiUpdateInfo))
val out = ValidIO(new RASResp)
val branchInfo = Output(new RASBranchInfo)
val meta = Output(new RASBranchInfo)
}
class RASEntry() extends XSBundle {
......@@ -187,7 +187,7 @@ class RAS extends BasePredictor
// TODO: back-up stack for ras
// use checkpoint to recover RAS
val copy_valid = io.recover.valid && io.recover.bits.isMisPred
val copy_valid = io.recover.valid && (io.recover.bits.isMisPred || io.recover.bits.isReplay)
val copy_next = RegNext(copy_valid)
spec_ras.copy_valid := copy_next
spec_ras.copy_in_mem := commit_ras.copy_out_mem
......@@ -197,9 +197,9 @@ class RAS extends BasePredictor
commit_ras.copy_in_sp := DontCare
//no need to pass the ras branchInfo
io.branchInfo.rasSp := DontCare
io.branchInfo.rasTopCtr := DontCare
io.branchInfo.rasToqAddr := DontCare
io.meta.rasSp := DontCare
io.meta.rasTopCtr := DontCare
io.meta.rasToqAddr := DontCare
if (BPUDebug && debug) {
val spec_debug = spec.debugIO
......
......@@ -448,12 +448,12 @@ class Tage extends BaseTage {
val debug_hist_s2 = RegEnable(io.hist, enable=io.pc.valid)
val debug_hist_s3 = RegEnable(debug_hist_s2, enable=io.s3Fire)
val u = io.update.bits.ui
val updateValid = io.update.valid
val updateHist = io.update.bits.hist
val u = io.update.bits
val updateValid = io.update.valid && !io.update.bits.isReplay
val updateHist = u.bpuMeta.predHist.asUInt
val updateIsBr = u.pd.isBr
val updateMeta = u.brInfo.tageMeta
val updateMeta = u.bpuMeta.tageMeta
val updateMisPred = u.isMisPred && updateIsBr
val updateMask = WireInit(0.U.asTypeOf(Vec(TageNTables, Vec(TageBanks, Bool()))))
......@@ -475,7 +475,7 @@ class Tage extends BaseTage {
scUpdateTaken := DontCare
scUpdateOldCtrs := DontCare
val updateSCMeta = u.brInfo.tageMeta.scMeta
val updateSCMeta = u.bpuMeta.tageMeta.scMeta
val updateTageMisPred = updateMeta.taken =/= u.taken && updateIsBr
val updateBank = u.pc(log2Ceil(TageBanks), 1)
......@@ -641,7 +641,7 @@ class Tage extends BaseTage {
// use fetch pc instead of instruction pc
tables(i).io.update.pc := u.pc
tables(i).io.update.hist := updateHist
tables(i).io.update.fetchIdx := u.brInfo.fetchIdx
tables(i).io.update.fetchIdx := u.bpuMeta.fetchIdx
}
for (i <- 0 until SCNTables) {
......@@ -651,14 +651,14 @@ class Tage extends BaseTage {
scTables(i).io.update.oldCtr := scUpdateOldCtrs(i)
scTables(i).io.update.pc := u.pc
scTables(i).io.update.hist := updateHist
scTables(i).io.update.fetchIdx := u.brInfo.fetchIdx
scTables(i).io.update.fetchIdx := u.bpuMeta.fetchIdx
}
if (BPUDebug && debug) {
val m = updateMeta
val bri = u.brInfo
val bri = u.bpuMeta
XSDebug(io.pc.valid, "req: pc=0x%x, hist=%x\n", io.pc.bits, io.hist)
XSDebug(io.s3Fire, "s3Fire:%d, resp: pc=%x, hist=%x\n", io.s3Fire, debug_pc_s2, debug_hist_s2)
XSDebug(RegNext(io.s3Fire), "s3FireOnLastCycle: resp: pc=%x, hist=%x, hits=%b, takens=%b\n",
......
......@@ -43,12 +43,12 @@ class MicroBTB extends BasePredictor
class MicroBTBIO extends DefaultBasePredictorIO
{
val out = Output(new MicroBTBResp) //
val uBTBBranchInfo = Output(new MicroBTBBranchInfo)
val uBTBMeta = Output(new MicroBTBBranchInfo)
}
override val debug = true
override val io = IO(new MicroBTBIO)
io.uBTBBranchInfo <> out_ubtb_br_info
io.uBTBMeta <> out_ubtb_br_info
def getTag(pc: UInt) = (pc >> untaggedBits)(tagSize-1, 0)
def getBank(pc: UInt) = pc(log2Ceil(PredictWidth) ,1)
......@@ -251,13 +251,13 @@ class MicroBTB extends BasePredictor
//uBTB update
//backend should send fetch pc to update
val u = io.update.bits.ui
val u = io.update.bits
val update_br_pc = u.pc
val update_br_idx = u.fetchIdx
val update_br_offset = (update_br_idx << 1).asUInt()
val update_fetch_pc = update_br_pc - update_br_offset
val update_write_way = u.brInfo.ubtbWriteWay
val update_hits = u.brInfo.ubtbHits
val update_write_way = u.bpuMeta.ubtbWriteWay
val update_hits = u.bpuMeta.ubtbHits
val update_taken = u.taken
val update_bank = getBank(update_br_pc)
......@@ -268,9 +268,9 @@ class MicroBTB extends BasePredictor
val update_is_BR_or_JAL = (u.pd.brType === BrType.branch) || (u.pd.brType === BrType.jal)
val jalFirstEncountered = !u.isMisPred && !u.brInfo.btbHitJal && (u.pd.brType === BrType.jal)
val entry_write_valid = io.update.valid && (u.isMisPred || jalFirstEncountered)//io.update.valid //&& update_is_BR_or_JAL
val meta_write_valid = io.update.valid && (u.isMisPred || jalFirstEncountered)//io.update.valid //&& update_is_BR_or_JAL
val jalFirstEncountered = !u.isMisPred && !u.bpuMeta.btbHitJal && (u.pd.brType === BrType.jal)
val entry_write_valid = io.update.valid && (u.isMisPred || jalFirstEncountered) && !u.isReplay //io.update.valid //&& update_is_BR_or_JAL
val meta_write_valid = io.update.valid && (u.isMisPred || jalFirstEncountered) && !u.isReplay//io.update.valid //&& update_is_BR_or_JAL
//write btb target when miss prediction
// when(entry_write_valid)
// {
......
......@@ -468,7 +468,8 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
io.rollback := DontCare
// Note that we use roqIdx - 1.U to flush the load instruction itself.
// Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
io.rollback.valid := rollbackSelected.valid && (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx))
io.rollback.valid := rollbackSelected.valid && (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) &&
!(lastCycleRedirect.valid && (lastCycleRedirect.bits.isFlushPipe || lastCycleRedirect.bits.isException))
io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx - 1.U
io.rollback.bits.isReplay := true.B
......
......@@ -40,7 +40,7 @@ long snapshot_compressToFile(uint8_t *ptr, const char *filename, long buf_size)
// assert(bytes_write % sizeof(long) == 0);
}
printf("Write %lu bytes from gz stream in total\n", curr_size);
// printf("Write %lu bytes from gz stream in total\n", curr_size);
delete [] temp_page;
......@@ -65,7 +65,7 @@ long readFromGz(void* ptr, const char *file_name, long buf_size, uint8_t load_ty
// Only load from RAM need check
if (load_type == LOAD_RAM && (buf_size % chunk_size) != 0) {
printf("RAMSIZE must be divisible by chunk_size\n");
printf("buf_size must be divisible by chunk_size\n");
assert(0);
}
......@@ -88,10 +88,10 @@ long readFromGz(void* ptr, const char *file_name, long buf_size, uint8_t load_ty
}
if(gzread(compressed_mem, temp_page, chunk_size) > 0) {
printf("File size is larger than RAMSIZE!\n");
printf("File size is larger than buf_size!\n");
assert(0);
}
printf("Read %lu bytes from gz stream in total\n", curr_size);
// printf("Read %lu bytes from gz stream in total\n", curr_size);
delete [] temp_page;
......@@ -101,3 +101,26 @@ long readFromGz(void* ptr, const char *file_name, long buf_size, uint8_t load_ty
}
return curr_size;
}
void nonzero_large_memcpy(const void* __restrict dest, const void* __restrict src, size_t n) {
uint64_t *_dest = (uint64_t *)dest;
uint64_t *_src = (uint64_t *)src;
while (n >= sizeof(uint64_t)) {
if (*_src != 0) {
*_dest = *_src;
}
_dest++;
_src++;
n -= sizeof(uint64_t);
}
if (n > 0) {
uint8_t *dest8 = (uint8_t *)_dest;
uint8_t *src8 = (uint8_t *)_src;
while (n > 0) {
*dest8 = *src8;
dest8++;
src8++;
n--;
}
}
}
......@@ -15,4 +15,6 @@ int isGzFile(const char *filename);
long snapshot_compressToFile(uint8_t *ptr, const char *filename, long buf_size);
long readFromGz(void* ptr, const char *file_name, long buf_size, uint8_t load_type);
void nonzero_large_memcpy(const void* __restrict dest, const void* __restrict src, size_t n);
#endif
......@@ -4,6 +4,7 @@
#include <getopt.h>
#include "ram.h"
#include "zlib.h"
#include "compress.h"
void* get_ram_start();
long get_ram_size();
......@@ -14,15 +15,16 @@ void set_nemu_this_pc(uint64_t pc);
static inline void print_help(const char *file) {
printf("Usage: %s [OPTION...]\n", file);
printf("\n");
printf(" -s, --seed=NUM use this seed\n");
printf(" -C, --max-cycles=NUM execute at most NUM cycles\n");
printf(" -I, --max-instr=NUM execute at most NUM instructions\n");
printf(" -i, --image=FILE run with this image file\n");
printf(" -b, --log-begin=NUM display log from NUM th cycle\n");
printf(" -e, --log-end=NUM stop display log at NUM th cycle\n");
printf(" -s, --seed=NUM use this seed\n");
printf(" -C, --max-cycles=NUM execute at most NUM cycles\n");
printf(" -I, --max-instr=NUM execute at most NUM instructions\n");
printf(" -i, --image=FILE run with this image file\n");
printf(" -b, --log-begin=NUM display log from NUM th cycle\n");
printf(" -e, --log-end=NUM stop display log at NUM th cycle\n");
printf(" --load-snapshot=PATH load snapshot from PATH\n");
printf(" --dump-wave dump waveform when log is enabled\n");
printf(" -h, --help print program help info\n");
printf(" --no-snapshot disable saving snapshots\n");
printf(" --dump-wave dump waveform when log is enabled\n");
printf(" -h, --help print program help info\n");
printf("\n");
}
......@@ -32,6 +34,7 @@ inline EmuArgs parse_args(int argc, const char *argv[]) {
const struct option long_options[] = {
{ "load-snapshot", 1, NULL, 0 },
{ "dump-wave", 0, NULL, 0 },
{ "no-snapshot", 0, NULL, 0 },
{ "seed", 1, NULL, 's' },
{ "max-cycles", 1, NULL, 'C' },
{ "max-instr", 1, NULL, 'I' },
......@@ -50,6 +53,7 @@ inline EmuArgs parse_args(int argc, const char *argv[]) {
switch (long_index) {
case 0: args.snapshot_path = optarg; continue;
case 1: args.enable_waveform = true; continue;
case 2: args.enable_snapshot = false; continue;
}
// fall through
default:
......@@ -128,9 +132,12 @@ Emulator::~Emulator() {
ram_finish();
#ifdef VM_SAVABLE
snapshot_slot[0].save();
snapshot_slot[1].save();
printf("Please remove unused snapshots manually\n");
if (args.enable_snapshot && trapCode != STATE_GOODTRAP && trapCode != STATE_LIMIT_EXCEEDED) {
printf("Saving snapshots to file system. Please wait.\n");
snapshot_slot[0].save();
snapshot_slot[1].save();
printf("Please remove unused snapshots manually\n");
}
#endif
}
......@@ -317,8 +324,8 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) {
}
#ifdef VM_SAVABLE
static int snapshot_count = 0;
if (trapCode != STATE_GOODTRAP && t - lasttime_snapshot > 1000 * SNAPSHOT_INTERVAL) {
// save snapshot every 10s
if (args.enable_snapshot && trapCode != STATE_GOODTRAP && t - lasttime_snapshot > 6000 * SNAPSHOT_INTERVAL) {
// save snapshot every 60s
time_t now = time(NULL);
snapshot_save(snapshot_filename(now));
lasttime_snapshot = t;
......@@ -443,7 +450,7 @@ void Emulator::snapshot_save(const char *filename) {
char *buf = new char[size];
ref_difftest_memcpy_from_ref(buf, 0x80000000, size);
stream.unbuf_write(buf, size);
delete buf;
delete [] buf;
struct SyncState sync_mastate;
ref_difftest_get_mastatus(&sync_mastate);
......@@ -484,7 +491,7 @@ void Emulator::snapshot_load(const char *filename) {
char *buf = new char[size];
stream.read(buf, size);
ref_difftest_memcpy_from_dut(0x80000000, buf, size);
delete buf;
delete [] buf;
struct SyncState sync_mastate;
stream.read(&sync_mastate, sizeof(struct SyncState));
......
......@@ -14,6 +14,7 @@ struct EmuArgs {
const char *image;
const char *snapshot_path;
bool enable_waveform;
bool enable_snapshot;
EmuArgs() {
seed = 0;
......@@ -24,6 +25,7 @@ struct EmuArgs {
snapshot_path = NULL;
image = NULL;
enable_waveform = false;
enable_snapshot = true;
}
};
......
......@@ -4,7 +4,6 @@
#include "ram.h"
#include "compress.h"
#define RAMSIZE (256 * 1024 * 1024UL)
#ifdef WITH_DRAMSIM3
#include "cosimulation.h"
......@@ -16,7 +15,7 @@ static long img_size = 0;
void* get_img_start() { return &ram[0]; }
long get_img_size() { return img_size; }
void* get_ram_start() { return &ram[0]; }
long get_ram_size() { return RAMSIZE; }
long get_ram_size() { return EMU_RAM_SIZE; }
#ifdef TLB_UNITTEST
void addpageSv39() {
......@@ -109,17 +108,17 @@ void init_ram(const char *img) {
printf("The image is %s\n", img);
// initialize memory using Linux mmap
printf("Using simulated %luMB RAM\n", RAMSIZE / (1024 * 1024));
ram = (uint64_t *)mmap(NULL, RAMSIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
printf("Using simulated %luMB RAM\n", EMU_RAM_SIZE / (1024 * 1024));
ram = (uint64_t *)mmap(NULL, EMU_RAM_SIZE, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
if (ram == (uint64_t *)MAP_FAILED) {
printf("Cound not mmap 0x%lx bytes\n", RAMSIZE);
printf("Cound not mmap 0x%lx bytes\n", EMU_RAM_SIZE);
assert(0);
}
int ret;
if (isGzFile(img)) {
printf("Gzip file detected and loading image from extracted gz file\n");
img_size = readFromGz(ram, img, RAMSIZE, LOAD_RAM);
img_size = readFromGz(ram, img, EMU_RAM_SIZE, LOAD_RAM);
assert(img_size >= 0);
}
else {
......@@ -131,8 +130,8 @@ void init_ram(const char *img) {
fseek(fp, 0, SEEK_END);
img_size = ftell(fp);
if (img_size > RAMSIZE) {
img_size = RAMSIZE;
if (img_size > EMU_RAM_SIZE) {
img_size = EMU_RAM_SIZE;
}
fseek(fp, 0, SEEK_SET);
......@@ -159,24 +158,24 @@ void init_ram(const char *img) {
}
void ram_finish() {
munmap(ram, RAMSIZE);
munmap(ram, EMU_RAM_SIZE);
#ifdef WITH_DRAMSIM3
dramsim3_finish();
#endif
}
extern "C" uint64_t ram_read_helper(uint8_t en, uint64_t rIdx) {
if (en && rIdx >= RAMSIZE / sizeof(uint64_t)) {
rIdx %= RAMSIZE / sizeof(uint64_t);
if (en && rIdx >= EMU_RAM_SIZE / sizeof(uint64_t)) {
rIdx %= EMU_RAM_SIZE / sizeof(uint64_t);
}
return (en) ? ram[rIdx] : 0;
}
extern "C" void ram_write_helper(uint64_t wIdx, uint64_t wdata, uint64_t wmask, uint8_t wen) {
if (wen) {
if (wIdx >= RAMSIZE / sizeof(uint64_t)) {
if (wIdx >= EMU_RAM_SIZE / sizeof(uint64_t)) {
printf("ERROR: ram wIdx = 0x%lx out of bound!\n", wIdx);
assert(wIdx < RAMSIZE / sizeof(uint64_t));
assert(wIdx < EMU_RAM_SIZE / sizeof(uint64_t));
}
ram[wIdx] = (ram[wIdx] & ~wmask) | (wdata & wmask);
}
......@@ -201,7 +200,7 @@ struct dramsim3_meta {
};
void axi_read_data(const axi_ar_channel &ar, dramsim3_meta *meta) {
uint64_t address = ar.addr % RAMSIZE;
uint64_t address = ar.addr % EMU_RAM_SIZE;
uint64_t beatsize = 1 << ar.size;
uint8_t beatlen = ar.len + 1;
uint64_t transaction_size = beatsize * beatlen;
......@@ -350,7 +349,7 @@ void dramsim3_helper(axi_channel &axi) {
axi.b.id = meta->id;
// assert(axi.b.ready == 1);
for (int i = 0; i < meta->len; i++) {
uint64_t address = wait_resp_b->req->address % RAMSIZE;
uint64_t address = wait_resp_b->req->address % EMU_RAM_SIZE;
ram[address / sizeof(uint64_t) + i] = meta->data[i];
}
// printf("axi b channel fired\n");
......
......@@ -3,6 +3,8 @@
#include "common.h"
#define EMU_RAM_SIZE (256 * 1024 * 1024UL)
void init_ram(const char *img);
void ram_finish();
......
......@@ -15,20 +15,23 @@ void VerilatedSaveMem::save() {
if (size == 0) return;
trailer();
flush();
if(size <= (512 * 1024 * 1024UL)){
FILE *fp = fopen(m_filename.c_str(), "w");
auto saved_filename = m_filename;
if (size <= (512 * 1024 * 1024UL)) {
FILE *fp = fopen(saved_filename.c_str(), "w");
assert(fp != NULL);
fwrite(buf, size, 1, fp);
fclose(fp);
} else {
timeval s, e;
gettimeofday(&s, NULL);
snapshot_compressToFile(buf, (m_filename + ".gz").c_str(), size);
gettimeofday(&e, NULL);
printf("Compress cost time (msec.usec): %lf\n", calcTime(s, e));
}
else {
saved_filename = saved_filename + ".gz";
// timeval s, e;
// gettimeofday(&s, NULL);
snapshot_compressToFile(buf, saved_filename.c_str(), size);
// gettimeofday(&e, NULL);
// printf("Compress cost time (msec.usec): %lf\n", calcTime(s, e));
}
size = 0;
printf("save snapshot to %s...\n", m_filename.c_str());
printf("save snapshot to %s...\n", saved_filename.c_str());
}
void VerilatedRestoreMem::fill() {
......@@ -79,7 +82,7 @@ void VerilatedRestoreMem::open(const char* filename) {
gettimeofday(&s, NULL);
size = readFromGz(buf, filename, buf_size, LOAD_SNAPSHOT);
gettimeofday(&e, NULL);
printf("Uncompress cost time (msec.usec): %lf\n", calcTime(s, e));
// printf("Uncompress cost time (msec.usec): %lf\n", calcTime(s, e));
assert(size > 0);
} else {
FILE *fp = fopen(filename, "r");
......
......@@ -5,33 +5,40 @@
#include "VXSSimSoC.h"
#include <verilated_save.h>
#include <sys/mman.h>
#include "compress.h"
#include "ram.h"
#define SNAPSHOT_SIZE (3 * 16 * 1024 * 1024 * 1024UL)
#define SNAPSHOT_SIZE (3UL * EMU_RAM_SIZE)
class VerilatedSaveMem : public VerilatedSerialize {
const static long buf_size = SNAPSHOT_SIZE;
uint8_t *buf;
uint8_t *buf = NULL;
long size;
public:
VerilatedSaveMem() {
buf = (uint8_t*)mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
if (buf == (uint8_t *)MAP_FAILED) {
printf("Cound not mmap 0x%lx bytes\n", SNAPSHOT_SIZE);
assert(0);
}
buf = NULL;
size = 0;
}
~VerilatedSaveMem() { }
void init(const char *filename) {
if (buf != NULL) {
munmap(buf, SNAPSHOT_SIZE);
buf = NULL;
}
buf = (uint8_t*)mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
if (buf == (uint8_t *)MAP_FAILED) {
printf("Cound not mmap 0x%lx bytes\n", SNAPSHOT_SIZE);
assert(0);
}
size = 0;
m_filename = filename;
header();
}
void unbuf_write(const void* __restrict datap, size_t size) VL_MT_UNSAFE_ONE {
memcpy(buf + this->size, datap, size);
nonzero_large_memcpy(buf + this->size, datap, size);
this->size += size;
}
......
......@@ -17,14 +17,14 @@ with HasPartialDecoupledDriver {
test(new MicroBTB) { c =>
def genUpdateReq(pc: Long,target: Long,taken: Boolean,fetchIdx: Int,isMiss: Boolean,write_way: Int,hit: Boolean) = {
c.io.update.valid.poke(true.B)
c.io.update.bits.ui.pc.poke(pc.U)
c.io.update.bits.ui.target.poke(target.U)
c.io.update.bits.ui.taken.poke(taken.B)
c.io.update.bits.ui.fetchIdx.poke(fetchIdx.U)
c.io.update.bits.ui.isMisPred.poke(isMiss.B)
c.io.update.bits.ui.brInfo.ubtbWriteWay.poke(write_way.U)
c.io.update.bits.ui.brInfo.ubtbHits.poke(hit.B)
c.io.update.bits.ui.pd.brType.poke(BrType.branch)
c.io.update.bits.pc.poke(pc.U)
c.io.update.bits.target.poke(target.U)
c.io.update.bits.taken.poke(taken.B)
c.io.update.bits.fetchIdx.poke(fetchIdx.U)
c.io.update.bits.isMisPred.poke(isMiss.B)
c.io.update.bits.bpuMeta.ubtbWriteWay.poke(write_way.U)
c.io.update.bits.bpuMeta.ubtbHits.poke(hit.B)
c.io.update.bits.pd.brType.poke(BrType.branch)
}
def genReadReq(fetchpc: Long){
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册