提交 a1f32e2f 编写于 作者: L Lingrui98

Merge branch 'debian-gogogo' of https://github.com/RISCVERS/XiangShan into debian-gogogo

......@@ -49,3 +49,6 @@ jobs:
- name: Run microbench
run: |
make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run 2> /dev/null
- name: Run coremark
run: |
make -C $AM_HOME/apps/coremark ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run 2> /dev/null
#/usr/bin/python3
# -*- coding: UTF-8 -*-
import sys
import re
import copy
if __name__ == "__main__":
assert len(sys.argv) == 3, "Expect input_file and output_file"
input_file = sys.argv[1]
output_file = sys.argv[2]
lines = []
line_count = 0
synthesis_nest_level = 0
with open(input_file) as f:
for line in f:
line_count += 1
ifdef = re.compile('`ifdef')
ifndef = re.compile('`ifndef')
endif = re.compile('`endif')
synthesis = re.compile('`ifndef SYNTHESIS')
line_coverage = re.compile('^\s*([%]?\d+)\s+if')
ifdef_match = ifdef.search(line)
ifndef_match = ifndef.search(line)
endif_match = endif.search(line)
synthesis_match = synthesis.search(line)
line_coverage_match = line_coverage.search(line)
# enter synthesis block
if synthesis_match:
assert synthesis_nest_level == 0, "Should not nest SYNTHESIS macro"
synthesis_nest_level = 1
if ifdef_match or (ifndef_match and not synthesis_match):
synthesis_nest_level += 1
if endif_match:
synthesis_nest_level -= 1
assert synthesis_nest_level >= 0, "Macro nest level should be >= 0"
# remove line coverage results in systhesis block
if synthesis_nest_level > 0 and line_coverage_match:
coverage_stat = line_coverage_match.group(1)
line = line.replace(line_coverage_match.group(1), " " * len(coverage_stat))
lines += line
with open(output_file, "w") as f:
for line in lines:
f.write(line)
#/usr/bin/python3
# -*- coding: UTF-8 -*-
import sys
import re
import copy
if __name__ == "__main__":
assert len(sys.argv) == 2, "Expect input_file"
input_file = sys.argv[1]
coverred = 0
not_coverred = 0
with open(input_file) as f:
for line in f:
coverred_pattern = re.compile('^\s*(\d+)\s+if')
not_coverred_pattern = re.compile('^\s*(%0+)\s+if')
coverred_match = coverred_pattern.search(line)
not_coverred_match = not_coverred_pattern.search(line)
assert not (coverred_match and not_coverred_match)
if coverred_match:
coverred += 1
if not_coverred_match:
not_coverred += 1
print("cover: %d not_cover: %d coverage: %f" %
(coverred, not_coverred, float(coverred) / (coverred + not_coverred)))
......@@ -114,5 +114,5 @@ object GenMask {
}
object UIntToMask {
def apply(ptr: UInt) = UIntToOH(ptr) - 1.U
def apply(ptr: UInt, length: Integer) = UIntToOH(ptr)(length - 1, 0) - 1.U
}
......@@ -2,15 +2,18 @@ package xiangshan
import chisel3._
import chisel3.util._
import xiangshan.backend.SelImm
import xiangshan.backend.brq.BrqPtr
import xiangshan.backend.fu.fpu.Fflags
import xiangshan.backend.rename.FreeListPtr
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.decode.XDecode
import xiangshan.mem.{LqPtr, SqPtr}
import xiangshan.frontend.PreDecodeInfo
import xiangshan.frontend.HasBPUParameter
import xiangshan.frontend.HasTageParameter
import xiangshan.frontend.HasIFUConst
import xiangshan.frontend.GlobalHistory
import utils._
import scala.math.max
......@@ -24,6 +27,7 @@ class FetchPacket extends XSBundle {
val brInfo = Vec(PredictWidth, new BranchInfo)
val pd = Vec(PredictWidth, new PreDecodeInfo)
val ipf = Bool()
val acf = Bool()
val crossPageIPFFix = Bool()
val predTaken = Bool()
}
......@@ -99,16 +103,17 @@ class BranchPrediction extends XSBundle with HasIFUConst {
def brNotTakens = ~realTakens & realBrMask
def sawNotTakenBr = VecInit((0 until PredictWidth).map(i =>
(if (i == 0) false.B else brNotTakens(i-1,0).orR)))
def hasNotTakenBrs = (brNotTakens & LowerMaskFromLowest(realTakens)).orR
// def hasNotTakenBrs = (brNotTakens & LowerMaskFromLowest(realTakens)).orR
def unmaskedJmpIdx = PriorityEncoder(takens)
def saveHalfRVI = (firstBankHasHalfRVI && (unmaskedJmpIdx === (bankWidth-1).U || !(takens.orR))) ||
(lastBankHasHalfRVI && unmaskedJmpIdx === (PredictWidth-1).U)
(lastBankHasHalfRVI && unmaskedJmpIdx === (PredictWidth-1).U)
// could get PredictWidth-1 when only the first bank is valid
def jmpIdx = PriorityEncoder(realTakens)
// only used when taken
def target = targets(jmpIdx)
def taken = realTakens.orR
def takenOnBr = taken && realBrMask(jmpIdx)
def hasNotTakenBrs = Mux(taken, sawNotTakenBr(jmpIdx), brNotTakens.orR)
}
class BranchInfo extends XSBundle with HasBPUParameter {
......@@ -117,27 +122,28 @@ class BranchInfo extends XSBundle with HasBPUParameter {
val btbWriteWay = UInt(log2Up(BtbWays).W)
val btbHitJal = Bool()
val bimCtr = UInt(2.W)
val histPtr = UInt(log2Up(ExtHistoryLength).W)
val predHistPtr = UInt(log2Up(ExtHistoryLength).W)
val tageMeta = new TageMeta
val rasSp = UInt(log2Up(RasSize).W)
val rasTopCtr = UInt(8.W)
val rasToqAddr = UInt(VAddrBits.W)
val fetchIdx = UInt(log2Up(PredictWidth).W)
val specCnt = UInt(10.W)
// for global history
val hist = new GlobalHistory
val predHist = new GlobalHistory
val sawNotTakenBranch = Bool()
val debug_ubtb_cycle = if (EnableBPUTimeRecord) UInt(64.W) else UInt(0.W)
val debug_btb_cycle = if (EnableBPUTimeRecord) UInt(64.W) else UInt(0.W)
val debug_tage_cycle = if (EnableBPUTimeRecord) UInt(64.W) else UInt(0.W)
def apply(histPtr: UInt, tageMeta: TageMeta, rasSp: UInt, rasTopCtr: UInt) = {
this.histPtr := histPtr
this.tageMeta := tageMeta
this.rasSp := rasSp
this.rasTopCtr := rasTopCtr
this.asUInt
}
// def apply(histPtr: UInt, tageMeta: TageMeta, rasSp: UInt, rasTopCtr: UInt) = {
// this.histPtr := histPtr
// this.tageMeta := tageMeta
// this.rasSp := rasSp
// this.rasTopCtr := rasTopCtr
// this.asUInt
// }
def size = 0.U.asTypeOf(this).getWidth
def fromUInt(x: UInt) = x.asTypeOf(this)
}
......@@ -189,8 +195,19 @@ class CtrlSignals extends XSBundle {
val blockBackward = Bool() // block backward
val flushPipe = Bool() // This inst will flush all the pipe when commit, like exception but can commit
val isRVF = Bool()
val selImm = SelImm()
val imm = UInt(XLEN.W)
val commitType = CommitType()
def decode(inst: UInt, table: Iterable[(BitPat, List[BitPat])]) = {
val decoder = freechips.rocketchip.rocket.DecodeLogic(inst, XDecode.decodeDefault, table)
val signals =
Seq(src1Type, src2Type, src3Type, fuType, fuOpType, rfWen, fpWen,
isXSTrap, noSpecExec, blockBackward, flushPipe, isRVF, selImm)
signals zip decoder map { case(s, d) => s := d }
commitType := DontCare
this
}
}
class CfCtrl extends XSBundle {
......@@ -285,9 +302,13 @@ class CSRSpecialIO extends XSBundle {
// val mcommit = Input(UInt(3.W))
//}
class RoqCommit extends XSBundle {
val uop = new MicroOp
val isWalk = Bool()
class RoqCommitIO extends XSBundle {
val isWalk = Output(Bool())
val valid = Vec(CommitWidth, Output(Bool()))
val uop = Vec(CommitWidth, Output(new MicroOp))
def hasWalkInstr = isWalk && valid.asUInt.orR
def hasCommitInstr = !isWalk && valid.asUInt.orR
}
class TlbFeedback extends XSBundle {
......
......@@ -63,9 +63,9 @@ case class XSCoreParameters
RoqSize: Int = 192,
dpParams: DispatchParameters = DispatchParameters(
DqEnqWidth = 4,
IntDqSize = 128,
FpDqSize = 128,
LsDqSize = 96,
IntDqSize = 24,
FpDqSize = 24,
LsDqSize = 24,
IntDqDeqWidth = 4,
FpDqDeqWidth = 4,
LsDqDeqWidth = 4
......@@ -316,20 +316,16 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
val uncache = outer.uncache.module
val l1pluscache = outer.l1pluscache.module
val ptw = outer.ptw.module
val icache = Module(new ICache)
frontend.io.backend <> ctrlBlock.io.frontend
frontend.io.icacheResp <> icache.io.resp
frontend.io.icacheToTlb <> icache.io.tlb
icache.io.req <> frontend.io.icacheReq
icache.io.flush <> frontend.io.icacheFlush
frontend.io.sfence <> integerBlock.io.fenceio.sfence
frontend.io.tlbCsr <> integerBlock.io.csrio.tlb
icache.io.mem_acquire <> l1pluscache.io.req
l1pluscache.io.resp <> icache.io.mem_grant
l1pluscache.io.flush := icache.io.l1plusflush
icache.io.fencei := integerBlock.io.fenceio.fencei
frontend.io.icacheMemAcq <> l1pluscache.io.req
l1pluscache.io.resp <> frontend.io.icacheMemGrant
l1pluscache.io.flush := frontend.io.l1plusFlush
frontend.io.fencei := integerBlock.io.fenceio.fencei
ctrlBlock.io.fromIntBlock <> integerBlock.io.toCtrlBlock
ctrlBlock.io.fromFpBlock <> floatBlock.io.toCtrlBlock
......
......@@ -4,7 +4,7 @@ import chisel3._
import chisel3.util._
import utils._
import xiangshan._
import xiangshan.backend.decode.{DecodeBuffer, DecodeStage}
import xiangshan.backend.decode.DecodeStage
import xiangshan.backend.rename.{Rename, BusyTable}
import xiangshan.backend.brq.Brq
import xiangshan.backend.dispatch.Dispatch
......@@ -53,14 +53,13 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
val exception = ValidIO(new MicroOp)
val isInterrupt = Output(Bool())
// to mem block
val commits = Vec(CommitWidth, ValidIO(new RoqCommit))
val commits = new RoqCommitIO
val roqDeqPtr = Output(new RoqPtr)
}
})
val decode = Module(new DecodeStage)
val brq = Module(new Brq)
val decBuf = Module(new DecodeBuffer)
val rename = Module(new Rename)
val dispatch = Module(new Dispatch)
val intBusyTable = Module(new BusyTable(NRIntReadPorts, NRIntWritePorts))
......@@ -87,7 +86,6 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
decode.io.in <> io.frontend.cfVec
decode.io.toBrq <> brq.io.enqReqs
decode.io.brTags <> brq.io.brTags
decode.io.out <> decBuf.io.in
brq.io.roqRedirect <> roq.io.redirect
brq.io.memRedirect.valid := brq.io.redirect.valid || io.fromLsBlock.replay.valid
......@@ -96,10 +94,10 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
brq.io.enqReqs <> decode.io.toBrq
brq.io.exuRedirect <> io.fromIntBlock.exuRedirect
decBuf.io.isWalking := roq.io.commits(0).valid && roq.io.commits(0).bits.isWalk
decBuf.io.redirect.valid <> redirectValid
decBuf.io.redirect.bits <> redirect
decBuf.io.out <> rename.io.in
// pipeline between decode and dispatch
for (i <- 0 until RenameWidth) {
PipelineConnect(decode.io.out(i), rename.io.in(i), rename.io.in(i).ready, redirectValid)
}
rename.io.redirect.valid <> redirectValid
rename.io.redirect.bits <> redirect
......
......@@ -53,7 +53,7 @@ class MemBlock
val lsqio = new Bundle {
val exceptionAddr = new ExceptionAddrIO // to csr
val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit))) // to lsq
val commits = Flipped(new RoqCommitIO) // to lsq
val roqDeqPtr = Input(new RoqPtr) // to lsq
}
})
......@@ -229,49 +229,67 @@ class MemBlock
assert(!(fenceFlush && atomicsFlush))
sbuffer.io.flush.valid := fenceFlush || atomicsFlush
// TODO: make 0/1 configurable
// AtomicsUnit
// AtomicsUnit will override other control signials,
// AtomicsUnit: AtomicsUnit will override other control signials,
// as atomics insts (LR/SC/AMO) will block the pipeline
val st0_atomics = reservationStations(2).io.deq.valid && reservationStations(2).io.deq.bits.uop.ctrl.fuType === FuType.mou
val st1_atomics = reservationStations(3).io.deq.valid && reservationStations(3).io.deq.bits.uop.ctrl.fuType === FuType.mou
// amo should always go through store issue queue 0
assert(!st1_atomics)
val s_normal :: s_atomics_0 :: s_atomics_1 :: Nil = Enum(3)
val state = RegInit(s_normal)
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready
val atomic_rs0 = exuParameters.LduCnt + 0
val atomic_rs1 = exuParameters.LduCnt + 1
val st0_atomics = reservationStations(atomic_rs0).io.deq.valid && reservationStations(atomic_rs0).io.deq.bits.uop.ctrl.fuType === FuType.mou
val st1_atomics = reservationStations(atomic_rs1).io.deq.valid && reservationStations(atomic_rs1).io.deq.bits.uop.ctrl.fuType === FuType.mou
// dispatch 0 takes priority
atomicsUnit.io.in.valid := st0_atomics
atomicsUnit.io.in.bits := reservationStations(2).io.deq.bits
when (st0_atomics) {
reservationStations(0).io.deq.ready := atomicsUnit.io.in.ready
reservationStations(atomic_rs0).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(0).io.stin.valid := false.B
}
when(atomicsUnit.io.dtlb.req.valid) {
dtlb.io.requestor(0) <> atomicsUnit.io.dtlb
// take load unit 0's tlb port
// make sure not to disturb loadUnit
assert(!loadUnits(0).io.dtlb.req.valid)
loadUnits(0).io.dtlb.resp.valid := false.B
state := s_atomics_0
assert(!st1_atomics)
}
when (st1_atomics) {
reservationStations(atomic_rs1).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(1).io.stin.valid := false.B
when(atomicsUnit.io.tlbFeedback.valid) {
assert(!storeUnits(0).io.tlbFeedback.valid)
atomicsUnit.io.tlbFeedback <> reservationStations(exuParameters.LduCnt + 0).io.feedback
state := s_atomics_1
assert(!st0_atomics)
}
when (atomicsUnit.io.out.valid) {
assert(state === s_atomics_0 || state === s_atomics_1)
state := s_normal
}
atomicsUnit.io.in.valid := st0_atomics || st1_atomics
atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq.bits, reservationStations(atomic_rs1).io.deq.bits)
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready
atomicsUnit.io.dcache <> io.dcache.atomics
atomicsUnit.io.flush_sbuffer.empty := sbuffer.io.flush.empty
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
// for atomicsUnit, it uses loadUnit(0)'s TLB port
when (state === s_atomics_0 || state === s_atomics_1) {
atomicsUnit.io.dtlb <> dtlb.io.requestor(0)
when(atomicsUnit.io.out.valid){
// take load unit 0's write back port
assert(!loadUnits(0).io.ldout.valid)
loadUnits(0).io.dtlb.resp.valid := false.B
loadUnits(0).io.ldout.ready := false.B
// make sure there's no in-flight uops in load unit
assert(!loadUnits(0).io.dtlb.req.valid)
assert(!loadUnits(0).io.ldout.valid)
}
when (state === s_atomics_0) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs0).io.feedback
assert(!storeUnits(0).io.tlbFeedback.valid)
}
when (state === s_atomics_1) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs1).io.feedback
assert(!storeUnits(1).io.tlbFeedback.valid)
}
lsq.io.exceptionAddr.lsIdx := io.lsqio.exceptionAddr.lsIdx
......
......@@ -90,16 +90,13 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
/*
example: headIdx = 2
headIdxOH = 00000100
headIdxMaskHI = 11111100
headIdxMaskHi = 11111100
headIdxMaskLo = 00000011
skipMask = 00111101
commitIdxHi = 6
commitIdxLo = 0
commitIdx = 6
*/
val headIdxOH = UIntToOH(headIdx)
val headIdxMaskLo = headIdxOH - 1.U
val headIdxMaskLo = UIntToMask(headIdx, BrqSize)
val headIdxMaskHi = ~headIdxMaskLo
val commitIdxHi = PriorityEncoder((~skipMask).asUInt() & headIdxMaskHi)
......
package xiangshan.backend.decode
import chisel3._
import chisel3.util._
import xiangshan._
import utils._
class DecodeBuffer extends XSModule {
val io = IO(new Bundle() {
val isWalking = Input(Bool())
val redirect = Flipped(ValidIO(new Redirect))
val in = Vec(DecodeWidth, Flipped(DecoupledIO(new CfCtrl)))
val out = Vec(RenameWidth, DecoupledIO(new CfCtrl))
})
require(DecodeWidth == RenameWidth)
val validVec = RegInit(VecInit(Seq.fill(DecodeWidth)(false.B)))
val leftCanIn = ParallelAND(
validVec.zip(io.out.map(_.fire())).map({
case (v, fire) =>
!v || fire
})
)
val flush = io.redirect.valid// && !io.redirect.bits.isReplay
for( i <- 0 until RenameWidth){
when(io.out(i).fire()){
validVec(i) := false.B
}
when(io.in(i).fire()){
validVec(i) := true.B
}
when(flush){
validVec(i) := false.B
}
val r = RegEnable(io.in(i).bits, io.in(i).fire())
io.in(i).ready := leftCanIn
io.out(i).bits <> r
io.out(i).valid := validVec(i) && !flush && !io.isWalking
}
for(in <- io.in){
XSInfo(p"in v:${in.valid} r:${in.ready} pc=${Hexadecimal(in.bits.cf.pc)}\n")
}
for(out <- io.out){
XSInfo(p"out v:${out.valid} r:${out.ready} pc=${Hexadecimal(out.bits.cf.pc)}\n")
}
}
package xiangshan.backend.decode
import chisel3._
import chisel3.util.BitPat
import xiangshan.{FuType, HasXSParameter}
import xiangshan.backend._
import xiangshan.backend.decode.isa._
......@@ -32,15 +33,6 @@ trait HasInstrType {
).map(_===instrType).reduce(_||_)
}
object SrcType {
def reg = "b00".U
def pc = "b01".U
def imm = "b01".U
def fp = "b10".U
def apply() = UInt(2.W)
}
object FuOpType {
def apply() = UInt(6.W)
}
......
......@@ -19,7 +19,7 @@ class DecodeStage extends XSModule {
// to DecBuffer
val out = Vec(DecodeWidth, DecoupledIO(new CfCtrl))
})
val decoders = Seq.fill(DecodeWidth)(Module(new Decoder))
val decoders = Seq.fill(DecodeWidth)(Module(new DecodeUnit))
val decoderToBrq = Wire(Vec(DecodeWidth, new CfCtrl)) // without brTag and brMask
val decoderToDecBuffer = Wire(Vec(DecodeWidth, new CfCtrl)) // with brTag and brMask
......@@ -32,18 +32,18 @@ class DecodeStage extends XSModule {
// Second, assert out(i).valid iff in(i).valid and instruction is valid (not implemented) and toBrq(i).ready
for (i <- 0 until DecodeWidth) {
decoders(i).io.in <> io.in(i).bits
decoderToBrq(i) := decoders(i).io.out // CfCtrl without bfTag and brMask
decoders(i).io.enq.ctrl_flow <> io.in(i).bits
decoderToBrq(i) := decoders(i).io.deq.cf_ctrl // CfCtrl without bfTag and brMask
decoderToBrq(i).brTag := DontCare
io.toBrq(i).bits := decoderToBrq(i)
decoderToDecBuffer(i) := decoders(i).io.out
decoderToDecBuffer(i) := decoders(i).io.deq.cf_ctrl
decoderToDecBuffer(i).brTag := io.brTags(i)
io.out(i).bits := decoderToDecBuffer(i)
val isMret = decoders(i).io.out.cf.instr === BitPat("b001100000010_00000_000_00000_1110011")
val isSret = decoders(i).io.out.cf.instr === BitPat("b000100000010_00000_000_00000_1110011")
val thisBrqValid = !decoders(i).io.out.cf.brUpdate.pd.notCFI || isMret || isSret
val isMret = decoders(i).io.deq.cf_ctrl.cf.instr === BitPat("b001100000010_00000_000_00000_1110011")
val isSret = decoders(i).io.deq.cf_ctrl.cf.instr === BitPat("b000100000010_00000_000_00000_1110011")
val thisBrqValid = !decoders(i).io.deq.cf_ctrl.cf.brUpdate.pd.notCFI || isMret || isSret
io.in(i).ready := io.out(i).ready && io.toBrq(i).ready
io.out(i).valid := io.in(i).valid && io.toBrq(i).ready
io.toBrq(i).valid := io.in(i).valid && thisBrqValid && io.out(i).ready
......
......@@ -3,7 +3,6 @@ package xiangshan.backend.dispatch
import chisel3._
import chisel3.util._
import utils._
import xiangshan.backend.decode.SrcType
import xiangshan._
import xiangshan.backend.roq.RoqPtr
......@@ -29,10 +28,10 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int) extends XSModule with H
// head: first valid entry (dispatched entry)
val headPtr = RegInit(0.U.asTypeOf(new CircularQueuePtr(size)))
val headPtrMask = UIntToMask(headPtr.value)
val headPtrMask = UIntToMask(headPtr.value, size)
// tail: first invalid entry (free entry)
val tailPtr = RegInit(0.U.asTypeOf(new CircularQueuePtr(size)))
val tailPtrMask = UIntToMask(tailPtr.value)
val tailPtrMask = UIntToMask(tailPtr.value, size)
// TODO: make ptr a vector to reduce latency?
// deq: starting from head ptr
......
......@@ -164,4 +164,17 @@ package object backend {
def apply() = UInt(2.W)
}
object SelImm {
def IMM_X = "b111".U
def IMM_S = "b000".U
def IMM_SB = "b001".U
def IMM_U = "b010".U
def IMM_UJ = "b011".U
def IMM_I = "b100".U
def IMM_Z = "b101".U
def INVALID_INSTR = "b110".U
def apply() = UInt(3.W)
}
}
......@@ -3,7 +3,7 @@ package xiangshan.backend.rename
import chisel3._
import chisel3.util._
import xiangshan._
import utils.{CircularQueuePtr, HasCircularQueuePtrHelper, XSDebug}
import utils._
import xiangshan.backend.brq.BrqPtr
trait HasFreeListConsts extends HasXSParameter {
......@@ -36,13 +36,19 @@ class FreeList extends XSModule with HasFreeListConsts with HasCircularQueuePtrH
val io = IO(new Bundle() {
val redirect = Flipped(ValidIO(new Redirect))
// alloc new phy regs
val allocReqs = Input(Vec(RenameWidth, Bool()))
val pdests = Output(Vec(RenameWidth, UInt(PhyRegIdxWidth.W)))
val canAlloc = Output(Vec(RenameWidth, Bool()))
val req = new Bundle {
// need to alloc (not actually do the allocation)
val allocReqs = Vec(RenameWidth, Input(Bool()))
// response pdest according to alloc
val pdests = Vec(RenameWidth, Output(UInt(PhyRegIdxWidth.W)))
// alloc new phy regs// freelist can alloc
val canAlloc = Output(Bool())
// actually do the allocation
val doAlloc = Input(Bool())
}
// do checkpoints
val cpReqs = Vec(RenameWidth, Flipped(ValidIO(new BrqPtr)))
// val cpReqs = Vec(RenameWidth, Flipped(ValidIO(new BrqPtr)))
val walk = Flipped(ValidIO(UInt(log2Up(RenameWidth).W)))
// dealloc phy regs
......@@ -75,33 +81,32 @@ class FreeList extends XSModule with HasFreeListConsts with HasCircularQueuePtrH
// number of free regs in freelist
val freeRegs = Wire(UInt())
// use RegNext for better timing
val hasEnoughRegs = RegNext(freeRegs >= RenameWidth.U, true.B)
io.req.canAlloc := RegNext(freeRegs >= RenameWidth.U)
XSDebug(p"free regs: $freeRegs\n")
val newHeadPtrs = ((0 until RenameWidth) map {i =>
if(i == 0) headPtr else headPtr + PopCount(io.allocReqs.take(i))
}) :+ (headPtr + PopCount(io.allocReqs))
val allocatePtrs = (0 until RenameWidth).map(i => headPtr + i.U)
val allocatePdests = VecInit(allocatePtrs.map(ptr => freeList(ptr.value)))
for(i <- 0 until RenameWidth){
val ptr = newHeadPtrs(i)
val idx = ptr.value
io.canAlloc(i) := hasEnoughRegs
io.pdests(i) := freeList(idx)
when(io.cpReqs(i).valid){
checkPoints(io.cpReqs(i).bits.value) := newHeadPtrs(i+1)
XSDebug(p"do checkPt at BrqIdx=${io.cpReqs(i).bits.value} ${newHeadPtrs(i+1)}\n")
}
XSDebug(p"req:${io.allocReqs(i)} canAlloc:$hasEnoughRegs pdest:${io.pdests(i)}\n")
io.req.pdests(i) := allocatePdests(/*if (i == 0) 0.U else */PopCount(io.req.allocReqs.take(i)))
// when(io.cpReqs(i).valid){
// checkPoints(io.cpReqs(i).bits.value) := newHeadPtrs(i+1)
// XSDebug(p"do checkPt at BrqIdx=${io.cpReqs(i).bits.value} ${newHeadPtrs(i+1)}\n")
// }
XSDebug(p"req:${io.req.allocReqs(i)} canAlloc:${io.req.canAlloc} pdest:${io.req.pdests(i)}\n")
}
val headPtrNext = Mux(hasEnoughRegs, newHeadPtrs.last, headPtr)
val headPtrAllocate = headPtr + PopCount(io.req.allocReqs)
val headPtrNext = Mux(io.req.canAlloc && io.req.doAlloc, headPtrAllocate, headPtr)
freeRegs := distanceBetween(tailPtr, headPtrNext)
// when mispredict or exception happens, reset headPtr to tailPtr (freelist is full).
val resetHeadPtr = io.redirect.valid && (io.redirect.bits.isException || io.redirect.bits.isFlushPipe)
headPtr := Mux(resetHeadPtr,
val resetHeadPtr = io.redirect.bits.isException || io.redirect.bits.isFlushPipe
// priority: (1) exception and flushPipe; (2) walking; (3) mis-prediction; (4) normal dequeue
headPtr := Mux(io.redirect.valid && resetHeadPtr,
FreeListPtr(!tailPtrNext.flag, tailPtrNext.value),
Mux(io.walk.valid, headPtr - io.walk.bits, headPtrNext)
Mux(io.walk.valid,
headPtr - io.walk.bits,
Mux(io.redirect.valid, headPtr, headPtrNext))
)
XSDebug(p"head:$headPtr tail:$tailPtr\n")
......@@ -109,10 +114,10 @@ class FreeList extends XSModule with HasFreeListConsts with HasCircularQueuePtrH
XSDebug(io.redirect.valid, p"redirect: brqIdx=${io.redirect.bits.brTag.value}\n")
val enableFreelistCheck = false
if(env.EnableDebug && enableFreelistCheck){
for( i <- 0 until FL_SIZE){
for(j <- i+1 until FL_SIZE){
assert(freeList(i) != freeList(j), s"Found same entry in freelist! (i=$i j=$j)")
if (enableFreelistCheck) {
for (i <- 0 until FL_SIZE) {
for (j <- i+1 until FL_SIZE) {
XSError(freeList(i) === freeList(j), s"Found same entry in freelist! (i=$i j=$j)")
}
}
}
......
......@@ -15,7 +15,7 @@ class RenameBypassInfo extends XSBundle {
class Rename extends XSModule {
val io = IO(new Bundle() {
val redirect = Flipped(ValidIO(new Redirect))
val roqCommits = Vec(CommitWidth, Flipped(ValidIO(new RoqCommit)))
val roqCommits = Flipped(new RoqCommitIO)
// from decode buffer
val in = Vec(RenameWidth, Flipped(DecoupledIO(new CfCtrl)))
// to dispatch1
......@@ -47,18 +47,21 @@ class Rename extends XSModule {
fpFreeList.redirect := io.redirect
intFreeList.redirect := io.redirect
val flush = io.redirect.valid && (io.redirect.bits.isException || io.redirect.bits.isFlushPipe) // TODO: need check by JiaWei
fpRat.flush := flush
intRat.flush := flush
fpRat.redirect := io.redirect
intRat.redirect := io.redirect
fpRat.walkWen := io.roqCommits.isWalk
intRat.walkWen := io.roqCommits.isWalk
def needDestReg[T <: CfCtrl](fp: Boolean, x: T): Bool = {
{if(fp) x.ctrl.fpWen else x.ctrl.rfWen && (x.ctrl.ldest =/= 0.U)}
}
val walkValid = Cat(io.roqCommits.map(_.valid)).orR && io.roqCommits(0).bits.isWalk
fpFreeList.walk.valid := walkValid
intFreeList.walk.valid := walkValid
fpFreeList.walk.bits := PopCount(io.roqCommits.map(c => c.valid && needDestReg(true, c.bits.uop)))
intFreeList.walk.bits := PopCount(io.roqCommits.map(c => c.valid && needDestReg(false, c.bits.uop)))
fpFreeList.walk.valid := io.roqCommits.isWalk
intFreeList.walk.valid := io.roqCommits.isWalk
fpFreeList.walk.bits := PopCount((0 until CommitWidth).map(i => io.roqCommits.valid(i) && needDestReg(true, io.roqCommits.uop(i))))
intFreeList.walk.bits := PopCount((0 until CommitWidth).map(i => io.roqCommits.valid(i) && needDestReg(false, io.roqCommits.uop(i))))
// walk has higher priority than allocation and thus we don't use isWalk here
fpFreeList.req.doAlloc := intFreeList.req.canAlloc && io.out(0).ready
intFreeList.req.doAlloc := fpFreeList.req.canAlloc && io.out(0).ready
val uops = Wire(Vec(RenameWidth, new MicroOp))
......@@ -76,10 +79,8 @@ class Rename extends XSModule {
val needFpDest = Wire(Vec(RenameWidth, Bool()))
val needIntDest = Wire(Vec(RenameWidth, Bool()))
var lastReady = WireInit(io.out(0).ready)
// debug assert
val outRdy = Cat(io.out.map(_.ready))
assert(outRdy===0.U || outRdy.andR())
val hasValid = Cat(io.in.map(_.valid)).orR
val canOut = io.out(0).ready && fpFreeList.req.canAlloc && intFreeList.req.canAlloc && !io.roqCommits.isWalk
for(i <- 0 until RenameWidth) {
uops(i).cf := io.in(i).bits.cf
uops(i).ctrl := io.in(i).bits.ctrl
......@@ -90,38 +91,26 @@ class Rename extends XSModule {
// alloc a new phy reg
needFpDest(i) := inValid && needDestReg(fp = true, io.in(i).bits)
needIntDest(i) := inValid && needDestReg(fp = false, io.in(i).bits)
fpFreeList.allocReqs(i) := needFpDest(i) && lastReady
intFreeList.allocReqs(i) := needIntDest(i) && lastReady
val fpCanAlloc = fpFreeList.canAlloc(i)
val intCanAlloc = intFreeList.canAlloc(i)
val this_can_alloc = Mux(
needIntDest(i),
intCanAlloc,
Mux(
needFpDest(i),
fpCanAlloc,
true.B
)
)
io.in(i).ready := lastReady && this_can_alloc
fpFreeList.req.allocReqs(i) := needFpDest(i)
intFreeList.req.allocReqs(i) := needIntDest(i)
// do checkpoints when a branch inst come
for(fl <- Seq(fpFreeList, intFreeList)){
fl.cpReqs(i).valid := inValid
fl.cpReqs(i).bits := io.in(i).bits.brTag
}
io.in(i).ready := !hasValid || canOut
lastReady = io.in(i).ready
// do checkpoints when a branch inst come
// for(fl <- Seq(fpFreeList, intFreeList)){
// fl.cpReqs(i).valid := inValid
// fl.cpReqs(i).bits := io.in(i).bits.brTag
// }
uops(i).pdest := Mux(needIntDest(i),
intFreeList.pdests(i),
intFreeList.req.pdests(i),
Mux(
uops(i).ctrl.ldest===0.U && uops(i).ctrl.rfWen,
0.U, fpFreeList.pdests(i)
0.U, fpFreeList.req.pdests(i)
)
)
io.out(i).valid := io.in(i).fire()
io.out(i).valid := io.in(i).valid && intFreeList.req.canAlloc && fpFreeList.req.canAlloc && !io.roqCommits.isWalk
io.out(i).bits := uops(i)
// write rename table
......@@ -129,23 +118,23 @@ class Rename extends XSModule {
val rat = if(fp) fpRat else intRat
val freeList = if(fp) fpFreeList else intFreeList
// speculative inst write
val specWen = freeList.allocReqs(i) && freeList.canAlloc(i)
val specWen = freeList.req.allocReqs(i) && freeList.req.canAlloc && freeList.req.doAlloc && !io.roqCommits.isWalk
// walk back write
val commitDestValid = io.roqCommits(i).valid && needDestReg(fp, io.roqCommits(i).bits.uop)
val walkWen = commitDestValid && io.roqCommits(i).bits.isWalk
val commitDestValid = io.roqCommits.valid(i) && needDestReg(fp, io.roqCommits.uop(i))
val walkWen = commitDestValid && io.roqCommits.isWalk
rat.specWritePorts(i).wen := specWen || walkWen
rat.specWritePorts(i).addr := Mux(specWen, uops(i).ctrl.ldest, io.roqCommits(i).bits.uop.ctrl.ldest)
rat.specWritePorts(i).wdata := Mux(specWen, freeList.pdests(i), io.roqCommits(i).bits.uop.old_pdest)
rat.specWritePorts(i).addr := Mux(specWen, uops(i).ctrl.ldest, io.roqCommits.uop(i).ctrl.ldest)
rat.specWritePorts(i).wdata := Mux(specWen, freeList.req.pdests(i), io.roqCommits.uop(i).old_pdest)
XSInfo(walkWen,
{if(fp) p"fp" else p"int "} + p"walk: pc:${Hexadecimal(io.roqCommits(i).bits.uop.cf.pc)}" +
{if(fp) p"fp" else p"int "} + p"walk: pc:${Hexadecimal(io.roqCommits.uop(i).cf.pc)}" +
p" ldest:${rat.specWritePorts(i).addr} old_pdest:${rat.specWritePorts(i).wdata}\n"
)
rat.archWritePorts(i).wen := commitDestValid && !io.roqCommits(i).bits.isWalk
rat.archWritePorts(i).addr := io.roqCommits(i).bits.uop.ctrl.ldest
rat.archWritePorts(i).wdata := io.roqCommits(i).bits.uop.pdest
rat.archWritePorts(i).wen := commitDestValid && !io.roqCommits.isWalk
rat.archWritePorts(i).addr := io.roqCommits.uop(i).ctrl.ldest
rat.archWritePorts(i).wdata := io.roqCommits.uop(i).pdest
XSInfo(rat.archWritePorts(i).wen,
{if(fp) p"fp" else p"int "} + p" rat arch: ldest:${rat.archWritePorts(i).addr}" +
......@@ -153,7 +142,7 @@ class Rename extends XSModule {
)
freeList.deallocReqs(i) := rat.archWritePorts(i).wen
freeList.deallocPregs(i) := io.roqCommits(i).bits.uop.old_pdest
freeList.deallocPregs(i) := io.roqCommits.uop(i).old_pdest
}
......
......@@ -17,7 +17,8 @@ class RatWritePort extends XSBundle {
class RenameTable(float: Boolean) extends XSModule {
val io = IO(new Bundle() {
val flush = Input(Bool())
val redirect = Flipped(ValidIO(new Redirect))
val walkWen = Input(Bool())
val readPorts = Vec({if(float) 4 else 3} * RenameWidth, new RatReadPort)
val specWritePorts = Vec(RenameWidth, new RatWritePort)
val archWritePorts = Vec(CommitWidth, new RatWritePort)
......@@ -29,8 +30,12 @@ class RenameTable(float: Boolean) extends XSModule {
// arch state rename table
val arch_table = RegInit(VecInit(Seq.tabulate(32)(i => i.U(PhyRegIdxWidth.W))))
// When redirect happens (mis-prediction), don't update the rename table
// However, when mis-prediction and walk happens at the same time, rename table needs to be updated
for(w <- io.specWritePorts){
when(w.wen){ spec_table(w.addr) := w.wdata }
when(w.wen && (!io.redirect.valid || io.walkWen)) {
spec_table(w.addr) := w.wdata
}
}
for((r, i) <- io.readPorts.zipWithIndex){
......@@ -44,9 +49,11 @@ class RenameTable(float: Boolean) extends XSModule {
when(w.wen){ arch_table(w.addr) := w.wdata }
}
when(io.flush){
val flush = io.redirect.valid && (io.redirect.bits.isException || io.redirect.bits.isFlushPipe)
when (flush) {
spec_table := arch_table
for(w <- io.archWritePorts) {
// spec table needs to be updated when flushPipe
for (w <- io.archWritePorts) {
when(w.wen){ spec_table(w.addr) := w.wdata }
}
}
......
......@@ -53,7 +53,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val exception = Output(new MicroOp)
// exu + brq
val exeWbResults = Vec(numWbPorts, Flipped(ValidIO(new ExuOutput)))
val commits = Vec(CommitWidth, Valid(new RoqCommit))
val commits = new RoqCommitIO
val bcommit = Output(UInt(BrTagWidth.W))
val roqDeqPtr = Output(new RoqPtr)
val csr = new RoqCSRIO
......@@ -127,15 +127,15 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val hasBlockBackward = RegInit(false.B)
val hasNoSpecExec = RegInit(false.B)
// When blockBackward instruction leaves Roq (commit or walk), hasBlockBackward should be set to false.B
val blockBackwardLeave = Cat(io.commits.map(c => c.valid && c.bits.uop.ctrl.blockBackward)).orR || io.redirect.valid
when(blockBackwardLeave){ hasBlockBackward:= false.B }
val blockBackwardLeave = Cat((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.uop(i).ctrl.blockBackward)).orR
when(blockBackwardLeave || io.redirect.valid) { hasBlockBackward:= false.B }
// When noSpecExec instruction commits (it should not be walked except when it has not entered Roq),
// hasNoSpecExec should be set to false.B
val noSpecExecCommit = Cat(io.commits.map(c => c.valid && !c.bits.isWalk && c.bits.uop.ctrl.noSpecExec)).orR || io.redirect.valid
when(noSpecExecCommit){ hasNoSpecExec:= false.B }
val noSpecExecCommit = !io.commits.isWalk && Cat((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.uop(i).ctrl.noSpecExec)).orR
when(noSpecExecCommit || io.redirect.valid) { hasNoSpecExec:= false.B }
// Assertion on that noSpecExec should never be walked since it's the only instruction in Roq.
// Extra walk should be ok since noSpecExec has not enter Roq.
val walkNoSpecExec = Cat(io.commits.map(c => c.valid && c.bits.isWalk && c.bits.uop.ctrl.noSpecExec)).orR
val walkNoSpecExec = io.commits.isWalk && Cat((0 until CommitWidth).map(i => io.commits.valid(i) && io.commits.uop(i).ctrl.noSpecExec)).orR
XSError(state =/= s_extrawalk && walkNoSpecExec, "noSpecExec should not walk\n")
val validDispatch = io.enq.req.map(_.valid)
......@@ -171,7 +171,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// Writeback
val firedWriteback = io.exeWbResults.map(_.fire())
XSInfo(PopCount(firedWriteback) > 0.U, "writebacked %d insts\n", PopCount(firedWriteback))
for(i <- 0 until numWbPorts){
for(i <- 0 until numWbPorts) {
when(io.exeWbResults(i).fire()){
val wbIdxExt = io.exeWbResults(i).bits.uop.roqIdx
val wbIdx = wbIdxExt.value
......@@ -186,7 +186,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val debug_Uop = microOp(wbIdx)
XSInfo(true.B,
p"writebacked pc 0x${Hexadecimal(debug_Uop.cf.pc)} wen ${debug_Uop.ctrl.rfWen} " +
p"data 0x${Hexadecimal(io.exeWbResults(i).bits.data)} ldst ${debug_Uop.ctrl.ldest} pdst ${debug_Uop.ctrl.ldest} " +
p"data 0x${Hexadecimal(io.exeWbResults(i).bits.data)} ldst ${debug_Uop.ctrl.ldest} pdst ${debug_Uop.pdest} " +
p"skip ${io.exeWbResults(i).bits.debug.isMMIO} roqIdx: ${wbIdxExt}\n"
)
}
......@@ -234,27 +234,31 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// wiring to csr
val fflags = WireInit(0.U.asTypeOf(new Fflags))
val dirty_fs = WireInit(false.B)
for(i <- 0 until CommitWidth){
io.commits(i) := DontCare
io.commits.isWalk := state =/= s_idle
for (i <- 0 until CommitWidth) {
io.commits.valid(i) := false.B
io.commits.uop(i) := DontCare
switch(state){
is(s_idle){
val commitIdx = deqPtr + i.U
val commitUop = microOp(commitIdx)
val hasException = Cat(commitUop.cf.exceptionVec).orR() || intrEnable
val canCommit = if(i!=0) (io.commits(i-1).valid && !io.commits(i-1).bits.uop.ctrl.flushPipe) else true.B
val canCommit = if(i!=0) (io.commits.valid(i-1) && !io.commits.uop(i-1).ctrl.flushPipe) else true.B
val v = valid(commitIdx)
val w = writebacked(commitIdx)
io.commits(i).valid := v && w && canCommit && !hasException
io.commits(i).bits.uop := commitUop
io.commits.valid(i) := v && w && canCommit && !hasException
io.commits.uop(i) := commitUop
storeCommitVec(i) := io.commits(i).valid &&
storeCommitVec(i) := io.commits.valid(i) &&
commitUop.ctrl.commitType === CommitType.STORE
cfiCommitVec(i) := io.commits(i).valid &&
cfiCommitVec(i) := io.commits.valid(i) &&
!commitUop.cf.brUpdate.pd.notCFI
val commitFflags = exuFflags(commitIdx)
when(io.commits(i).valid){
when(io.commits.valid(i)){
when(commitFflags.asUInt.orR()){
// update fflags
fflags := exuFflags(commitIdx)
......@@ -265,7 +269,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
}
}
XSInfo(io.commits(i).valid,
XSInfo(io.commits.valid(i),
"retired pc %x wen %d ldest %d pdest %x old_pdest %x data %x fflags: %b\n",
commitUop.cf.pc,
commitUop.ctrl.rfWen,
......@@ -275,7 +279,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
debug_exuData(commitIdx),
exuFflags(commitIdx).asUInt
)
XSInfo(io.commits(i).valid && debug_exuDebug(commitIdx).isMMIO,
XSInfo(io.commits.valid(i) && debug_exuDebug(commitIdx).isMMIO,
"difftest skiped pc0x%x\n",
commitUop.cf.pc
)
......@@ -285,12 +289,12 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val idx = walkPtrVec(i).value
val v = valid(idx)
val walkUop = microOp(idx)
io.commits(i).valid := v && shouldWalkVec(i)
io.commits(i).bits.uop := walkUop
io.commits.valid(i) := v && shouldWalkVec(i)
io.commits.uop(i) := walkUop
when(shouldWalkVec(i)){
v := false.B
}
XSInfo(io.commits(i).valid && shouldWalkVec(i), "walked pc %x wen %d ldst %d data %x\n",
XSInfo(io.commits.valid(i) && shouldWalkVec(i), "walked pc %x wen %d ldst %d data %x\n",
walkUop.cf.pc,
walkUop.ctrl.rfWen,
walkUop.ctrl.ldest,
......@@ -301,23 +305,22 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
is(s_extrawalk){
val idx = RenameWidth-i-1
val walkUop = extraSpaceForMPR(idx)
io.commits(i).valid := usedSpaceForMPR(idx)
io.commits(i).bits.uop := walkUop
io.commits.valid(i) := usedSpaceForMPR(idx)
io.commits.uop(i) := walkUop
state := s_walk
XSInfo(io.commits(i).valid, "use extra space walked pc %x wen %d ldst %d\n",
XSInfo(io.commits.valid(i), "use extra space walked pc %x wen %d ldst %d\n",
walkUop.cf.pc,
walkUop.ctrl.rfWen,
walkUop.ctrl.ldest
)
}
}
io.commits(i).bits.isWalk := state =/= s_idle
}
io.csr.fflags := fflags
io.csr.dirty_fs := dirty_fs
val validCommit = io.commits.map(_.valid)
val validCommit = io.commits.valid
val commitCnt = PopCount(validCommit)
when(state===s_walk) {
//exit walk state when all roq entry is commited
......@@ -362,12 +365,13 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// when exception occurs, cancels all
when (io.redirect.valid) { // TODO: need check for flushPipe
state := s_idle
enqPtrExt := 0.U.asTypeOf(new RoqPtr)
deqPtrExt := 0.U.asTypeOf(new RoqPtr)
}
// instvalid field
// write
// enqueue logic writes 6 valid
for (i <- 0 until RenameWidth) {
......@@ -379,7 +383,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
for(i <- 0 until CommitWidth){
switch(state){
is(s_idle){
when(io.commits(i).valid){valid(deqPtrExtPlus(i)) := false.B}
when(io.commits.valid(i)){valid(deqPtrExtPlus(i)) := false.B}
}
is(s_walk){
val idx = walkPtrVec(i).value
......@@ -390,7 +394,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
}
}
// read
// read
// enqueue logic reads 6 valid
// dequeue/walk logic reads 6 valid, dequeue and walk will not happen at the same time
// rollback reads all valid? is it necessary?
......@@ -412,15 +416,15 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
writebacked(enqPtrValPlus(i)) := false.B
}
}
// writeback logic set numWbPorts writebacked to true
for(i <- 0 until numWbPorts){
// writeback logic set numWbPorts writebacked to true
for(i <- 0 until numWbPorts) {
when(io.exeWbResults(i).fire()){
val wbIdxExt = io.exeWbResults(i).bits.uop.roqIdx
val wbIdx = wbIdxExt.value
writebacked(wbIdx) := true.B
}
}
// rollback: write all
// rollback: write all
// when rollback, reset writebacked entry to valid
// when(io.memRedirect.valid) { // TODO: opt timing
// for (i <- 0 until RoqSize) {
......@@ -447,8 +451,8 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// all flagBkup will be used
// exuFflags
// write: writeback logic set numWbPorts exuFflags
for(i <- 0 until numWbPorts){
// write: writeback logic set numWbPorts exuFflags
for(i <- 0 until numWbPorts) {
when(io.exeWbResults(i).fire()){
val wbIdxExt = io.exeWbResults(i).bits.uop.roqIdx
val wbIdx = wbIdxExt.value
......@@ -468,7 +472,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
}
XSDebug(false, true.B, "\n")
for(i <- 0 until RoqSize){
for(i <- 0 until RoqSize) {
if(i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x ", microOp(i).cf.pc)
XSDebug(false, !valid(i), "- ")
......@@ -476,14 +480,14 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
XSDebug(false, valid(i) && !writebacked(i), "v ")
if(i % 4 == 3) XSDebug(false, true.B, "\n")
}
val id = roqDebugId()
val difftestIntrNO = WireInit(0.U(XLEN.W))
val difftestCause = WireInit(0.U(XLEN.W))
ExcitingUtils.addSink(difftestIntrNO, s"difftestIntrNOfromCSR$id")
ExcitingUtils.addSink(difftestCause, s"difftestCausefromCSR$id")
if(!env.FPGAPlatform){
if(!env.FPGAPlatform) {
//difftest signals
val firstValidCommit = deqPtr + PriorityMux(validCommit, VecInit(List.tabulate(CommitWidth)(_.U)))
......@@ -499,29 +503,29 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
for(i <- 0 until CommitWidth){
// io.commits(i).valid
val idx = deqPtr+i.U
val uop = io.commits(i).bits.uop
val uop = io.commits.uop(i)
val DifftestSkipSC = false
if(!DifftestSkipSC){
skip(i) := debug_exuDebug(idx).isMMIO && io.commits(i).valid
skip(i) := debug_exuDebug(idx).isMMIO && io.commits.valid(i)
}else{
skip(i) := (
debug_exuDebug(idx).isMMIO ||
debug_exuDebug(idx).isMMIO ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_d ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_w
) && io.commits(i).valid
) && io.commits.valid(i)
}
wen(i) := io.commits(i).valid && uop.ctrl.rfWen && uop.ctrl.ldest =/= 0.U
wen(i) := io.commits.valid(i) && uop.ctrl.rfWen && uop.ctrl.ldest =/= 0.U
wdata(i) := debug_exuData(idx)
wdst(i) := uop.ctrl.ldest
diffTestDebugLrScValid(i) := uop.diffTestDebugLrScValid
wpc(i) := SignExt(uop.cf.pc, XLEN)
trapVec(i) := io.commits(i).valid && (state===s_idle) && uop.ctrl.isXSTrap
trapVec(i) := io.commits.valid(i) && (state===s_idle) && uop.ctrl.isXSTrap
isRVC(i) := uop.cf.brUpdate.pd.isRVC
}
val scFailed = !diffTestDebugLrScValid(0) &&
io.commits(0).bits.uop.ctrl.fuType === FuType.mou &&
(io.commits(0).bits.uop.ctrl.fuOpType === LSUOpType.sc_d || io.commits(0).bits.uop.ctrl.fuOpType === LSUOpType.sc_w)
val scFailed = !diffTestDebugLrScValid(0) &&
io.commits.uop(0).ctrl.fuType === FuType.mou &&
(io.commits.uop(0).ctrl.fuOpType === LSUOpType.sc_d || io.commits.uop(0).ctrl.fuOpType === LSUOpType.sc_w)
val instrCnt = RegInit(0.U(64.W))
instrCnt := instrCnt + retireCounter
......
......@@ -273,65 +273,105 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val entry = Reg(Vec(TlbEntrySize, new TlbEntry))
val g = VecInit(entry.map(_.perm.g)).asUInt // TODO: need check if reverse is needed
val entryHitVec = widthMapSeq{i => VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/))) }
val hitVec = widthMapSeq{ i => (v.asBools zip entryHitVec(i)).map{ case (a,b) => a&b } }
val pfHitVec = widthMapSeq{ i => (pf.asBools zip entryHitVec(i)).map{ case (a,b) => a&b } }
val pfArray = widthMap{ i => ParallelOR(pfHitVec(i)).asBool && valid(i) && vmEnable }
val hit = widthMap{ i => ParallelOR(hitVec(i)).asBool && valid(i) && vmEnable && ~pfArray(i) }
val miss = widthMap{ i => !hit(i) && valid(i) && vmEnable && ~pfArray(i) }
val hitppn = widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.ppn)) }
val hitPerm = widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.perm)) }
val hitLevel= widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.level)) }
val multiHit = {
val hitSum = widthMap{ i => PopCount(hitVec(i)) }
val pfHitSum = widthMap{ i => PopCount(pfHitVec(i)) }
ParallelOR(widthMap{ i => !(hitSum(i)===0.U || hitSum(i)===1.U) || !(pfHitSum(i)===0.U || pfHitSum(i)===1.U)})
/**
* PTW refill
*/
val refill = ptw.resp.fire()
val randIdx = LFSR64()(log2Up(TlbEntrySize)-1,0)
val priorIdx = PriorityEncoder(~(v|pf))
val tlbfull = ParallelAND((v|pf).asBools)
val refillIdx = Mux(tlbfull, randIdx, priorIdx)
val refillIdxOH = UIntToOH(refillIdx)
when (refill) {
v := Mux(ptw.resp.bits.pf, v & ~refillIdxOH, v | refillIdxOH)
entry(refillIdx) := ptw.resp.bits.entry
XSDebug(p"Refill: idx:${refillIdx} entry:${ptw.resp.bits.entry}\n")
}
// resp // TODO: A/D has not being concerned
for(i <- 0 until Width) {
val paddr = LookupTreeDefault(hitLevel(i), Cat(hitppn(i), reqAddr(i).off), List(
0.U -> Cat(hitppn(i)(ppnLen - 1, 2*vpnnLen), reqAddr(i).vpn(2*vpnnLen - 1, 0), reqAddr(i).off),
1.U -> Cat(hitppn(i)(ppnLen - 1, vpnnLen), reqAddr(i).vpn(vpnnLen - 1, 0), reqAddr(i).off),
2.U -> Cat(hitppn(i), reqAddr(i).off)
/**
* L1 TLB read
*/
val tlb_read_mask = Mux(refill, refillIdxOH, 0.U(TlbEntrySize.W))
def TLBRead(i: Int) = {
val entryHitVec = (
if (isDtlb)
VecInit((tlb_read_mask.asBools zip entry).map{ case (r, e) => !r && e.hit(reqAddr(i).vpn/*, satp.asid*/)})
else
VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/)))
)
val reqAddrReg = if (isDtlb) RegNext(reqAddr(i)) else reqAddr(i)
val cmdReg = if (isDtlb) RegNext(cmd(i)) else cmd(i)
val validReg = if (isDtlb) RegNext(valid(i)) else valid(i)
val entryHitVecReg = if (isDtlb) RegNext(entryHitVec) else entryHitVec
val hitVec = (v.asBools zip entryHitVecReg).map{ case (a,b) => a&b }
val pfHitVec = (pf.asBools zip entryHitVecReg).map{ case (a,b) => a&b }
val pfArray = ParallelOR(pfHitVec).asBool && validReg && vmEnable
val hit = ParallelOR(hitVec).asBool && validReg && vmEnable && ~pfArray
val miss = !hit && validReg && vmEnable && ~pfArray
val hitppn = ParallelMux(hitVec zip entry.map(_.ppn))
val hitPerm = ParallelMux(hitVec zip entry.map(_.perm))
val hitLevel= ParallelMux(hitVec zip entry.map(_.level))
val multiHit = {
val hitSum = PopCount(hitVec)
val pfHitSum = PopCount(pfHitVec)
!(hitSum===0.U || hitSum===1.U) || !(pfHitSum===0.U || pfHitSum===1.U)
}
// resp // TODO: A/D has not being concerned
val paddr = LookupTreeDefault(hitLevel, Cat(hitppn, reqAddrReg.off), List(
0.U -> Cat(hitppn(ppnLen - 1, 2*vpnnLen), reqAddrReg.vpn(2*vpnnLen - 1, 0), reqAddrReg.off),
1.U -> Cat(hitppn(ppnLen - 1, vpnnLen), reqAddrReg.vpn(vpnnLen - 1, 0), reqAddrReg.off),
2.U -> Cat(hitppn, reqAddrReg.off)
))
val vaddr = SignExt(req(i).bits.vaddr, PAddrBits)
req(i).ready := resp(i).ready
resp(i).valid := valid(i)
resp(i).bits.paddr := Mux(vmEnable, paddr, SignExt(req(i).bits.vaddr, PAddrBits))
resp(i).bits.miss := miss(i)
resp(i).valid := validReg
resp(i).bits.paddr := Mux(vmEnable, paddr, if (isDtlb) RegNext(vaddr) else vaddr)
resp(i).bits.miss := miss
val perm = hitPerm(i) // NOTE: given the excp, the out module choose one to use?
val update = false.B && hit(i) && (!hitPerm(i).a || !hitPerm(i).d && TlbCmd.isWrite(cmd(i))) // update A/D through exception
val perm = hitPerm // NOTE: given the excp, the out module choose one to use?
val update = false.B && hit && (!hitPerm.a || !hitPerm.d && TlbCmd.isWrite(cmdReg)) // update A/D through exception
val modeCheck = !(mode === ModeU && !perm.u || mode === ModeS && perm.u && (!priv.sum || ifecth))
val ldPf = (pfArray(i) && TlbCmd.isRead(cmd(i)) && true.B /*!isAMO*/) || hit(i) && !(modeCheck && (perm.r || priv.mxr && perm.x)) && (TlbCmd.isRead(cmd(i)) && true.B/*!isAMO*/) // TODO: handle isAMO
val stPf = (pfArray(i) && TlbCmd.isWrite(cmd(i)) || false.B /*isAMO*/ ) || hit(i) && !(modeCheck && perm.w) && (TlbCmd.isWrite(cmd(i)) || false.B/*TODO isAMO. */)
val instrPf = (pfArray(i) && TlbCmd.isExec(cmd(i))) || hit(i) && !(modeCheck && perm.x) && TlbCmd.isExec(cmd(i))
val ldPf = (pfArray && TlbCmd.isRead(cmdReg) && true.B /*!isAMO*/) || hit && !(modeCheck && (perm.r || priv.mxr && perm.x)) && (TlbCmd.isRead(cmdReg) && true.B/*!isAMO*/) // TODO: handle isAMO
val stPf = (pfArray && TlbCmd.isWrite(cmdReg) || false.B /*isAMO*/ ) || hit && !(modeCheck && perm.w) && (TlbCmd.isWrite(cmdReg) || false.B/*TODO isAMO. */)
val instrPf = (pfArray && TlbCmd.isExec(cmdReg)) || hit && !(modeCheck && perm.x) && TlbCmd.isExec(cmdReg)
resp(i).bits.excp.pf.ld := ldPf || update
resp(i).bits.excp.pf.st := stPf || update
resp(i).bits.excp.pf.instr := instrPf || update
(hit, miss, pfHitVec, multiHit)
}
val readResult = (0 until Width).map(TLBRead(_))
val hitVec = readResult.map(res => res._1)
val missVec = readResult.map(res => res._2)
val pfHitVecVec = readResult.map(res => res._3)
val multiHitVec = readResult.map(res => res._4)
val hasMissReq = Cat(missVec).orR
// ptw
val state_idle :: state_wait :: Nil = Enum(2)
val state = RegInit(state_idle)
ptw <> DontCare // TODO: need check it
ptw.req.valid := ParallelOR(miss).asBool && state===state_idle && !sfence.valid
ptw.req.valid := hasMissReq && state===state_idle && !sfence.valid
ptw.resp.ready := state===state_wait
// val ptwReqSeq = Wire(Seq.fill(Width)(new comBundle()))
val ptwReqSeq = Seq.fill(Width)(Wire(new comBundle()))
for (i <- 0 until Width) {
ptwReqSeq(i).valid := valid(i) && miss(i)
ptwReqSeq(i).roqIdx := req(i).bits.roqIdx
ptwReqSeq(i).bits.vpn := reqAddr(i).vpn
ptwReqSeq(i).valid := ((if (isDtlb) RegNext(valid(i)) else valid(i)) && missVec(i))
ptwReqSeq(i).roqIdx := (if (isDtlb) RegNext(req(i).bits.roqIdx) else req(i).bits.roqIdx)
ptwReqSeq(i).bits.vpn := (if (isDtlb) RegNext(reqAddr(i).vpn) else reqAddr(i).vpn)
}
ptw.req.bits := Compare(ptwReqSeq).bits
switch (state) {
is (state_idle) {
when (ParallelOR(miss).asBool && ptw.req.fire()) {
when (hasMissReq && ptw.req.fire()) {
state := state_wait
}
assert(!ptw.resp.valid)
......@@ -345,28 +385,15 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
}
// reset pf when pf hit
val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVec(i)).asUInt, 0.U) })
val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVecVec(i)).asUInt, 0.U) })
val pfHitRefill = ParallelOR(pfHitReset.asBools)
// refill
val refill = ptw.resp.fire()
val randIdx = LFSR64()(log2Up(TlbEntrySize)-1,0)
val priorIdx = PriorityEncoder(~(v|pf))
val tlbfull = ParallelAND((v|pf).asBools)
val refillIdx = Mux(tlbfull, randIdx, priorIdx)
val re2OH = UIntToOH(refillIdx)
when (refill) {
v := Mux(ptw.resp.bits.pf, v & ~re2OH, v | re2OH)
entry(refillIdx) := ptw.resp.bits.entry
XSDebug(p"Refill: idx:${refillIdx} entry:${ptw.resp.bits.entry}\n")
}
// pf update
when (refill) {
when (pfHitRefill) {
pf := Mux(ptw.resp.bits.pf, pf | re2OH, pf & ~re2OH) & ~pfHitReset
pf := Mux(ptw.resp.bits.pf, pf | refillIdxOH, pf & ~refillIdxOH) & ~pfHitReset
} .otherwise {
pf := Mux(ptw.resp.bits.pf, pf | re2OH, pf & ~re2OH)
pf := Mux(ptw.resp.bits.pf, pf | refillIdxOH, pf & ~refillIdxOH)
}
} .otherwise {
when (pfHitRefill) {
......@@ -374,7 +401,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
}
}
when (PopCount(pf) > 10.U) { // when too much pf, just clear
pf := Mux(refill && ptw.resp.bits.pf, re2OH, 0.U)
pf := Mux(refill && ptw.resp.bits.pf, refillIdxOH, 0.U)
}
// sfence (flush)
......@@ -409,15 +436,15 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
ExcitingUtils.addSource(valid(1)/* && vmEnable*/, "perfCntDtlbReqCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/, "perfCntDtlbReqCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/, "perfCntDtlbReqCnt3", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && miss(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && miss(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && miss(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && miss(3), "perfCntDtlbMissCnt3", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && missVec(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && missVec(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && missVec(3), "perfCntDtlbMissCnt3", Perf)
}
if (!env.FPGAPlatform && !isDtlb) {
ExcitingUtils.addSource(valid(0)/* && vmEnable*/, "perfCntItlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && miss(0), "perfCntItlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntItlbMissCnt0", Perf)
}
// Log
......@@ -428,7 +455,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
XSDebug(sfence.valid, p"Sfence: ${sfence}\n")
XSDebug(ParallelOR(valid)|| ptw.resp.valid, p"CSR: ${csr}\n")
XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hit).asUInt)} miss:${Binary(VecInit(miss).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n")
XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n")
XSDebug(ptw.req.fire(), p"PTW req:${ptw.req.bits}\n")
XSDebug(ptw.resp.valid, p"PTW resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n")
......@@ -437,7 +464,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
// assert((hit(i)&pfArray(i))===false.B, "hit(%d):%d pfArray(%d):%d v:0x%x pf:0x%x", i.U, hit(i), i.U, pfArray(i), v, pf)
// }
// for(i <- 0 until Width) {
// XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVec:0x${Hexadecimal(VecInit(pfHitVec(i)).asUInt)}\n")
// XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVecVec:0x${Hexadecimal(VecInit(pfHitVecVec(i)).asUInt)}\n")
// }
// for(i <- 0 until TlbEntrySize) {
// XSDebug(multiHit, p"entry(${i.U}): v:${v(i)} ${entry(i)}\n")
......
......@@ -30,6 +30,9 @@ case class ICacheParameters(
trait HasICacheParameters extends HasL1CacheParameters {
val cacheParams = icacheParameters
//TODO: temp set
def accessBorder = 0x80000000L
// the width of inner CPU data interface
def cacheID = 0
// RVC instruction length
......@@ -38,7 +41,7 @@ trait HasICacheParameters extends HasL1CacheParameters {
// icache Queue
val groupAlign = log2Up(cacheParams.blockBytes)
def groupPC(pc: UInt): UInt = Cat(pc(PAddrBits-1, groupAlign), 0.U(groupAlign.W))
//ECC encoding
def encRowBits = cacheParams.dataCode.width(rowBits)
def encTagBits = cacheParams.tagCode.width(tagBits)
......@@ -88,6 +91,7 @@ class ICacheResp extends ICacheBundle
val data = UInt((FetchWidth * 32).W)
val mask = UInt(PredictWidth.W)
val ipf = Bool()
val acf = Bool()
}
......@@ -174,7 +178,7 @@ class ICacheMetaArray extends ICachArray
val metaArray = Module(new SRAMTemplate(UInt(encTagBits.W), set=nSets, way=nWays, shouldReset = true))
//read
//read
metaArray.io.r.req.valid := io.read.valid
io.read.ready := metaArray.io.r.req.ready
io.write.ready := DontCare
......@@ -202,7 +206,7 @@ class ICacheDataArray extends ICachArray
val dataArray = List.fill(blockWords){ Module(new SRAMTemplate(UInt(encRowBits.W), set=nSets, way = nWays))}
//read
//read
//do ECC decoding after way choose
for(b <- 0 until blockWords){
dataArray(b).io.r.req.valid := io.read.valid
......@@ -221,8 +225,8 @@ class ICacheDataArray extends ICachArray
for(b <- 0 until blockWords){
dataArray(b).io.w.req.valid := io.write.valid
dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx,
data=write_data_encoded(b),
dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx,
data=write_data_encoded(b),
waymask=write.waymask)
}
......@@ -269,7 +273,7 @@ class ICache extends ICacheModule
val metaArray = Module(new ICacheMetaArray)
val dataArray = Module(new ICacheDataArray)
// 256-bit valid
val validArray = RegInit(0.U((nSets * nWays).W))
val validArray = RegInit(0.U((nSets * nWays).W))
//----------------------------
// Stage 1
......@@ -279,9 +283,10 @@ class ICache extends ICacheModule
s1_req_mask := io.req.bits.mask
s2_ready := WireInit(false.B)
s1_fire := s1_valid && (s2_ready || io.flush(0))
// SRAM(Meta and Data) read request
val s1_idx = get_idx(s1_req_pc)
metaArray.io.read.valid := s1_valid
metaArray.io.read.bits :=s1_idx
dataArray.io.read.valid := s1_valid
......@@ -289,8 +294,8 @@ class ICache extends ICacheModule
XSDebug("[Stage 1] v : r : f (%d %d %d) request pc: 0x%x mask: %b\n",s1_valid,s2_ready,s1_fire,s1_req_pc,s1_req_mask)
XSDebug("[Stage 1] index: %d\n",s1_idx)
//----------------------------
// Stage 2
//----------------------------
......@@ -298,11 +303,16 @@ class ICache extends ICacheModule
val s2_tlb_resp = WireInit(io.tlb.resp.bits)
val s2_tag = get_tag(s2_tlb_resp.paddr)
val s2_hit = WireInit(false.B)
val s2_access_fault = WireInit(false.B)
s2_fire := s2_valid && s3_ready && !io.flush(0) && io.tlb.resp.fire()
when(io.flush(0)) {s2_valid := s1_fire}
.elsewhen(s1_fire) { s2_valid := s1_valid}
.elsewhen(s2_fire) { s2_valid := false.B}
//physical address < 0x80000000
//TODO: May have bugs
s2_access_fault := (s2_tlb_resp.paddr < accessBorder.U) && s2_valid
// SRAM(Meta and Data) read reseponse
val metas = metaArray.io.readResp
val datas =RegEnable(next=dataArray.io.readResp, enable=s2_fire)
......@@ -315,19 +325,19 @@ class ICache extends ICacheModule
val invalidVec = ~validMeta
val hasInvalidWay = invalidVec.orR
val refillInvalidWaymask = PriorityMask(invalidVec)
val waymask = Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask))
s2_hit := ParallelOR(hitVec) || s2_tlb_resp.excp.pf.instr
s2_hit := ParallelOR(hitVec) || s2_tlb_resp.excp.pf.instr || s2_access_fault
s2_ready := s2_fire || !s2_valid || io.flush(0)
XSDebug("[Stage 2] v : r : f (%d %d %d) pc: 0x%x mask: %b\n",s2_valid,s3_ready,s2_fire,s2_req_pc,s2_req_mask)
XSDebug("[Stage 2] v : r : f (%d %d %d) pc: 0x%x mask: %b acf:%d\n",s2_valid,s3_ready,s2_fire,s2_req_pc,s2_req_mask,s2_access_fault)
XSDebug(p"[Stage 2] tlb req: v ${io.tlb.req.valid} r ${io.tlb.req.ready} ${io.tlb.req.bits}\n")
XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n")
XSDebug("[Stage 2] tag: %x hit:%d\n",s2_tag,s2_hit)
XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt)
//----------------------------
// Stage 3
//----------------------------
......@@ -338,18 +348,19 @@ class ICache extends ICacheModule
val s3_wayMask = RegEnable(next=waymask,init=0.U,enable=s2_fire)
val s3_miss = s3_valid && !s3_hit
val s3_idx = get_idx(s3_req_pc)
val s3_access_fault = RegEnable(s2_access_fault,init=false.B,enable=s2_fire)
when(io.flush(1)) { s3_valid := false.B }
.elsewhen(s2_fire) { s3_valid := s2_valid }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
val refillDataReg = Reg(Vec(refillCycles,UInt(beatBits.W)))
// icache hit
// icache hit
// data ECC encoding
// simply cut the hit cacheline
val dataHitWay = VecInit(s3_data.map(b => Mux1H(s3_wayMask,b).asUInt))
val outPacket = Wire(UInt((FetchWidth * 32).W))
val dataHitWayDecoded = VecInit(
(0 until blockWords).map{r =>
val dataHitWayDecoded = VecInit(
(0 until blockWords).map{r =>
val row = dataHitWay.asTypeOf(Vec(blockWords,UInt(encRowBits.W)))(r)
val decodedRow = cacheParams.dataCode.decode(row)
assert(!(s3_valid && s3_hit && decodedRow.uncorrectable))
......@@ -357,7 +368,7 @@ class ICache extends ICacheModule
}
)
outPacket := cutHelper(dataHitWay,s3_req_pc(5,1).asUInt,s3_req_mask.asUInt)
//ICache MissQueue
val icacheMissQueue = Module(new IcacheMissQueue)
val blocking = RegInit(false.B)
......@@ -383,9 +394,9 @@ class ICache extends ICacheModule
//refill write
val metaWriteReq = icacheMissQueue.io.meta_write.bits
icacheMissQueue.io.meta_write.ready := true.B
metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid
metaArray.io.write.bits.apply(tag=metaWriteReq.meta_write_tag,
idx=metaWriteReq.meta_write_idx,
metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid
metaArray.io.write.bits.apply(tag=metaWriteReq.meta_write_tag,
idx=metaWriteReq.meta_write_idx,
waymask=metaWriteReq.meta_write_waymask)
val wayNum = OHToUInt(metaWriteReq.meta_write_waymask.asTypeOf(Vec(nWays,Bool())))
......@@ -397,7 +408,7 @@ class ICache extends ICacheModule
//data
icacheMissQueue.io.refill.ready := true.B
val refillReq = icacheMissQueue.io.refill.bits
dataArray.io.write.valid := icacheMissQueue.io.refill.valid
dataArray.io.write.valid := icacheMissQueue.io.refill.valid
dataArray.io.write.bits.apply(data=refillReq.refill_data,
idx=refillReq.refill_idx,
waymask=refillReq.refill_waymask)
......@@ -411,7 +422,7 @@ class ICache extends ICacheModule
s3_ready := ((io.resp.fire() || !s3_valid) && !blocking) || (blocking && icacheMissQueue.io.resp.fire())
//TODO: coherence
XSDebug("[Stage 3] valid:%d pc: 0x%x mask: %b ipf:%d\n",s3_valid,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr)
XSDebug("[Stage 3] valid:%d pc: 0x%x mask: %b ipf:%d acf:%d \n",s3_valid,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr,s3_access_fault)
XSDebug("[Stage 3] hit:%d miss:%d waymask:%x blocking:%d\n",s3_hit,s3_miss,s3_wayMask.asUInt,blocking)
XSDebug("[Stage 3] tag: %x idx: %d\n",s3_tag,get_idx(s3_req_pc))
XSDebug(p"[Stage 3] tlb resp: ${s3_tlb_resp}\n")
......@@ -429,13 +440,14 @@ class ICache extends ICacheModule
//----------------------------
//icache request
io.req.ready := metaArray.io.read.ready && dataArray.io.read.ready && s2_ready
//icache response: to pre-decoder
io.resp.valid := s3_valid && (s3_hit || icacheMissQueue.io.resp.valid)
io.resp.bits.data := Mux((s3_valid && s3_hit),outPacket,refillDataOut)
io.resp.bits.mask := s3_req_mask
io.resp.bits.pc := s3_req_pc
io.resp.bits.ipf := s3_tlb_resp.excp.pf.instr
io.resp.bits.acf := s3_access_fault
//to itlb
io.tlb.resp.ready := s3_ready
......@@ -444,7 +456,7 @@ class ICache extends ICacheModule
io.tlb.req.bits.cmd := TlbCmd.exec
io.tlb.req.bits.roqIdx := DontCare
io.tlb.req.bits.debug.pc := s2_req_pc
//To L1 plus
io.mem_acquire <> icacheMissQueue.io.mem_acquire
icacheMissQueue.io.mem_grant <> io.mem_grant
......
......@@ -128,7 +128,7 @@ class LoadPipe extends DCacheModule
val s2_data_word = s2_data_words(s2_word_idx)
val s2_decoded = cacheParams.dataCode.decode(s2_data_word)
val s2_data_word_decoded = s2_decoded.corrected
assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
val resp = Wire(ValidIO(new DCacheWordResp))
......
......@@ -11,7 +11,7 @@ import chisel3.experimental.chiselName
trait HasBPUParameter extends HasXSParameter {
val BPUDebug = true
val EnableCFICommitLog = true
val EnbaleCFIPredLog = false
val EnbaleCFIPredLog = true
val EnableBPUTimeRecord = EnableCFICommitLog || EnbaleCFIPredLog
}
......@@ -144,7 +144,7 @@ abstract class BPUStage extends XSModule with HasBPUParameter with HasIFUConst {
val outFire = Input(Bool())
val debug_hist = Input(UInt((if (BPUDebug) (HistoryLength) else 0).W))
val debug_histPtr = Input(UInt((if (BPUDebug) (ExtHistoryLength) else 0).W))
// val debug_histPtr = Input(UInt((if (BPUDebug) (ExtHistoryLength) else 0).W))
}
val io = IO(new DefaultIO)
......@@ -230,7 +230,7 @@ class BPUStage2 extends BPUStage {
val bimResp = inLatch.resp.bim
takens := VecInit((0 until PredictWidth).map(i => btbResp.hits(i) && (btbResp.types(i) === BTBtype.B && bimResp.ctrs(i)(1) || btbResp.types(i) =/= BTBtype.B)))
targets := btbResp.targets
brMask := VecInit(btbResp.types.map(_ === BTBtype.B))
brMask := VecInit((0 until PredictWidth).map(i => btbResp.types(i) === BTBtype.B && btbResp.hits(i)))
jalMask := DontCare
firstBankHasHalfRVI := Mux(lastBankHasInst, false.B, btbResp.hits(bankWidth-1) && !btbResp.isRVC(bankWidth-1) && inLatch.mask(bankWidth-1))
......@@ -343,6 +343,7 @@ class BPUStage3 extends BPUStage {
}
when (lastBankHasHalfRVI && btbResp.types(PredictWidth-1) === BTBtype.B && btbHits(PredictWidth-1)) {
takens(PredictWidth-1) := brPred(PredictWidth-1) && !loopRes(PredictWidth-1)
}
// targets would be lost as well, since it is from btb
......@@ -367,11 +368,14 @@ class BPUStage3 extends BPUStage {
XSDebug(io.inFire && s3IO.predecode.mask(i), "predecode(%d): brType:%d, br:%d, jal:%d, jalr:%d, call:%d, ret:%d, RVC:%d, excType:%d\n",
i.U, p.brType, p.isBr, p.isJal, p.isJalr, p.isCall, p.isRet, p.isRVC, p.excType)
}
XSDebug(p"brs:${Binary(brs)} jals:${Binary(jals)} jalrs:${Binary(jalrs)} calls:${Binary(calls)} rets:${Binary(rets)} rvcs:${Binary(RVCs)}\n")
XSDebug(p"callIdx:${callIdx} retIdx:${retIdx}\n")
XSDebug(p"brPred:${Binary(brPred)} loopRes:${Binary(loopRes)} prevHalfTaken:${prevHalfTaken} brTakens:${Binary(brTakens)}\n")
}
if (EnbaleCFIPredLog) {
val out = io.out
XSDebug(io.outFire, p"cfi_pred: fetchpc(${Hexadecimal(out.pc)}) mask(${out.mask}) brmask(${brMask.asUInt}) hist(${Hexadecimal(io.debug_hist)}) histPtr(${io.debug_histPtr})\n")
XSDebug(io.outFire, p"cfi_pred: fetchpc(${Hexadecimal(out.pc)}) mask(${out.mask}) brmask(${brMask.asUInt}) hist(${Hexadecimal(io.debug_hist)})\n")
}
if (EnableBPUTimeRecord) {
......@@ -394,7 +398,7 @@ class BPUReq extends XSBundle {
val pc = UInt(VAddrBits.W)
val hist = UInt(HistoryLength.W)
val inMask = UInt(PredictWidth.W)
val histPtr = UInt(log2Up(ExtHistoryLength).W) // only for debug
// val histPtr = UInt(log2Up(ExtHistoryLength).W) // only for debug
}
class BranchUpdateInfoWithHist extends XSBundle {
......@@ -559,14 +563,6 @@ class BPU extends BaseBPU {
s2.io.debug_hist := s2_hist
s3.io.debug_hist := s3_hist
val s1_histPtr = RegEnable(io.in.histPtr, enable=s1_fire)
val s2_histPtr = RegEnable(s1_histPtr, enable=s2_fire)
val s3_histPtr = RegEnable(s2_histPtr, enable=s3_fire)
s1.io.debug_histPtr := s1_histPtr
s2.io.debug_histPtr := s2_histPtr
s3.io.debug_histPtr := s3_histPtr
//**********************Stage 2****************************//
tage.io.flush := io.flush(1) // TODO: fix this
tage.io.pc.valid := s2_fire
......
......@@ -35,8 +35,8 @@ class BIM extends BasePredictor with BimParams {
val bimAddr = new TableAddr(log2Up(BimSize), BimBanks)
val bankAlignedPC = bankAligned(io.pc.bits)
val pcLatch = RegEnable(bankAlignedPC, io.pc.valid)
val if1_bankAlignedPC = bankAligned(io.pc.bits)
val if2_pc = RegEnable(if1_bankAlignedPC, io.pc.valid)
val bim = List.fill(BimBanks) {
Module(new SRAMTemplate(UInt(2.W), set = nRows, shouldReset = false, holdRead = true))
......@@ -48,34 +48,34 @@ class BIM extends BasePredictor with BimParams {
when (resetRow === (nRows-1).U) { doing_reset := false.B }
// this bank means cache bank
val startsAtOddBank = bankInGroup(bankAlignedPC)(0)
val if1_startsAtOddBank = bankInGroup(if1_bankAlignedPC)(0)
val realMask = Mux(startsAtOddBank,
val if1_realMask = Mux(if1_startsAtOddBank,
Cat(io.inMask(bankWidth-1,0), io.inMask(PredictWidth-1, bankWidth)),
io.inMask)
val isInNextRow = VecInit((0 until BimBanks).map(i => Mux(startsAtOddBank, (i < bankWidth).B, false.B)))
val if1_isInNextRow = VecInit((0 until BimBanks).map(i => Mux(if1_startsAtOddBank, (i < bankWidth).B, false.B)))
val baseRow = bimAddr.getBankIdx(bankAlignedPC)
val if1_baseRow = bimAddr.getBankIdx(if1_bankAlignedPC)
val realRow = VecInit((0 until BimBanks).map(b => Mux(isInNextRow(b), (baseRow+1.U)(log2Up(nRows)-1, 0), baseRow)))
val if1_realRow = VecInit((0 until BimBanks).map(b => Mux(if1_isInNextRow(b), (if1_baseRow+1.U)(log2Up(nRows)-1, 0), if1_baseRow)))
val realRowLatch = VecInit(realRow.map(RegEnable(_, enable=io.pc.valid)))
val if2_realRow = VecInit(if1_realRow.map(RegEnable(_, enable=io.pc.valid)))
for (b <- 0 until BimBanks) {
bim(b).io.r.req.valid := realMask(b) && io.pc.valid
bim(b).io.r.req.bits.setIdx := realRow(b)
bim(b).io.r.req.valid := if1_realMask(b) && io.pc.valid
bim(b).io.r.req.bits.setIdx := if1_realRow(b)
}
val bimRead = VecInit(bim.map(_.io.r.resp.data(0)))
val if2_bimRead = VecInit(bim.map(_.io.r.resp.data(0)))
val startsAtOddBankLatch = bankInGroup(pcLatch)(0)
val if2_startsAtOddBank = bankInGroup(if2_pc)(0)
for (b <- 0 until BimBanks) {
val realBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U)
else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U))
val ctr = bimRead(realBank)
val realBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, (b+bankWidth).U, b.U)
else Mux(if2_startsAtOddBank, (b-bankWidth).U, b.U))
val ctr = if2_bimRead(realBank)
io.resp.ctrs(b) := ctr
io.meta.ctrs(b) := ctr
}
......
......@@ -72,9 +72,9 @@ class BTB extends BasePredictor with BTBParams{
override val io = IO(new BTBIO)
val btbAddr = new TableAddr(log2Up(BtbSize/BtbWays), BtbBanks)
val bankAlignedPC = bankAligned(io.pc.bits)
val if1_bankAlignedPC = bankAligned(io.pc.bits)
val pcLatch = RegEnable(bankAlignedPC, io.pc.valid)
val if2_pc = RegEnable(if1_bankAlignedPC, io.pc.valid)
val data = List.fill(BtbWays) {
List.fill(BtbBanks) {
......@@ -91,61 +91,61 @@ class BTB extends BasePredictor with BTBParams{
// BTB read requests
// this bank means cache bank
val startsAtOddBank = bankInGroup(bankAlignedPC)(0)
val if1_startsAtOddBank = bankInGroup(if1_bankAlignedPC)(0)
val baseBank = btbAddr.getBank(bankAlignedPC)
val if1_baseBank = btbAddr.getBank(if1_bankAlignedPC)
val realMask = Mux(startsAtOddBank,
val if1_realMask = Mux(if1_startsAtOddBank,
Cat(io.inMask(bankWidth-1,0), io.inMask(PredictWidth-1, bankWidth)),
io.inMask)
val realMaskLatch = RegEnable(realMask, io.pc.valid)
val if2_realMask = RegEnable(if1_realMask, io.pc.valid)
val isInNextRow = VecInit((0 until BtbBanks).map(i => Mux(startsAtOddBank, (i < bankWidth).B, false.B)))
val if1_isInNextRow = VecInit((0 until BtbBanks).map(i => Mux(if1_startsAtOddBank, (i < bankWidth).B, false.B)))
val baseRow = btbAddr.getBankIdx(bankAlignedPC)
val if1_baseRow = btbAddr.getBankIdx(if1_bankAlignedPC)
val nextRowStartsUp = baseRow.andR
val if1_nextRowStartsUp = if1_baseRow.andR
val realRow = VecInit((0 until BtbBanks).map(b => Mux(isInNextRow(b), (baseRow+1.U)(log2Up(nRows)-1, 0), baseRow)))
val if1_realRow = VecInit((0 until BtbBanks).map(b => Mux(if1_isInNextRow(b), (if1_baseRow+1.U)(log2Up(nRows)-1, 0), if1_baseRow)))
val realRowLatch = VecInit(realRow.map(RegEnable(_, enable=io.pc.valid)))
val if2_realRow = VecInit(if1_realRow.map(RegEnable(_, enable=io.pc.valid)))
for (w <- 0 until BtbWays) {
for (b <- 0 until BtbBanks) {
meta(w)(b).io.r.req.valid := realMask(b) && io.pc.valid
meta(w)(b).io.r.req.bits.setIdx := realRow(b)
data(w)(b).io.r.req.valid := realMask(b) && io.pc.valid
data(w)(b).io.r.req.bits.setIdx := realRow(b)
meta(w)(b).io.r.req.valid := if1_realMask(b) && io.pc.valid
meta(w)(b).io.r.req.bits.setIdx := if1_realRow(b)
data(w)(b).io.r.req.valid := if1_realMask(b) && io.pc.valid
data(w)(b).io.r.req.bits.setIdx := if1_realRow(b)
}
}
for (b <- 0 to 1) {
edata(b).io.r.req.valid := io.pc.valid
val row = if (b == 0) { Mux(startsAtOddBank, realRow(bankWidth), realRow(0)) }
else { Mux(startsAtOddBank, realRow(0), realRow(bankWidth))}
val row = if (b == 0) { Mux(if1_startsAtOddBank, if1_realRow(bankWidth), if1_realRow(0)) }
else { Mux(if1_startsAtOddBank, if1_realRow(0), if1_realRow(bankWidth))}
edata(b).io.r.req.bits.setIdx := row
}
// Entries read from SRAM
val metaRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => meta(w)(b).io.r.resp.data(0)))))
val dataRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => data(w)(b).io.r.resp.data(0)))))
val edataRead = VecInit((0 to 1).map(i => edata(i).io.r.resp.data(0)))
val if2_metaRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => meta(w)(b).io.r.resp.data(0)))))
val if2_dataRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => data(w)(b).io.r.resp.data(0)))))
val if2_edataRead = VecInit((0 to 1).map(i => edata(i).io.r.resp.data(0)))
val baseBankLatch = btbAddr.getBank(pcLatch)
val startsAtOddBankLatch = bankInGroup(pcLatch)(0)
val baseTag = btbAddr.getTag(pcLatch)
val if2_baseBank = btbAddr.getBank(if2_pc)
val if2_startsAtOddBank = bankInGroup(if2_pc)(0)
val if2_baseTag = btbAddr.getTag(if2_pc)
val tagIncremented = VecInit((0 until BtbBanks).map(b => RegEnable(isInNextRow(b.U) && nextRowStartsUp, io.pc.valid)))
val realTags = VecInit((0 until BtbBanks).map(b => Mux(tagIncremented(b), baseTag + 1.U, baseTag)))
val if2_tagIncremented = VecInit((0 until BtbBanks).map(b => RegEnable(if1_isInNextRow(b.U) && if1_nextRowStartsUp, io.pc.valid)))
val if2_realTags = VecInit((0 until BtbBanks).map(b => Mux(if2_tagIncremented(b), if2_baseTag + 1.U, if2_baseTag)))
val totalHits = VecInit((0 until BtbBanks).map( b =>
val if2_totalHits = VecInit((0 until BtbBanks).map( b =>
VecInit((0 until BtbWays).map( w =>
// This should correspond to the real mask from last valid cycle!
metaRead(w)(b).tag === realTags(b) && metaRead(w)(b).valid && realMaskLatch(b)
if2_metaRead(w)(b).tag === if2_realTags(b) && if2_metaRead(w)(b).valid && if2_realMask(b)
))
))
val bankHits = VecInit(totalHits.map(_.reduce(_||_)))
val bankHitWays = VecInit(totalHits.map(PriorityEncoder(_)))
val if2_bankHits = VecInit(if2_totalHits.map(_.reduce(_||_)))
val if2_bankHitWays = VecInit(if2_totalHits.map(PriorityEncoder(_)))
def allocWay(valids: UInt, meta_tags: UInt, req_tag: UInt) = {
......@@ -159,7 +159,7 @@ class BTB extends BasePredictor with BTBParams{
val chunks = (0 until nChunks).map( i =>
tags(min((i+1)*l, tags.getWidth)-1, i*l)
)
w := Mux(valid, chunks.reduce(_^_), (if (randomAlloc) {LFSR64()(log2Up(BtbWays)-1,0)} else {PriorityEncoder(~valids)}))
w := Mux(valid, if (randomAlloc) {LFSR64()(log2Up(BtbWays)-1,0)} else {chunks.reduce(_^_)}, PriorityEncoder(~valids))
w
} else {
val w = WireInit(0.U)
......@@ -167,30 +167,30 @@ class BTB extends BasePredictor with BTBParams{
}
}
val allocWays = VecInit((0 until BtbBanks).map(b =>
allocWay(VecInit(metaRead.map(w => w(b).valid)).asUInt,
VecInit(metaRead.map(w => w(b).tag)).asUInt,
realTags(b))))
allocWay(VecInit(if2_metaRead.map(w => w(b).valid)).asUInt,
VecInit(if2_metaRead.map(w => w(b).tag)).asUInt,
if2_realTags(b))))
val writeWay = VecInit((0 until BtbBanks).map(
b => Mux(bankHits(b), bankHitWays(b), allocWays(b))
b => Mux(if2_bankHits(b), if2_bankHitWays(b), allocWays(b))
))
for (b <- 0 until BtbBanks) {
val realBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U)
else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U))
val meta_entry = metaRead(bankHitWays(realBank))(realBank)
val data_entry = dataRead(bankHitWays(realBank))(realBank)
val edataBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, 1.U, 0.U)
else Mux(startsAtOddBankLatch, 0.U, 1.U))
val realBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, (b+bankWidth).U, b.U)
else Mux(if2_startsAtOddBank, (b-bankWidth).U, b.U))
val meta_entry = if2_metaRead(if2_bankHitWays(realBank))(realBank)
val data_entry = if2_dataRead(if2_bankHitWays(realBank))(realBank)
val edataBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, 1.U, 0.U)
else Mux(if2_startsAtOddBank, 0.U, 1.U))
// Use real pc to calculate the target
io.resp.targets(b) := Mux(data_entry.extended, edataRead(edataBank), (pcLatch.asSInt + (b << 1).S + data_entry.offset).asUInt)
io.resp.hits(b) := bankHits(realBank)
io.resp.targets(b) := Mux(data_entry.extended, if2_edataRead(edataBank), (if2_pc.asSInt + (b << 1).S + data_entry.offset).asUInt)
io.resp.hits(b) := if2_bankHits(realBank)
io.resp.types(b) := meta_entry.btbType
io.resp.isRVC(b) := meta_entry.isRVC
io.meta.writeWay(b) := writeWay(realBank)
io.meta.hitJal(b) := bankHits(realBank) && meta_entry.btbType === BTBtype.J
io.meta.hitJal(b) := if2_bankHits(realBank) && meta_entry.btbType === BTBtype.J
}
def pdInfoToBTBtype(pd: PreDecodeInfo) = {
......@@ -244,35 +244,35 @@ class BTB extends BasePredictor with BTBParams{
XSDebug("isInNextRow: ")
(0 until BtbBanks).foreach(i => {
XSDebug(false, true.B, "%d ", isInNextRow(i))
XSDebug(false, true.B, "%d ", if1_isInNextRow(i))
if (i == BtbBanks-1) { XSDebug(false, true.B, "\n") }
})
val validLatch = RegNext(io.pc.valid)
XSDebug(io.pc.valid, "read: pc=0x%x, baseBank=%d, realMask=%b\n", bankAlignedPC, baseBank, realMask)
XSDebug(io.pc.valid, "read: pc=0x%x, baseBank=%d, realMask=%b\n", if1_bankAlignedPC, if1_baseBank, if1_realMask)
XSDebug(validLatch, "read_resp: pc=0x%x, readIdx=%d-------------------------------\n",
pcLatch, btbAddr.getIdx(pcLatch))
if2_pc, btbAddr.getIdx(if2_pc))
if (debug_verbose) {
for (i <- 0 until BtbBanks){
for (j <- 0 until BtbWays) {
XSDebug(validLatch, "read_resp[w=%d][b=%d][r=%d] is valid(%d) mask(%d), tag=0x%x, offset=0x%x, type=%d, isExtend=%d, isRVC=%d\n",
j.U, i.U, realRowLatch(i), metaRead(j)(i).valid, realMaskLatch(i), metaRead(j)(i).tag, dataRead(j)(i).offset, metaRead(j)(i).btbType, dataRead(j)(i).extended, metaRead(j)(i).isRVC)
j.U, i.U, if2_realRow(i), if2_metaRead(j)(i).valid, if2_realMask(i), if2_metaRead(j)(i).tag, if2_dataRead(j)(i).offset, if2_metaRead(j)(i).btbType, if2_dataRead(j)(i).extended, if2_metaRead(j)(i).isRVC)
}
}
}
// e.g: baseBank == 5 => (5, 6,..., 15, 0, 1, 2, 3, 4)
val bankIdxInOrder = VecInit((0 until BtbBanks).map(b => (baseBankLatch +& b.U)(log2Up(BtbBanks)-1,0)))
val bankIdxInOrder = VecInit((0 until BtbBanks).map(b => (if2_baseBank +& b.U)(log2Up(BtbBanks)-1,0)))
for (i <- 0 until BtbBanks) {
val idx = bankIdxInOrder(i)
XSDebug(validLatch && bankHits(bankIdxInOrder(i)), "resp(%d): bank(%d) hits, tgt=%x, isRVC=%d, type=%d\n",
XSDebug(validLatch && if2_bankHits(bankIdxInOrder(i)), "resp(%d): bank(%d) hits, tgt=%x, isRVC=%d, type=%d\n",
i.U, idx, io.resp.targets(i), io.resp.isRVC(i), io.resp.types(i))
}
XSDebug(updateValid, "update_req: cycle=%d, pc=0x%x, target=0x%x, misPred=%d, offset=%x, extended=%d, way=%d, bank=%d, row=0x%x\n",
u.brInfo.debug_btb_cycle, u.pc, new_target, u.isMisPred, new_offset, new_extended, updateWay, updateBankIdx, updateRow)
for (i <- 0 until BtbBanks) {
// Conflict when not hit and allocating a valid entry
val conflict = metaRead(allocWays(i))(i).valid && !bankHits(i)
val conflict = if2_metaRead(allocWays(i))(i).valid && !if2_bankHits(i)
XSDebug(conflict, "bank(%d) is trying to allocate a valid way(%d)\n", i.U, allocWays(i))
// There is another circumstance when a branch is on its way to update while another
// branch chose the same way to udpate, then after the first branch is wrote in,
......
......@@ -9,10 +9,10 @@ import xiangshan.cache._
class Frontend extends XSModule {
val io = IO(new Bundle() {
val icacheReq = DecoupledIO(new ICacheReq)
val icacheResp = Flipped(DecoupledIO(new ICacheResp))
val icacheFlush = Output(UInt(2.W))
val icacheToTlb = Flipped(new BlockTlbRequestIO)
val icacheMemAcq = DecoupledIO(new L1plusCacheReq)
val icacheMemGrant = Flipped(DecoupledIO(new L1plusCacheResp))
val l1plusFlush = Output(Bool())
val fencei = Input(Bool())
val ptw = new TlbPtwIO
val backend = new FrontendToBackendIO
val sfence = Input(new SfenceBundle)
......@@ -21,6 +21,7 @@ class Frontend extends XSModule {
val ifu = Module(new IFU)
val ibuffer = Module(new Ibuffer)
val icache = Module(new ICache)
val needFlush = io.backend.redirect.valid
......@@ -29,12 +30,16 @@ class Frontend extends XSModule {
ifu.io.inOrderBrInfo <> io.backend.inOrderBrInfo
ifu.io.outOfOrderBrInfo <> io.backend.outOfOrderBrInfo
//icache
io.icacheReq <> ifu.io.icacheReq
io.icacheFlush <> ifu.io.icacheFlush
ifu.io.icacheResp <> io.icacheResp
ifu.io.icacheResp <> icache.io.resp
icache.io.req <> ifu.io.icacheReq
icache.io.flush <> ifu.io.icacheFlush
icache.io.fencei := io.fencei
io.l1plusFlush := icache.io.l1plusflush
io.icacheMemAcq <> icache.io.mem_acquire
icache.io.mem_grant <> io.icacheMemGrant
//itlb to ptw
io.ptw <> TLB(
in = Seq(io.icacheToTlb),
in = Seq(icache.io.tlb),
sfence = io.sfence,
csr = io.tlbCsr,
width = 1,
......
......@@ -23,6 +23,7 @@ class Ibuffer extends XSModule {
val brInfo = new BranchInfo
val pd = new PreDecodeInfo
val ipf = Bool()
val acf = Bool()
val crossPageIPFFix = Bool()
}
......@@ -65,6 +66,7 @@ class Ibuffer extends XSModule {
inWire.brInfo := io.in.bits.brInfo(i)
inWire.pd := io.in.bits.pd(i)
inWire.ipf := io.in.bits.ipf
inWire.acf := io.in.bits.acf
inWire.crossPageIPFFix := io.in.bits.crossPageIPFFix
ibuf(enq_idx) := inWire
......@@ -90,6 +92,7 @@ class Ibuffer extends XSModule {
// io.out(i).bits.exceptionVec := Mux(outWire.ipf, UIntToOH(instrPageFault.U), 0.U)
io.out(i).bits.exceptionVec := 0.U.asTypeOf(Vec(16, Bool()))
io.out(i).bits.exceptionVec(instrPageFault) := outWire.ipf
io.out(i).bits.exceptionVec(instrAccessFault) := outWire.acf
// io.out(i).bits.brUpdate := outWire.brInfo
io.out(i).bits.brUpdate := DontCare
io.out(i).bits.brUpdate.pc := outWire.pc
......
......@@ -50,6 +50,7 @@ class RAS extends BasePredictor
}
override val io = IO(new RASIO)
override val debug = true
@chiselName
class RASStack(val rasSize: Int) extends XSModule {
......@@ -66,6 +67,11 @@ class RAS extends BasePredictor
val copy_out_mem = Output(Vec(rasSize, rasEntry()))
val copy_out_sp = Output(UInt(log2Up(rasSize).W))
})
val debugIO = IO(new Bundle{
val write_entry = Output(rasEntry())
val alloc_new = Output(Bool())
val sp = Output(UInt(log2Up(rasSize).W))
})
@chiselName
class Stack(val size: Int) extends XSModule {
val io = IO(new Bundle {
......@@ -98,9 +104,13 @@ class RAS extends BasePredictor
val alloc_new = io.new_addr =/= top_addr
stack.wen := io.push_valid || io.pop_valid && top_ctr =/= 1.U
stack.wIdx := Mux(io.pop_valid && top_ctr =/= 1.U, sp - 1.U, Mux(alloc_new, sp, sp - 1.U))
stack.wdata := Mux(io.pop_valid && top_ctr =/= 1.U,
RASEntry(top_addr, top_ctr - 1.U),
Mux(alloc_new, RASEntry(io.new_addr, 1.U), RASEntry(top_addr, top_ctr + 1.U)))
val write_addr = Mux(io.pop_valid && top_ctr =/= 1.U, top_addr, io.new_addr)
val write_ctr = Mux(io.pop_valid && top_ctr =/= 1.U, top_ctr - 1.U, Mux(alloc_new, 1.U, top_ctr + 1.U))
val write_entry = RASEntry(write_addr, write_ctr)
stack.wdata := write_entry
debugIO.write_entry := write_entry
debugIO.alloc_new := alloc_new
debugIO.sp := sp
when (io.push_valid && alloc_new) {
sp := sp + 1.U
......@@ -138,7 +148,9 @@ class RAS extends BasePredictor
// val commit_ras = Reg(Vec(RasSize, rasEntry()))
// val commit_sp = RegInit(0.U(log2Up(RasSize).W))
val spec_ras = Module(new RASStack(RasSize)).io
val spec = Module(new RASStack(RasSize))
val spec_ras = spec.io
val spec_push = WireInit(false.B)
val spec_pop = WireInit(false.B)
......@@ -153,7 +165,8 @@ class RAS extends BasePredictor
spec_push := !spec_is_full && io.callIdx.valid && io.pc.valid
spec_pop := !spec_is_empty && io.is_ret && io.pc.valid
val commit_ras = Module(new RASStack(RasSize)).io
val commit = Module(new RASStack(RasSize))
val commit_ras = commit.io
val commit_push = WireInit(false.B)
val commit_pop = WireInit(false.B)
......@@ -179,7 +192,7 @@ class RAS extends BasePredictor
spec_ras.copy_valid := copy_next
spec_ras.copy_in_mem := commit_ras.copy_out_mem
spec_ras.copy_in_sp := commit_ras.copy_out_sp
commit_ras.copy_valid := DontCare
commit_ras.copy_valid := false.B
commit_ras.copy_in_mem := DontCare
commit_ras.copy_in_sp := DontCare
......@@ -189,26 +202,28 @@ class RAS extends BasePredictor
io.branchInfo.rasToqAddr := DontCare
if (BPUDebug && debug) {
// XSDebug("----------------RAS(spec)----------------\n")
// XSDebug(" index addr ctr \n")
// for(i <- 0 until RasSize){
// XSDebug(" (%d) 0x%x %d",i.U,spec_ras(i).retAddr,spec_ras(i).ctr)
// when(i.U === spec_sp){XSDebug(false,true.B," <----sp")}
// XSDebug(false,true.B,"\n")
// }
// XSDebug("----------------RAS(commit)----------------\n")
// XSDebug(" index addr ctr \n")
// for(i <- 0 until RasSize){
// XSDebug(" (%d) 0x%x %d",i.U,commit_ras(i).retAddr,commit_ras(i).ctr)
// when(i.U === commit_sp){XSDebug(false,true.B," <----sp")}
// XSDebug(false,true.B,"\n")
// }
// XSDebug(spec_push, "(spec_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",spec_ras_write.retAddr,spec_ras_write.ctr,sepc_alloc_new,spec_sp.asUInt)
// XSDebug(spec_pop, "(spec_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
// XSDebug(commit_push, "(commit_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",commit_ras_write.retAddr,commit_ras_write.ctr,sepc_alloc_new,commit_sp.asUInt)
// XSDebug(commit_pop, "(commit_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
// XSDebug("copyValid:%d copyNext:%d \n",copy_valid,copy_next)
val spec_debug = spec.debugIO
val commit_debug = commit.debugIO
XSDebug("----------------RAS(spec)----------------\n")
XSDebug(" index addr ctr \n")
for(i <- 0 until RasSize){
XSDebug(" (%d) 0x%x %d",i.U,spec_ras.copy_out_mem(i).retAddr,spec_ras.copy_out_mem(i).ctr)
when(i.U === spec_ras.copy_out_sp){XSDebug(false,true.B," <----sp")}
XSDebug(false,true.B,"\n")
}
XSDebug("----------------RAS(commit)----------------\n")
XSDebug(" index addr ctr \n")
for(i <- 0 until RasSize){
XSDebug(" (%d) 0x%x %d",i.U,commit_ras.copy_out_mem(i).retAddr,commit_ras.copy_out_mem(i).ctr)
when(i.U === commit_ras.copy_out_sp){XSDebug(false,true.B," <----sp")}
XSDebug(false,true.B,"\n")
}
XSDebug(spec_push, "(spec_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",spec_new_addr,spec_debug.write_entry.ctr,spec_debug.alloc_new,spec_debug.sp.asUInt)
XSDebug(spec_pop, "(spec_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
XSDebug(commit_push, "(commit_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",commit_new_addr,commit_debug.write_entry.ctr,commit_debug.alloc_new,commit_debug.sp.asUInt)
XSDebug(commit_pop, "(commit_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target)
XSDebug("copyValid:%d copyNext:%d \n",copy_valid,copy_next)
}
......
......@@ -121,26 +121,26 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val tageEntrySz = 1 + tagLen + TageCtrBits
val bankAlignedPC = bankAligned(io.req.bits.pc)
val if2_bankAlignedPC = bankAligned(io.req.bits.pc)
// this bank means cache bank
val startsAtOddBank = bankInGroup(bankAlignedPC)(0)
val if2_startsAtOddBank = bankInGroup(if2_bankAlignedPC)(0)
// use real address to index
// val unhashed_idxes = VecInit((0 until TageBanks).map(b => ((io.req.bits.pc >> 1.U) + b.U) >> log2Up(TageBanks).U))
val unhashed_idx = Wire(Vec(2, UInt((log2Ceil(nRows)+tagLen).W)))
val if2_unhashed_idx = Wire(Vec(2, UInt((log2Ceil(nRows)+tagLen).W)))
// the first bank idx always correspond with pc
unhashed_idx(0) := io.req.bits.pc >> (1+log2Ceil(TageBanks))
if2_unhashed_idx(0) := io.req.bits.pc >> (1+log2Ceil(TageBanks))
// when pc is at odd bank, the second bank is at the next idx
unhashed_idx(1) := unhashed_idx(0) + startsAtOddBank
if2_unhashed_idx(1) := if2_unhashed_idx(0) + if2_startsAtOddBank
// val idxes_and_tags = (0 until TageBanks).map(b => compute_tag_and_hash(unhashed_idxes(b.U), io.req.bits.hist))
// val (idx, tag) = compute_tag_and_hash(unhashed_idx, io.req.bits.hist)
val idxes_and_tags = unhashed_idx.map(compute_tag_and_hash(_, io.req.bits.hist))
// val idxes = VecInit(idxes_and_tags.map(_._1))
// val tags = VecInit(idxes_and_tags.map(_._2))
// val idxes_and_tags = (0 until TageBanks).map(b => compute_tag_and_hash(if2_unhashed_idxes(b.U), io.req.bits.hist))
// val (idx, tag) = compute_tag_and_hash(if2_unhashed_idx, io.req.bits.hist)
val if2_idxes_and_tags = if2_unhashed_idx.map(compute_tag_and_hash(_, io.req.bits.hist))
// val idxes = VecInit(if2_idxes_and_tags.map(_._1))
// val tags = VecInit(if2_idxes_and_tags.map(_._2))
val idxes_latch = RegEnable(VecInit(idxes_and_tags.map(_._1)), io.req.valid)
val tags_latch = RegEnable(VecInit(idxes_and_tags.map(_._2)), io.req.valid)
// and_tags_latch = RegEnable(idxes_and_tags, enable=io.req.valid)
val if3_idxes = RegEnable(VecInit(if2_idxes_and_tags.map(_._1)), io.req.valid)
val if3_tags = RegEnable(VecInit(if2_idxes_and_tags.map(_._2)), io.req.valid)
// and_if3_tags = RegEnable(if2_idxes_and_tags, enable=io.req.valid)
// val idxLatch = RegEnable(idx, enable=io.req.valid)
// val tagLatch = RegEnable(tag, enable=io.req.valid)
......@@ -175,59 +175,59 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val lo_us = List.fill(TageBanks)(Module(new HL_Bank(nRows)))
val table = List.fill(TageBanks)(Module(new SRAMTemplate(new TageEntry, set=nRows, shouldReset=false, holdRead=true, singlePort=false)))
val hi_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
val lo_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
val table_r = WireInit(0.U.asTypeOf(Vec(TageBanks, new TageEntry)))
val if3_hi_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
val if3_lo_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool())))
val if3_table_r = WireInit(0.U.asTypeOf(Vec(TageBanks, new TageEntry)))
val baseBank = io.req.bits.pc(log2Up(TageBanks), 1)
val baseBankLatch = RegEnable(baseBank, enable=io.req.valid)
val if2_baseBank = io.req.bits.pc(log2Up(TageBanks), 1)
val if3_baseBank = RegEnable(if2_baseBank, enable=io.req.valid)
val bankIdxInOrder = VecInit((0 until TageBanks).map(b => (baseBankLatch +& b.U)(log2Up(TageBanks)-1, 0)))
val if3_bankIdxInOrder = VecInit((0 until TageBanks).map(b => (if3_baseBank +& b.U)(log2Up(TageBanks)-1, 0)))
val realMask = Mux(startsAtOddBank,
val if2_realMask = Mux(if2_startsAtOddBank,
Cat(io.req.bits.mask(bankWidth-1,0), io.req.bits.mask(PredictWidth-1, bankWidth)),
io.req.bits.mask)
val maskLatch = RegEnable(realMask, enable=io.req.valid)
val if3_realMask = RegEnable(if2_realMask, enable=io.req.valid)
(0 until TageBanks).map(
b => {
val idxes = VecInit(idxes_and_tags.map(_._1))
val idx = (if (b < bankWidth) Mux(startsAtOddBank, idxes(1), idxes(0))
else Mux(startsAtOddBank, idxes(0), idxes(1)))
hi_us(b).io.r.req.valid := io.req.valid && realMask(b)
val idxes = VecInit(if2_idxes_and_tags.map(_._1))
val idx = (if (b < bankWidth) Mux(if2_startsAtOddBank, idxes(1), idxes(0))
else Mux(if2_startsAtOddBank, idxes(0), idxes(1)))
hi_us(b).io.r.req.valid := io.req.valid && if2_realMask(b)
hi_us(b).io.r.req.bits.setIdx := idx
lo_us(b).io.r.req.valid := io.req.valid && realMask(b)
lo_us(b).io.r.req.valid := io.req.valid && if2_realMask(b)
lo_us(b).io.r.req.bits.setIdx := idx
table(b).reset := reset.asBool
table(b).io.r.req.valid := io.req.valid && realMask(b)
table(b).io.r.req.valid := io.req.valid && if2_realMask(b)
table(b).io.r.req.bits.setIdx := idx
hi_us_r(b) := hi_us(b).io.r.resp.data
lo_us_r(b) := lo_us(b).io.r.resp.data
table_r(b) := table(b).io.r.resp.data(0)
if3_hi_us_r(b) := hi_us(b).io.r.resp.data
if3_lo_us_r(b) := lo_us(b).io.r.resp.data
if3_table_r(b) := table(b).io.r.resp.data(0)
}
)
val startsAtOddBankLatch = RegEnable(startsAtOddBank, io.req.valid)
val if3_startsAtOddBank = RegEnable(if2_startsAtOddBank, io.req.valid)
val req_rhits = VecInit((0 until TageBanks).map(b => {
val tag = (if (b < bankWidth) Mux(startsAtOddBank, tags_latch(1), tags_latch(0))
else Mux(startsAtOddBank, tags_latch(0), tags_latch(1)))
val bank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U)
else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U))
table_r(bank).valid && table_r(bank).tag === tag
val if3_req_rhits = VecInit((0 until TageBanks).map(b => {
val tag = (if (b < bankWidth) Mux(if3_startsAtOddBank, if3_tags(1), if3_tags(0))
else Mux(if3_startsAtOddBank, if3_tags(0), if3_tags(1)))
val bank = (if (b < bankWidth) Mux(if3_startsAtOddBank, (b+bankWidth).U, b.U)
else Mux(if3_startsAtOddBank, (b-bankWidth).U, b.U))
if3_table_r(bank).valid && if3_table_r(bank).tag === tag
}))
(0 until TageBanks).map(b => {
val bank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U)
else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U))
io.resp(b).valid := req_rhits(b) && maskLatch(b)
io.resp(b).bits.ctr := table_r(bank).ctr
io.resp(b).bits.u := Cat(hi_us_r(bank),lo_us_r(bank))
val bank = (if (b < bankWidth) Mux(if3_startsAtOddBank, (b+bankWidth).U, b.U)
else Mux(if3_startsAtOddBank, (b-bankWidth).U, b.U))
io.resp(b).valid := if3_req_rhits(b) && if3_realMask(b)
io.resp(b).bits.ctr := if3_table_r(bank).ctr
io.resp(b).bits.u := Cat(if3_hi_us_r(bank),if3_lo_us_r(bank))
})
......@@ -292,7 +292,7 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
// when (RegNext(wrbypass_rhit)) {
// for (b <- 0 until TageBanks) {
// when (RegNext(wrbypass_rctr_hits(b.U + baseBank))) {
// io.resp(b).bits.ctr := rhit_ctrs(bankIdxInOrder(b))
// io.resp(b).bits.ctr := rhit_ctrs(if3_bankIdxInOrder(b))
// }
// }
// }
......@@ -335,17 +335,17 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio
val u = io.update
val b = PriorityEncoder(u.mask)
val ub = PriorityEncoder(u.uMask)
val idx = idxes_and_tags.map(_._1)
val tag = idxes_and_tags.map(_._2)
val idx = if2_idxes_and_tags.map(_._1)
val tag = if2_idxes_and_tags.map(_._2)
XSDebug(io.req.valid, "tableReq: pc=0x%x, hist=%x, idx=(%d,%d), tag=(%x,%x), baseBank=%d, mask=%b, realMask=%b\n",
io.req.bits.pc, io.req.bits.hist, idx(0), idx(1), tag(0), tag(1), baseBank, io.req.bits.mask, realMask)
io.req.bits.pc, io.req.bits.hist, idx(0), idx(1), tag(0), tag(1), if2_baseBank, io.req.bits.mask, if2_realMask)
for (i <- 0 until TageBanks) {
XSDebug(RegNext(io.req.valid) && req_rhits(i), "TageTableResp[%d]: idx=(%d,%d), hit:%d, ctr:%d, u:%d\n",
i.U, idxes_latch(0), idxes_latch(1), req_rhits(i), io.resp(i).bits.ctr, io.resp(i).bits.u)
XSDebug(RegNext(io.req.valid) && if3_req_rhits(i), "TageTableResp[%d]: idx=(%d,%d), hit:%d, ctr:%d, u:%d\n",
i.U, if3_idxes(0), if3_idxes(1), if3_req_rhits(i), io.resp(i).bits.ctr, io.resp(i).bits.u)
}
XSDebug(RegNext(io.req.valid), "TageTableResp: hits:%b, maskLatch is %b\n", req_rhits.asUInt, maskLatch)
XSDebug(RegNext(io.req.valid) && !req_rhits.reduce(_||_), "TageTableResp: no hits!\n")
XSDebug(RegNext(io.req.valid), "TageTableResp: hits:%b, maskLatch is %b\n", if3_req_rhits.asUInt, if3_realMask)
XSDebug(RegNext(io.req.valid) && !if3_req_rhits.reduce(_||_), "TageTableResp: no hits!\n")
XSDebug(io.update.mask.reduce(_||_), "update Table: pc:%x, fetchIdx:%d, hist:%x, bank:%d, taken:%d, alloc:%d, oldCtr:%d\n",
u.pc, u.fetchIdx, u.hist, b, u.taken(b), u.alloc(b), u.oldCtr(b))
......@@ -435,12 +435,12 @@ class Tage extends BaseTage {
override val debug = true
// Keep the table responses to process in s3
val resps = VecInit(tables.map(t => RegEnable(t.io.resp, enable=io.s3Fire)))
val scResps = VecInit(scTables.map(t => RegEnable(t.io.resp, enable=io.s3Fire)))
val if4_resps = RegEnable(VecInit(tables.map(t => t.io.resp)), enable=io.s3Fire)
val if4_scResps = RegEnable(VecInit(scTables.map(t => t.io.resp)), enable=io.s3Fire)
// val flushLatch = RegNext(io.flush)
val s2_bim = RegEnable(io.bim, enable=io.pc.valid) // actually it is s2Fire
val s3_bim = RegEnable(s2_bim, enable=io.s3Fire)
val if3_bim = RegEnable(io.bim, enable=io.pc.valid) // actually it is s2Fire
val if4_bim = RegEnable(if3_bim, enable=io.s3Fire)
val debug_pc_s2 = RegEnable(io.pc.bits, enable=io.pc.valid)
val debug_pc_s3 = RegEnable(debug_pc_s2, enable=io.s3Fire)
......@@ -482,37 +482,37 @@ class Tage extends BaseTage {
// access tag tables and output meta info
for (w <- 0 until TageBanks) {
val tageTaken = WireInit(s3_bim.ctrs(w)(1).asBool)
var altPred = s3_bim.ctrs(w)(1)
val finalAltPred = WireInit(s3_bim.ctrs(w)(1))
var provided = false.B
var provider = 0.U
io.resp.takens(w) := s3_bim.ctrs(w)(1)
val if4_tageTaken = WireInit(if4_bim.ctrs(w)(1).asBool)
var if4_altPred = if4_bim.ctrs(w)(1)
val if4_finalAltPred = WireInit(if4_bim.ctrs(w)(1))
var if4_provided = false.B
var if4_provider = 0.U
io.resp.takens(w) := if4_bim.ctrs(w)(1)
for (i <- 0 until TageNTables) {
val hit = resps(i)(w).valid
val ctr = resps(i)(w).bits.ctr
val hit = if4_resps(i)(w).valid
val ctr = if4_resps(i)(w).bits.ctr
when (hit) {
io.resp.takens(w) := Mux(ctr === 3.U || ctr === 4.U, altPred, ctr(2)) // Use altpred on weak taken
tageTaken := Mux(ctr === 3.U || ctr === 4.U, altPred, ctr(2))
finalAltPred := altPred
io.resp.takens(w) := Mux(ctr === 3.U || ctr === 4.U, if4_altPred, ctr(2)) // Use altpred on weak taken
if4_tageTaken := Mux(ctr === 3.U || ctr === 4.U, if4_altPred, ctr(2))
if4_finalAltPred := if4_altPred
}
provided = provided || hit // Once hit then provide
provider = Mux(hit, i.U, provider) // Use the last hit as provider
altPred = Mux(hit, ctr(2), altPred) // Save current pred as potential altpred
if4_provided = if4_provided || hit // Once hit then provide
if4_provider = Mux(hit, i.U, if4_provider) // Use the last hit as provider
if4_altPred = Mux(hit, ctr(2), if4_altPred) // Save current pred as potential altpred
}
io.resp.hits(w) := provided
io.meta(w).provider.valid := provided
io.meta(w).provider.bits := provider
io.meta(w).altDiffers := finalAltPred =/= io.resp.takens(w)
io.meta(w).providerU := resps(provider)(w).bits.u
io.meta(w).providerCtr := resps(provider)(w).bits.ctr
io.meta(w).taken := tageTaken
io.resp.hits(w) := if4_provided
io.meta(w).provider.valid := if4_provided
io.meta(w).provider.bits := if4_provider
io.meta(w).altDiffers := if4_finalAltPred =/= io.resp.takens(w)
io.meta(w).providerU := if4_resps(if4_provider)(w).bits.u
io.meta(w).providerCtr := if4_resps(if4_provider)(w).bits.ctr
io.meta(w).taken := if4_tageTaken
// Create a mask fo tables which did not hit our query, and also contain useless entries
// and also uses a longer history than the provider
val allocatableSlots = (VecInit(resps.map(r => !r(w).valid && r(w).bits.u === 0.U)).asUInt &
~(LowerMask(UIntToOH(provider), TageNTables) & Fill(TageNTables, provided.asUInt))
val allocatableSlots = (VecInit(if4_resps.map(r => !r(w).valid && r(w).bits.u === 0.U)).asUInt &
~(LowerMask(UIntToOH(if4_provider), TageNTables) & Fill(TageNTables, if4_provided.asUInt))
)
val allocLFSR = LFSR64()(TageNTables - 1, 0)
val firstEntry = PriorityEncoder(allocatableSlots)
......@@ -525,12 +525,12 @@ class Tage extends BaseTage {
scMeta := DontCare
val scTableSums = VecInit(
(0 to 1) map { i => {
// val providerCtr = resps(provider)(w).bits.ctr.zext()
// val providerCtr = if4_resps(if4_provider)(w).bits.ctr.zext()
// val pvdrCtrCentered = (((providerCtr - 4.S) << 1) + 1.S) << 3
// sum += pvdrCtrCentered
if (EnableSC) {
(0 until SCNTables) map { j =>
scTables(j).getCenteredValue(scResps(j)(w).ctr(i))
scTables(j).getCenteredValue(if4_scResps(j)(w).ctr(i))
} reduce (_+_) // TODO: rewrite with adder tree
}
else 0.S
......@@ -539,21 +539,21 @@ class Tage extends BaseTage {
)
if (EnableSC) {
scMeta.tageTaken := tageTaken
scMeta.scUsed := provided
scMeta.scPred := tageTaken
scMeta.tageTaken := if4_tageTaken
scMeta.scUsed := if4_provided
scMeta.scPred := if4_tageTaken
scMeta.sumAbs := 0.U
when (provided) {
val providerCtr = resps(provider)(w).bits.ctr.zext()
when (if4_provided) {
val providerCtr = if4_resps(if4_provider)(w).bits.ctr.zext()
val pvdrCtrCentered = ((((providerCtr - 4.S) << 1).asSInt + 1.S) << 3).asSInt
val totalSum = scTableSums(tageTaken.asUInt) + pvdrCtrCentered
val totalSum = scTableSums(if4_tageTaken.asUInt) + pvdrCtrCentered
val sumAbs = totalSum.abs().asUInt
val sumBelowThreshold = totalSum.abs.asUInt < useThreshold
val scPred = totalSum >= 0.S
scMeta.sumAbs := sumAbs
scMeta.ctrs := VecInit(scResps.map(r => r(w).ctr(tageTaken.asUInt)))
scMeta.ctrs := VecInit(if4_scResps.map(r => r(w).ctr(if4_tageTaken.asUInt)))
for (i <- 0 until SCNTables) {
XSDebug(RegNext(io.s3Fire), p"SCTable(${i.U})(${w.U}): ctr:(${scResps(i)(w).ctr(0)},${scResps(i)(w).ctr(1)})\n")
XSDebug(RegNext(io.s3Fire), p"SCTable(${i.U})(${w.U}): ctr:(${if4_scResps(i)(w).ctr(0)},${if4_scResps(i)(w).ctr(1)})\n")
}
XSDebug(RegNext(io.s3Fire), p"SC(${w.U}): pvdCtr(${providerCtr}), pvdCentred(${pvdrCtrCentered}), totalSum(${totalSum}), abs(${sumAbs}) useThres(${useThreshold}), scPred(${scPred})\n")
// Use prediction from Statistical Corrector
......@@ -664,10 +664,10 @@ class Tage extends BaseTage {
XSDebug(RegNext(io.s3Fire), "s3FireOnLastCycle: resp: pc=%x, hist=%x, hits=%b, takens=%b\n",
debug_pc_s3, debug_hist_s3, io.resp.hits.asUInt, io.resp.takens.asUInt)
for (i <- 0 until TageNTables) {
XSDebug(RegNext(io.s3Fire), "TageTable(%d): valids:%b, resp_ctrs:%b, resp_us:%b\n", i.U, VecInit(resps(i).map(_.valid)).asUInt, Cat(resps(i).map(_.bits.ctr)), Cat(resps(i).map(_.bits.u)))
XSDebug(RegNext(io.s3Fire), "TageTable(%d): valids:%b, resp_ctrs:%b, resp_us:%b\n", i.U, VecInit(if4_resps(i).map(_.valid)).asUInt, Cat(if4_resps(i).map(_.bits.ctr)), Cat(if4_resps(i).map(_.bits.u)))
}
XSDebug(io.update.valid, "update: pc=%x, fetchpc=%x, cycle=%d, hist=%x, taken:%d, misPred:%d, histPtr:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc(%d):%d\n",
u.pc, u.pc - (bri.fetchIdx << 1.U), bri.debug_tage_cycle, updateHist, u.taken, u.isMisPred, bri.histPtr, bri.bimCtr, m.provider.valid, m.provider.bits, m.altDiffers, m.providerU, m.providerCtr, m.allocate.valid, m.allocate.bits)
XSDebug(io.update.valid, "update: pc=%x, fetchpc=%x, cycle=%d, hist=%x, taken:%d, misPred:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc(%d):%d\n",
u.pc, u.pc - (bri.fetchIdx << 1.U), bri.debug_tage_cycle, updateHist, u.taken, u.isMisPred, bri.bimCtr, m.provider.valid, m.provider.bits, m.altDiffers, m.providerU, m.providerCtr, m.allocate.valid, m.allocate.bits)
XSDebug(io.update.valid && updateIsBr, p"update: sc: ${updateSCMeta}\n")
XSDebug(true.B, p"scThres: use(${useThreshold}), update(${updateThreshold})\n")
}
......
......@@ -244,7 +244,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback store
val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit)))
val commits = Flipped(new RoqCommitIO)
val rollback = Output(Valid(new Redirect))
val dcache = new DCacheLineIO
val uncache = new DCacheWordIO
......
......@@ -37,7 +37,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // FIXME: Valid() only
val ldout = Vec(2, DecoupledIO(new ExuOutput)) // writeback load
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit)))
val commits = Flipped(new RoqCommitIO)
val rollback = Output(Valid(new Redirect)) // replay now starts from load instead of store
val dcache = new DCacheLineIO
val uncache = new DCacheWordIO
......@@ -67,12 +67,12 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val isFull = enqPtr === deqPtr && !sameFlag
val allowIn = !isFull
val loadCommit = (0 until CommitWidth).map(i => io.commits(i).valid && !io.commits(i).bits.isWalk && io.commits(i).bits.uop.ctrl.commitType === CommitType.LOAD)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits(i).bits.uop.lqIdx.value)
val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.LOAD)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).lqIdx.value)
val tailMask = (((1.U((LoadQueueSize + 1).W)) << deqPtr).asUInt - 1.U)(LoadQueueSize - 1, 0)
val headMask = (((1.U((LoadQueueSize + 1).W)) << enqPtr).asUInt - 1.U)(LoadQueueSize - 1, 0)
val enqDeqMask1 = tailMask ^ headMask
val deqMask = UIntToMask(deqPtr, LoadQueueSize)
val enqMask = UIntToMask(enqPtr, LoadQueueSize)
val enqDeqMask1 = deqMask ^ enqMask
val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1)
// Enqueue at dispatch
......@@ -172,7 +172,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
allocated(i) && miss(i) && !inflight
})
val missRefillSel = getFirstOne(missRefillSelVec, tailMask)
val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
io.dcache.req.valid := missRefillSelVec.asUInt.orR
io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
......@@ -307,7 +307,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
// allocatedMask: dequeuePtr can go to the next 1-bit
val allocatedMask = VecInit((0 until LoadQueueSize).map(i => allocated(i) || !enqDeqMask(i)))
// find the first one from deqPtr (deqPtr)
val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, deqPtrExt.flag)
val nextTail1 = getFirstOneWithFlag(allocatedMask, deqMask, deqPtrExt.flag)
val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, enqPtrExt)
deqPtrExt := nextTail
......@@ -319,9 +319,6 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
}
})
// rollback check
val rollback = Wire(Vec(StorePipelineWidth, Valid(new Redirect)))
def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
val length = mask.length
val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
......@@ -372,91 +369,88 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
})
// store backward query and rollback
// val needCheck = Seq.fill(8)(WireInit(true.B))
(0 until StorePipelineWidth).foreach(i => {
rollback(i) := DontCare
when(io.storeIn(i).valid) {
val startIndex = io.storeIn(i).bits.uop.lqIdx.value
val lqIdxMask = ((1.U((LoadQueueSize + 1).W) << startIndex).asUInt - 1.U)(LoadQueueSize - 1, 0)
val xorMask = lqIdxMask ^ headMask
val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
// check if load already in lq needs to be rolledback
val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
// TODO: update refilled data
val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
Cat(violationVec).orR() && entryNeedCheck
}))
val lqViolation = lqViolationVec.asUInt().orR()
val lqViolationIndex = getFirstOne(lqViolationVec, lqIdxMask)
val lqViolationUop = uop(lqViolationIndex)
XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")
// when l/s writeback to roq together, check if rollback is needed
val wbViolationVec = VecInit((0 until LoadPipelineWidth).map(j => {
io.loadIn(j).valid &&
isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
}))
val wbViolation = wbViolationVec.asUInt().orR()
val wbViolationUop = getOldestInTwo(wbViolationVec, io.loadIn.map(_.bits.uop))
XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")
// check if rollback is needed for load in l1
val l1ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => {
io.forward(j).valid && // L4 valid\
isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.forward(j).mask).orR
}))
val l1Violation = l1ViolationVec.asUInt().orR()
val l1ViolationUop = getOldestInTwo(l1ViolationVec, io.forward.map(_.uop))
XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")
val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)
rollback(i).valid := Cat(rollbackValidVec).orR
val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
val oneAfterZero = mask(1)(0)
val rollbackUop = Mux(oneAfterZero && mask(2)(0),
rollbackUopVec(0),
Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))
rollback(i).bits.roqIdx := rollbackUop.roqIdx - 1.U
rollback(i).bits.isReplay := true.B
rollback(i).bits.isMisPred := false.B
rollback(i).bits.isException := false.B
rollback(i).bits.isFlushPipe := false.B
rollback(i).bits.target := rollbackUop.cf.pc
rollback(i).bits.brTag := rollbackUop.brTag
XSDebug(
l1Violation,
"need rollback (l4 load) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
)
XSDebug(
lqViolation,
"need rollback (ld wb before store) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
)
XSDebug(
wbViolation,
"need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
)
}.otherwise {
rollback(i).valid := false.B
}
})
def detectRollback(i: Int) = {
val startIndex = io.storeIn(i).bits.uop.lqIdx.value
val lqIdxMask = UIntToMask(startIndex, LoadQueueSize)
val xorMask = lqIdxMask ^ enqMask
val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
// check if load already in lq needs to be rolledback
val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j))
// TODO: update refilled data
val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
Cat(violationVec).orR() && entryNeedCheck
})))
val lqViolation = lqViolationVec.asUInt().orR()
val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
val lqViolationUop = uop(lqViolationIndex)
// lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag
// lqViolationUop.lqIdx.value := lqViolationIndex
XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n")
// when l/s writeback to roq together, check if rollback is needed
val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
io.loadIn(j).valid &&
isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR
})))
val wbViolation = wbViolationVec.asUInt().orR()
val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop))))
XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n")
// check if rollback is needed for load in l1
val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => {
io.forward(j).valid && // L1 valid
isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) &&
(io.storeIn(i).bits.mask & io.forward(j).mask).orR
})))
val l1Violation = l1ViolationVec.asUInt().orR()
val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.forward.map(_.uop))))
XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n")
val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation)
val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop)
val mask = getAfterMask(rollbackValidVec, rollbackUopVec)
val oneAfterZero = mask(1)(0)
val rollbackUop = Mux(oneAfterZero && mask(2)(0),
rollbackUopVec(0),
Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2)))
XSDebug(
l1Violation,
"need rollback (l4 load) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt
)
XSDebug(
lqViolation,
"need rollback (ld wb before store) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt
)
XSDebug(
wbViolation,
"need rollback (ld/st wb together) pc %x roqidx %d target %x\n",
io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt
)
(RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop)
}
def rollbackSel(a: Valid[Redirect], b: Valid[Redirect]): ValidIO[Redirect] = {
// rollback check
val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
for (i <- 0 until StorePipelineWidth) {
val detectedRollback = detectRollback(i)
rollback(i).valid := detectedRollback._1
rollback(i).bits := detectedRollback._2
}
def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = {
Mux(
a.valid,
Mux(
......@@ -468,17 +462,31 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
)
}
io.rollback := ParallelOperation(rollback, rollbackSel)
val rollbackSelected = ParallelOperation(rollback, rollbackSel)
val lastCycleRedirect = RegNext(io.brqRedirect)
io.rollback := DontCare
// Note that we use roqIdx - 1.U to flush the load instruction itself.
// Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect.
io.rollback.valid := rollbackSelected.valid && (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx))
io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx - 1.U
io.rollback.bits.isReplay := true.B
io.rollback.bits.isMisPred := false.B
io.rollback.bits.isException := false.B
io.rollback.bits.isFlushPipe := false.B
io.rollback.bits.target := rollbackSelected.bits.cf.pc
io.rollback.bits.brTag := rollbackSelected.bits.brTag
// Memory mapped IO / other uncached operations
// setup misc mem access req
// mask / paddr / data can be get from lq.data
val commitType = io.commits(0).bits.uop.ctrl.commitType
val commitType = io.commits.uop(0).ctrl.commitType
io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
commitType === CommitType.LOAD &&
io.roqDeqPtr === uop(deqPtr).roqIdx &&
!io.commits(0).bits.isWalk
!io.commits.isWalk
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
......@@ -496,7 +504,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
io.uncache.resp.ready := true.B
when(io.uncache.req.fire()){
when (io.uncache.req.fire()) {
pending(deqPtr) := false.B
}
......
......@@ -34,23 +34,23 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit)))
val commits = Flipped(new RoqCommitIO)
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
// val refill = Flipped(Valid(new DCacheLineReq ))
val exceptionAddr = new ExceptionAddrIO
})
val uop = Reg(Vec(StoreQueueSize, new MicroOp))
// val data = Reg(Vec(StoreQueueSize, new LsqEntry))
val dataModule = Module(new LSQueueData(StoreQueueSize, StorePipelineWidth))
dataModule.io := DontCare
dataModule.io := DontCare
val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated
val datavalid = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // non-mmio data is valid
val writebacked = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // inst has been writebacked to CDB
val commited = Reg(Vec(StoreQueueSize, Bool())) // inst has been commited by roq
val pending = Reg(Vec(StoreQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
val enqPtrExt = RegInit(0.U.asTypeOf(new SqPtr))
val deqPtrExt = RegInit(0.U.asTypeOf(new SqPtr))
val enqPtr = enqPtrExt.value
......@@ -59,12 +59,12 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val isEmpty = enqPtr === deqPtr && sameFlag
val isFull = enqPtr === deqPtr && !sameFlag
val allowIn = !isFull
val storeCommit = (0 until CommitWidth).map(i => io.commits(i).valid && !io.commits(i).bits.isWalk && io.commits(i).bits.uop.ctrl.commitType === CommitType.STORE)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits(i).bits.uop.sqIdx.value)
val tailMask = (((1.U((StoreQueueSize + 1).W)) << deqPtr).asUInt - 1.U)(StoreQueueSize - 1, 0)
val headMask = (((1.U((StoreQueueSize + 1).W)) << enqPtr).asUInt - 1.U)(StoreQueueSize - 1, 0)
val storeCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.STORE)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).sqIdx.value)
val tailMask = UIntToMask(deqPtr, StoreQueueSize)
val headMask = UIntToMask(enqPtr, StoreQueueSize)
val enqDeqMask1 = tailMask ^ headMask
val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1)
......@@ -94,7 +94,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
enqPtrExt := enqPtrExt + PopCount(firedDispatch)
XSInfo("dispatched %d insts to sq\n", PopCount(firedDispatch))
}
// writeback store
(0 until StorePipelineWidth).map(i => {
dataModule.io.wb(i).wen := false.B
......@@ -163,7 +163,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// TODO: do not select according to seq, just select 2 valid bit randomly
val firstSelVec = valid
val notFirstVec = Wire(Vec(valid.length, Bool()))
(0 until valid.length).map(i =>
(0 until valid.length).map(i =>
notFirstVec(i) := (if(i != 0) { valid(i) || !notFirstVec(i) } else { false.B })
)
val secondSelVec = VecInit((0 until valid.length).map(i => valid(i) && !notFirstVec(i)))
......@@ -228,8 +228,8 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
val differentFlag = deqPtrExt.flag =/= io.forward(i).sqIdx.flag
val forwardMask = ((1.U((StoreQueueSize + 1).W)) << io.forward(i).sqIdx.value).asUInt - 1.U
val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B)))
val forwardMask = UIntToMask(io.forward(i).sqIdx.value, StoreQueueSize)
val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B)))
for (j <- 0 until StoreQueueSize) {
storeWritebackedVec(j) := datavalid(j) && allocated(j) // all datavalid terms need to be checked
}
......@@ -241,7 +241,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// do real fwd query
dataModule.io.forwardQuery(
channel = i,
paddr = io.forward(i).paddr,
paddr = io.forward(i).paddr,
needForward1 = needForward1,
needForward2 = needForward2
)
......@@ -279,20 +279,20 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
})
// Memory mapped IO / other uncached operations
// setup misc mem access req
// mask / paddr / data can be get from sq.data
val commitType = io.commits(0).bits.uop.ctrl.commitType
val commitType = io.commits.uop(0).ctrl.commitType
io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
commitType === CommitType.STORE &&
io.roqDeqPtr === uop(deqPtr).roqIdx &&
!io.commits(0).bits.isWalk
!io.commits.isWalk
io.uncache.req.bits.cmd := MemoryOpConstants.M_XWR
io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
io.uncache.req.bits.meta.id := DontCare // TODO: // FIXME
io.uncache.req.bits.meta.vaddr := DontCare
io.uncache.req.bits.meta.paddr := dataModule.io.rdata(deqPtr).paddr
......@@ -301,18 +301,18 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.uncache.req.bits.meta.tlb_miss := false.B
io.uncache.req.bits.meta.mask := dataModule.io.rdata(deqPtr).mask
io.uncache.req.bits.meta.replay := false.B
io.uncache.resp.ready := true.B
when(io.uncache.req.fire()){
pending(deqPtr) := false.B
}
when(io.uncache.resp.fire()){
datavalid(deqPtr) := true.B // will be writeback to CDB in the next cycle
// TODO: write back exception info
}
when(io.uncache.req.fire()){
XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
uop(deqPtr).cf.pc,
......
......@@ -64,7 +64,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
// we send feedback right after we receives request
// also, we always treat amo as tlb hit
// since we will continue polling tlb all by ourself
io.tlbFeedback.valid := RegNext(io.in.fire())
io.tlbFeedback.valid := RegNext(RegNext(io.in.valid))
io.tlbFeedback.bits.hit := true.B
io.tlbFeedback.bits.roqIdx := in.uop.roqIdx
......
......@@ -21,35 +21,23 @@ class LoadUnit_S0 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new ExuInput))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dtlbReq = DecoupledIO(new TlbReq)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val dcacheReq = DecoupledIO(new DCacheLoadReq)
})
val s0_uop = io.in.bits.uop
val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm
val s0_paddr = io.dtlbResp.bits.paddr
val s0_tlb_miss = io.dtlbResp.bits.miss
val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))
// query DTLB
io.dtlbReq.valid := io.out.valid
io.dtlbReq.valid := io.in.valid
io.dtlbReq.bits.vaddr := s0_vaddr
io.dtlbReq.bits.cmd := TlbCmd.read
io.dtlbReq.bits.roqIdx := s0_uop.roqIdx
io.dtlbReq.bits.debug.pc := s0_uop.cf.pc
io.dtlbResp.ready := io.out.ready // TODO: check it: io.out.fire()?
// feedback tlb result to RS
// Note: can be moved to s1
io.tlbFeedback.valid := io.out.valid
io.tlbFeedback.bits.hit := !s0_tlb_miss
io.tlbFeedback.bits.roqIdx := s0_uop.roqIdx
// query DCache
io.dcacheReq.valid := io.in.valid && !s0_uop.roqIdx.needFlush(io.redirect)
io.dcacheReq.valid := io.in.valid
io.dcacheReq.bits.cmd := MemoryOpConstants.M_XRD
io.dcacheReq.bits.addr := s0_vaddr
io.dcacheReq.bits.mask := s0_mask
......@@ -72,21 +60,18 @@ class LoadUnit_S0 extends XSModule {
"b11".U -> (s0_vaddr(2, 0) === 0.U) //d
))
io.out.valid := io.dcacheReq.fire() && // dcache may not accept load request
!io.in.bits.uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid && io.dcacheReq.ready
io.out.bits := DontCare
io.out.bits.vaddr := s0_vaddr
io.out.bits.paddr := s0_paddr
io.out.bits.tlbMiss := io.dtlbResp.bits.miss
io.out.bits.mask := s0_mask
io.out.bits.uop := s0_uop
io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.in.ready := io.out.fire()
io.in.ready := !io.in.valid || (io.out.ready && io.dcacheReq.ready)
XSDebug(io.dcacheReq.fire(), "[DCACHE LOAD REQ] pc %x vaddr %x paddr will be %x\n",
s0_uop.cf.pc, s0_vaddr, s0_paddr
XSDebug(io.dcacheReq.fire(),
p"[DCACHE LOAD REQ] pc ${Hexadecimal(s0_uop.cf.pc)}, vaddr ${Hexadecimal(s0_vaddr)}\n"
)
}
......@@ -97,20 +82,28 @@ class LoadUnit_S1 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val s1_paddr = Output(UInt(PAddrBits.W))
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val dcachePAddr = Output(UInt(PAddrBits.W))
val sbuffer = new LoadForwardQueryIO
val lsq = new LoadForwardQueryIO
})
val s1_uop = io.in.bits.uop
val s1_paddr = io.in.bits.paddr
val s1_tlb_miss = io.in.bits.tlbMiss
val s1_paddr = io.dtlbResp.bits.paddr
val s1_tlb_miss = io.dtlbResp.bits.miss
val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr) && !io.out.bits.uop.cf.exceptionVec.asUInt.orR
val s1_mask = io.in.bits.mask
io.out.bits := io.in.bits // forwardXX field will be updated in s1
io.s1_paddr := s1_paddr
io.dtlbResp.ready := true.B
// feedback tlb result to RS
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.roqIdx := s1_uop.roqIdx
io.dcachePAddr := s1_paddr
// load forward query datapath
io.sbuffer.valid := io.in.valid
......@@ -127,15 +120,13 @@ class LoadUnit_S1 extends XSModule {
io.lsq.mask := s1_mask
io.lsq.pc := s1_uop.cf.pc // FIXME: remove it
io.out.bits.forwardMask := io.sbuffer.forwardMask
io.out.bits.forwardData := io.sbuffer.forwardData
io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid && !s1_tlb_miss
io.out.bits.paddr := s1_paddr
io.out.bits.mmio := s1_mmio
io.out.bits.tlbMiss := s1_tlb_miss
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.in.ready := io.out.ready || !io.in.valid
io.in.ready := !io.in.valid || io.out.ready
}
......@@ -146,9 +137,9 @@ class LoadUnit_S2 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val lsq = new LoadForwardQueryIO
val sbuffer = new LoadForwardQueryIO
})
val s2_uop = io.in.bits.uop
......@@ -197,7 +188,7 @@ class LoadUnit_S2 extends XSModule {
// TODO: ECC check
io.out.valid := io.in.valid // && !s2_uop.needFlush(io.redirect) will cause comb. loop
io.out.valid := io.in.valid
// Inst will be canceled in store queue / lsq,
// so we do not need to care about flush in load / store unit's out.valid
io.out.bits := io.in.bits
......@@ -208,10 +199,16 @@ class LoadUnit_S2 extends XSModule {
io.in.ready := io.out.ready || !io.in.valid
// merge forward result
// lsq has higher priority than sbuffer
io.lsq := DontCare
io.sbuffer := DontCare
// generate XLEN/8 Muxs
for (i <- 0 until XLEN / 8) {
when(io.lsq.forwardMask(i)) {
when (io.sbuffer.forwardMask(i)) {
io.out.bits.forwardMask(i) := true.B
io.out.bits.forwardData(i) := io.sbuffer.forwardData(i)
}
when (io.lsq.forwardMask(i)) {
io.out.bits.forwardMask(i) := true.B
io.out.bits.forwardData(i) := io.lsq.forwardData(i)
}
......@@ -224,18 +221,6 @@ class LoadUnit_S2 extends XSModule {
}
// class LoadUnit_S3 extends XSModule {
// val io = IO(new Bundle() {
// val in = Flipped(Decoupled(new LsPipelineBundle))
// val out = Decoupled(new LsPipelineBundle)
// val redirect = Flipped(ValidIO(new Redirect))
// })
// io.in.ready := true.B
// io.out.bits := io.in.bits
// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
// }
class LoadUnit extends XSModule {
val io = IO(new Bundle() {
val ldin = Flipped(Decoupled(new ExuInput))
......@@ -251,33 +236,27 @@ class LoadUnit extends XSModule {
val load_s0 = Module(new LoadUnit_S0)
val load_s1 = Module(new LoadUnit_S1)
val load_s2 = Module(new LoadUnit_S2)
// val load_s3 = Module(new LoadUnit_S3)
load_s0.io.in <> io.ldin
load_s0.io.redirect <> io.redirect
load_s0.io.dtlbReq <> io.dtlb.req
load_s0.io.dtlbResp <> io.dtlb.resp
load_s0.io.dcacheReq <> io.dcache.req
load_s0.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, false.B)
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, load_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect))
io.dcache.s1_paddr := load_s1.io.out.bits.paddr
load_s1.io.redirect <> io.redirect
load_s1.io.dtlbResp <> io.dtlb.resp
load_s1.io.tlbFeedback <> io.tlbFeedback
io.dcache.s1_paddr <> load_s1.io.dcachePAddr
io.dcache.s1_kill := DontCare // FIXME
io.sbuffer <> load_s1.io.sbuffer
io.lsq.forward <> load_s1.io.lsq
load_s1.io.sbuffer <> io.sbuffer
load_s1.io.lsq <> io.lsq.forward
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, false.B)
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect))
load_s2.io.redirect <> io.redirect
load_s2.io.dcacheResp <> io.dcache.resp
load_s2.io.lsq := DontCare
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
// PipelineConnect(load_s2.io.fp_out, load_s3.io.in, true.B, false.B)
// load_s3.io.redirect <> io.redirect
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData
load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask
XSDebug(load_s0.io.out.valid,
p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " +
......
......@@ -12,10 +12,7 @@ class StoreUnit_S0 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new ExuInput))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dtlbReq = DecoupledIO(new TlbReq)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
})
// send req to dtlb
......@@ -26,16 +23,15 @@ class StoreUnit_S0 extends XSModule {
io.dtlbReq.bits.cmd := TlbCmd.write
io.dtlbReq.bits.roqIdx := io.in.bits.uop.roqIdx
io.dtlbReq.bits.debug.pc := io.in.bits.uop.cf.pc
io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready?
io.out.bits := DontCare
io.out.bits.vaddr := saddr
io.out.bits.paddr := io.dtlbResp.bits.paddr
io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.bits.uop := io.in.bits.uop
io.out.bits.miss := io.dtlbResp.bits.miss
io.out.bits.miss := DontCare
io.out.bits.mask := genWmask(io.out.bits.vaddr, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.valid := io.in.valid && !io.dtlbResp.bits.miss && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid
io.in.ready := io.out.ready
// exception check
......@@ -46,18 +42,7 @@ class StoreUnit_S0 extends XSModule {
"b11".U -> (io.out.bits.vaddr(2,0) === 0.U) //d
))
io.out.bits.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned
io.out.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
// Send TLB feedback to store issue queue
// TODO: should be moved to S1
io.tlbFeedback.valid := RegNext(io.in.valid && io.out.ready)
io.tlbFeedback.bits.hit := RegNext(!io.out.bits.miss)
io.tlbFeedback.bits.roqIdx := RegNext(io.out.bits.uop.roqIdx)
XSDebug(io.tlbFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n",
io.tlbFeedback.bits.hit,
io.tlbFeedback.bits.roqIdx.asUInt
)
}
// Load Pipeline Stage 1
......@@ -67,30 +52,41 @@ class StoreUnit_S1 extends XSModule {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
// val fp_out = Decoupled(new LsPipelineBundle)
val stout = DecoupledIO(new ExuOutput) // writeback store
val redirect = Flipped(ValidIO(new Redirect))
val lsq = ValidIO(new LsPipelineBundle)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
})
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
// writeback to LSQ
val s1_paddr = io.dtlbResp.bits.paddr
val s1_tlb_miss = io.dtlbResp.bits.miss
io.in.ready := true.B
io.out.bits := io.in.bits
io.out.bits.miss := false.B
io.out.bits.mmio := AddressSpace.isMMIO(io.in.bits.paddr)
io.out.valid := io.in.fire() // TODO: && ! FP
io.stout.bits.uop := io.in.bits.uop
// io.stout.bits.uop.cf.exceptionVec := // TODO: update according to TLB result
io.stout.bits.data := DontCare
io.stout.bits.redirectValid := false.B
io.stout.bits.redirect := DontCare
io.stout.bits.brUpdate := DontCare
io.stout.bits.debug.isMMIO := io.out.bits.mmio
io.stout.bits.fflags := DontCare
io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready?
// Send TLB feedback to store issue queue
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.roqIdx := io.in.bits.uop.roqIdx
XSDebug(io.tlbFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n",
io.tlbFeedback.bits.hit,
io.tlbFeedback.bits.roqIdx.asUInt
)
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
io.lsq.valid := io.in.valid && !s1_tlb_miss// TODO: && ! FP
io.lsq.bits := io.in.bits
io.lsq.bits.paddr := s1_paddr
io.lsq.bits.miss := false.B
io.lsq.bits.mmio := AddressSpace.isMMIO(s1_paddr)
io.lsq.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
// mmio inst with exception will be writebacked immediately
val hasException = io.out.bits.uop.cf.exceptionVec.asUInt.orR
io.stout.valid := io.in.fire() && (!io.out.bits.mmio || hasException) // mmio inst will be writebacked immediately
io.out.valid := io.in.valid && (!io.out.bits.mmio || hasException) && !s1_tlb_miss
io.out.bits := io.lsq.bits
// if fp
// io.fp_out.valid := ...
......@@ -98,17 +94,24 @@ class StoreUnit_S1 extends XSModule {
}
// class StoreUnit_S2 extends XSModule {
// val io = IO(new Bundle() {
// val in = Flipped(Decoupled(new LsPipelineBundle))
// val out = Decoupled(new LsPipelineBundle)
// val redirect = Flipped(ValidIO(new Redirect))
// })
class StoreUnit_S2 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val stout = DecoupledIO(new ExuOutput) // writeback store
})
io.in.ready := true.B
// io.in.ready := true.B
// io.out.bits := io.in.bits
// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
// }
io.stout.valid := io.in.valid
io.stout.bits.uop := io.in.bits.uop
io.stout.bits.data := DontCare
io.stout.bits.redirectValid := false.B
io.stout.bits.redirect := DontCare
io.stout.bits.brUpdate := DontCare
io.stout.bits.debug.isMMIO := io.in.bits.mmio
io.stout.bits.fflags := DontCare
}
class StoreUnit extends XSModule {
val io = IO(new Bundle() {
......@@ -122,25 +125,21 @@ class StoreUnit extends XSModule {
val store_s0 = Module(new StoreUnit_S0)
val store_s1 = Module(new StoreUnit_S1)
// val store_s2 = Module(new StoreUnit_S2)
val store_s2 = Module(new StoreUnit_S2)
store_s0.io.in <> io.stin
store_s0.io.redirect <> io.redirect
store_s0.io.dtlbReq <> io.dtlb.req
store_s0.io.dtlbResp <> io.dtlb.resp
store_s0.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, false.B)
// PipelineConnect(store_s1.io.fp_out, store_s2.io.in, true.B, false.B)
PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, store_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect))
store_s1.io.redirect <> io.redirect
store_s1.io.stout <> io.stout
// send result to sq
io.lsq.valid := store_s1.io.out.valid
io.lsq.bits := store_s1.io.out.bits
store_s1.io.lsq <> io.lsq // send result to sq
store_s1.io.dtlbResp <> io.dtlb.resp
store_s1.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(store_s1.io.out, store_s2.io.in, true.B, store_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect))
store_s2.io.stout <> io.stout
store_s1.io.out.ready := true.B
private def printPipeLine(pipeline: LsPipelineBundle, cond: Bool, name: String): Unit = {
XSDebug(cond,
p"$name" + p" pc ${Hexadecimal(pipeline.uop.cf.pc)} " +
......@@ -154,4 +153,4 @@ class StoreUnit extends XSModule {
printPipeLine(store_s0.io.out.bits, store_s0.io.out.valid, "S0")
printPipeLine(store_s1.io.out.bits, store_s1.io.out.valid, "S1")
}
\ No newline at end of file
}
......@@ -104,7 +104,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
def isOneOf(key: UInt, seq: Seq[UInt]): Bool =
if(seq.isEmpty) false.B else Cat(seq.map(_===key)).orR()
def witdhMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f
def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f
def maskData(mask: UInt, data: UInt): UInt = {
......@@ -160,7 +160,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req))
val mergeMask = witdhMap(i =>
val mergeMask = widthMap(i =>
req.valid && stateCanMerge(state_old(i)) && getTag(req.bits.addr)===mem_old(i).tag
)
val canMerge = Cat(mergeMask).orR()
......@@ -184,7 +184,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
state_new.zip(mem_new)
}
val bufferRead = VecInit((0 until StoreBufferSize) map (i => buffer.read(i.U)))
val bufferRead = VecInit((0 until StoreBufferSize) map (i => buffer(i)))
val initialSbuffer = stateVec.zip(bufferRead)
val updatedSbuffer = io.in.zipWithIndex.foldLeft[Seq[SbufferEntry]](initialSbuffer)(enqSbuffer)
val updatedState = updatedSbuffer.map(_._1)
......@@ -205,8 +205,8 @@ class NewSbuffer extends XSModule with HasSbufferCst {
XSDebug(req.fire(),
p"accept req [$i]: " +
p"addr:${Hexadecimal(req.bits.addr)} " +
p"mask:${Binary(req.bits.mask)} " +
p"data:${Hexadecimal(req.bits.data)}\n"
p"mask:${Binary(req.bits.mask)} " +
p"data:${Hexadecimal(req.bits.data)}\n"
)
XSDebug(req.valid && !req.ready,
p"req [$i] blocked by sbuffer\n"
......@@ -257,7 +257,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
def noSameBlockInflight(idx: UInt): Bool = {
val tag = updatedSbufferLine(idx).tag
!Cat(witdhMap(i => {
!Cat(widthMap(i => {
// stateVec(idx) itself must not be s_inflight*
isOneOf(stateVec(i), Seq(s_inflight_req, s_inflight_resp)) &&
tag===updatedSbufferLine(i).tag
......@@ -316,62 +316,42 @@ class NewSbuffer extends XSModule with HasSbufferCst {
// ---------------------- Load Data Forward ---------------------
// (buff, do_forward)
// pass 'do_forward' here to avoid duplicated tag compare
type ForwardBuf = (SbufferLine, Bool)
def forwardQuery(forward: LoadForwardQueryIO, buff: ForwardBuf): LoadForwardQueryIO = {
val bufLine = buff._1
val do_forward = buff._2
val forwardWire = WireInit(forward)
val forwardMask = forwardWire.forwardMask
val forwardData = forwardWire.forwardData
val dataVec = VecInit((0 until CacheLineBytes).map(i =>
bufLine.data(i*8+7, i*8)
))
when(do_forward){
(0 until DataBytes).map(i => {
val lineOffset = Cat(getWordOffset(forward.paddr), i.U(3.W))
when(bufLine.mask(lineOffset) && forward.mask(i)){
forwardMask(i) := true.B
forwardData(i) := dataVec(lineOffset)
}
})
}
forwardWire
}
for((forward, i) <- io.forward.zipWithIndex){
val tag_matches = witdhMap(i => bufferRead(i).tag===getTag(forward.paddr))
val valid_tag_matches = witdhMap(i => tag_matches(i) && stateVec(i)===s_valid)
val inflight_tag_matches = witdhMap(i =>
for ((forward, i) <- io.forward.zipWithIndex) {
val tag_matches = widthMap(i => bufferRead(i).tag===getTag(forward.paddr))
val valid_tag_matches = widthMap(i => tag_matches(i) && stateVec(i)===s_valid)
val inflight_tag_matches = widthMap(i =>
tag_matches(i) && (stateVec(i)===s_inflight_req || stateVec(i)===s_inflight_resp)
)
val (valid_forward_idx, valid_tag_match) = PriorityEncoderWithFlag(valid_tag_matches)
val (inflight_forwad_idx, inflight_tag_match) = PriorityEncoderWithFlag(inflight_tag_matches)
val line_offset_mask = UIntToOH(getWordOffset(forward.paddr))
val valid_line = bufferRead(valid_forward_idx)
val inflight_line = bufferRead(inflight_forwad_idx)
val valid_tag_match_reg = valid_tag_matches.map(RegNext(_))
val inflight_tag_match_reg = inflight_tag_matches.map(RegNext(_))
val line_offset_reg = RegNext(line_offset_mask)
val initialForward = WireInit(forward)
initialForward.forwardMask := 0.U.asTypeOf(Vec(DataBytes, Bool()))
initialForward.forwardData := DontCare
val selectedValidLine = Mux1H(valid_tag_match_reg, bufferRead)
val selectedValidMask = Mux1H(line_offset_reg, selectedValidLine.mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool()))))
val selectedValidData = Mux1H(line_offset_reg, selectedValidLine.data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
val forwardResult = Seq(
(inflight_line, inflight_tag_match),
(valid_line, valid_tag_match)
).foldLeft(initialForward)(forwardQuery)
val selectedInflightLine = Mux1H(inflight_tag_match_reg, bufferRead)
val selectedInflightMask = Mux1H(line_offset_reg, selectedInflightLine.mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool()))))
val selectedInflightData = Mux1H(line_offset_reg, selectedInflightLine.data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))
forward.forwardMask := forwardResult.forwardMask
forward.forwardData := forwardResult.forwardData
for (j <- 0 until DataBytes) {
forward.forwardMask(j) := false.B
forward.forwardData(j) := DontCare
XSDebug(inflight_tag_match,
p"inflight tag match: forward [$i] <> buf[$inflight_forwad_idx]\n"
)
XSDebug(valid_tag_match,
p"valid tag match: forward [$i] <> buf[$valid_forward_idx]\n"
)
XSDebug(inflight_tag_match || valid_tag_match,
// valid entries have higher priority than inflight entries
when (selectedInflightMask(j)) {
forward.forwardMask(j) := true.B
forward.forwardData(j) := selectedInflightData(j)
}
when (selectedValidMask(j)) {
forward.forwardMask(j) := true.B
forward.forwardData(j) := selectedValidData(j)
}
}
XSDebug(Cat(inflight_tag_matches).orR || Cat(valid_tag_matches).orR,
p"[$i] forward paddr:${Hexadecimal(forward.paddr)}\n"
)
}
......
......@@ -8,6 +8,8 @@ package object xiangshan {
def imm = "b01".U
def fp = "b10".U
def DC = imm // Don't Care
def isReg(srcType: UInt) = srcType===reg
def isPc(srcType: UInt) = srcType===pc
def isImm(srcType: UInt) = srcType===imm
......
此差异已折叠。
#ifndef COMPRESS_H
#define COMPRESS_H
#include "common.h"
#include <zlib.h>
#include <sys/time.h>
#define LOAD_SNAPSHOT 0
#define LOAD_RAM 1
double calcTime(timeval s, timeval e);
int isGzFile(const char *filename);
long snapshot_compressToFile(uint8_t *ptr, const char *filename, long buf_size);
long readFromGz(void* ptr, const char *file_name, long buf_size, uint8_t load_type);
#endif
......@@ -3,6 +3,7 @@
#include "difftest.h"
#include <getopt.h>
#include "ram.h"
#include "zlib.h"
void* get_ram_start();
long get_ram_size();
......@@ -78,7 +79,6 @@ Emulator::Emulator(int argc, const char *argv[]):
cycles(0), hascommit(0), trapCode(STATE_RUNNING)
{
args = parse_args(argc, argv);
printf("Emu compiled at %s, %s UTC\n", __DATE__, __TIME__);
// srand
srand(args.seed);
......@@ -193,7 +193,11 @@ inline void Emulator::single_cycle() {
#ifdef WITH_DRAMSIM3
axi_channel axi;
axi_copy_from_dut_ptr(dut_ptr, axi);
axi.aw.addr -= 0x80000000UL;
axi.ar.addr -= 0x80000000UL;
dramsim3_helper(axi);
axi.aw.addr += 0x80000000UL;
axi.ar.addr += 0x80000000UL;
axi_set_dut_ptr(dut_ptr, axi);
#endif
......@@ -321,6 +325,7 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) {
}
if (Verilated::gotFinish()) {
difftest_display(dut_ptr->io_difftest_priviledgeMode);
eprintf("The simulation stopped. There might be some assertion failed.\n");
trapCode = STATE_ABORT;
}
......@@ -430,7 +435,7 @@ void Emulator::snapshot_save(const char *filename) {
}
void Emulator::snapshot_load(const char *filename) {
VerilatedRestore stream;
VerilatedRestoreMem stream;
stream.open(filename);
stream >> *dut_ptr;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册