提交 fdd71723 编写于 作者: J jinyue110

Merge branch 'master' into icache-uncache

icache: add not bus-width aligned MMIO req support

IFU: add mmio aligned function
......@@ -39,7 +39,7 @@ jobs:
echo "AM_HOME=/home/ci-runner/xsenv/nexus-am" >> $GITHUB_ENV
- name: Build EMU
run:
make ./build/emu SIM_ARGS=--disable-all NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME -j220
make ./build/emu SIM_ARGS=--disable-log NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME B=0 E=0 -j220
- name: Run cputest
run: |
CPU_TEST_DIR=$AM_HOME/tests/cputest
......@@ -49,7 +49,7 @@ jobs:
do
t=${test%.c}
echo $t
make -C $CPU_TEST_DIR ALL=$t ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run 2>/dev/null | grep "HIT GOOD TRAP"
make -C $CPU_TEST_DIR ALL=$t ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run B=0 E=0 | grep "HIT GOOD TRAP"
if [[ $? != 0 ]];
then
echo $t fail
......@@ -59,10 +59,10 @@ jobs:
exit $ret
- name: Run riscv-tests
run: |
make -C $RVTEST_HOME/isa/ SUITES+=rv64ui SUITES+=rv64um SUITES+=rv64ua SUITES+=rv64uf SUITES+=rv64ud NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME noop_run 2> /dev/null
make -C $RVTEST_HOME/isa/ SUITES+=rv64ui SUITES+=rv64um SUITES+=rv64ua SUITES+=rv64uf SUITES+=rv64ud NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME noop_run B=0 E=0
- name: Run microbench
run: |
make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run 2> /dev/null
make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run B=0 E=0
- name: Run coremark
run: |
make -C $AM_HOME/apps/coremark ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run 2> /dev/null
make -C $AM_HOME/apps/coremark ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run B=0 E=0
Subproject commit ab2a8e8afd162b601d9f749e6e6af452cccc03a7
Subproject commit ca387163b32f20406d443bdab34bc034d5281b51
......@@ -23,7 +23,9 @@ int main(int argc, char* argv[]){
fd = tryLock(argv[1]);
if(fd > 0){
getlogin_r(user, BUF_SIZE);
write(fd, user, strlen(user));
int len = strlen(user);
user[len] = '\0';
write(fd, user, len+1);
break;
} else {
// someone is holding the lock...
......
......@@ -11,6 +11,7 @@ object XSLogLevel extends Enumeration {
val ALL = Value(0, "ALL ")
val DEBUG = Value("DEBUG")
val INFO = Value("INFO ")
val PERF = Value("PERF ")
val WARN = Value("WARN ")
val ERROR = Value("ERROR")
val OFF = Value("OFF ")
......@@ -24,7 +25,9 @@ object XSLog {
{
val logEnable = WireInit(false.B)
val logTimestamp = WireInit(0.U(64.W))
if(Parameters.get.envParameters.EnableDebug){
val enableDebug = Parameters.get.envParameters.EnableDebug && debugLevel != XSLogLevel.PERF
val enablePerf = Parameters.get.envParameters.EnablePerfDebug && debugLevel == XSLogLevel.PERF
if (enableDebug || enablePerf) {
ExcitingUtils.addSink(logEnable, "DISPLAY_LOG_ENABLE")
ExcitingUtils.addSink(logTimestamp, "logTimestamp")
when (cond && logEnable) {
......@@ -98,3 +101,42 @@ object XSInfo extends LogHelper(XSLogLevel.INFO)
object XSWarn extends LogHelper(XSLogLevel.WARN)
object XSError extends LogHelper(XSLogLevel.ERROR)
object XSPerf {
def apply(perfName: String, perfCnt: UInt)(implicit name: String) = {
val reset = true
val print_per_cycle = false
val print_gap_bits = 15
val counter = RegInit(0.U(64.W))
val next_counter = WireInit(0.U(64.W))
val logTimestamp = WireInit(0.U(64.W))
val enableDebug = Parameters.get.envParameters.EnableDebug
val logEnable = WireInit(false.B)
if (enableDebug) {
ExcitingUtils.addSink(logEnable, "DISPLAY_LOG_ENABLE")
if(!print_per_cycle) {
ExcitingUtils.addSink(logTimestamp, "logTimestamp")
next_counter := counter + perfCnt
when(logEnable && logTimestamp(print_gap_bits-1, 0) === 0.U) { // TODO: Need print when program exit?
if(reset) {
next_counter := perfCnt
XSLog(XSLogLevel.PERF)(true, true.B, p"$perfName, $counter\n")
}else{
XSLog(XSLogLevel.PERF)(true, true.B, p"$perfName, $next_counter\n")
}
}
counter := next_counter
}else{
when(logEnable) {
XSLog(XSLogLevel.PERF)(true, true.B, p"$perfName, $perfCnt\n")
}
}
}
}
}
......@@ -142,6 +142,8 @@ class BpuMeta extends XSBundle with HasBPUParameter {
val debug_btb_cycle = if (EnableBPUTimeRecord) UInt(64.W) else UInt(0.W)
val debug_tage_cycle = if (EnableBPUTimeRecord) UInt(64.W) else UInt(0.W)
val predictor = if (BPUDebug) UInt(log2Up(4).W) else UInt(0.W) // Mark which component this prediction comes from {ubtb, btb, tage, loopPredictor}
// def apply(histPtr: UInt, tageMeta: TageMeta, rasSp: UInt, rasTopCtr: UInt) = {
// this.histPtr := histPtr
// this.tageMeta := tageMeta
......@@ -160,7 +162,7 @@ class Predecode extends XSBundle with HasIFUConst {
val pd = Vec(PredictWidth, (new PreDecodeInfo))
}
class CfiUpdateInfo extends XSBundle {
class CfiUpdateInfo extends XSBundle with HasBPUParameter {
// from backend
val pc = UInt(VAddrBits.W)
val pnpc = UInt(VAddrBits.W)
......@@ -182,7 +184,7 @@ class CfiUpdateInfo extends XSBundle {
class CtrlFlow extends XSBundle {
val instr = UInt(32.W)
val pc = UInt(VAddrBits.W)
val exceptionVec = Vec(16, Bool())
val exceptionVec = ExceptionVec()
val intrVec = Vec(12, Bool())
val brUpdate = new CfiUpdateInfo
val crossPageIPFFix = Bool()
......@@ -241,6 +243,16 @@ class CfCtrl extends XSBundle {
val brTag = new BrqPtr
}
class PerfDebugInfo extends XSBundle {
// val fetchTime = UInt(64.W)
val renameTime = UInt(64.W)
val dispatchTime = UInt(64.W)
val issueTime = UInt(64.W)
val writebackTime = UInt(64.W)
// val commitTime = UInt(64.W)
}
// Separate LSQ
class LSIdx extends XSBundle {
val lqIdx = new LqPtr
val sqIdx = new SqPtr
......@@ -254,6 +266,7 @@ class MicroOp extends CfCtrl {
val lqIdx = new LqPtr
val sqIdx = new SqPtr
val diffTestDebugLrScValid = Bool()
val debugInfo = new PerfDebugInfo
}
class Redirect extends XSBundle {
......
......@@ -89,7 +89,9 @@ case class XSCoreParameters
StoreBufferSize: Int = 16,
RefillSize: Int = 512,
TlbEntrySize: Int = 32,
TlbSPEntrySize: Int = 4,
TlbL2EntrySize: Int = 256, // or 512
TlbL2SPEntrySize: Int = 16,
PtwL1EntrySize: Int = 16,
PtwL2EntrySize: Int = 256,
NumPerfCounters: Int = 16,
......@@ -165,7 +167,9 @@ trait HasXSParameter {
val RefillSize = core.RefillSize
val DTLBWidth = core.LoadPipelineWidth + core.StorePipelineWidth
val TlbEntrySize = core.TlbEntrySize
val TlbSPEntrySize = core.TlbSPEntrySize
val TlbL2EntrySize = core.TlbL2EntrySize
val TlbL2SPEntrySize = core.TlbL2SPEntrySize
val PtwL1EntrySize = core.PtwL1EntrySize
val PtwL2EntrySize = core.PtwL2EntrySize
val NumPerfCounters = core.NumPerfCounters
......@@ -183,32 +187,6 @@ trait HasXSParameter {
nMissEntries = 8
)
// icache prefetcher
val l1plusPrefetcherParameters = L1plusPrefetcherParameters(
enable = false,
_type = "stream",
streamParams = StreamPrefetchParameters(
streamCnt = 4,
streamSize = 4,
ageWidth = 4,
blockBytes = l1plusCacheParameters.blockBytes,
reallocStreamOnMissInstantly = true
)
)
// dcache prefetcher
val l2PrefetcherParameters = L2PrefetcherParameters(
enable = true,
_type = "stream",
streamParams = StreamPrefetchParameters(
streamCnt = 4,
streamSize = 4,
ageWidth = 4,
blockBytes = L2BlockSize,
reallocStreamOnMissInstantly = true
)
)
val dcacheParameters = DCacheParameters(
tagECC = Some("secded"),
dataECC = Some("secded"),
......@@ -240,6 +218,34 @@ trait HasXSParameter {
// on chip network configurations
val L3BusWidth = 256
// icache prefetcher
val l1plusPrefetcherParameters = L1plusPrefetcherParameters(
enable = true,
_type = "stream",
streamParams = StreamPrefetchParameters(
streamCnt = 2,
streamSize = 4,
ageWidth = 4,
blockBytes = l1plusCacheParameters.blockBytes,
reallocStreamOnMissInstantly = true,
cacheName = "icache"
)
)
// dcache prefetcher
val l2PrefetcherParameters = L2PrefetcherParameters(
enable = true,
_type = "stream",
streamParams = StreamPrefetchParameters(
streamCnt = 4,
streamSize = 4,
ageWidth = 4,
blockBytes = L2BlockSize,
reallocStreamOnMissInstantly = true,
cacheName = "dcache"
)
)
}
trait HasXSLog { this: RawModule =>
......@@ -271,7 +277,8 @@ abstract class XSBundle extends Bundle
case class EnviromentParameters
(
FPGAPlatform: Boolean = true,
EnableDebug: Boolean = false
EnableDebug: Boolean = false,
EnablePerfDebug: Boolean = false
)
// object AddressSpace extends HasXSParameter {
......
......@@ -237,4 +237,4 @@ class IntegerBlock
rf.addr := wb.bits.uop.pdest
rf.data := wb.bits.data
}
}
\ No newline at end of file
}
......@@ -250,6 +250,7 @@ class MemBlockImp
// LSQ to store buffer
lsq.io.sbuffer <> sbuffer.io.in
lsq.io.sqempty <> sbuffer.io.sqempty
// Sbuffer
sbuffer.io.dcache <> dcache.io.lsu.store
......
......@@ -5,6 +5,7 @@ import chisel3.util._
import xiangshan._
import utils._
import chisel3.ExcitingUtils._
import xiangshan.backend.JumpOpType
import xiangshan.backend.decode.ImmUnion
......@@ -75,7 +76,7 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
val exuOut = new ExuOutput
}
val s_idle :: s_wb :: Nil = Enum(2)
val s_idle :: s_wb :: s_auipc_wb :: Nil = Enum(3)
class DecodeEnqBrqData extends Bundle {
val cfiUpdateInfo = new CfiUpdateInfo
......@@ -107,7 +108,9 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
/**
* write back
*/
val wbValid = stateQueue(writebackIdx) === s_wb
val wbState = stateQueue(writebackIdx)
val wbValid = wbState === s_wb
val wbIsAuipc = wbState === s_auipc_wb
val wbEntry = Wire(new ExuOutput)
val wbIsMisPred = wbEntry.redirect.target =/= wbEntry.brUpdate.pnpc
......@@ -115,9 +118,9 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
io.redirectOut.bits := wbEntry.redirect
io.redirectOut.bits.brTag := BrqPtr(ptrFlagVec(writebackIdx), writebackIdx)
io.out.valid := wbValid
io.out.valid := wbValid || wbIsAuipc
io.out.bits := wbEntry
when (wbValid) {
when (io.out.valid) {
stateQueue(writebackIdx) := s_idle
writebackPtr_next := writebackPtr + 1.U
}
......@@ -164,7 +167,7 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
/**
* exu write back
*/
for (exuWb <- io.exuRedirectWb) {
for ((exuWb, i) <- io.exuRedirectWb.zipWithIndex) {
when (exuWb.valid) {
val wbIdx = exuWb.bits.redirect.brTag.value
XSInfo(
......@@ -174,8 +177,14 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
p"target=${Hexadecimal(exuWb.bits.redirect.target)}\n"
)
assert(stateQueue(wbIdx) === s_idle)
stateQueue(wbIdx) := s_wb
if(i == 0){ // jump
stateQueue(wbIdx) := Mux(JumpOpType.jumpOpisAuipc(exuWb.bits.uop.ctrl.fuOpType),
s_auipc_wb,
s_wb
)
} else { // alu
stateQueue(wbIdx) := s_wb
}
}
}
......@@ -225,6 +234,7 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
mergeData.brUpdate.target := redirectTarget
mergeData.brUpdate.brTarget := redirectTarget
mergeData.brUpdate.taken := wb.brUpdate.taken
mergeData.brUpdate.bpuMeta.predictor:= wb.brUpdate.bpuMeta.predictor
mergeData
}
......@@ -305,6 +315,20 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
val mbpRRight = predRight && isRType
val mbpRWrong = predWrong && isRType
val predictor = io.cfiInfo.bits.bpuMeta.predictor
val ubtbRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 0.U
val ubtbWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 0.U
val btbRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 1.U
val btbWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 1.U
val tageRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 2.U
val tageWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 2.U
val loopRight = !io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 3.U
val loopWrong = io.cfiInfo.bits.isMisPred && !io.cfiInfo.bits.isReplay && predictor === 3.U
if(!env.FPGAPlatform){
ExcitingUtils.addSource(mbpInstr, "perfCntCondBpInstr", Perf)
ExcitingUtils.addSource(mbpRight, "perfCntCondBpRight", Perf)
......@@ -317,5 +341,28 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
ExcitingUtils.addSource(mbpIWrong, "perfCntCondBpIWrong", Perf)
ExcitingUtils.addSource(mbpRRight, "perfCntCondBpRRight", Perf)
ExcitingUtils.addSource(mbpRWrong, "perfCntCondBpRWrong", Perf)
ExcitingUtils.addSource(ubtbRight, "perfCntubtbRight", Perf)
ExcitingUtils.addSource(ubtbWrong, "perfCntubtbWrong", Perf)
ExcitingUtils.addSource(btbRight, "perfCntbtbRight", Perf)
ExcitingUtils.addSource(btbWrong, "perfCntbtbWrong", Perf)
ExcitingUtils.addSource(tageRight, "perfCnttageRight", Perf)
ExcitingUtils.addSource(tageWrong, "perfCnttageWrong", Perf)
ExcitingUtils.addSource(loopRight, "perfCntloopRight", Perf)
ExcitingUtils.addSource(loopWrong, "perfCntloopWrong", Perf)
}
val utilization = Mux(headPtr.flag === tailPtr.flag, tailPtr.value - headPtr.value, BrqSize.U + tailPtr.value - headPtr.value)
XSPerf("utilization", utilization)
XSPerf("mbpInstr", PopCount(mbpInstr))
XSPerf("mbpRight", PopCount(mbpRight))
XSPerf("mbpWrong", PopCount(mbpWrong))
XSPerf("mbpBRight", PopCount(mbpBRight))
XSPerf("mbpBWrong", PopCount(mbpBWrong))
XSPerf("mbpJRight", PopCount(mbpJRight))
XSPerf("mbpJWrong", PopCount(mbpJWrong))
XSPerf("mbpIRight", PopCount(mbpIRight))
XSPerf("mbpIWrong", PopCount(mbpIWrong))
XSPerf("mbpRRight", PopCount(mbpRRight))
XSPerf("mbpRWrong", PopCount(mbpRWrong))
}
......@@ -5,6 +5,7 @@ import chisel3.util._
import xiangshan._
import xiangshan.backend.brq.BrqEnqIO
import utils._
import xiangshan.backend.decode.Instructions.{AUIPC, MRET, SRET}
class DecodeStage extends XSModule {
val io = IO(new Bundle() {
......@@ -31,12 +32,14 @@ class DecodeStage extends XSModule {
for (i <- 0 until DecodeWidth) {
decoders(i).io.enq.ctrl_flow <> io.in(i).bits
val isMret = io.in(i).bits.instr === BitPat("b001100000010_00000_000_00000_1110011")
val isSret = io.in(i).bits.instr === BitPat("b000100000010_00000_000_00000_1110011")
val thisBrqValid = !io.in(i).bits.brUpdate.pd.notCFI || isMret || isSret
val isMret = io.in(i).bits.instr === MRET
val isSret = io.in(i).bits.instr === SRET
val isAuiPc = io.in(i).bits.instr === AUIPC
val thisBrqValid = !io.in(i).bits.brUpdate.pd.notCFI || isMret || isSret || isAuiPc
io.enqBrq.needAlloc(i) := thisBrqValid
io.enqBrq.req(i).valid := io.in(i).valid && thisBrqValid && io.out(i).ready
io.enqBrq.req(i).bits := decoders(i).io.deq.cf_ctrl.cf
io.enqBrq.req(i).bits := io.in(i).bits
io.enqBrq.req(i).bits.instr := decoders(i).io.deq.cf_ctrl.cf.instr
io.out(i).valid := io.in(i).valid && io.enqBrq.req(i).ready
io.out(i).bits := decoders(i).io.deq.cf_ctrl
......
......@@ -135,7 +135,7 @@ object XDecode extends DecodeConstants {
REMW -> List(SrcType.reg, SrcType.reg, SrcType.DC, FuType.div, MDUOpType.remw, Y, N, N, N, N, N, N, SelImm.IMM_X),
REMUW -> List(SrcType.reg, SrcType.reg, SrcType.DC, FuType.div, MDUOpType.remuw, Y, N, N, N, N, N, N, SelImm.IMM_X),
AUIPC -> List(SrcType.pc, SrcType.imm, SrcType.DC, FuType.alu, ALUOpType.add, Y, N, N, N, N, N, N, SelImm.IMM_U),
AUIPC -> List(SrcType.pc , SrcType.imm, SrcType.DC, FuType.jmp, JumpOpType.auipc, Y, N, N, N, N, N, N, SelImm.IMM_U),
JAL -> List(SrcType.pc , SrcType.imm, SrcType.DC, FuType.jmp, JumpOpType.jal, Y, N, N, N, N, N, N, SelImm.IMM_UJ),
JALR -> List(SrcType.reg, SrcType.imm, SrcType.DC, FuType.jmp, JumpOpType.jalr, Y, N, N, N, N, N, N, SelImm.IMM_I),
BEQ -> List(SrcType.reg, SrcType.reg, SrcType.DC, FuType.alu, ALUOpType.beq, N, N, N, N, N, N, N, SelImm.IMM_SB),
......@@ -458,10 +458,8 @@ class DecodeUnit extends XSModule with DecodeUnitConstants {
cs.ldest := Mux(cs.fpWen || cs.rfWen, ctrl_flow.instr(RD_MSB,RD_LSB), 0.U)
// fill in exception vector
cf_ctrl.cf.exceptionVec.map(_ := false.B)
cf_ctrl.cf.exceptionVec := io.enq.ctrl_flow.exceptionVec
cf_ctrl.cf.exceptionVec(illegalInstr) := cs.selImm === SelImm.INVALID_INSTR
cf_ctrl.cf.exceptionVec(instrPageFault) := io.enq.ctrl_flow.exceptionVec(instrPageFault)
cf_ctrl.cf.exceptionVec(instrAccessFault) := io.enq.ctrl_flow.exceptionVec(instrAccessFault)
// fix frflags
// fflags zero csrrs rd csr
......
......@@ -27,7 +27,7 @@ class FPDecoder extends XSModule{
FCVT_S_WU-> List(N,s,s,Y,Y,Y,N,N,Y),
FCVT_S_L -> List(N,s,s,Y,Y,Y,N,N,Y),
FCVT_S_LU-> List(N,s,s,Y,Y,Y,N,N,Y),
FMV_X_W -> List(N,s,X,N,N,N,N,N,N),
FMV_X_W -> List(N,d,X,N,N,N,N,N,N),
FCLASS_S -> List(N,s,X,N,N,N,N,N,N),
FCVT_W_S -> List(N,s,X,N,Y,N,N,N,Y),
FCVT_WU_S-> List(N,s,X,N,Y,N,N,N,Y),
......
......@@ -4,13 +4,14 @@ import chisel3._
import chisel3.util._
import chisel3.ExcitingUtils._
import xiangshan._
import utils.{XSDebug, XSError, XSInfo}
import utils._
import xiangshan.backend.roq.{RoqPtr, RoqEnqIO}
import xiangshan.backend.rename.RenameBypassInfo
import xiangshan.mem.LsqEnqIO
import xiangshan.backend.fu.HasExceptionNO
// read rob and enqueue
class Dispatch1 extends XSModule {
class Dispatch1 extends XSModule with HasExceptionNO {
val io = IO(new Bundle() {
// from rename
val fromRename = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
......@@ -45,7 +46,10 @@ class Dispatch1 extends XSModule {
*/
// valid bits for different dispatch queues
val isInt = VecInit(io.fromRename.map(req => FuType.isIntExu(req.bits.ctrl.fuType)))
val isBranch = VecInit(io.fromRename.map(req => !req.bits.cf.brUpdate.pd.notCFI))
val isBranch = VecInit(io.fromRename.map(req =>
// cover auipc (a fake branch)
!req.bits.cf.brUpdate.pd.notCFI || FuType.isJumpExu(req.bits.ctrl.fuType)
))
val isFp = VecInit(io.fromRename.map(req => FuType.isFpExu (req.bits.ctrl.fuType)))
val isLs = VecInit(io.fromRename.map(req => FuType.isMemExu(req.bits.ctrl.fuType)))
val isStore = VecInit(io.fromRename.map(req => FuType.isStoreExu(req.bits.ctrl.fuType)))
......@@ -113,6 +117,7 @@ class Dispatch1 extends XSModule {
// thisIsBlocked: this instruction is blocked by itself (based on noSpecExec)
// nextCanOut: next instructions can out (based on blockBackward)
// notBlockedByPrevious: previous instructions can enqueue
val hasException = VecInit(io.fromRename.map(r => selectFrontend(r.bits.cf.exceptionVec).asUInt.orR))
val thisIsBlocked = VecInit((0 until RenameWidth).map(i => {
// for i > 0, when Roq is empty but dispatch1 have valid instructions to enqueue, it's blocked
if (i > 0) isNoSpecExec(i) && (!io.enqRoq.isEmpty || Cat(io.fromRename.take(i).map(_.valid)).orR)
......@@ -153,17 +158,17 @@ class Dispatch1 extends XSModule {
// We use notBlockedByPrevious here.
io.toIntDq.needAlloc(i) := io.fromRename(i).valid && isInt(i)
io.toIntDq.req(i).bits := updatedUop(i)
io.toIntDq.req(i).valid := io.fromRename(i).valid && isInt(i) && thisCanActualOut(i) &&
io.toIntDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isInt(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toFpDq.canAccept && io.toLsDq.canAccept
io.toFpDq.needAlloc(i) := io.fromRename(i).valid && isFp(i)
io.toFpDq.req(i).bits := updatedUop(i)
io.toFpDq.req(i).valid := io.fromRename(i).valid && isFp(i) && thisCanActualOut(i) &&
io.toFpDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isFp(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toIntDq.canAccept && io.toLsDq.canAccept
io.toLsDq.needAlloc(i) := io.fromRename(i).valid && isLs(i)
io.toLsDq.req(i).bits := updatedUop(i)
io.toLsDq.req(i).valid := io.fromRename(i).valid && isLs(i) && thisCanActualOut(i) &&
io.toLsDq.req(i).valid := io.fromRename(i).valid && !hasException(i) && isLs(i) && thisCanActualOut(i) &&
io.enqLsq.canAccept && io.enqRoq.canAccept && io.toIntDq.canAccept && io.toFpDq.canAccept
XSDebug(io.toIntDq.req(i).valid, p"pc 0x${Hexadecimal(io.toIntDq.req(i).bits.cf.pc)} int index $i\n")
......@@ -193,4 +198,7 @@ class Dispatch1 extends XSModule {
PopCount(io.toFpDq.req.map(_.valid && io.toFpDq.canAccept)) +
PopCount(io.toLsDq.req.map(_.valid && io.toLsDq.canAccept))
XSError(enqFireCnt > renameFireCnt, "enqFireCnt should not be greater than renameFireCnt\n")
XSPerf("utilization", PopCount(io.fromRename.map(_.valid)))
XSPerf("waitInstr", PopCount((0 until RenameWidth).map(i => io.fromRename(i).valid && !io.recv(i))))
}
......@@ -125,4 +125,7 @@ class Dispatch2Fp extends XSModule {
// p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)}), " +
// p"(${readPortIndexReg(i)+2.U}, ${uopReg(i).psrc3}, ${Hexadecimal(io.enqIQData(i).src3)})\n")
// }
XSPerf("utilization", PopCount(io.fromDq.map(_.valid)))
}
......@@ -88,6 +88,7 @@ class Dispatch2Int extends XSModule {
val src2Ready = VecInit((0 until 4).map(i => io.regRdy(i * 2 + 1)))
enq.bits.src1State := src1Ready(readPortIndex(i))
enq.bits.src2State := src2Ready(readPortIndex(i))
enq.bits.src3State := DontCare
XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
p"srcState(${enq.bits.src1State} ${enq.bits.src2State}) " +
......@@ -134,4 +135,7 @@ class Dispatch2Int extends XSModule {
// p"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
// p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
XSPerf("utilization", PopCount(io.fromDq.map(_.valid)))
}
......@@ -83,6 +83,7 @@ class Dispatch2Ls extends XSModule {
enq.bits.src2State := Mux(io.fromDq(indexVec(i)).bits.ctrl.src2Type === SrcType.fp,
io.fpRegRdy(i - exuParameters.LduCnt), io.intRegRdy(readPort(i) + 1))
}
enq.bits.src3State := DontCare
XSInfo(enq.fire(), p"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} " +
p"srcState(${enq.bits.src1State} ${enq.bits.src2State}) " +
......@@ -133,4 +134,8 @@ class Dispatch2Ls extends XSModule {
// p"(${readPort(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
// p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
XSPerf("utilization", PopCount(io.fromDq.map(_.valid)))
XSPerf("waitInstr", PopCount(io.fromDq.map(r => r.valid && !r.ready)))
}
......@@ -204,4 +204,5 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int) extends XSModule with H
XSDebug(false, true.B, "\n")
XSError(isAfter(headPtr(0), tailPtr(0)), p"assert greaterOrEqualThan(tailPtr: ${tailPtr(0)}, headPtr: ${headPtr(0)}) failed\n")
XSPerf("utilization", PopCount(stateEntries.map(_ =/= s_invalid)))
}
......@@ -120,13 +120,21 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
def writebackArb(in: Seq[DecoupledIO[FuOutput]], out: DecoupledIO[ExuOutput]): Arbiter[FuOutput] = {
if (needArbiter) {
val arb = Module(new Arbiter(new FuOutput(in.head.bits.len), in.size))
arb.io.in <> in
arb.io.out.ready := out.ready
out.bits.data := arb.io.out.bits.data
out.bits.uop := arb.io.out.bits.uop
out.valid := arb.io.out.valid
arb
if(in.size == 1){
in.head.ready := out.ready
out.bits.data := in.head.bits.data
out.bits.uop := in.head.bits.uop
out.valid := in.head.valid
null
} else {
val arb = Module(new Arbiter(new FuOutput(in.head.bits.len), in.size))
arb.io.in <> in
arb.io.out.ready := out.ready
out.bits.data := arb.io.out.bits.data
out.bits.uop := arb.io.out.bits.uop
out.valid := arb.io.out.valid
arb
}
} else {
in.foreach(_.ready := out.ready)
val sel = Mux1H(in.map(x => x.valid -> x))
......
......@@ -14,8 +14,8 @@ class FmiscExeUnit extends Exu(fmiscExeUnitCfg) {
val toFpUnits = Seq(f2f, fdivSqrt)
val toIntUnits = Seq(f2i)
assert(fpArb.io.in.length == toFpUnits.size)
assert(intArb.io.in.length == toIntUnits.size)
assert(toFpUnits.size == 1 || fpArb.io.in.length == toFpUnits.size)
assert(toIntUnits.size == 1 || intArb.io.in.length == toIntUnits.size)
val input = io.fromFp
val isRVF = input.bits.uop.ctrl.isRVF
......
......@@ -56,15 +56,20 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
mulReq.size
)
val arbiters = for(i <- mulReq.indices) yield {
val other = arbReq(i).getOrElse(Seq())
val arb = Module(new Arbiter(new ExuOutput, 1+other.size))
arb.io.in <> mulReq(i) +: other
for(i <- mulReq.indices) {
val out = io.out(directConnect.size + i)
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
arb.io.out.ready := true.B
arb
val other = arbReq(i).getOrElse(Seq())
if(other.isEmpty){
out.valid := mulReq(i).valid
out.bits := mulReq(i).bits
mulReq(i).ready := true.B
} else {
val arb = Module(new Arbiter(new ExuOutput, 1+other.size))
arb.io.in <> mulReq(i) +: other
out.valid := arb.io.out.valid
out.bits := arb.io.out.bits
arb.io.out.ready := true.B
}
}
if(portUsed < numOut){
......@@ -78,10 +83,11 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
}
for(i <- mulReq.indices){
sb.append(s"[ ${cfgs(io.in.indexOf(mulReq(i))).name} ")
val useArb = arbReq(i).nonEmpty
for(req <- arbReq(i).getOrElse(Nil)){
sb.append(s"${cfgs(io.in.indexOf(req)).name} ")
}
sb.append(s"] -> arb -> out #${directConnect.size + i}\n")
sb.append(s"] -> ${if(useArb) "arb ->" else ""} out #${directConnect.size + i}\n")
}
println(sb)
......
......@@ -47,6 +47,62 @@ trait HasExceptionNO {
storeAddrMisaligned,
loadAddrMisaligned
)
val frontendSet = List(
// instrAddrMisaligned,
instrAccessFault,
illegalInstr,
instrPageFault
)
val csrSet = List(
illegalInstr,
breakPoint,
ecallU,
ecallS,
ecallM
)
val loadUnitSet = List(
loadAddrMisaligned,
loadAccessFault,
loadPageFault
)
val storeUnitSet = List(
storeAddrMisaligned,
storeAccessFault,
storePageFault
)
val atomicsUnitSet = (loadUnitSet ++ storeUnitSet).distinct
val allPossibleSet = (frontendSet ++ csrSet ++ loadUnitSet ++ storeUnitSet).distinct
def partialSelect(vec: Vec[Bool], select: Seq[Int], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] = {
if (dontCareBits) {
val new_vec = Wire(ExceptionVec())
new_vec := DontCare
select.map(i => new_vec(i) := vec(i))
return new_vec
}
else if (falseBits) {
val new_vec = Wire(ExceptionVec())
new_vec.map(_ := false.B)
select.map(i => new_vec(i) := vec(i))
return new_vec
}
else {
val new_vec = Wire(Vec(select.length, Bool()))
select.zipWithIndex.map{ case(s, i) => new_vec(i) := vec(s) }
return new_vec
}
}
def selectFrontend(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, frontendSet, dontCareBits, falseBits)
def selectCSR(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, csrSet, dontCareBits, falseBits)
def selectLoad(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, loadUnitSet, dontCareBits, falseBits)
def selectStore(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, storeUnitSet, dontCareBits, falseBits)
def selectAtomics(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, atomicsUnitSet, dontCareBits, falseBits)
def selectAll(vec: Vec[Bool], dontCareBits: Boolean = true, falseBits: Boolean = false): Vec[Bool] =
partialSelect(vec, allPossibleSet, dontCareBits, falseBits)
}
class FpuCsrIO extends XSBundle {
......@@ -166,7 +222,7 @@ class CSR extends FunctionUnit with HasCSRConst
if (HasFPU) { extList = extList ++ List('f', 'd') }
val misaInitVal = getMisaMxl(2) | extList.foldLeft(0.U)((sum, i) => sum | getMisaExt(i)) //"h8000000000141105".U
val misa = RegInit(UInt(XLEN.W), misaInitVal)
// MXL = 2 | 0 | EXT = b 00 0000 0100 0001 0001 0000 0101
// (XLEN-1, XLEN-2) | |(25, 0) ZY XWVU TSRQ PONM LKJI HGFE DCBA
......@@ -176,7 +232,7 @@ class CSR extends FunctionUnit with HasCSRConst
val mhartNo = hartId()
val mhartid = RegInit(UInt(XLEN.W), mhartNo.asUInt) // the hardware thread running the code
val mstatus = RegInit(UInt(XLEN.W), "h00001800".U) // another option: "h8000c0100".U
// mstatus Value Table
// | sd |
// | pad1 |
......@@ -196,7 +252,7 @@ class CSR extends FunctionUnit with HasCSRConst
// | spp | 0 |
// | pie | 0000 | pie.h is used as UBE
// | ie | 0000 | uie hardlinked to 0, as N ext is not implemented
val mstatusStruct = mstatus.asTypeOf(new MstatusStruct)
def mstatusUpdateSideEffect(mstatus: UInt): UInt = {
val mstatusOld = WireInit(mstatus.asTypeOf(new MstatusStruct))
......@@ -318,11 +374,11 @@ class CSR extends FunctionUnit with HasCSRConst
// Emu perfcnt
val hasEmuPerfCnt = !env.FPGAPlatform
val nrEmuPerfCnts = if (hasEmuPerfCnt) 0x80 else 0x3
val emuPerfCnts = List.fill(nrEmuPerfCnts)(RegInit(0.U(XLEN.W)))
val emuPerfCntCond = List.fill(nrEmuPerfCnts)(WireInit(false.B))
(emuPerfCnts zip emuPerfCntCond).map { case (c, e) => when (e) { c := c + 1.U } }
val emuPerfCntsLoMapping = (0 until nrEmuPerfCnts).map(i => MaskedRegMap(0x1000 + i, emuPerfCnts(i)))
val emuPerfCntsHiMapping = (0 until nrEmuPerfCnts).map(i => MaskedRegMap(0x1080 + i, emuPerfCnts(i)(63, 32)))
println(s"CSR: hasEmuPerfCnt:${hasEmuPerfCnt}")
......@@ -336,7 +392,7 @@ class CSR extends FunctionUnit with HasCSRConst
mcycle := mcycle + 1.U
val minstret = RegInit(0.U(XLEN.W))
minstret := minstret + RegNext(csrio.perf.retiredInstr)
// CSR reg map
val basicPrivMapping = Map(
......@@ -424,11 +480,11 @@ class CSR extends FunctionUnit with HasCSRConst
val mapping = basicPrivMapping ++
perfCntMapping ++
pmpMapping ++
emuPerfCntsLoMapping ++
pmpMapping ++
emuPerfCntsLoMapping ++
(if (XLEN == 32) emuPerfCntsHiMapping else Nil) ++
(if (HasFPU) fcsrMapping else Nil)
val addr = src2(11, 0)
val csri = src2(16, 12)
val rdata = Wire(UInt(XLEN.W))
......@@ -580,6 +636,17 @@ class CSR extends FunctionUnit with HasCSRConst
io.in.ready := true.B
io.out.valid := valid
val csrExceptionVec = WireInit(cfIn.exceptionVec)
csrExceptionVec(breakPoint) := io.in.valid && isEbreak
csrExceptionVec(ecallM) := priviledgeMode === ModeM && io.in.valid && isEcall
csrExceptionVec(ecallS) := priviledgeMode === ModeS && io.in.valid && isEcall
csrExceptionVec(ecallU) := priviledgeMode === ModeU && io.in.valid && isEcall
// Trigger an illegal instr exception when:
// * unimplemented csr is being read/written
// * csr access is illegal
csrExceptionVec(illegalInstr) := (isIllegalAddr || isIllegalAccess) && wen
cfOut.exceptionVec := csrExceptionVec
/**
* Exception and Intr
*/
......@@ -613,25 +680,7 @@ class CSR extends FunctionUnit with HasCSRConst
val hasLoadAccessFault = csrio.exception.bits.cf.exceptionVec(loadAccessFault) && raiseException
val hasStoreAccessFault = csrio.exception.bits.cf.exceptionVec(storeAccessFault) && raiseException
val csrExceptionVec = Wire(Vec(16, Bool()))
csrExceptionVec.map(_ := false.B)
csrExceptionVec(breakPoint) := io.in.valid && isEbreak
csrExceptionVec(ecallM) := priviledgeMode === ModeM && io.in.valid && isEcall
csrExceptionVec(ecallS) := priviledgeMode === ModeS && io.in.valid && isEcall
csrExceptionVec(ecallU) := priviledgeMode === ModeU && io.in.valid && isEcall
// Trigger an illegal instr exception when:
// * unimplemented csr is being read/written
// * csr access is illegal
csrExceptionVec(illegalInstr) := (isIllegalAddr || isIllegalAccess) && wen
csrExceptionVec(loadPageFault) := hasLoadPageFault
csrExceptionVec(storePageFault) := hasStorePageFault
csrExceptionVec(loadAccessFault) := hasLoadAccessFault
csrExceptionVec(storeAccessFault) := hasStoreAccessFault
val iduExceptionVec = cfIn.exceptionVec
val exceptionVec = csrExceptionVec.asUInt() | iduExceptionVec.asUInt()
cfOut.exceptionVec.zipWithIndex.map{case (e, i) => e := exceptionVec(i) }
val raiseExceptionVec = csrio.exception.bits.cf.exceptionVec.asUInt()
val raiseExceptionVec = csrio.exception.bits.cf.exceptionVec
val exceptionNO = ExcPriority.foldRight(0.U)((i: Int, sum: UInt) => Mux(raiseExceptionVec(i), i.U, sum))
val causeNO = (raiseIntr << (XLEN-1)).asUInt() | Mux(raiseIntr, intrNO, exceptionNO)
......@@ -743,27 +792,36 @@ class CSR extends FunctionUnit with HasCSRConst
"PtwL2TlbHit" -> (0x1027, "perfCntPtwL2TlbHit" ),
"ICacheReq" -> (0x1028, "perfCntIcacheReqCnt" ),
"ICacheMiss" -> (0x1029, "perfCntIcacheMissCnt"),
"ICacheMMIO" -> (0x102a, "perfCntIcacheMMIOCnt")
"ICacheMMIO" -> (0x102a, "perfCntIcacheMMIOCnt"),
// "FetchFromLoopBuffer" -> (0x102b, "CntFetchFromLoopBuffer"),
// "ExitLoop1" -> (0x102c, "CntExitLoop1"),
// "ExitLoop2" -> (0x102d, "CntExitLoop2"),
// "ExitLoop3" -> (0x102e, "CntExitLoop3")
"ubtbRight" -> (0x1030, "perfCntubtbRight"),
"ubtbWrong" -> (0x1031, "perfCntubtbWrong"),
"btbRight" -> (0x1032, "perfCntbtbRight"),
"btbWrong" -> (0x1033, "perfCntbtbWrong"),
"tageRight" -> (0x1034, "perfCnttageRight"),
"tageWrong" -> (0x1035, "perfCnttageWrong"),
"loopRight" -> (0x1036, "perfCntloopRight"),
"loopWrong" -> (0x1037, "perfCntloopWrong")
// "L2cacheHit" -> (0x1023, "perfCntCondL2cacheHit")
) ++ (
(0 until dcacheParameters.nMissEntries).map(i =>
("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0x102d + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
(0 until dcacheParameters.nMissEntries).map(i =>
("DCacheMissQueuePenalty" + Integer.toString(i, 10), (0x102a + i, "perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until icacheParameters.nMissEntries).map(i =>
("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0x102d + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
("ICacheMissQueuePenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + i, "perfCntICacheMissQueuePenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until l1plusPrefetcherParameters.nEntries).map(i =>
("L1+PrefetchPenalty" + Integer.toString(i, 10), (0x102d + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10)))
("L1+PrefetchPenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + i, "perfCntL1plusPrefetchPenaltyEntry" + Integer.toString(i, 10)))
).toMap
) ++ (
(0 until l2PrefetcherParameters.nEntries).map(i =>
("L2PrefetchPenalty" + Integer.toString(i, 10), (0x102d + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10)))
("L2PrefetchPenalty" + Integer.toString(i, 10), (0x102a + dcacheParameters.nMissEntries + icacheParameters.nMissEntries + l1plusPrefetcherParameters.nEntries + i, "perfCntL2PrefetchPenaltyEntry" + Integer.toString(i, 10)))
).toMap
)
......
......@@ -25,10 +25,13 @@ class Jump extends FunctionUnit with HasRedirectOut {
io.in.bits.uop
)
val offset = SignExt(Mux(JumpOpType.jumpOpIsJal(func),
ImmUnion.J.toImm32(immMin),
ImmUnion.I.toImm32(immMin)
), XLEN)
val isJalr = JumpOpType.jumpOpisJalr(func)
val isAuipc = JumpOpType.jumpOpisAuipc(func)
val offset = SignExt(Mux1H(Seq(
isJalr -> ImmUnion.I.toImm32(immMin),
isAuipc -> ImmUnion.U.toImm32(immMin),
!(isJalr || isAuipc) -> ImmUnion.J.toImm32(immMin)
)), XLEN)
val redirectHit = uop.roqIdx.needFlush(io.redirectIn)
val valid = io.in.valid
......@@ -53,7 +56,7 @@ class Jump extends FunctionUnit with HasRedirectOut {
brUpdate.taken := true.B
// Output
val res = snpc
val res = Mux(JumpOpType.jumpOpisAuipc(func), target, snpc)
io.in.ready := io.out.ready
io.out.valid := valid
......
......@@ -12,14 +12,16 @@ class FPToFP extends FPUPipelineModule{
override def latency: Int = FunctionUnit.f2iCfg.latency.latencyVal.get
val ctrl = io.in.bits.uop.ctrl.fpu
val ctrlIn = io.in.bits.uop.ctrl.fpu
val ctrl = S1Reg(ctrlIn)
val inTag = ctrl.typeTagIn
val outTag = ctrl.typeTagOut
val src1 = unbox(io.in.bits.src(0), inTag, None)
val src2 = unbox(io.in.bits.src(1), inTag, None)
val wflags = ctrl.wflags
val src1 = S1Reg(unbox(io.in.bits.src(0), ctrlIn.typeTagIn, None))
val src2 = S1Reg(unbox(io.in.bits.src(1), ctrlIn.typeTagIn, None))
val rmReg = S1Reg(rm)
val signNum = Mux(rm(1), src1 ^ src2, Mux(rm(0), ~src2, src2))
val signNum = Mux(rmReg(1), src1 ^ src2, Mux(rmReg(0), ~src2, src2))
val fsgnj = Cat(signNum(fLen), src1(fLen-1, 0))
val fsgnjMux = Wire(new Bundle() {
......@@ -32,7 +34,7 @@ class FPToFP extends FPUPipelineModule{
val dcmp = Module(new CompareRecFN(maxExpWidth, maxSigWidth))
dcmp.io.a := src1
dcmp.io.b := src2
dcmp.io.signaling := !rm(1)
dcmp.io.signaling := !rmReg(1)
val lt = dcmp.io.lt || (dcmp.io.a.asSInt() < 0.S && dcmp.io.b.asSInt() >= 0.S)
......@@ -41,7 +43,7 @@ class FPToFP extends FPUPipelineModule{
val isnan2 = maxType.isNaN(src2)
val isInvalid = maxType.isSNaN(src1) || maxType.isSNaN(src2)
val isNaNOut = isnan1 && isnan2
val isLHS = isnan2 || rm(0) =/= lt && !isnan1
val isLHS = isnan2 || rmReg(0) =/= lt && !isnan1
fsgnjMux.exc := isInvalid << 4
fsgnjMux.data := Mux(isNaNOut, maxType.qNaN, Mux(isLHS, src1, src2))
}
......@@ -67,7 +69,7 @@ class FPToFP extends FPUPipelineModule{
when(outTag === typeTag(outType).U && (typeTag(outType) == 0).B || (outTag < inTag)){
val narrower = Module(new hardfloat.RecFNToRecFN(maxType.exp, maxType.sig, outType.exp, outType.sig))
narrower.io.in := src1
narrower.io.roundingMode := rm
narrower.io.roundingMode := rmReg
narrower.io.detectTininess := hardfloat.consts.tininess_afterRounding
val narrowed = sanitizeNaN(narrower.io.out, outType)
mux.data := Cat(fsgnjMux.data >> narrowed.getWidth, narrowed)
......@@ -77,11 +79,6 @@ class FPToFP extends FPUPipelineModule{
}
}
var resVec = Seq(mux)
for(i <- 1 to latency){
resVec = resVec :+ PipelineReg(i)(resVec(i-1))
}
io.out.bits.data := resVec.last.data
fflags := resVec.last.exc
io.out.bits.data := S2Reg(mux.data)
fflags := S2Reg(mux.exc)
}
......@@ -18,30 +18,37 @@ class FPToInt extends FPUPipelineModule {
val ctrl = io.in.bits.uop.ctrl.fpu
val src1_s = unbox(src1, S, Some(FType.S))
val src1_d = unbox(src1, ctrl.typeTagIn, None)
val src2_d = unbox(src2, ctrl.typeTagIn, None)
val src1_ieee = ieee(src1)
val move_out = Mux(ctrl.typeTagIn === S, src1_ieee(31, 0), src1_ieee)
// stage 1: unbox inputs
val src1_d = S1Reg(unbox(src1, ctrl.typeTagIn, None))
val src2_d = S1Reg(unbox(src2, ctrl.typeTagIn, None))
val ctrl_reg = S1Reg(ctrl)
val rm_reg = S1Reg(rm)
// stage2
val src1_ieee = ieee(src1_d)
val move_out = Mux(ctrl_reg.typeTagIn === S,
src1_ieee(FType.S.ieeeWidth - 1, 0),
src1_ieee
)
val classify_out = Mux(ctrl.typeTagIn === S,
FType.S.classify(src1_s),
FType.D.classify(src1)
val classify_out = Mux(ctrl_reg.typeTagIn === S,
FType.S.classify(maxType.unsafeConvert(src1_d, FType.S)),
FType.D.classify(src1_d)
)
val dcmp = Module(new hardfloat.CompareRecFN(maxExpWidth, maxSigWidth))
dcmp.io.a := src1_d
dcmp.io.b := src2_d
dcmp.io.signaling := !rm(1)
dcmp.io.signaling := !rm_reg(1)
val dcmp_out = ((~rm).asUInt() & Cat(dcmp.io.lt, dcmp.io.eq)).orR()
val dcmp_out = ((~rm_reg).asUInt() & Cat(dcmp.io.lt, dcmp.io.eq)).orR()
val dcmp_exc = dcmp.io.exceptionFlags
val conv = Module(new RecFNToIN(maxExpWidth, maxSigWidth, XLEN))
conv.io.in := src1_d
conv.io.roundingMode := rm
conv.io.signedOut := ~ctrl.typ(0)
conv.io.roundingMode := rm_reg
conv.io.signedOut := ~ctrl_reg.typ(0)
val conv_out = WireInit(conv.io.out)
val conv_exc = WireInit(Cat(
......@@ -52,10 +59,10 @@ class FPToInt extends FPUPipelineModule {
val narrow = Module(new RecFNToIN(maxExpWidth, maxSigWidth, 32))
narrow.io.in := src1_d
narrow.io.roundingMode := rm
narrow.io.signedOut := ~ctrl.typ(0)
narrow.io.roundingMode := rm_reg
narrow.io.signedOut := ~ctrl_reg.typ(0)
when(!ctrl.typ(1)) { // fcvt.w/wu.fp
when(!ctrl_reg.typ(1)) { // fcvt.w/wu.fp
val excSign = src1_d(maxExpWidth + maxSigWidth) && !maxType.isNaN(src1_d)
val excOut = Cat(conv.io.signedOut === excSign, Fill(32 - 1, !excSign))
val invalid = conv.io.intExceptionFlags(2) || narrow.io.intExceptionFlags(1)
......@@ -67,26 +74,18 @@ class FPToInt extends FPUPipelineModule {
val intData = Wire(UInt(XLEN.W))
intData := Mux(ctrl.wflags,
Mux(ctrl.fcvt, conv_out, dcmp_out),
Mux(rm(0), classify_out, move_out)
intData := Mux(ctrl_reg.wflags,
Mux(ctrl_reg.fcvt, conv_out, dcmp_out),
Mux(rm_reg(0), classify_out, move_out)
)
val doubleOut = Mux(ctrl.fcvt, ctrl.typ(1), ctrl.fmt(0))
val intValue = Mux(doubleOut,
val doubleOut = Mux(ctrl_reg.fcvt, ctrl_reg.typ(1), ctrl_reg.fmt(0))
val intValue = S2Reg(Mux(doubleOut,
SignExt(intData, XLEN),
SignExt(intData(31, 0), XLEN)
)
val exc = Mux(ctrl.fcvt, conv_exc, dcmp_exc)
var dataVec = Seq(intValue)
var excVec = Seq(exc)
))
for (i <- 1 to latency) {
dataVec = dataVec :+ PipelineReg(i)(dataVec(i - 1))
excVec = excVec :+ PipelineReg(i)(excVec(i - 1))
}
val exc = S2Reg(Mux(ctrl_reg.fcvt, conv_exc, dcmp_exc))
io.out.bits.data := dataVec.last
fflags := excVec.last
io.out.bits.data := intValue
fflags := exc
}
......@@ -4,7 +4,8 @@ import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.decode.ImmUnion
import xiangshan.backend.SelImm
import xiangshan.backend.decode.{ImmUnion, Imm_U}
import xiangshan.backend.exu.{Exu, ExuConfig}
import xiangshan.backend.regfile.RfReadPort
......@@ -435,10 +436,10 @@ class ReservationStationData
io.srcRegValue(0)
)
dataWrite(enqPtrReg, 0, src1Mux)
// TODO: opt this, a full map is not necesscary here
val imm32 = LookupTree(
enqUopReg.ctrl.selImm,
ImmUnion.immSelMap.map(x => x._1 -> x._2.toImm32(enqUopReg.ctrl.imm))
// alu only need U type and I type imm
val imm32 = Mux(enqUopReg.ctrl.selImm === SelImm.IMM_U,
ImmUnion.U.toImm32(enqUopReg.ctrl.imm),
ImmUnion.I.toImm32(enqUopReg.ctrl.imm)
)
val imm64 = SignExt(imm32, XLEN)
val src2Mux = Mux(enqUopReg.ctrl.src2Type === SrcType.imm,
......@@ -500,6 +501,7 @@ class ReservationStationData
val exuInput = io.deq.bits
exuInput := DontCare
exuInput.uop := uop(deq)
exuInput.uop.cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
val regValues = List.tabulate(srcNum)(i => dataRead(Mux(sel.valid, sel.bits, deq), i))
XSDebug(io.deq.fire(), p"[regValues] " + List.tabulate(srcNum)(idx => p"reg$idx: ${Hexadecimal(regValues(idx))}").reduce((p1, p2) => p1 + " " + p2) + "\n")
exuInput.src1 := regValues(0)
......@@ -541,6 +543,7 @@ class ReservationStationData
bpQueue.io.redirect := io.redirect
io.selectedUop.valid := bpQueue.io.out.valid
io.selectedUop.bits := bpQueue.io.out.bits
io.selectedUop.bits.cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
XSDebug(io.selectedUop.valid, p"SelUop: pc:0x${Hexadecimal(io.selectedUop.bits.cf.pc)}" +
p" roqIdx:${io.selectedUop.bits.roqIdx} pdest:${io.selectedUop.bits.pdest} " +
......
......@@ -17,12 +17,13 @@ package object backend {
// jump
object JumpOpType {
def jal = "b11_000".U
def jalr = "b11_010".U
def jal = "b00".U
def jalr = "b01".U
def auipc = "b10".U
// def call = "b11_011".U
// def ret = "b11_100".U
def jumpOpIsJal(op: UInt) = !op(1)
def jumpOpisJalr(op: UInt) = op(1)
def jumpOpisJalr(op: UInt) = op(0)
def jumpOpisAuipc(op: UInt) = op(1)
}
object FenceOpType {
......
......@@ -3,7 +3,7 @@ package xiangshan.backend.rename
import chisel3._
import chisel3.util._
import xiangshan._
import utils.XSInfo
import utils._
class RenameBypassInfo extends XSBundle {
val lsrc1_bypass = MixedVec(List.tabulate(RenameWidth-1)(i => UInt((i+1).W)))
......@@ -77,6 +77,7 @@ class Rename extends XSModule {
uop.src3State := DontCare
uop.roqIdx := DontCare
uop.diffTestDebugLrScValid := DontCare
uop.debugInfo := DontCare
uop.lqIdx := DontCare
uop.sqIdx := DontCare
})
......
......@@ -54,12 +54,9 @@ class RoqEnqIO extends XSBundle {
class RoqDispatchData extends RoqCommitInfo {
val crossPageIPFFix = Bool()
val exceptionVec = Vec(16, Bool())
}
class RoqWbData extends XSBundle {
// mostly for exceptions
val exceptionVec = Vec(16, Bool())
val fflags = UInt(5.W)
val flushPipe = Bool()
}
......@@ -70,7 +67,7 @@ class RoqDeqPtrWrapper extends XSModule with HasCircularQueuePtrHelper {
val state = Input(UInt(2.W))
val deq_v = Vec(CommitWidth, Input(Bool()))
val deq_w = Vec(CommitWidth, Input(Bool()))
val deq_exceptionVec = Vec(CommitWidth, Input(UInt(16.W)))
val deq_exceptionVec = Vec(CommitWidth, Input(ExceptionVec()))
val deq_flushPipe = Vec(CommitWidth, Input(Bool()))
// for flush: when exception occurs, reset deqPtrs to range(0, CommitWidth)
val intrBitSetReg = Input(Bool())
......@@ -83,15 +80,16 @@ class RoqDeqPtrWrapper extends XSModule with HasCircularQueuePtrHelper {
val deqPtrVec = RegInit(VecInit((0 until CommitWidth).map(_.U.asTypeOf(new RoqPtr))))
val possibleException = VecInit(io.deq_exceptionVec.map(selectAll(_, false)))
// for exceptions (flushPipe included) and interrupts:
// only consider the first instruction
val intrEnable = io.intrBitSetReg && !io.hasNoSpecExec && !CommitType.isLoadStore(io.commitType)
val exceptionEnable = io.deq_w(0) && (io.deq_exceptionVec(0).orR || io.deq_flushPipe(0))
val exceptionEnable = io.deq_w(0) && (possibleException(0).asUInt.orR || io.deq_flushPipe(0))
val redirectOutValid = io.state === 0.U && io.deq_v(0) && (intrEnable || exceptionEnable)
// for normal commits: only to consider when there're no exceptions
// we don't need to consider whether the first instruction has exceptions since it wil trigger exceptions.
val commitBlocked = VecInit((0 until CommitWidth).map(i => if (i == 0) false.B else io.deq_exceptionVec(i).orR || io.deq_flushPipe(i)))
val commitBlocked = VecInit((0 until CommitWidth).map(i => if (i == 0) false.B else possibleException(i).asUInt.orR || io.deq_flushPipe(i)))
val canCommit = VecInit((0 until CommitWidth).map(i => io.deq_v(i) && io.deq_w(i) && !commitBlocked(i)))
val normalCommitCnt = PriorityEncoder(canCommit.map(c => !c) :+ true.B)
// when io.intrBitSetReg, only one instruction is allowed to commit
......@@ -118,7 +116,7 @@ class RoqEnqPtrWrapper extends XSModule with HasCircularQueuePtrHelper {
val state = Input(UInt(2.W))
val deq_v = Input(Bool())
val deq_w = Input(Bool())
val deq_exceptionVec = Input(UInt(16.W))
val deq_exceptionVec = Input(ExceptionVec())
val deq_flushPipe = Input(Bool())
val intrBitSetReg = Input(Bool())
val hasNoSpecExec = Input(Bool())
......@@ -137,7 +135,7 @@ class RoqEnqPtrWrapper extends XSModule with HasCircularQueuePtrHelper {
// for exceptions (flushPipe included) and interrupts:
// only consider the first instruction
val intrEnable = io.intrBitSetReg && !io.hasNoSpecExec && !CommitType.isLoadStore(io.commitType)
val exceptionEnable = io.deq_w && (io.deq_exceptionVec.orR || io.deq_flushPipe)
val exceptionEnable = io.deq_w && (selectAll(io.deq_exceptionVec, false).asUInt.orR || io.deq_flushPipe)
val redirectOutValid = io.state === 0.U && io.deq_v && (intrEnable || exceptionEnable)
// enqueue
......@@ -264,28 +262,8 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val writebackData = Module(new SyncDataModuleTemplate(new RoqWbData, RoqSize, CommitWidth, numWbPorts))
val writebackDataRead = writebackData.io.rdata
def mergeExceptionVec(dpData: RoqDispatchData, wbData: RoqWbData) = {
// these exceptions can be determined before dispatch.
// by default, let all exceptions be determined by dispatch.
// mergeVec(instrAddrMisaligned) := dpData(instrAddrMisaligned)
// mergeVec(instrAccessFault) := dpData(instrAccessFault)
// mergeVec(instrPageFault) := dpData(instrPageFault)
val mergeVec = WireInit(dpData.exceptionVec)
// these exceptions are determined in execution units
mergeVec(illegalInstr) := wbData.exceptionVec(illegalInstr)
mergeVec(breakPoint) := wbData.exceptionVec(breakPoint)
mergeVec(loadAddrMisaligned) := wbData.exceptionVec(loadAddrMisaligned)
mergeVec(loadAccessFault) := wbData.exceptionVec(loadAccessFault)
mergeVec(storeAddrMisaligned) := wbData.exceptionVec(storeAddrMisaligned)
mergeVec(storeAccessFault) := wbData.exceptionVec(storeAccessFault)
mergeVec(ecallU) := wbData.exceptionVec(ecallU)
mergeVec(ecallS) := wbData.exceptionVec(ecallS)
mergeVec(ecallM) := wbData.exceptionVec(ecallM)
mergeVec(loadPageFault) := wbData.exceptionVec(loadPageFault)
mergeVec(storePageFault) := wbData.exceptionVec(storePageFault)
// returns the merged exception vector
mergeVec
}
val exceptionVecWritePortNum = RenameWidth + 1 + 2 + 2 // CSR, 2*load, 2*store
val exceptionData = Module(new SyncDataModuleTemplate(ExceptionVec(), RoqSize, CommitWidth, exceptionVecWritePortNum))
io.roqDeqPtr := deqPtr
......@@ -337,6 +315,8 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
debug_microOp(wbIdx).diffTestDebugLrScValid := io.exeWbResults(i).bits.uop.diffTestDebugLrScValid
debug_exuData(wbIdx) := io.exeWbResults(i).bits.data
debug_exuDebug(wbIdx) := io.exeWbResults(i).bits.debug
debug_microOp(wbIdx).debugInfo.issueTime := io.exeWbResults(i).bits.uop.debugInfo.issueTime
debug_microOp(wbIdx).debugInfo.writebackTime := io.exeWbResults(i).bits.uop.debugInfo.writebackTime
val debug_Uop = debug_microOp(wbIdx)
XSInfo(true.B,
......@@ -357,7 +337,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val deqWritebackData = writebackDataRead(0)
val debug_deqUop = debug_microOp(deqPtr.value)
val deqExceptionVec = mergeExceptionVec(deqDispatchData, deqWritebackData)
val deqExceptionVec = exceptionData.io.rdata(0)
// For MMIO instructions, they should not trigger interrupts since they may be sent to lower level before it writes back.
// However, we cannot determine whether a load/store instruction is MMIO.
// Thus, we don't allow load/store instructions to trigger an interrupt.
......@@ -419,7 +399,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
io.commits.isWalk := state =/= s_idle
val commit_v = Mux(state === s_idle, VecInit(deqPtrVec.map(ptr => valid(ptr.value))), VecInit(walkPtrVec.map(ptr => valid(ptr.value))))
val commit_w = VecInit(deqPtrVec.map(ptr => writebacked(ptr.value)))
val commit_exception = dispatchDataRead.zip(writebackDataRead).map{ case (d, w) => mergeExceptionVec(d, w).asUInt.orR }
val commit_exception = exceptionData.io.rdata.map(_.asUInt.orR)
val commit_block = VecInit((0 until CommitWidth).map(i => !commit_w(i) || commit_exception(i) || writebackDataRead(i).flushPipe))
for (i <- 0 until CommitWidth) {
// defaults: state === s_idle and instructions commit
......@@ -493,7 +473,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
deqPtrGenModule.io.state := state
deqPtrGenModule.io.deq_v := commit_v
deqPtrGenModule.io.deq_w := commit_w
deqPtrGenModule.io.deq_exceptionVec := VecInit(dispatchDataRead.zip(writebackDataRead).map{ case (d, w) => mergeExceptionVec(d, w).asUInt })
deqPtrGenModule.io.deq_exceptionVec := exceptionData.io.rdata
deqPtrGenModule.io.deq_flushPipe := writebackDataRead.map(_.flushPipe)
deqPtrGenModule.io.intrBitSetReg := intrBitSetReg
deqPtrGenModule.io.hasNoSpecExec := hasNoSpecExec
......@@ -505,7 +485,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
enqPtrGenModule.io.state := state
enqPtrGenModule.io.deq_v := commit_v(0)
enqPtrGenModule.io.deq_w := commit_w(0)
enqPtrGenModule.io.deq_exceptionVec := deqExceptionVec.asUInt
enqPtrGenModule.io.deq_exceptionVec := deqExceptionVec
enqPtrGenModule.io.deq_flushPipe := writebackDataRead(0).flushPipe
enqPtrGenModule.io.intrBitSetReg := intrBitSetReg
enqPtrGenModule.io.hasNoSpecExec := hasNoSpecExec
......@@ -598,7 +578,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
// enqueue logic set 6 writebacked to false
for (i <- 0 until RenameWidth) {
when (canEnqueue(i)) {
writebacked(enqPtrVec(i).value) := false.B
writebacked(enqPtrVec(i).value) := selectFrontend(io.enq.req(i).bits.cf.exceptionVec, false).asUInt.orR
}
}
// writeback logic set numWbPorts writebacked to true
......@@ -639,19 +619,41 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
wdata.sqIdx := req.sqIdx
wdata.pc := req.cf.pc
wdata.crossPageIPFFix := req.cf.crossPageIPFFix
wdata.exceptionVec := req.cf.exceptionVec
// wdata.exceptionVec := req.cf.exceptionVec
}
dispatchData.io.raddr := commitReadAddr_next
writebackData.io.wen := io.exeWbResults.map(_.valid)
writebackData.io.waddr := io.exeWbResults.map(_.bits.uop.roqIdx.value)
writebackData.io.wdata.zip(io.exeWbResults.map(_.bits)).map{ case (wdata, wb) =>
wdata.exceptionVec := wb.uop.cf.exceptionVec
wdata.fflags := wb.fflags
wdata.flushPipe := wb.uop.ctrl.flushPipe
}
writebackData.io.raddr := commitReadAddr_next
for (i <- 0 until RenameWidth) {
exceptionData.io.wen(i) := canEnqueue(i)
exceptionData.io.waddr(i) := enqPtrVec(i).value
exceptionData.io.wdata(i) := selectAll(io.enq.req(i).bits.cf.exceptionVec, false, true)
}
def connectWbExc(index: Int, i: Int) = {
exceptionData.io.wen(index) := io.exeWbResults(i).valid
exceptionData.io.waddr(index) := io.exeWbResults(i).bits.uop.roqIdx.value
}
// csr
connectWbExc(RenameWidth, 6)
exceptionData.io.wdata(RenameWidth) := selectCSR(io.exeWbResults(6).bits.uop.cf.exceptionVec)
// load
connectWbExc(RenameWidth+1, 4)
exceptionData.io.wdata(RenameWidth+1) := selectAtomics(io.exeWbResults(4).bits.uop.cf.exceptionVec)
connectWbExc(RenameWidth+2, 5)
exceptionData.io.wdata(RenameWidth+2) := selectAtomics(io.exeWbResults(5).bits.uop.cf.exceptionVec)
// store
connectWbExc(RenameWidth+3, 16)
exceptionData.io.wdata(RenameWidth+3) := selectStore(io.exeWbResults(16).bits.uop.cf.exceptionVec)
connectWbExc(RenameWidth+4, 17)
exceptionData.io.wdata(RenameWidth+4) := selectStore(io.exeWbResults(17).bits.uop.cf.exceptionVec)
exceptionData.io.raddr := VecInit(deqPtrVec_next.map(_.value))
/**
* debug info
......@@ -674,6 +676,23 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
if(i % 4 == 3) XSDebug(false, true.B, "\n")
}
XSPerf("utilization", PopCount((0 until RoqSize).map(valid(_))))
XSPerf("commitInstr", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid)))
XSPerf("commitInstrLoad", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{ case (v, t) => v && t === CommitType.LOAD})))
XSPerf("commitInstrStore", Mux(io.commits.isWalk, 0.U, PopCount(io.commits.valid.zip(io.commits.info.map(_.commitType)).map{ case (v, t) => v && t === CommitType.STORE})))
XSPerf("writeback", PopCount((0 until RoqSize).map(i => valid(i) && writebacked(i))))
// XSPerf("enqInstr", PopCount(io.dp1Req.map(_.fire())))
// XSPerf("d2rVnR", PopCount(io.dp1Req.map(p => p.valid && !p.ready)))
XSPerf("walkInstr", Mux(io.commits.isWalk, PopCount(io.commits.valid), 0.U))
XSPerf("walkCycle", state === s_walk || state === s_extrawalk)
val deqNotWritebacked = valid(deqPtr.value) && !writebacked(deqPtr.value)
val deqUopCommitType = io.commits.info(0).commitType
XSPerf("waitNormalCycle", deqNotWritebacked && deqUopCommitType === CommitType.NORMAL)
XSPerf("waitBranchCycle", deqNotWritebacked && deqUopCommitType === CommitType.BRANCH)
XSPerf("waitLoadCycle", deqNotWritebacked && deqUopCommitType === CommitType.LOAD)
XSPerf("waitStoreCycle", deqNotWritebacked && deqUopCommitType === CommitType.STORE)
XSPerf("roqHeadPC", io.commits.info(0).pc)
val instrCnt = RegInit(0.U(64.W))
val retireCounter = Mux(state === s_idle, commitCnt, 0.U)
instrCnt := instrCnt + retireCounter
......@@ -751,7 +770,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
ExcitingUtils.addSource(RegNext(trapPC), "trapPC")
ExcitingUtils.addSource(RegNext(GTimer()), "trapCycleCnt")
ExcitingUtils.addSource(RegNext(instrCnt), "trapInstrCnt")
ExcitingUtils.addSource(state === s_walk || state === s_extrawalk, "perfCntCondRoqWalk", Perf)
if(EnableBPU){
ExcitingUtils.addSource(hitTrap, "XSTRAP", ConnectionType.Debug)
......
......@@ -269,6 +269,23 @@ class ICache extends ICacheModule
cutPacket.asUInt
}
def cutHelperMMIO(sourceVec: Vec[UInt], pc: UInt, mask: UInt) = {
val sourceVec_inst = Wire(Vec(mmioBeats * mmioBusBytes/instBytes,UInt(insLen.W)))
(0 until mmioBeats).foreach{ i =>
(0 until mmioBusBytes/instBytes).foreach{ j =>
sourceVec_inst(i*mmioBusBytes/instBytes + j) := sourceVec(i)(j*insLen+insLen-1, j*insLen)
}
}
val cutPacket = WireInit(VecInit(Seq.fill(PredictWidth){0.U(insLen.W)}))
val insLenLog = log2Ceil(insLen)
val start = (pc >> insLenLog.U)(log2Ceil(mmioBeats * mmioBusBytes/instBytes) -1, 0)
val outMask = mask >> start
(0 until PredictWidth ).foreach{ i =>
cutPacket(i) := Mux(outMask(i).asBool,sourceVec_inst(start + i.U),0.U)
}
(cutPacket.asUInt, outMask.asUInt)
}
// generate the one hot code according to a UInt between 0-8
def PriorityMask(sourceVec: UInt) : UInt = {
val oneHot = Mux(sourceVec >= 8.U, "b1000".U,
......@@ -339,7 +356,6 @@ class ICache extends ICacheModule
val hasInvalidWay = invalidVec.orR
val refillInvalidWaymask = PriorityMask(invalidVec)
val waymask = Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask))
//deal with icache exception
val icacheExceptionVec = Wire(Vec(8,Bool()))
......@@ -351,6 +367,7 @@ class ICache extends ICacheModule
s2_mmio := s2_valid && io.tlb.resp.valid && s2_tlb_resp.mmio && !hasIcacheException
s2_hit := s2_valid && ParallelOR(hitVec)
val waymask = Mux(hasIcacheException,1.U(nWays.W),Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask)))
assert(!(s2_hit && s2_mmio),"MMIO address should not hit in icache")
......@@ -388,6 +405,7 @@ class ICache extends ICacheModule
(0 until blockWords).map{r =>
val row = dataHitWay.asTypeOf(Vec(blockWords,UInt(encRowBits.W)))(r)
val decodedRow = cacheParams.dataCode.decode(row)
// assert(!(s3_valid && s3_hit && decodedRow.uncorrectable))
decodedRow.corrected
}
)
......@@ -450,7 +468,9 @@ class ICache extends ICacheModule
val refillDataVecReg = RegEnable(next=refillDataVec, enable= (is_same_cacheline && icacheMissQueue.io.resp.fire()))
//FIXME!!
val mmio_packet = io.mmio_grant.bits.data
val mmioDataVec = io.mmio_grant.bits.data.asTypeOf(Vec(mmioBeats,UInt(mmioBusWidth.W)))
val mmio_packet = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)._1
val mmio_mask = cutHelperMMIO(mmioDataVec, s3_req_pc, mmioMask)._2
XSDebug("mmio data %x\n", mmio_packet)
......@@ -465,7 +485,7 @@ class ICache extends ICacheModule
val refillData = Mux(useRefillReg,cutHelper(refillDataVecReg, s3_req_pc,s3_req_mask),cutHelper(refillDataVec, s3_req_pc,s3_req_mask))
wayResp.pc := s3_req_pc
wayResp.data := Mux(s3_valid && s3_hit, wayData, Mux(s3_mmio ,mmio_packet ,refillData))
wayResp.mask := Mux(s3_mmio,mmioMask,s3_req_mask)
wayResp.mask := Mux(s3_mmio,mmio_mask,s3_req_mask)
wayResp.ipf := s3_exception_vec(pageFault)
wayResp.acf := s3_exception_vec(accessFault)
wayResp.mmio := s3_mmio
......@@ -478,10 +498,6 @@ class ICache extends ICacheModule
// if a fetch packet triggers page fault, at least send a valid instruction
io.pd_out := Mux1H(s3_wayMask, pds.map(_.io.out))
val s3_noHit = s3_wayMask === 0.U
when ((io.prev_ipf || s3_tlb_resp.excp.pf.instr) && s3_noHit) {
io.pd_out.pc := pds(0).io.out.pc
io.pd_out.mask := 1.U(PredictWidth.W)
}
//TODO: coherence
XSDebug("[Stage 3] valid:%d miss:%d pc: 0x%x mmio :%d mask: %b ipf:%d\n",s3_valid, s3_miss,s3_req_pc,s3_req_mask,s3_tlb_resp.excp.pf.instr, s3_mmio)
......@@ -507,7 +523,7 @@ class ICache extends ICacheModule
//icache response: to pre-decoder
io.resp.valid := s3_valid && (s3_hit || s3_has_exception || icacheMissQueue.io.resp.valid || io.mmio_grant.valid)
io.resp.bits.data := Mux(s3_mmio,mmio_packet,Mux((s3_valid && s3_hit),outPacket,refillDataOut))
io.resp.bits.mask := Mux(s3_mmio,mmioMask,s3_req_mask)
io.resp.bits.mask := Mux(s3_mmio,mmio_mask,s3_req_mask)
io.resp.bits.pc := s3_req_pc
io.resp.bits.ipf := s3_tlb_resp.excp.pf.instr
io.resp.bits.acf := s3_exception_vec(accessFault)
......@@ -532,7 +548,7 @@ class ICache extends ICacheModule
//To icache Uncache
io.mmio_acquire.valid := s3_mmio && s3_valid
io.mmio_acquire.bits.addr := s3_tlb_resp.paddr
io.mmio_acquire.bits.addr := mmioBusAligned(s3_tlb_resp.paddr)
io.mmio_acquire.bits.id := cacheID.U
io.mmio_grant.ready := io.resp.ready
......
......@@ -11,7 +11,8 @@ case class StreamPrefetchParameters(
streamSize: Int,
ageWidth: Int,
blockBytes: Int,
reallocStreamOnMissInstantly: Boolean
reallocStreamOnMissInstantly: Boolean,
cacheName: String // distinguish between different prefetchers
) {
def streamWidth = log2Up(streamCnt)
def idxWidth = log2Up(streamSize)
......@@ -107,7 +108,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
val buf = RegInit(VecInit(Seq.fill(streamSize)(0.U.asTypeOf(new PrefetchReq))))
val valid = RegInit(VecInit(Seq.fill(streamSize)(false.B)))
val head = RegInit(0.U(log2Up(streamSize).W))
val tail = RegInit(0.U(log2Up(streamCnt).W))
val tail = RegInit(0.U(log2Up(streamSize).W))
val s_idle :: s_req :: s_resp :: s_finish :: Nil = Enum(4)
val state = RegInit(VecInit(Seq.fill(streamSize)(s_idle)))
......@@ -122,7 +123,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
// dequeue
val hitIdx = io.update.bits.hitIdx
when (io.update.valid && !empty && valid(hitIdx)) {
when (io.update.valid && !empty && (isPrefetching(hitIdx) || valid(hitIdx))) {
val headBeforehitIdx = head <= hitIdx && (hitIdx < tail || tail <= head)
val hitIdxBeforeHead = hitIdx < tail && tail <= head
when (headBeforehitIdx) {
......@@ -132,6 +133,8 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
when (hitIdxBeforeHead) {
(0 until streamSize).foreach(i => deqLater(i) := Mux(i.U >= head || i.U <= hitIdx, true.B, deqLater(i)))
}
XSDebug(io.update.valid && !empty && (isPrefetching(hitIdx) || valid(hitIdx)), p"hitIdx=${hitIdx} headBeforehitIdx=${headBeforehitIdx} hitIdxBeforeHead=${hitIdxBeforeHead}\n")
}
val deqValid = WireInit(VecInit(Seq.fill(streamSize)(false.B)))
......@@ -143,8 +146,15 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
deqValid(idx) := deq
}
(0 until streamSize).foreach(i => valid(i) := valid(i) && !deqValid(i))
(0 until streamSize).foreach(i => deqLater(i) := deqLater(i) && !deqValid(i))
// (0 until streamSize).foreach(i => valid(i) := valid(i) && !deqValid(i))
// (0 until streamSize).foreach(i => deqLater(i) := deqLater(i) && !deqValid(i))
for (i <- 0 until streamSize) {
when (deqValid(i)) {
valid(i) := false.B
deqLater(i) := false.B
}
}
val nextHead = head + PopCount(deqValid)
when (deqValid.asUInt.orR) {
head := nextHead
......@@ -198,13 +208,17 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
val finishArb = Module(new Arbiter(new StreamPrefetchFinish(p), streamSize))
for (i <- 0 until streamSize) {
prefetchPrior(i) := head + i.U
reqs(i).ready := false.B
reqArb.io.in(i) <> reqs(prefetchPrior(i))
finishs(i).ready := false.B
reqs(i).ready := DontCare
finishArb.io.in(i) <> finishs(prefetchPrior(i))
finishs(i).ready := DontCare
resps(i).bits := io.resp.bits
resps(i).valid := io.resp.valid && io.resp.bits.idx === i.U
}
for (i <- 0 until streamSize) {
reqs(prefetchPrior(i)).ready := reqArb.io.in(i).ready
finishs(prefetchPrior(i)).ready := finishArb.io.in(i).ready
}
io.req <> reqArb.io.out
io.finish <> finishArb.io.out
io.resp.ready := VecInit(resps.zipWithIndex.map{ case (r, i) =>
......@@ -225,6 +239,7 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
needRealloc := false.B
state.foreach(_ := s_idle)
valid.foreach(_ := false.B)
deqLater.foreach(_ := false.B)
}
for (i <- 0 until streamSize) {
......@@ -233,20 +248,20 @@ class StreamBuffer(p: StreamPrefetchParameters) extends PrefetchModule {
}
// debug info
XSDebug(p"StreamBuf ${io.streamBufId} io.req: v=${io.req.valid} r=${io.req.ready} ${io.req.bits}\n")
XSDebug(p"StreamBuf ${io.streamBufId} io.resp: v=${io.resp.valid} r=${io.resp.ready} ${io.resp.bits}\n")
XSDebug(p"StreamBuf ${io.streamBufId} io.finish: v=${io.finish.valid} r=${io.finish.ready} ${io.finish.bits}")
XSDebug(p"StreamBuf ${io.streamBufId} io.update: v=${io.update.valid} ${io.update.bits}\n")
XSDebug(p"StreamBuf ${io.streamBufId} io.alloc: v=${io.alloc.valid} ${io.alloc.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.req: v=${io.req.valid} r=${io.req.ready} ${io.req.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.resp: v=${io.resp.valid} r=${io.resp.ready} ${io.resp.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.finish: v=${io.finish.valid} r=${io.finish.ready} ${io.finish.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.update: v=${io.update.valid} ${io.update.bits}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} io.alloc: v=${io.alloc.valid} ${io.alloc.bits}\n")
for (i <- 0 until streamSize) {
XSDebug(p"StreamBuf ${io.streamBufId} [${i.U}] io.addrs: ${io.addrs(i).valid} 0x${Hexadecimal(io.addrs(i).bits)} " +
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} [${i.U}] io.addrs: ${io.addrs(i).valid} 0x${Hexadecimal(io.addrs(i).bits)} " +
p"buf: ${buf(i)} valid: ${valid(i)} state: ${state(i)} isPfting: ${isPrefetching(i)} " +
p"deqLater: ${deqLater(i)} deqValid: ${deqValid(i)}\n")
}
XSDebug(p"StreamBuf ${io.streamBufId} head: ${head} tail: ${tail} full: ${full} empty: ${empty} nextHead: ${nextHead}\n")
XSDebug(p"StreamBuf ${io.streamBufId} baseReq: v=${baseReq.valid} ${baseReq.bits} nextReq: ${nextReq}\n")
XSDebug(needRealloc, p"StreamBuf ${io.streamBufId} needRealloc: ${needRealloc} reallocReq: ${reallocReq}\n")
XSDebug(p"StreamBuf ${io.streamBufId} prefetchPrior: ")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} head: ${head} tail: ${tail} full: ${full} empty: ${empty} nextHead: ${nextHead} blockBytes: ${blockBytes.U}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} baseReq: v=${baseReq.valid} ${baseReq.bits} nextReq: ${nextReq}\n")
XSDebug(needRealloc, s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} needRealloc: ${needRealloc} reallocReq: ${reallocReq}\n")
XSDebug(s"${p.cacheName} " + p"StreamBuf ${io.streamBufId} prefetchPrior: ")
(0 until streamSize).foreach(i => XSDebug(false, true.B, p"${prefetchPrior(i)} "))
XSDebug(false, true.B, "\n")
}
......@@ -266,6 +281,8 @@ object ParallelMin {
class StreamPrefetch(p: StreamPrefetchParameters) extends PrefetchModule {
val io = IO(new StreamPrefetchIO(p))
require(p.blockBytes > 0)
// TODO: implement this
def streamCnt = p.streamCnt
......@@ -352,8 +369,8 @@ class StreamPrefetch(p: StreamPrefetchParameters) extends PrefetchModule {
i.U === io.resp.bits.stream && buf.io.resp.ready}).asUInt.orR
// debug info
XSDebug(p"io: ${io}\n")
XSDebug(p"bufValids: ${Binary(bufValids.asUInt)} hit: ${hit} ages: ")
XSDebug(s"${p.cacheName} " + p"io: ${io}\n")
XSDebug(s"${p.cacheName} " + p"bufValids: ${Binary(bufValids.asUInt)} hit: ${hit} ages: ")
(0 until streamCnt).foreach(i => XSDebug(false, true.B, p"${Hexadecimal(ages(i))} "))
XSDebug(false, true.B, "\n")
}
......@@ -156,6 +156,94 @@ class PtwEntries(num: Int, tagLen: Int) extends PtwBundle {
}
}
class L2TlbEntry extends TlbBundle {
val tag = UInt(vpnLen.W) // tag is vpn
val level = UInt(log2Up(Level).W) // 2 for 4KB, 1 for 2MB, 0 for 1GB
val ppn = UInt(ppnLen.W)
val perm = new PtePermBundle
def hit(vpn: UInt):Bool = {
val fullMask = VecInit((Seq.fill(vpnLen)(true.B))).asUInt
val maskLevel = VecInit((Level-1 to 0 by -1).map{i => // NOTE: level 2 for 4KB, 1 for 2MB, 0 for 1GB
Reverse(VecInit(Seq.fill(vpnLen-i*vpnnLen)(true.B) ++ Seq.fill(i*vpnnLen)(false.B)).asUInt)})
val mask = maskLevel(level)
(mask&this.tag) === (mask&vpn)
}
def apply(pte: UInt, level: UInt, vpn: UInt) = {
this.tag := vpn
this.level := level
this.ppn := pte.asTypeOf(pteBundle).ppn
this.perm := pte.asTypeOf(pteBundle).perm
this
}
override def toPrintable: Printable = {
p"vpn:0x${Hexadecimal(tag)} level:${level} ppn:${Hexadecimal(ppn)} perm:${perm}"
}
}
class L2TlbEntires(num: Int, tagLen: Int) extends TlbBundle {
require(log2Up(num)==log2Down(num))
/* vpn can be divide into three part */
// vpn: tagPart(17bit) + addrPart(8bit) + cutLenPart(2bit)
val cutLen = log2Up(num)
val tag = UInt(tagLen.W) // NOTE: high part of vpn
val ppns = Vec(num, UInt(ppnLen.W))
val perms = Vec(num, new PtePermBundle)
val vs = Vec(num, Bool())
def tagClip(vpn: UInt) = { // full vpn => tagLen
vpn(vpn.getWidth-1, vpn.getWidth-tagLen)
}
// NOTE: get insize idx
def idxClip(vpn: UInt) = {
vpn(cutLen-1, 0)
}
def hit(vpn: UInt) = {
(tag === tagClip(vpn)) && vs(idxClip(vpn))
}
def genEntries(data: UInt, level: UInt, vpn: UInt): L2TlbEntires = {
require((data.getWidth / XLEN) == num,
"input data length must be multiple of pte length")
assert(level===2.U, "tlb entries only support 4K pages")
val ts = Wire(new L2TlbEntires(num, tagLen))
ts.tag := tagClip(vpn)
for (i <- 0 until num) {
val pte = data((i+1)*XLEN-1, i*XLEN).asTypeOf(new PteBundle)
ts.ppns(i) := pte.ppn
ts.perms(i):= pte.perm // this.perms has no v
ts.vs(i) := !pte.isPf(level) && pte.isLeaf() // legal and leaf, store to l2Tlb
}
ts
}
def get(vpn: UInt): L2TlbEntry = {
val t = Wire(new L2TlbEntry)
val idx = idxClip(vpn)
t.tag := vpn // Note: Use input vpn, not vpn in TlbL2
t.level := 2.U // L2TlbEntries only support 4k page
t.ppn := ppns(idx)
t.perm := perms(idx)
t
}
override def cloneType: this.type = (new L2TlbEntires(num, tagLen)).asInstanceOf[this.type]
override def toPrintable: Printable = {
require(num == 4, "if num is not 4, please comment this toPrintable")
// NOTE: if num is not 4, please comment this toPrintable
p"tag:${Hexadecimal(tag)} ppn(0):${Hexadecimal(ppns(0))} ppn(1):${Hexadecimal(ppns(1))}" +
p"ppn(2):${Hexadecimal(ppns(2))} ppn(3):${Hexadecimal(ppns(3))} " +
p"perms(0):${perms(0)} perms(1):${perms(1)} perms(2):${perms(2)} perms(3):${perms(3)} vs:${Binary(vs.asUInt)}"
}
}
class PtwReq extends PtwBundle {
val vpn = UInt(vpnLen.W)
......@@ -165,8 +253,8 @@ class PtwReq extends PtwBundle {
}
class PtwResp extends PtwBundle {
val entry = new TlbEntry
val pf = Bool() // simple pf no matter cmd
val entry = new L2TlbEntry
val pf = Bool()
override def toPrintable: Printable = {
p"entry:${entry} pf:${pf}"
......@@ -237,18 +325,25 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
// pde/pte-cache is cache of page-table, speeding up ptw
val tlbl2 = Module(new SRAMWrapper(
"L2TLB",
new TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen),
set = TlbL2LineNum
new L2TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen),
set = TlbL2LineNum,
singlePort = true
)) // (total 256, one line is 4 => 64 lines)
val tlbv = RegInit(0.U(TlbL2LineNum.W)) // valid
val tlbg = Reg(UInt(TlbL2LineNum.W)) // global
val sp = Reg(Vec(TlbL2SPEntrySize, new L2TlbEntry)) // (total 16, one is 4M or 1G)
val spv = RegInit(0.U(TlbL2SPEntrySize.W))
val spg = Reg(UInt(TlbL2SPEntrySize.W))
val ptwl1 = Reg(Vec(PtwL1EntrySize, new PtwEntry(tagLen = PtwL1TagLen)))
val l1v = RegInit(0.U(PtwL1EntrySize.W)) // valid
val l1g = Reg(UInt(PtwL1EntrySize.W))
val ptwl2 = Module(new SRAMWrapper(
"L2PTW",
new PtwEntries(num = PtwL2LineSize, tagLen = PtwL2TagLen),
set = PtwL2LineNum
set = PtwL2LineNum,
singlePort = true
)) // (total 256, one line is 4 => 64 lines)
val l2v = RegInit(0.U(PtwL2LineNum.W)) // valid
val l2g = Reg(UInt(PtwL2LineNum.W)) // global
......@@ -276,7 +371,6 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
* tlbl2
*/
val (tlbHit, tlbHitData) = {
assert(tlbl2.io.r.req.ready)
val ridx = genTlbL2Idx(req.vpn)
val vidx = RegEnable(tlbv(ridx), validOneCycle)
......@@ -284,10 +378,22 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
tlbl2.io.r.req.bits.apply(setIdx = ridx)
val ramData = tlbl2.io.r.resp.data(0)
assert(tlbl2.io.r.req.ready || !tlbl2.io.r.req.valid)
XSDebug(tlbl2.io.r.req.valid, p"tlbl2 Read rIdx:${Hexadecimal(ridx)}\n")
XSDebug(RegNext(tlbl2.io.r.req.valid), p"tlbl2 RamData:${ramData}\n")
XSDebug(RegNext(tlbl2.io.r.req.valid), p"tlbl2 v:${vidx} hit:${ramData.hit(req.vpn)} tlbPte:${ramData.get(req.vpn)}\n")
(ramData.hit(req.vpn) && vidx, ramData.get(req.vpn))
val spHitVec = sp.zipWithIndex.map{ case (a,i) =>
RegEnable(a.hit(req.vpn) && spv(i), validOneCycle)
}
val spHitData = ParallelMux(spHitVec zip sp)
val spHit = Cat(spHitVec).orR
XSDebug(RegNext(validOneCycle), p"tlbl2 sp: spHit:${spHit} spPte:${spHitData}\n")
assert(RegNext(!(ramData.hit(req.vpn) && vidx && spHit && RegNext(validOneCycle))), "pages should not be normal page and super page as well")
(ramData.hit(req.vpn) && vidx || spHit, Mux(spHit, spHitData, ramData.get(req.vpn)))
}
/*
......@@ -314,7 +420,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
val idx = RegEnable(l2addr(log2Up(PtwL2LineSize)+log2Up(XLEN/8)-1, log2Up(XLEN/8)), readRam)
val vidx = RegEnable(l2v(ridx), readRam)
assert(ptwl2.io.r.req.ready)
assert(ptwl2.io.r.req.ready || !readRam)
ptwl2.io.r.req.valid := readRam
ptwl2.io.r.req.bits.apply(setIdx = ridx)
val ramData = ptwl2.io.r.resp.data(0)
......@@ -369,7 +475,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
state := state_idle
}.otherwise {
state := state_wait_ready
latch.entry := new TlbEntry().genTlbEntry(memRdata, level, req.vpn)
latch.entry := Wire(new L2TlbEntry()).apply(memRdata, level, req.vpn)
latch.pf := memPte.isPf(level)
}
}.otherwise {
......@@ -426,7 +532,7 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
for(i <- 0 until PtwWidth) {
resp(i).valid := valid && arbChosen===i.U && ptwFinish // TODO: add resp valid logic
resp(i).bits.entry := Mux(tlbHit, tlbHitData,
Mux(state===state_wait_ready, latch.entry, new TlbEntry().genTlbEntry(memSelData, Mux(level===3.U, 2.U, level), req.vpn)))
Mux(state===state_wait_ready, latch.entry, Wire(new L2TlbEntry()).apply(memSelData, Mux(level===3.U, 2.U, level), req.vpn)))
resp(i).bits.pf := Mux(level===3.U || notFound, true.B, Mux(tlbHit, false.B, Mux(state===state_wait_ready, latch.pf, memPte.isPf(level))))
// TODO: the pf must not be correct, check it
}
......@@ -442,13 +548,15 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
when (memRespFire && !memPte.isPf(level) && !sfenceLatch) {
when (level===0.U && !memPte.isLeaf) {
val refillIdx = LFSR64()(log2Up(PtwL1EntrySize)-1,0) // TODO: may be LRU
val rfOH = UIntToOH(refillIdx)
ptwl1(refillIdx).refill(l1addr, memSelData)
l1v := l1v | UIntToOH(refillIdx)
l1g := (l1g & ~UIntToOH(refillIdx)) | Mux(memPte.perm.g, UIntToOH(refillIdx), 0.U)
l1v := l1v | rfOH
l1g := (l1g & ~rfOH) | Mux(memPte.perm.g, rfOH, 0.U)
}
when (level===1.U && !memPte.isLeaf) {
val l2addrStore = RegEnable(l2addr, memReqFire && state===state_req && level===1.U)
val refillIdx = genPtwL2Idx(l2addrStore) //getVpnn(req.vpn, 1)(log2Up(PtwL2EntrySize)-1, 0)
val rfOH = UIntToOH(refillIdx)
//TODO: check why the old refillIdx is right
assert(ptwl2.io.w.req.ready)
......@@ -459,26 +567,34 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
data = ps,
waymask = -1.S.asUInt
)
l2v := l2v | UIntToOH(refillIdx)
l2g := (l2g & ~UIntToOH(refillIdx)) | Mux(Cat(memPtes.map(_.perm.g)).andR, UIntToOH(refillIdx), 0.U)
l2v := l2v | rfOH
l2g := (l2g & ~rfOH) | Mux(Cat(memPtes.map(_.perm.g)).andR, rfOH, 0.U)
XSDebug(p"ptwl2 RefillIdx:${Hexadecimal(refillIdx)} ps:${ps}\n")
}
when (memPte.isLeaf() && (level===2.U)) {
val refillIdx = genTlbL2Idx(req.vpn)//getVpnn(req.vpn, 0)(log2Up(TlbL2EntrySize)-1, 0)
val rfOH = UIntToOH(refillIdx)
//TODO: check why the old refillIdx is right
assert(tlbl2.io.w.req.ready)
val ts = new TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen).genEntries(memRdata, level, req.vpn)
val ts = new L2TlbEntires(num = TlbL2LineSize, tagLen = TlbL2TagLen).genEntries(memRdata, level, req.vpn)
tlbl2.io.w.apply(
valid = true.B,
setIdx = refillIdx,
data = ts,
waymask = -1.S.asUInt
)
tlbv := tlbv | UIntToOH(refillIdx)
tlbg := (tlbg & ~UIntToOH(refillIdx)) | Mux(Cat(memPtes.map(_.perm.g)).andR, UIntToOH(refillIdx), 0.U)
tlbv := tlbv | rfOH
tlbg := (tlbg & ~rfOH) | Mux(Cat(memPtes.map(_.perm.g)).andR, rfOH, 0.U)
XSDebug(p"tlbl2 refillIdx:${Hexadecimal(refillIdx)} ts:${ts}\n")
}
when (memPte.isLeaf() && (level===1.U || level===0.U)) {
val refillIdx = LFSR64()(log2Up(TlbL2SPEntrySize)-1,0) // TODO: may be LRU
val rfOH = UIntToOH(refillIdx)
sp(refillIdx) := Wire(new L2TlbEntry()).apply(memSelData, Mux(level===3.U, 2.U, level), req.vpn)
spv := spv | rfOH
spg := (spg & ~rfOH) | Mux(memPte.perm.g, rfOH, 0.U)
}
}
/* sfence
......@@ -496,25 +612,29 @@ class PTWImp(outer: PTW) extends PtwModule(outer){
when (sfence.bits.rs2) {
// all va && all asid
tlbv := 0.U
tlbg := 0.U
spv := 0.U
// tlbg := 0.U
l1v := 0.U
l2v := 0.U
l2g := 0.U
// l2g := 0.U
} .otherwise {
// all va && specific asid except global
tlbv := tlbv & tlbg
spv := spv & spg
l1v := l1v & l1g
l2v := l2v & l2g
}
} .otherwise {
val sfenceTlbL2IdxOH = UIntToOH(genTlbL2Idx(sfence.bits.addr(sfence.bits.addr.getWidth-1, offLen)))
when (sfence.bits.rs2) {
// specific leaf of addr && all asid
tlbv := tlbv & ~UIntToOH(genTlbL2Idx(sfence.bits.addr(sfence.bits.addr.getWidth-1, offLen)))
tlbg := tlbg & ~UIntToOH(genTlbL2Idx(sfence.bits.addr(sfence.bits.addr.getWidth-1, offLen)))
tlbv := tlbv & ~sfenceTlbL2IdxOH
tlbg := tlbg & ~sfenceTlbL2IdxOH
} .otherwise {
// specific leaf of addr && specific asid
tlbv := tlbv & (~UIntToOH(genTlbL2Idx(sfence.bits.addr(sfence.bits.addr.getWidth-1, offLen)))| tlbg)
tlbv := tlbv & (~sfenceTlbL2IdxOH| tlbg)
}
spv := 0.U
}
}
......
......@@ -8,12 +8,14 @@ import utils._
import xiangshan.cache._
import chisel3.experimental.chiselName
import freechips.rocketchip.tile.HasLazyRoCC
import chisel3.ExcitingUtils._
trait HasInstrMMIOConst extends HasXSParameter{
trait HasInstrMMIOConst extends HasXSParameter with HasIFUConst{
def mmioBusWidth = 64
def mmioBusBytes = mmioBusWidth /8
def mmioBeats = FetchWidth * 4 * 8 / mmioBusWidth
def mmioMask = VecInit(List.fill(PredictWidth)(true.B)).asUInt
def mmioBusAligned(pc :UInt): UInt = align(pc, mmioBusBytes)
}
trait HasIFUConst extends HasXSParameter {
......@@ -490,6 +492,35 @@ class IFU extends XSModule with HasIFUConst
io.fetchPacket.bits := fetchPacketWire
io.fetchPacket.valid := fetchPacketValid
// if(IFUDebug) {
val predictor_s3 = RegEnable(Mux(if3_redirect, 1.U(log2Up(4).W), 0.U(log2Up(4).W)), if3_fire)
val predictor_s4 = Mux(if4_redirect, 2.U, predictor_s3)
val predictor = predictor_s4
fetchPacketWire.bpuMeta.map(_.predictor := predictor)
// }
// val predRight = cfiUpdate.valid && !cfiUpdate.bits.isMisPred && !cfiUpdate.bits.isReplay
// val predWrong = cfiUpdate.valid && cfiUpdate.bits.isMisPred && !cfiUpdate.bits.isReplay
// val ubtbRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 0.U
// val ubtbWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 0.U
// val btbRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 1.U
// val btbWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 1.U
// val tageRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 2.U
// val tageWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 2.U
// val loopRight = predRight && cfiUpdate.bits.bpuMeta.predictor === 3.U
// val loopWrong = predWrong && cfiUpdate.bits.bpuMeta.predictor === 3.U
// ExcitingUtils.addSource(ubtbRight, "perfCntubtbRight", Perf)
// ExcitingUtils.addSource(ubtbWrong, "perfCntubtbWrong", Perf)
// ExcitingUtils.addSource(btbRight, "perfCntbtbRight", Perf)
// ExcitingUtils.addSource(btbWrong, "perfCntbtbWrong", Perf)
// ExcitingUtils.addSource(tageRight, "perfCnttageRight", Perf)
// ExcitingUtils.addSource(tageWrong, "perfCnttageWrong", Perf)
// ExcitingUtils.addSource(loopRight, "perfCntloopRight", Perf)
// ExcitingUtils.addSource(loopWrong, "perfCntloopWrong", Perf)
// debug info
if (IFUDebug) {
XSDebug(RegNext(reset.asBool) && !reset.asBool, "Reseting...\n")
......@@ -501,6 +532,7 @@ class IFU extends XSModule with HasIFUConst
XSDebug("[IF2] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x snpc=%x\n", if2_valid, if2_ready, if2_fire, if2_redirect, if2_flush, if2_pc, if2_snpc)
XSDebug("[IF3] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if3_valid, if3_ready, if3_fire, if3_redirect, if3_flush, if3_pc, crossPageIPF, if3_bp.hasNotTakenBrs)
XSDebug("[IF4] v=%d r=%d fire=%d redirect=%d flush=%d pc=%x crossPageIPF=%d sawNTBrs=%d\n", if4_valid, if4_ready, if4_fire, if4_redirect, if4_flush, if4_pc, if4_crossPageIPF, if4_bp.hasNotTakenBrs)
XSDebug("[predictor] predictor_s3=%d, predictor_s4=%d, predictor=%d\n", predictor_s3, predictor_s4, predictor)
XSDebug("[IF1][icacheReq] v=%d r=%d addr=%x\n", icache.io.req.valid, icache.io.req.ready, icache.io.req.bits.addr)
XSDebug("[IF1][ghr] hist=%b\n", if1_gh.asUInt)
XSDebug("[IF1][ghr] extHist=%b\n\n", if1_gh.asUInt)
......@@ -542,4 +574,4 @@ class IFU extends XSModule with HasIFUConst
)
}
}
}
\ No newline at end of file
}
......@@ -206,4 +206,6 @@ class Ibuffer extends XSModule with HasCircularQueuePtrHelper {
// ibuf(i*8+7).inst
// )
// }
XSPerf("utilization", validEntries)
}
......@@ -41,7 +41,6 @@ class LsPipelineBundle extends XSBundle {
val miss = Bool()
val tlbMiss = Bool()
val mmio = Bool()
val rollback = Bool()
val forwardMask = Vec(8, Bool())
val forwardData = Vec(8, UInt(8.W))
......
......@@ -51,6 +51,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
val uncache = new DCacheWordIO
val roqDeqPtr = Input(new RoqPtr)
val exceptionAddr = new ExceptionAddrIO
val sqempty = Output(Bool())
})
val loadQueue = Module(new LoadQueue)
......@@ -103,6 +104,8 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
loadQueue.io.load_s1 <> io.forward
storeQueue.io.forward <> io.forward // overlap forwardMask & forwardData, DO NOT CHANGE SEQUENCE
storeQueue.io.sqempty <> io.sqempty
io.exceptionAddr.vaddr := Mux(io.exceptionAddr.isStore, storeQueue.io.exceptionAddr.vaddr, loadQueue.io.exceptionAddr.vaddr)
// naive uncache arbiter
......
......@@ -10,6 +10,7 @@ import xiangshan.cache.{DCacheLineIO, DCacheWordIO, MemoryOpConstants, TlbReques
import xiangshan.backend.LSUOpType
import xiangshan.mem._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.fu.HasExceptionNO
class LqPtr extends CircularQueuePtr(LqPtr.LoadQueueSize) { }
......@@ -58,6 +59,7 @@ class LoadQueue extends XSModule
with HasDCacheParameters
with HasCircularQueuePtrHelper
with HasLoadHelper
with HasExceptionNO
{
val io = IO(new Bundle() {
val enq = new LqEnqIO
......@@ -150,7 +152,7 @@ class LoadQueue extends XSModule
vaddrModule.io.wen(i) := false.B
when(io.loadIn(i).fire()) {
when(io.loadIn(i).bits.miss) {
XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
io.loadIn(i).bits.uop.lqIdx.asUInt,
io.loadIn(i).bits.uop.cf.pc,
io.loadIn(i).bits.vaddr,
......@@ -159,12 +161,10 @@ class LoadQueue extends XSModule
io.loadIn(i).bits.mask,
io.loadIn(i).bits.forwardData.asUInt,
io.loadIn(i).bits.forwardMask.asUInt,
io.loadIn(i).bits.mmio,
io.loadIn(i).bits.rollback,
io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
io.loadIn(i).bits.mmio
)
}.otherwise {
XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
XSInfo(io.loadIn(i).valid, "load hit write to cbd lqidx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x\n",
io.loadIn(i).bits.uop.lqIdx.asUInt,
io.loadIn(i).bits.uop.cf.pc,
io.loadIn(i).bits.vaddr,
......@@ -173,9 +173,7 @@ class LoadQueue extends XSModule
io.loadIn(i).bits.mask,
io.loadIn(i).bits.forwardData.asUInt,
io.loadIn(i).bits.forwardMask.asUInt,
io.loadIn(i).bits.mmio,
io.loadIn(i).bits.rollback,
io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
io.loadIn(i).bits.mmio
)
}
val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
......@@ -187,7 +185,6 @@ class LoadQueue extends XSModule
loadWbData.mask := io.loadIn(i).bits.mask
loadWbData.data := io.loadIn(i).bits.data // fwd data
loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
dataModule.io.wb.wen(i) := true.B
......@@ -196,11 +193,11 @@ class LoadQueue extends XSModule
vaddrModule.io.wen(i) := true.B
debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
val dcacheMissed = io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
miss(loadWbIndex) := dcacheMissed && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
// listening(loadWbIndex) := dcacheMissed
pending(loadWbIndex) := io.loadIn(i).bits.mmio && !io.loadIn(i).bits.uop.cf.exceptionVec.asUInt.orR
miss(loadWbIndex) := dcacheMissed
pending(loadWbIndex) := io.loadIn(i).bits.mmio
uop(loadWbIndex).debugInfo.issueTime := io.loadIn(i).bits.uop.debugInfo.issueTime
}
}
......@@ -294,41 +291,57 @@ class LoadQueue extends XSModule
// Stage 0
// Generate writeback indexes
def getEvenBits(input: UInt): UInt = {
require(input.getWidth == LoadQueueSize)
VecInit((0 until LoadQueueSize/2).map(i => {input(2*i)})).asUInt
}
def getOddBits(input: UInt): UInt = {
require(input.getWidth == LoadQueueSize)
VecInit((0 until LoadQueueSize/2).map(i => {input(2*i+1)})).asUInt
}
val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W))) // index selected last cycle
val loadWbSelV = RegInit(VecInit(List.fill(LoadPipelineWidth)(false.B))) // index selected in last cycle is valid
val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
allocated(i) && !writebacked(i) && datavalid(i)
})).asUInt() // use uint instead vec to reduce verilog lines
val loadEvenSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i)}))
val loadOddSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i+1)}))
val evenDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i)})).asUInt
val oddDeqMask = VecInit((0 until LoadQueueSize/2).map(i => {deqMask(2*i+1)})).asUInt
val evenDeqMask = getEvenBits(deqMask)
val oddDeqMask = getOddBits(deqMask)
// generate lastCycleSelect mask
val evenSelectMask = Mux(loadWbSelV(0), getEvenBits(UIntToOH(loadWbSel(0))), 0.U)
val oddSelectMask = Mux(loadWbSelV(1), getOddBits(UIntToOH(loadWbSel(1))), 0.U)
// generate real select vec
val loadEvenSelVec = getEvenBits(loadWbSelVec) & ~evenSelectMask
val loadOddSelVec = getOddBits(loadWbSelVec) & ~oddSelectMask
def toVec(a: UInt): Vec[Bool] = {
VecInit(a.asBools)
}
val loadWbSelGen = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
val loadWbSelVGen = Wire(Vec(LoadPipelineWidth, Bool()))
loadWbSelGen(0) := Cat(getFirstOne(loadEvenSelVec, evenDeqMask), 0.U(1.W))
loadWbSelGen(0) := Cat(getFirstOne(toVec(loadEvenSelVec), evenDeqMask), 0.U(1.W))
loadWbSelVGen(0):= loadEvenSelVec.asUInt.orR
loadWbSelGen(1) := Cat(getFirstOne(loadOddSelVec, oddDeqMask), 1.U(1.W))
loadWbSelGen(1) := Cat(getFirstOne(toVec(loadOddSelVec), oddDeqMask), 1.U(1.W))
loadWbSelVGen(1) := loadOddSelVec.asUInt.orR
val loadWbSel = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueSize).W)))
val loadWbSelV = RegInit(VecInit(List.fill(LoadPipelineWidth)(false.B)))
(0 until LoadPipelineWidth).map(i => {
val canGo = io.ldout(i).fire() || !loadWbSelV(i)
val valid = loadWbSelVGen(i)
// store selected index in pipeline reg
loadWbSel(i) := RegEnable(loadWbSelGen(i), valid && canGo)
// Mark them as writebacked, so they will not be selected in the next cycle
when(valid && canGo){
writebacked(loadWbSelGen(i)) := true.B
}
// update loadWbSelValidReg
when(io.ldout(i).fire()){
// Mark them as writebacked, so they will not be selected in the next cycle
writebacked(loadWbSel(i)) := true.B
// update loadWbSelValidReg
loadWbSelV(i) := false.B
}
when(valid && canGo){
loadWbSelV(i) := true.B
}
})
// Stage 1
// Use indexes generated in cycle 0 to read data
// writeback data to cdb
......@@ -352,10 +365,9 @@ class LoadQueue extends XSModule
val rdataPartialLoad = rdataHelper(seluop, rdataSel)
// writeback missed int/fp load
//
//
// Int load writeback will finish (if not blocked) in one cycle
io.ldout(i).bits.uop := seluop
io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.wb.rdata(i).exception.asBools
io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
io.ldout(i).bits.data := rdataPartialLoad
io.ldout(i).bits.redirectValid := false.B
......
......@@ -15,7 +15,6 @@ class LQDataEntry extends XSBundle {
val paddr = UInt(PAddrBits.W)
val mask = UInt(8.W)
val data = UInt(XLEN.W)
val exception = UInt(16.W) // TODO: opt size
val fwdMask = Vec(8, Bool())
}
......@@ -236,7 +235,6 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
// data module
val paddrModule = Module(new PaddrModule(size, numRead = 3, numWrite = 2))
val maskModule = Module(new MaskModule(size, numRead = 3, numWrite = 2))
val exceptionModule = Module(new AsyncDataModuleTemplate(UInt(16.W), size, numRead = 3, numWrite = 2))
val coredataModule = Module(new CoredataModule(size, numRead = 3, numWrite = 3))
// read data
......@@ -244,26 +242,22 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
(0 until wbNumRead).map(i => {
paddrModule.io.raddr(i) := io.wb.raddr(i)
maskModule.io.raddr(i) := io.wb.raddr(i)
exceptionModule.io.raddr(i) := io.wb.raddr(i)
coredataModule.io.raddr(i) := io.wb.raddr(i)
io.wb.rdata(i).paddr := paddrModule.io.rdata(i)
io.wb.rdata(i).mask := maskModule.io.rdata(i)
io.wb.rdata(i).data := coredataModule.io.rdata(i)
io.wb.rdata(i).exception := exceptionModule.io.rdata(i)
io.wb.rdata(i).fwdMask := DontCare
})
// read port wbNumRead
paddrModule.io.raddr(wbNumRead) := io.uncache.raddr
maskModule.io.raddr(wbNumRead) := io.uncache.raddr
exceptionModule.io.raddr(wbNumRead) := io.uncache.raddr
coredataModule.io.raddr(wbNumRead) := io.uncache.raddr
io.uncache.rdata.paddr := paddrModule.io.rdata(wbNumRead)
io.uncache.rdata.mask := maskModule.io.rdata(wbNumRead)
io.uncache.rdata.data := exceptionModule.io.rdata(wbNumRead)
io.uncache.rdata.exception := coredataModule.io.rdata(wbNumRead)
io.uncache.rdata.data := coredataModule.io.rdata(wbNumRead)
io.uncache.rdata.fwdMask := DontCare
// write data
......@@ -271,19 +265,16 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
(0 until wbNumWrite).map(i => {
paddrModule.io.wen(i) := false.B
maskModule.io.wen(i) := false.B
exceptionModule.io.wen(i) := false.B
coredataModule.io.wen(i) := false.B
coredataModule.io.fwdMaskWen(i) := false.B
coredataModule.io.paddrWen(i) := false.B
paddrModule.io.waddr(i) := io.wb.waddr(i)
maskModule.io.waddr(i) := io.wb.waddr(i)
exceptionModule.io.waddr(i) := io.wb.waddr(i)
coredataModule.io.waddr(i) := io.wb.waddr(i)
paddrModule.io.wdata(i) := io.wb.wdata(i).paddr
maskModule.io.wdata(i) := io.wb.wdata(i).mask
exceptionModule.io.wdata(i) := io.wb.wdata(i).exception
coredataModule.io.wdata(i) := io.wb.wdata(i).data
coredataModule.io.fwdMaskWdata(i) := io.wb.wdata(i).fwdMask.asUInt
coredataModule.io.paddrWdata(i) := io.wb.wdata(i).paddr
......@@ -291,7 +282,6 @@ class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule
when(io.wb.wen(i)){
paddrModule.io.wen(i) := true.B
maskModule.io.wen(i) := true.B
exceptionModule.io.wen(i) := true.B
coredataModule.io.wen(i) := true.B
coredataModule.io.fwdMaskWen(i) := true.B
coredataModule.io.paddrWen(i) := true.B
......
......@@ -43,6 +43,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val roqDeqPtr = Input(new RoqPtr)
// val refill = Flipped(Valid(new DCacheLineReq ))
val exceptionAddr = new ExceptionAddrIO
val sqempty = Output(Bool())
})
// data modules
......@@ -52,8 +53,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
dataModule.io := DontCare
val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), StoreQueueSize, numRead = 1, numWrite = StorePipelineWidth))
vaddrModule.io := DontCare
val exceptionModule = Module(new AsyncDataModuleTemplate(UInt(16.W), StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth))
exceptionModule.io := DontCare
// state & misc
val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated
......@@ -83,7 +82,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
dataModule.io.raddr(i) := deqPtrExt(i).value
}
vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.sqIdx.value
exceptionModule.io.raddr(0) := deqPtr // read exception
/**
* Enqueue at dispatch
......@@ -123,14 +121,11 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
for (i <- 0 until StorePipelineWidth) {
dataModule.io.wen(i) := false.B
vaddrModule.io.wen(i) := false.B
exceptionModule.io.wen(i) := false.B
when(io.storeIn(i).fire()) {
when (io.storeIn(i).fire()) {
val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value
val hasException = io.storeIn(i).bits.uop.cf.exceptionVec.asUInt.orR
val hasWritebacked = !io.storeIn(i).bits.mmio || hasException
datavalid(stWbIndex) := hasWritebacked
writebacked(stWbIndex) := hasWritebacked
pending(stWbIndex) := !hasWritebacked // valid mmio require
datavalid(stWbIndex) := !io.storeIn(i).bits.mmio
writebacked(stWbIndex) := !io.storeIn(i).bits.mmio
pending(stWbIndex) := io.storeIn(i).bits.mmio
val storeWbData = Wire(new SQDataEntry)
storeWbData := DontCare
......@@ -145,21 +140,15 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
vaddrModule.io.wdata(i) := io.storeIn(i).bits.vaddr
vaddrModule.io.wen(i) := true.B
exceptionModule.io.waddr(i) := stWbIndex
exceptionModule.io.wdata(i) := io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
exceptionModule.io.wen(i) := true.B
mmio(stWbIndex) := io.storeIn(i).bits.mmio
XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x mmio %x roll %x exc %x\n",
XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x mmio %x\n",
io.storeIn(i).bits.uop.sqIdx.value,
io.storeIn(i).bits.uop.cf.pc,
io.storeIn(i).bits.vaddr,
io.storeIn(i).bits.paddr,
io.storeIn(i).bits.data,
io.storeIn(i).bits.mmio,
io.storeIn(i).bits.rollback,
io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
io.storeIn(i).bits.mmio
)
}
}
......@@ -258,7 +247,6 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.mmioStout.valid := allocated(deqPtr) && datavalid(deqPtr) && !writebacked(deqPtr)
io.mmioStout.bits.uop := uop(deqPtr)
io.mmioStout.bits.uop.sqIdx := deqPtrExt(0)
io.mmioStout.bits.uop.cf.exceptionVec := exceptionModule.io.rdata(0).asBools
io.mmioStout.bits.data := dataModuleRead(0).data // dataModuleRead.read(deqPtr)
io.mmioStout.bits.redirectValid := false.B
io.mmioStout.bits.redirect := DontCare
......@@ -373,6 +361,12 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
)
)
// io.sqempty will be used by sbuffer
// We delay it for 1 cycle for better timing
// When sbuffer need to check if it is empty, the pipeline is blocked, which means delay io.sqempty
// for 1 cycle will also promise that sq is empty in that cycle
io.sqempty := RegNext(enqPtrExt(0).value === deqPtrExt(0).value && enqPtrExt(0).flag === deqPtrExt(0).flag)
// debug info
XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt(0).flag, deqPtr)
......
......@@ -25,6 +25,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
val s_invalid :: s_tlb :: s_flush_sbuffer_req :: s_flush_sbuffer_resp :: s_cache_req :: s_cache_resp :: s_finish :: Nil = Enum(7)
val state = RegInit(s_invalid)
val in = Reg(new ExuInput())
val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec()))
val atom_override_xtval = RegInit(false.B)
// paddr after translation
val paddr = Reg(UInt())
......@@ -89,11 +90,11 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
"b10".U -> (in.src1(1,0) === 0.U), //w
"b11".U -> (in.src1(2,0) === 0.U) //d
))
in.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned
in.uop.cf.exceptionVec(storePageFault) := io.dtlb.resp.bits.excp.pf.st
in.uop.cf.exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld
in.uop.cf.exceptionVec(storeAccessFault) := io.dtlb.resp.bits.excp.af.st
in.uop.cf.exceptionVec(loadAccessFault) := io.dtlb.resp.bits.excp.af.ld
exceptionVec(storeAddrMisaligned) := !addrAligned
exceptionVec(storePageFault) := io.dtlb.resp.bits.excp.pf.st
exceptionVec(loadPageFault) := io.dtlb.resp.bits.excp.pf.ld
exceptionVec(storeAccessFault) := io.dtlb.resp.bits.excp.af.st
exceptionVec(loadAccessFault) := io.dtlb.resp.bits.excp.af.ld
val exception = !addrAligned ||
io.dtlb.resp.bits.excp.pf.st ||
io.dtlb.resp.bits.excp.pf.ld ||
......@@ -215,6 +216,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
when (state === s_finish) {
io.out.valid := true.B
io.out.bits.uop := in.uop
io.out.bits.uop.cf.exceptionVec := exceptionVec
io.out.bits.uop.diffTestDebugLrScValid := is_lrsc_valid
io.out.bits.data := resp_data
io.out.bits.redirectValid := false.B
......
......@@ -91,7 +91,7 @@ class LoadUnit_S1 extends XSModule {
val s1_uop = io.in.bits.uop
val s1_paddr = io.dtlbResp.bits.paddr
val s1_exception = io.out.bits.uop.cf.exceptionVec.asUInt.orR
val s1_exception = selectLoad(io.out.bits.uop.cf.exceptionVec, false).asUInt.orR
val s1_tlb_miss = io.dtlbResp.bits.miss
val s1_mmio = !s1_tlb_miss && io.dtlbResp.bits.mmio
val s1_mask = io.in.bits.mask
......@@ -148,7 +148,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
val s2_paddr = io.in.bits.paddr
val s2_tlb_miss = io.in.bits.tlbMiss
val s2_mmio = io.in.bits.mmio
val s2_exception = io.in.bits.uop.cf.exceptionVec.asUInt.orR
val s2_exception = selectLoad(io.in.bits.uop.cf.exceptionVec, false).asUInt.orR
val s2_cache_miss = io.dcacheResp.bits.miss
val s2_cache_replay = io.dcacheResp.bits.replay
......@@ -193,7 +193,9 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
// so we do not need to care about flush in load / store unit's out.valid
io.out.bits := io.in.bits
io.out.bits.data := rdataPartialLoad
io.out.bits.miss := s2_cache_miss && !fullForward
// when exception occurs, set it to not miss and let it write back to roq (via int port)
io.out.bits.miss := s2_cache_miss && !fullForward && !s2_exception
io.out.bits.uop.ctrl.fpWen := io.in.bits.uop.ctrl.fpWen && !s2_exception
io.out.bits.mmio := s2_mmio
io.in.ready := io.out.ready || !io.in.valid
......@@ -270,12 +272,14 @@ class LoadUnit extends XSModule with HasLoadHelper {
// Load queue will be updated at s2 for both hit/miss int/fp load
io.lsq.loadIn.valid := load_s2.io.out.valid
io.lsq.loadIn.bits := load_s2.io.out.bits
val s2Valid = load_s2.io.out.valid && (!load_s2.io.out.bits.miss || load_s2.io.out.bits.uop.cf.exceptionVec.asUInt.orR)
// write to rob and writeback bus
val s2_wb_valid = load_s2.io.out.valid && !load_s2.io.out.bits.miss
val refillFpLoad = io.lsq.ldout.bits.uop.ctrl.fpWen
// Int load, if hit, will be writebacked at s2
val intHitLoadOut = Wire(Valid(new ExuOutput))
intHitLoadOut.valid := s2Valid && !load_s2.io.out.bits.uop.ctrl.fpWen
intHitLoadOut.valid := s2_wb_valid && !load_s2.io.out.bits.uop.ctrl.fpWen
intHitLoadOut.bits.uop := load_s2.io.out.bits.uop
intHitLoadOut.bits.data := load_s2.io.out.bits.data
intHitLoadOut.bits.redirectValid := false.B
......@@ -289,10 +293,10 @@ class LoadUnit extends XSModule with HasLoadHelper {
io.ldout.bits := Mux(intHitLoadOut.valid, intHitLoadOut.bits, io.lsq.ldout.bits)
io.ldout.valid := intHitLoadOut.valid || io.lsq.ldout.valid && !refillFpLoad
// Fp load, if hit, will be send to recoder at s2, then it will be recoded & writebacked at s3
val fpHitLoadOut = Wire(Valid(new ExuOutput))
fpHitLoadOut.valid := s2Valid && load_s2.io.out.bits.uop.ctrl.fpWen
fpHitLoadOut.valid := s2_wb_valid && load_s2.io.out.bits.uop.ctrl.fpWen
fpHitLoadOut.bits := intHitLoadOut.bits
val fpLoadOut = Wire(Valid(new ExuOutput))
......
......@@ -90,7 +90,7 @@ class StoreUnit_S1 extends XSModule {
io.lsq.bits.uop.cf.exceptionVec(storeAccessFault) := io.dtlbResp.bits.excp.af.st
// mmio inst with exception will be writebacked immediately
val hasException = io.out.bits.uop.cf.exceptionVec.asUInt.orR
val hasException = selectStore(io.out.bits.uop.cf.exceptionVec, false).asUInt.orR
io.out.valid := io.in.valid && (!io.out.bits.mmio || hasException) && !s1_tlb_miss
io.out.bits := io.lsq.bits
......
......@@ -13,9 +13,9 @@ trait HasSbufferCst extends HasXSParameter {
def s_prepare = 2.U(2.W)
def s_inflight = 3.U(2.W)
val evictCycle = 8192
val evictCycle = 1 << 20
require(isPow2(evictCycle))
val countBits = 1 + log2Up(evictCycle)
val countBits = log2Up(evictCycle+1)
val SbufferIndexWidth: Int = log2Up(StoreBufferSize)
// paddr = tag + offset
......@@ -108,6 +108,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq))) //Todo: store logic only support Width == 2 now
val dcache = new DCacheLineIO
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val sqempty = Input(Bool())
val flush = new Bundle {
val valid = Input(Bool())
val empty = Output(Bool())
......@@ -291,7 +292,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
do_eviction := validCount >= 12.U
io.flush.empty := empty
io.flush.empty := empty && io.sqempty
lru.io.flush := sbuffer_state === x_drain_sbuffer && empty
switch(sbuffer_state){
is(x_idle){
......
......@@ -483,4 +483,6 @@ class Sbuffer extends XSModule with HasSBufferConst {
cache.zipWithIndex.foreach { case (line, i) => {
XSDebug(line.valid, "[#%d line] Tag: %x, data: %x, mask: %x\n", i.U, line.tag, line.data.asUInt(), line.mask.asUInt())
}}
XSPerf("waitResp", waitingCacheLine.valid)
}
......@@ -48,10 +48,11 @@ package object xiangshan {
def apply() = UInt(log2Up(num).W)
def isIntExu(fuType: UInt) = !fuType(3)
def isIntExu(fuType: UInt) = !fuType(3)
def isJumpExu(fuType: UInt) = fuType === jmp
def isFpExu(fuType: UInt) = fuType(3, 2) === "b10".U
def isMemExu(fuType: UInt) = fuType(3, 2) === "b11".U
def isLoadExu(fuType: UInt) = fuType === ldu || fuType===mou
def isLoadExu(fuType: UInt) = fuType === ldu || fuType === mou
def isStoreExu(fuType: UInt) = fuType === stu
val functionNameMap = Map(
......@@ -109,6 +110,10 @@ package object xiangshan {
def isException(level: UInt) = level(1) && level(0)
}
object ExceptionVec {
def apply() = Vec(16, Bool())
}
object PMAMode {
def R = "b1".U << 0 //readable
def W = "b1".U << 1 //writeable
......
......@@ -4,6 +4,7 @@
#include "ram.h"
#include "compress.h"
// #define TLB_UNITTEST
#ifdef WITH_DRAMSIM3
#include "cosimulation.h"
......@@ -84,8 +85,8 @@ void addpageSv39() {
//pdde[2] = ((0x80000000&0xc0000000) >> 2) | 0xf;
for(int i = 0; i < PTENUM ;i++) {
pde[i] = ((PTEADDR(i)&0xfffff000)>>2) | 0x1;
//pde[i] = (((0x8000000+i*2*1024*1024)&0xffe00000)>>2) | 0xf;
// pde[i] = ((PTEADDR(i)&0xfffff000)>>2) | 0x1;
pde[i] = (((0x80000000+i*2*1024*1024)&0xffe00000)>>2) | 0xf;
}
for(int outidx = 0; outidx < PTENUM; outidx++ ) {
......@@ -94,6 +95,7 @@ void addpageSv39() {
}
}
printf("try to add identical tlb page to ram\n");
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM+PDEDEVNUM+PTEDEVNUM)),ptedev,PAGESIZE*PTEDEVNUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM+PDEDEVNUM)),pdedev,PAGESIZE*PDEDEVNUM);
memcpy((char *)ram+(TOPSIZE-PAGESIZE*(PTENUM+PDDENUM+PDENUM+PDEMMIONUM+PTEMMIONUM)),ptemmio, PAGESIZE*PTEMMIONUM);
......@@ -117,6 +119,12 @@ void init_ram(const char *img) {
assert(0);
}
#ifdef TLB_UNITTEST
//new add
addpageSv39();
//new end
#endif
int ret;
if (isGzFile(img)) {
printf("Gzip file detected and loading image from extracted gz file\n");
......@@ -143,12 +151,6 @@ void init_ram(const char *img) {
fclose(fp);
}
#ifdef TLB_UNITTEST
//new add
addpageSv39();
//new end
#endif
#ifdef WITH_DRAMSIM3
#if !defined(DRAMSIM3_CONFIG) || !defined(DRAMSIM3_OUTDIR)
#error DRAMSIM3_CONFIG or DRAMSIM3_OUTDIR is not defined
......
......@@ -31,7 +31,15 @@ object AddSinks {
"perfCntCondMbpIWrong",
"perfCntCondMbpRRight",
"perfCntCondMbpRWrong",
"perfCntLoopExit"//,
"perfCntubtbRight",
"perfCntubtbWrong",
"perfCntbtbRight",
"perfCntbtbWrong",
"perfCnttageRight",
"perfCnttageWrong",
"perfCntloopRight",
"perfCntloopWrong",
"perfCntLoopExit",
// "CntFetchFromICache",
// "CntFetchFromLoopBuffer",
// "CntExitLoop1",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册