未验证 提交 9d05dce6 编写于 作者: Y Yinan Xu 提交者: GitHub

Merge pull request #283 from RISCVERS/opt-roq-timing

Opt roq timing
......@@ -60,15 +60,36 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val csr = new RoqCSRIO
})
val microOp = Mem(RoqSize, new MicroOp)
// instvalid field
val valid = RegInit(VecInit(List.fill(RoqSize)(false.B)))
val flag = RegInit(VecInit(List.fill(RoqSize)(false.B)))
// status
val writebacked = Reg(Vec(RoqSize, Bool()))
// data for redirect, exception, etc.
val flagBkup = RegInit(VecInit(List.fill(RoqSize)(false.B)))
val exuFflags = Mem(RoqSize, new Fflags)
val exuData = Reg(Vec(RoqSize, UInt(XLEN.W)))//for debug
val exuDebug = Reg(Vec(RoqSize, new DebugBundle))//for debug
// uop field used when commit
// flushPipe (wb) (commit) (used in roq)
// lidx (wb) (commit)
// sidx (wb) (commit)
// uop.ctrl.commitType (wb) (commit) (L/S)
// exceptionVec (wb) (commit)
// roqIdx (dispatch) (commit)
// crossPageIPFFix (dispatch) (commit)
// uop field used when walk
// ctrl.fpWen (dispatch) (walk)
// ctrl.rfWen (dispatch) (walk)
// ldest (dispatch) (walk)
// data for debug
val debug_microOp = Mem(RoqSize, new MicroOp)
val debug_exuData = Reg(Vec(RoqSize, UInt(XLEN.W)))//for debug
val debug_exuDebug = Reg(Vec(RoqSize, new DebugBundle))//for debug
// ptr
val enqPtrExt = RegInit(0.U.asTypeOf(new RoqPtr))
val deqPtrExt = RegInit(0.U.asTypeOf(new RoqPtr))
val walkPtrExt = Reg(new RoqPtr)
......@@ -80,11 +101,29 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val isFull = enqPtr === deqPtr && enqPtrExt.flag =/= deqPtrExt.flag
val notFull = !isFull
val emptyEntries = RoqSize.U - distanceBetween(enqPtrExt, deqPtrExt)
val s_idle :: s_walk :: s_extrawalk :: Nil = Enum(3)
val state = RegInit(s_idle)
io.roqDeqPtr := deqPtrExt
// common signal
val enqPtrValPlus = Wire(Vec(RenameWidth, UInt(log2Up(RoqSize).W)))
val enqPtrFlagPlus = Wire(Vec(RenameWidth, Bool()))
for (i <- 0 until RenameWidth) {
val offset = PopCount(io.enq.req.map(_.valid).take(i))
val roqIdxExt = enqPtrExt + offset
enqPtrValPlus(i) := roqIdxExt.value
enqPtrFlagPlus(i) := roqIdxExt.flag
}
val deqPtrExtPlus = Wire(Vec(RenameWidth, UInt(log2Up(RoqSize).W)))
for(i <- 0 until CommitWidth){
val roqIdxExt = deqPtrExt + i.U
deqPtrExtPlus(i) := roqIdxExt.value
}
// Dispatch
val hasBlockBackward = RegInit(false.B)
val hasNoSpecExec = RegInit(false.B)
......@@ -101,10 +140,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val roqIdx = roqIdxExt.value
when(io.enq.req(i).valid) {
microOp(roqIdx) := io.enq.req(i).bits
valid(roqIdx) := true.B
flag(roqIdx) := roqIdxExt.flag
writebacked(roqIdx) := false.B
debug_microOp(roqIdx) := io.enq.req(i).bits
when(io.enq.req(i).bits.ctrl.blockBackward) {
hasBlockBackward := true.B
}
......@@ -134,26 +170,25 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
when(io.exeWbResults(i).fire()){
val wbIdxExt = io.exeWbResults(i).bits.uop.roqIdx
val wbIdx = wbIdxExt.value
writebacked(wbIdx) := true.B
microOp(wbIdx).cf.exceptionVec := io.exeWbResults(i).bits.uop.cf.exceptionVec
microOp(wbIdx).lqIdx := io.exeWbResults(i).bits.uop.lqIdx
microOp(wbIdx).sqIdx := io.exeWbResults(i).bits.uop.sqIdx
microOp(wbIdx).ctrl.flushPipe := io.exeWbResults(i).bits.uop.ctrl.flushPipe
microOp(wbIdx).diffTestDebugLrScValid := io.exeWbResults(i).bits.uop.diffTestDebugLrScValid
exuData(wbIdx) := io.exeWbResults(i).bits.data
exuFflags(wbIdx) := io.exeWbResults(i).bits.fflags
exuDebug(wbIdx) := io.exeWbResults(i).bits.debug
val debugUop = microOp(wbIdx)
debug_microOp(wbIdx).cf.exceptionVec := io.exeWbResults(i).bits.uop.cf.exceptionVec
debug_microOp(wbIdx).lqIdx := io.exeWbResults(i).bits.uop.lqIdx
debug_microOp(wbIdx).sqIdx := io.exeWbResults(i).bits.uop.sqIdx
debug_microOp(wbIdx).ctrl.flushPipe := io.exeWbResults(i).bits.uop.ctrl.flushPipe
debug_microOp(wbIdx).diffTestDebugLrScValid := io.exeWbResults(i).bits.uop.diffTestDebugLrScValid
debug_exuData(wbIdx) := io.exeWbResults(i).bits.data
debug_exuDebug(wbIdx) := io.exeWbResults(i).bits.debug
val debug_Uop = debug_microOp(wbIdx)
XSInfo(true.B,
p"writebacked pc 0x${Hexadecimal(debugUop.cf.pc)} wen ${debugUop.ctrl.rfWen} " +
p"data 0x${Hexadecimal(io.exeWbResults(i).bits.data)} ldst ${debugUop.ctrl.ldest} pdst ${debugUop.ctrl.ldest} " +
p"writebacked pc 0x${Hexadecimal(debug_Uop.cf.pc)} wen ${debug_Uop.ctrl.rfWen} " +
p"data 0x${Hexadecimal(io.exeWbResults(i).bits.data)} ldst ${debug_Uop.ctrl.ldest} pdst ${debug_Uop.ctrl.ldest} " +
p"skip ${io.exeWbResults(i).bits.debug.isMMIO} roqIdx: ${wbIdxExt}\n"
)
}
}
val deqUop = microOp(deqPtr)
// Interrupt
val deqUop = debug_microOp(deqPtr)
val deqPtrWritebacked = writebacked(deqPtr) && valid(deqPtr)
val intrEnable = io.csr.intrBitSet && !isEmpty && !hasNoSpecExec &&
deqUop.ctrl.commitType =/= CommitType.STORE && deqUop.ctrl.commitType =/= CommitType.LOAD// TODO: wanna check why has hasCsr(hasNoSpec)
......@@ -169,19 +204,18 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
XSDebug(io.redirect.valid,
"generate redirect: pc 0x%x intr %d excp %d flushpp %d target:0x%x Traptarget 0x%x exceptionVec %b\n",
io.exception.cf.pc, intrEnable, exceptionEnable, isFlushPipe, io.redirect.bits.target, io.csr.trapTarget,
Cat(microOp(deqPtr).cf.exceptionVec))
Cat(debug_microOp(deqPtr).cf.exceptionVec))
// Commit uop to Rename (walk)
val walkCounter = Reg(UInt(log2Up(RoqSize).W))
val shouldWalkVec = Wire(Vec(CommitWidth, Bool()))
val walkPtrMatchVec = Wire(Vec(CommitWidth, Bool()))
val walkPtrVec = Wire(Vec(CommitWidth, new RoqPtr))
for(i <- shouldWalkVec.indices){
walkPtrVec(i) := walkPtrExt - i.U
walkPtrMatchVec(i) := walkPtrVec(i) === walkTgtExt
if(i == 0) shouldWalkVec(i) := !walkPtrMatchVec(i)
else shouldWalkVec(i) := shouldWalkVec(i-1) && !walkPtrMatchVec(i)
shouldWalkVec(i) := i.U < walkCounter
}
val walkFinished = Cat(walkPtrMatchVec).orR()
val walkFinished = walkCounter <= CommitWidth.U && // walk finish in this cycle
!io.brqRedirect.valid // no new redirect comes and update walkptr
// extra space is used weh roq has no enough space, but mispredict recovery needs such info to walk regmap
val needExtraSpaceForMPR = WireInit(VecInit(
......@@ -200,7 +234,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
switch(state){
is(s_idle){
val commitIdx = deqPtr + i.U
val commitUop = microOp(commitIdx)
val commitUop = debug_microOp(commitIdx)
val hasException = Cat(commitUop.cf.exceptionVec).orR() || intrEnable
val canCommit = if(i!=0) (io.commits(i-1).valid && !io.commits(i-1).bits.uop.ctrl.flushPipe) else true.B
val v = valid(commitIdx)
......@@ -226,7 +260,6 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
}
}
when(io.commits(i).valid){v := false.B}
XSInfo(io.commits(i).valid,
"retired pc %x wen %d ldest %d pdest %x old_pdest %x data %x fflags: %b\n",
commitUop.cf.pc,
......@@ -234,10 +267,10 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
commitUop.ctrl.ldest,
commitUop.pdest,
commitUop.old_pdest,
exuData(commitIdx),
debug_exuData(commitIdx),
exuFflags(commitIdx).asUInt
)
XSInfo(io.commits(i).valid && exuDebug(commitIdx).isMMIO,
XSInfo(io.commits(i).valid && debug_exuDebug(commitIdx).isMMIO,
"difftest skiped pc0x%x\n",
commitUop.cf.pc
)
......@@ -246,7 +279,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
is(s_walk){
val idx = walkPtrVec(i).value
val v = valid(idx)
val walkUop = microOp(idx)
val walkUop = debug_microOp(idx)
io.commits(i).valid := v && shouldWalkVec(i)
io.commits(i).bits.uop := walkUop
when(shouldWalkVec(i)){
......@@ -256,7 +289,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
walkUop.cf.pc,
walkUop.ctrl.rfWen,
walkUop.ctrl.ldest,
exuData(idx)
debug_exuData(idx)
)
}
......@@ -280,18 +313,18 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
io.csr.dirty_fs := dirty_fs
val validCommit = io.commits.map(_.valid)
val commitCnt = PopCount(validCommit)
when(state===s_walk) {
//exit walk state when all roq entry is commited
when(walkFinished) {
state := s_idle
}
walkPtrExt := walkPtrExt - CommitWidth.U
// ringBufferWalkExtended := ringBufferWalkExtended - validCommit
XSInfo("rolling back: enqPtr %d deqPtr %d walk %d:%d\n", enqPtr, deqPtr, walkPtrExt.flag, walkPtr)
walkCounter := walkCounter - commitCnt
XSInfo("rolling back: enqPtr %d deqPtr %d walk %d:%d walkcnt %d\n", enqPtr, deqPtr, walkPtrExt.flag, walkPtr, walkCounter)
}
// move tail ptr
val commitCnt = PopCount(validCommit)
when(state === s_idle){
deqPtrExt := deqPtrExt + commitCnt
}
......@@ -308,7 +341,11 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
when(io.brqRedirect.valid){ // TODO: need check if consider exception redirect?
state := s_walk
walkPtrExt := Mux(state === s_walk && !walkFinished, walkPtrExt - CommitWidth.U, Mux(state === s_extrawalk, walkPtrExt, enqPtrExt - 1.U + dispatchCnt))
walkTgtExt := io.brqRedirect.bits.roqIdx
// walkTgtExt := io.brqRedirect.bits.roqIdx
walkCounter := Mux(state === s_walk,
distanceBetween(walkPtrExt, io.brqRedirect.bits.roqIdx) - commitCnt,
distanceBetween(enqPtrExt, io.brqRedirect.bits.roqIdx) + dispatchCnt -1.U,
)
enqPtrExt := io.brqRedirect.bits.roqIdx + 1.U
}
......@@ -320,24 +357,103 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
XSDebug("roq full, switched to s_extrawalk. needExtraSpaceForMPR: %b\n", needExtraSpaceForMPR.asUInt)
}
// when exception occurs, cancels all
when (io.redirect.valid) { // TODO: need check for flushPipe
enqPtrExt := 0.U.asTypeOf(new RoqPtr)
deqPtrExt := 0.U.asTypeOf(new RoqPtr)
}
// instvalid field
// write
// enqueue logic writes 6 valid
for (i <- 0 until RenameWidth) {
when(io.enq.req(i).fire()){
valid(enqPtrValPlus(i)) := true.B
}
}
// dequeue/walk logic writes 6 valid, dequeue and walk will not happen at the same time
for(i <- 0 until CommitWidth){
switch(state){
is(s_idle){
when(io.commits(i).valid){valid(deqPtrExtPlus(i)) := false.B}
}
is(s_walk){
val idx = walkPtrVec(i).value
when(shouldWalkVec(i)){
valid(idx) := false.B
}
}
}
}
// read
// enqueue logic reads 6 valid
// dequeue/walk logic reads 6 valid, dequeue and walk will not happen at the same time
// rollback reads all valid? is it necessary?
// reset
// when exception, reset all valid to false
when (io.redirect.valid) {
for (i <- 0 until RoqSize) {
valid(i) := false.B
}
}
// status field: writebacked
// write
// enqueue logic set 6 writebacked to false
for (i <- 0 until RenameWidth) {
when(io.enq.req(i).fire()){
writebacked(enqPtrValPlus(i)) := false.B
}
}
// writeback logic set numWbPorts writebacked to true
for(i <- 0 until numWbPorts){
when(io.exeWbResults(i).fire()){
val wbIdxExt = io.exeWbResults(i).bits.uop.roqIdx
val wbIdx = wbIdxExt.value
writebacked(wbIdx) := true.B
}
}
// rollback: write all
// when rollback, reset writebacked entry to valid
when(io.memRedirect.valid) { // TODO: opt timing
for (i <- 0 until RoqSize) {
val recRoqIdx = RoqPtr(flag(i), i.U)
val recRoqIdx = RoqPtr(flagBkup(i), i.U)
when (valid(i) && isAfter(recRoqIdx, io.memRedirect.bits.roqIdx)) {
writebacked(i) := false.B
}
}
}
// when exception occurs, cancels all
when (io.redirect.valid) { // TODO: need check for flushPipe
enqPtrExt := 0.U.asTypeOf(new RoqPtr)
deqPtrExt := 0.U.asTypeOf(new RoqPtr)
for (i <- 0 until RoqSize) {
valid(i) := false.B
// read
// deqPtrWritebacked
// gen io.commits(i).valid read 6 (CommitWidth)
// flagBkup
// write: update when enqueue
// enqueue logic set 6 flagBkup at most
for (i <- 0 until RenameWidth) {
when(io.enq.req(i).fire()){
flagBkup(enqPtrValPlus(i)) := enqPtrFlagPlus(i)
}
}
// read: used in rollback logic
// all flagBkup will be used
// exuFflags
// write: writeback logic set numWbPorts exuFflags
for(i <- 0 until numWbPorts){
when(io.exeWbResults(i).fire()){
val wbIdxExt = io.exeWbResults(i).bits.uop.roqIdx
val wbIdx = wbIdxExt.value
exuFflags(wbIdx) := io.exeWbResults(i).bits.fflags
}
}
// read: used in commit logic
// read CommitWidth exuFflags
// debug info
XSDebug(p"enqPtr ${enqPtrExt} deqPtr ${deqPtrExt}\n")
......@@ -351,7 +467,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
for(i <- 0 until RoqSize){
if(i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x ", microOp(i).cf.pc)
XSDebug(false, true.B, "%x ", debug_microOp(i).cf.pc)
XSDebug(false, !valid(i), "- ")
XSDebug(false, valid(i) && writebacked(i), "w ")
XSDebug(false, valid(i) && !writebacked(i), "v ")
......@@ -383,16 +499,16 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val uop = io.commits(i).bits.uop
val DifftestSkipSC = false
if(!DifftestSkipSC){
skip(i) := exuDebug(idx).isMMIO && io.commits(i).valid
skip(i) := debug_exuDebug(idx).isMMIO && io.commits(i).valid
}else{
skip(i) := (
exuDebug(idx).isMMIO ||
debug_exuDebug(idx).isMMIO ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_d ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_w
) && io.commits(i).valid
}
wen(i) := io.commits(i).valid && uop.ctrl.rfWen && uop.ctrl.ldest =/= 0.U
wdata(i) := exuData(idx)
wdata(i) := debug_exuData(idx)
wdst(i) := uop.ctrl.ldest
diffTestDebugLrScValid(i) := uop.diffTestDebugLrScValid
wpc(i) := SignExt(uop.cf.pc, XLEN)
......@@ -409,8 +525,8 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
XSDebug(difftestIntrNO =/= 0.U, "difftest intrNO set %x\n", difftestIntrNO)
val retireCounterFix = Mux(io.redirect.valid, 1.U, retireCounter)
val retirePCFix = SignExt(Mux(io.redirect.valid, microOp(deqPtr).cf.pc, microOp(firstValidCommit).cf.pc), XLEN)
val retireInstFix = Mux(io.redirect.valid, microOp(deqPtr).cf.instr, microOp(firstValidCommit).cf.instr)
val retirePCFix = SignExt(Mux(io.redirect.valid, debug_microOp(deqPtr).cf.pc, debug_microOp(firstValidCommit).cf.pc), XLEN)
val retireInstFix = Mux(io.redirect.valid, debug_microOp(deqPtr).cf.instr, debug_microOp(firstValidCommit).cf.instr)
ExcitingUtils.addSource(RegNext(retireCounterFix), "difftestCommit", ExcitingUtils.Debug)
ExcitingUtils.addSource(RegNext(retirePCFix), "difftestThisPC", ExcitingUtils.Debug)//first valid PC
......
......@@ -58,31 +58,31 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
val pending = Reg(Vec(LoadQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
val ringBufferHeadExtended = RegInit(0.U.asTypeOf(new LqPtr))
val ringBufferTailExtended = RegInit(0.U.asTypeOf(new LqPtr))
val ringBufferHead = ringBufferHeadExtended.value
val ringBufferTail = ringBufferTailExtended.value
val ringBufferSameFlag = ringBufferHeadExtended.flag === ringBufferTailExtended.flag
val ringBufferEmpty = ringBufferHead === ringBufferTail && ringBufferSameFlag
val ringBufferFull = ringBufferHead === ringBufferTail && !ringBufferSameFlag
val ringBufferAllowin = !ringBufferFull
val enqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
val deqPtrExt = RegInit(0.U.asTypeOf(new LqPtr))
val enqPtr = enqPtrExt.value
val deqPtr = deqPtrExt.value
val sameFlag = enqPtrExt.flag === deqPtrExt.flag
val isEmpty = enqPtr === deqPtr && sameFlag
val isFull = enqPtr === deqPtr && !sameFlag
val allowIn = !isFull
val loadCommit = (0 until CommitWidth).map(i => io.commits(i).valid && !io.commits(i).bits.isWalk && io.commits(i).bits.uop.ctrl.commitType === CommitType.LOAD)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits(i).bits.uop.lqIdx.value)
val tailMask = (((1.U((LoadQueueSize + 1).W)) << ringBufferTail).asUInt - 1.U)(LoadQueueSize - 1, 0)
val headMask = (((1.U((LoadQueueSize + 1).W)) << ringBufferHead).asUInt - 1.U)(LoadQueueSize - 1, 0)
val tailMask = (((1.U((LoadQueueSize + 1).W)) << deqPtr).asUInt - 1.U)(LoadQueueSize - 1, 0)
val headMask = (((1.U((LoadQueueSize + 1).W)) << enqPtr).asUInt - 1.U)(LoadQueueSize - 1, 0)
val enqDeqMask1 = tailMask ^ headMask
val enqDeqMask = Mux(ringBufferSameFlag, enqDeqMask1, ~enqDeqMask1)
val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1)
// Enqueue at dispatch
val validEntries = distanceBetween(ringBufferHeadExtended, ringBufferTailExtended)
val validEntries = distanceBetween(enqPtrExt, deqPtrExt)
val firedDispatch = io.enq.req.map(_.valid)
io.enq.canAccept := validEntries <= (LoadQueueSize - RenameWidth).U
XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(firedDispatch))}\n")
for (i <- 0 until RenameWidth) {
val offset = if (i == 0) 0.U else PopCount((0 until i).map(firedDispatch(_)))
val lqIdx = ringBufferHeadExtended + offset
val lqIdx = enqPtrExt + offset
val index = lqIdx.value
when(io.enq.req(i).valid) {
uop(index) := io.enq.req(i).bits
......@@ -100,7 +100,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
}
when(Cat(firedDispatch).orR) {
ringBufferHeadExtended := ringBufferHeadExtended + PopCount(firedDispatch)
enqPtrExt := enqPtrExt + PopCount(firedDispatch)
XSInfo("dispatched %d insts to lq\n", PopCount(firedDispatch))
}
......@@ -306,10 +306,10 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
// move tailPtr
// allocatedMask: dequeuePtr can go to the next 1-bit
val allocatedMask = VecInit((0 until LoadQueueSize).map(i => allocated(i) || !enqDeqMask(i)))
// find the first one from deqPtr (ringBufferTail)
val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, ringBufferTailExtended.flag)
val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, ringBufferHeadExtended)
ringBufferTailExtended := nextTail
// find the first one from deqPtr (deqPtr)
val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, deqPtrExt.flag)
val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, enqPtrExt)
deqPtrExt := nextTail
// When load commited, mark it as !allocated, this entry will be recycled later
(0 until CommitWidth).map(i => {
......@@ -380,7 +380,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
val startIndex = io.storeIn(i).bits.uop.lqIdx.value
val lqIdxMask = ((1.U((LoadQueueSize + 1).W) << startIndex).asUInt - 1.U)(LoadQueueSize - 1, 0)
val xorMask = lqIdxMask ^ headMask
val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === ringBufferHeadExtended.flag
val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
// check if load already in lq needs to be rolledback
......@@ -473,42 +473,42 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
// setup misc mem access req
// mask / paddr / data can be get from lq.data
val commitType = io.commits(0).bits.uop.ctrl.commitType
io.uncache.req.valid := pending(ringBufferTail) && allocated(ringBufferTail) &&
io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
commitType === CommitType.LOAD &&
io.roqDeqPtr === uop(ringBufferTail).roqIdx &&
io.roqDeqPtr === uop(deqPtr).roqIdx &&
!io.commits(0).bits.isWalk
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
io.uncache.req.bits.addr := dataModule.io.rdata(ringBufferTail).paddr
io.uncache.req.bits.data := dataModule.io.rdata(ringBufferTail).data
io.uncache.req.bits.mask := dataModule.io.rdata(ringBufferTail).mask
io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
io.uncache.req.bits.meta.id := DontCare // TODO: // FIXME
io.uncache.req.bits.meta.vaddr := DontCare
io.uncache.req.bits.meta.paddr := dataModule.io.rdata(ringBufferTail).paddr
io.uncache.req.bits.meta.uop := uop(ringBufferTail)
io.uncache.req.bits.meta.mmio := true.B // dataModule.io.rdata(ringBufferTail).mmio
io.uncache.req.bits.meta.paddr := dataModule.io.rdata(deqPtr).paddr
io.uncache.req.bits.meta.uop := uop(deqPtr)
io.uncache.req.bits.meta.mmio := true.B // dataModule.io.rdata(deqPtr).mmio
io.uncache.req.bits.meta.tlb_miss := false.B
io.uncache.req.bits.meta.mask := dataModule.io.rdata(ringBufferTail).mask
io.uncache.req.bits.meta.mask := dataModule.io.rdata(deqPtr).mask
io.uncache.req.bits.meta.replay := false.B
io.uncache.resp.ready := true.B
when(io.uncache.req.fire()){
pending(ringBufferTail) := false.B
pending(deqPtr) := false.B
}
dataModule.io.uncache.wen := false.B
when(io.uncache.resp.fire()){
valid(ringBufferTail) := true.B
dataModule.io.uncacheWrite(ringBufferTail, io.uncache.resp.bits.data(XLEN-1, 0))
valid(deqPtr) := true.B
dataModule.io.uncacheWrite(deqPtr, io.uncache.resp.bits.data(XLEN-1, 0))
dataModule.io.uncache.wen := true.B
// TODO: write back exception info
}
when(io.uncache.req.fire()){
XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
uop(ringBufferTail).cf.pc,
uop(deqPtr).cf.pc,
io.uncache.req.bits.addr,
io.uncache.req.bits.data,
io.uncache.req.bits.cmd,
......@@ -541,7 +541,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
}
}
when (io.brqRedirect.valid && io.brqRedirect.bits.isMisPred) {
ringBufferHeadExtended := ringBufferHeadExtended - PopCount(needCancel)
enqPtrExt := enqPtrExt - PopCount(needCancel)
}
// assert(!io.rollback.valid)
......@@ -550,7 +550,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP
}
// debug info
XSDebug("head %d:%d tail %d:%d\n", ringBufferHeadExtended.flag, ringBufferHead, ringBufferTailExtended.flag, ringBufferTail)
XSDebug("head %d:%d tail %d:%d\n", enqPtrExt.flag, enqPtr, deqPtrExt.flag, deqPtr)
def PrintFlag(flag: Bool, name: String): Unit = {
when(flag) {
......
......@@ -52,31 +52,31 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val commited = Reg(Vec(StoreQueueSize, Bool())) // inst has been commited by roq
val pending = Reg(Vec(StoreQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
val ringBufferHeadExtended = RegInit(0.U.asTypeOf(new SqPtr))
val ringBufferTailExtended = RegInit(0.U.asTypeOf(new SqPtr))
val ringBufferHead = ringBufferHeadExtended.value
val ringBufferTail = ringBufferTailExtended.value
val ringBufferSameFlag = ringBufferHeadExtended.flag === ringBufferTailExtended.flag
val ringBufferEmpty = ringBufferHead === ringBufferTail && ringBufferSameFlag
val ringBufferFull = ringBufferHead === ringBufferTail && !ringBufferSameFlag
val ringBufferAllowin = !ringBufferFull
val enqPtrExt = RegInit(0.U.asTypeOf(new SqPtr))
val deqPtrExt = RegInit(0.U.asTypeOf(new SqPtr))
val enqPtr = enqPtrExt.value
val deqPtr = deqPtrExt.value
val sameFlag = enqPtrExt.flag === deqPtrExt.flag
val isEmpty = enqPtr === deqPtr && sameFlag
val isFull = enqPtr === deqPtr && !sameFlag
val allowIn = !isFull
val storeCommit = (0 until CommitWidth).map(i => io.commits(i).valid && !io.commits(i).bits.isWalk && io.commits(i).bits.uop.ctrl.commitType === CommitType.STORE)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits(i).bits.uop.sqIdx.value)
val tailMask = (((1.U((StoreQueueSize + 1).W)) << ringBufferTail).asUInt - 1.U)(StoreQueueSize - 1, 0)
val headMask = (((1.U((StoreQueueSize + 1).W)) << ringBufferHead).asUInt - 1.U)(StoreQueueSize - 1, 0)
val tailMask = (((1.U((StoreQueueSize + 1).W)) << deqPtr).asUInt - 1.U)(StoreQueueSize - 1, 0)
val headMask = (((1.U((StoreQueueSize + 1).W)) << enqPtr).asUInt - 1.U)(StoreQueueSize - 1, 0)
val enqDeqMask1 = tailMask ^ headMask
val enqDeqMask = Mux(ringBufferSameFlag, enqDeqMask1, ~enqDeqMask1)
val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1)
// Enqueue at dispatch
val validEntries = distanceBetween(ringBufferHeadExtended, ringBufferTailExtended)
val validEntries = distanceBetween(enqPtrExt, deqPtrExt)
val firedDispatch = io.enq.req.map(_.valid)
io.enq.canAccept := validEntries <= (LoadQueueSize - RenameWidth).U
XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(firedDispatch))}\n")
for (i <- 0 until RenameWidth) {
val offset = if (i == 0) 0.U else PopCount((0 until i).map(firedDispatch(_)))
val sqIdx = ringBufferHeadExtended + offset
val sqIdx = enqPtrExt + offset
val index = sqIdx.value
when(io.enq.req(i).valid) {
uop(index) := io.enq.req(i).bits
......@@ -92,7 +92,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
}
when(Cat(firedDispatch).orR) {
ringBufferHeadExtended := ringBufferHeadExtended + PopCount(firedDispatch)
enqPtrExt := enqPtrExt + PopCount(firedDispatch)
XSInfo("dispatched %d insts to sq\n", PopCount(firedDispatch))
}
......@@ -180,7 +180,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
val validStoreVec = VecInit((0 until StoreQueueSize).map(i => !(allocated(i) && datavalid(i))))
val storeNotValid = SqPtr(false.B, getFirstOne(validStoreVec, tailMask))
val storeValidIndex = (storeNotValid - 1.U).value
io.oldestStore.valid := allocated(ringBufferTailExtended.value) && datavalid(ringBufferTailExtended.value) && !commited(storeValidIndex)
io.oldestStore.valid := allocated(deqPtrExt.value) && datavalid(deqPtrExt.value) && !commited(storeValidIndex)
io.oldestStore.bits := uop(storeValidIndex).roqIdx
// writeback up to 2 store insts to CDB
......@@ -200,6 +200,9 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.stout(i).valid := storeWbSelVec(storeWbSel(i)) && storeWbValid(i)
when(io.stout(i).fire()) {
writebacked(storeWbSel(i)) := true.B
when(dataModule.io.rdata(storeWbSel(i)).mmio) {
allocated(storeWbSel(i)) := false.B // potential opt: move deqPtr immediately
}
}
io.stout(i).bits.fflags := DontCare
})
......@@ -210,19 +213,19 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// TailPtr slow recovery: recycle bubbles in store queue
// allocatedMask: dequeuePtr can go to the next 1-bit
val allocatedMask = VecInit((0 until StoreQueueSize).map(i => allocated(i) || !enqDeqMask(i)))
// find the first one from deqPtr (ringBufferTail)
val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, ringBufferTailExtended.flag)
val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, ringBufferHeadExtended)
ringBufferTailExtended := nextTail
// find the first one from deqPtr (deqPtr)
val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, deqPtrExt.flag)
val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, enqPtrExt)
deqPtrExt := nextTail
// TailPtr fast recovery
val tailRecycle = VecInit(List(
io.uncache.resp.fire() || io.sbuffer(0).fire(),
io.sbuffer(1).fire()
))
// val tailRecycle = VecInit(List(
// io.uncache.resp.fire() || io.sbuffer(0).fire(),
// io.sbuffer(1).fire()
// ))
when(tailRecycle.asUInt.orR){
ringBufferTailExtended := ringBufferTailExtended + PopCount(tailRecycle.asUInt)
when(io.sbuffer(0).fire()){
deqPtrExt := deqPtrExt + Mux(io.sbuffer(1).fire(), 2.U, 1.U)
}
// load forward query
......@@ -231,14 +234,14 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.forward(i).forwardMask := 0.U(8.W).asBools
io.forward(i).forwardData := DontCare
// Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
// Compare deqPtr (deqPtr) and forward.sqIdx, we have two cases:
// (1) if they have the same flag, we need to check range(tail, sqIdx)
// (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx)
// Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize))
// Forward2: Mux(same_flag, 0.U, range(0, sqIdx) )
// i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
val differentFlag = ringBufferTailExtended.flag =/= io.forward(i).sqIdx.flag
val differentFlag = deqPtrExt.flag =/= io.forward(i).sqIdx.flag
val forwardMask = ((1.U((StoreQueueSize + 1).W)) << io.forward(i).sqIdx.value).asUInt - 1.U
val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B)))
for (j <- 0 until StoreQueueSize) {
......@@ -270,7 +273,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
})
(0 until 2).map(i => {
val ptr = (ringBufferTailExtended + i.U).value
val ptr = (deqPtrExt + i.U).value
val mmio = dataModule.io.rdata(ptr).mmio
io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio
io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR
......@@ -294,39 +297,39 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
// setup misc mem access req
// mask / paddr / data can be get from sq.data
val commitType = io.commits(0).bits.uop.ctrl.commitType
io.uncache.req.valid := pending(ringBufferTail) && allocated(ringBufferTail) &&
io.uncache.req.valid := pending(deqPtr) && allocated(deqPtr) &&
commitType === CommitType.STORE &&
io.roqDeqPtr === uop(ringBufferTail).roqIdx &&
io.roqDeqPtr === uop(deqPtr).roqIdx &&
!io.commits(0).bits.isWalk
io.uncache.req.bits.cmd := MemoryOpConstants.M_XWR
io.uncache.req.bits.addr := dataModule.io.rdata(ringBufferTail).paddr
io.uncache.req.bits.data := dataModule.io.rdata(ringBufferTail).data
io.uncache.req.bits.mask := dataModule.io.rdata(ringBufferTail).mask
io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
io.uncache.req.bits.meta.id := DontCare // TODO: // FIXME
io.uncache.req.bits.meta.vaddr := DontCare
io.uncache.req.bits.meta.paddr := dataModule.io.rdata(ringBufferTail).paddr
io.uncache.req.bits.meta.uop := uop(ringBufferTail)
io.uncache.req.bits.meta.mmio := true.B // dataModule.io.rdata(ringBufferTail).mmio
io.uncache.req.bits.meta.paddr := dataModule.io.rdata(deqPtr).paddr
io.uncache.req.bits.meta.uop := uop(deqPtr)
io.uncache.req.bits.meta.mmio := true.B // dataModule.io.rdata(deqPtr).mmio
io.uncache.req.bits.meta.tlb_miss := false.B
io.uncache.req.bits.meta.mask := dataModule.io.rdata(ringBufferTail).mask
io.uncache.req.bits.meta.mask := dataModule.io.rdata(deqPtr).mask
io.uncache.req.bits.meta.replay := false.B
io.uncache.resp.ready := true.B
when(io.uncache.req.fire()){
pending(ringBufferTail) := false.B
pending(deqPtr) := false.B
}
when(io.uncache.resp.fire()){
datavalid(ringBufferTail) := true.B // will be writeback to CDB in the next cycle
datavalid(deqPtr) := true.B // will be writeback to CDB in the next cycle
// TODO: write back exception info
}
when(io.uncache.req.fire()){
XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
uop(ringBufferTail).cf.pc,
uop(deqPtr).cf.pc,
io.uncache.req.bits.addr,
io.uncache.req.bits.data,
io.uncache.req.bits.cmd,
......@@ -353,11 +356,11 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
}
}
when (io.brqRedirect.valid && io.brqRedirect.bits.isMisPred) {
ringBufferHeadExtended := ringBufferHeadExtended - PopCount(needCancel)
enqPtrExt := enqPtrExt - PopCount(needCancel)
}
// debug info
XSDebug("head %d:%d tail %d:%d\n", ringBufferHeadExtended.flag, ringBufferHead, ringBufferTailExtended.flag, ringBufferTail)
XSDebug("head %d:%d tail %d:%d\n", enqPtrExt.flag, enqPtr, deqPtrExt.flag, deqPtr)
def PrintFlag(flag: Bool, name: String): Unit = {
when(flag) {
......
......@@ -72,7 +72,8 @@ class LoadUnit_S0 extends XSModule {
"b11".U -> (s0_vaddr(2, 0) === 0.U) //d
))
io.out.valid := io.dcacheReq.fire() // dcache may not accept load request
io.out.valid := io.dcacheReq.fire() && // dcache may not accept load request
!io.in.bits.uop.roqIdx.needFlush(io.redirect)
io.out.bits := DontCare
io.out.bits.vaddr := s0_vaddr
io.out.bits.paddr := s0_paddr
......@@ -142,7 +143,7 @@ class LoadUnit_S1 extends XSModule {
io.sbuffer.forwardData.asUInt, io.sbuffer.forwardMask.asUInt
)
io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect)
io.out.bits.paddr := s1_paddr
io.out.bits.mmio := s1_mmio
io.out.bits.tlbMiss := s1_tlb_miss
......@@ -219,6 +220,17 @@ class LoadUnit_S2 extends XSModule {
}
// class LoadUnit_S3 extends XSModule {
// val io = IO(new Bundle() {
// val in = Flipped(Decoupled(new LsPipelineBundle))
// val out = Decoupled(new LsPipelineBundle)
// val redirect = Flipped(ValidIO(new Redirect))
// })
// io.in.ready := true.B
// io.out.bits := io.in.bits
// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
// }
class LoadUnit extends XSModule {
val io = IO(new Bundle() {
......@@ -235,6 +247,7 @@ class LoadUnit extends XSModule {
val load_s0 = Module(new LoadUnit_S0)
val load_s1 = Module(new LoadUnit_S1)
val load_s2 = Module(new LoadUnit_S2)
// val load_s3 = Module(new LoadUnit_S3)
load_s0.io.in <> io.ldin
load_s0.io.redirect <> io.redirect
......@@ -243,7 +256,7 @@ class LoadUnit extends XSModule {
load_s0.io.dcacheReq <> io.dcache.req
load_s0.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(load_s0.io.out, load_s1.io.in, load_s1.io.out.fire() || load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect), false.B)
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, false.B)
io.dcache.s1_paddr := load_s1.io.out.bits.paddr
load_s1.io.redirect <> io.redirect
......@@ -251,11 +264,14 @@ class LoadUnit extends XSModule {
io.sbuffer <> load_s1.io.sbuffer
io.lsq.forward <> load_s1.io.lsq
PipelineConnect(load_s1.io.out, load_s2.io.in, load_s2.io.out.fire() || load_s1.io.out.bits.tlbMiss, false.B)
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, false.B)
load_s2.io.redirect <> io.redirect
load_s2.io.dcacheResp <> io.dcache.resp
// PipelineConnect(load_s2.io.fp_out, load_s3.io.in, true.B, false.B)
// load_s3.io.redirect <> io.redirect
XSDebug(load_s0.io.out.valid,
p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " +
p"vaddr ${Hexadecimal(load_s0.io.out.bits.vaddr)}, mask ${Hexadecimal(load_s0.io.out.bits.mask)}\n")
......
......@@ -4,135 +4,139 @@ import chisel3._
import chisel3.util._
import utils._
import xiangshan._
import xiangshan.cache.{TlbRequestIO, TlbCmd}
import xiangshan.cache._
class StoreUnit extends XSModule {
// Store Pipeline Stage 0
// Generate addr, use addr to query DCache and DTLB
class StoreUnit_S0 extends XSModule {
val io = IO(new Bundle() {
val stin = Flipped(Decoupled(new ExuInput))
val in = Flipped(Decoupled(new ExuInput))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dtlbReq = DecoupledIO(new TlbReq)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val dtlb = new TlbRequestIO()
val lsq = ValidIO(new LsPipelineBundle)
})
//-------------------------------------------------------
// Store Pipeline
//-------------------------------------------------------
val s2_out = Wire(Decoupled(new LsPipelineBundle))
val s3_in = Wire(Decoupled(new LsPipelineBundle))
private def printPipeLine(pipeline: LsPipelineBundle, cond: Bool, name: String): Unit = {
XSDebug(cond,
p"$name" + p" pc ${Hexadecimal(pipeline.uop.cf.pc)} " +
p"addr ${Hexadecimal(pipeline.vaddr)} -> ${Hexadecimal(pipeline.paddr)} " +
p"op ${Binary(pipeline.uop.ctrl.fuOpType)} " +
p"data ${Hexadecimal(pipeline.data)} " +
p"mask ${Hexadecimal(pipeline.mask)}\n"
)
}
printPipeLine(s2_out.bits, s2_out.valid, "S2")
// TODO: is this nesscary ?
XSDebug(s2_out.fire(), "store req: pc 0x%x addr 0x%x -> 0x%x op %b data 0x%x\n",
s2_out.bits.uop.cf.pc,
s2_out.bits.vaddr,
s2_out.bits.paddr,
s2_out.bits.uop.ctrl.fuOpType,
s2_out.bits.data
)
printPipeLine(s3_in.bits, s3_in.valid, "S3")
//-------------------------------------------------------
// ST Pipeline Stage 2
// Generate addr, use addr to query DTLB
//-------------------------------------------------------
// send req to dtlb
val saddr = io.stin.bits.src1 + io.stin.bits.uop.ctrl.imm
io.dtlb.req.bits.vaddr := saddr
io.dtlb.req.valid := io.stin.valid
io.dtlb.req.bits.cmd := TlbCmd.write
io.dtlb.req.bits.roqIdx := io.stin.bits.uop.roqIdx
io.dtlb.req.bits.debug.pc := io.stin.bits.uop.cf.pc
io.dtlb.resp.ready := s2_out.ready
s2_out.bits := DontCare
s2_out.bits.vaddr := saddr
s2_out.bits.paddr := io.dtlb.resp.bits.paddr
s2_out.bits.data := genWdata(io.stin.bits.src2, io.stin.bits.uop.ctrl.fuOpType(1,0))
s2_out.bits.uop := io.stin.bits.uop
s2_out.bits.miss := io.dtlb.resp.bits.miss
s2_out.bits.mask := genWmask(s2_out.bits.vaddr, io.stin.bits.uop.ctrl.fuOpType(1,0))
s2_out.valid := io.stin.valid && !io.dtlb.resp.bits.miss && !s2_out.bits.uop.roqIdx.needFlush(io.redirect)
io.stin.ready := s2_out.ready
val saddr = io.in.bits.src1 + io.in.bits.uop.ctrl.imm
io.dtlbReq.bits.vaddr := saddr
io.dtlbReq.valid := io.in.valid
io.dtlbReq.bits.cmd := TlbCmd.write
io.dtlbReq.bits.roqIdx := io.in.bits.uop.roqIdx
io.dtlbReq.bits.debug.pc := io.in.bits.uop.cf.pc
io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready?
io.out.bits := DontCare
io.out.bits.vaddr := saddr
io.out.bits.paddr := io.dtlbResp.bits.paddr
io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.bits.uop := io.in.bits.uop
io.out.bits.miss := io.dtlbResp.bits.miss
io.out.bits.mask := genWmask(io.out.bits.vaddr, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.valid := io.in.valid && !io.dtlbResp.bits.miss && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
io.in.ready := io.out.ready
// exception check
val addrAligned = LookupTree(io.stin.bits.uop.ctrl.fuOpType(1,0), List(
val addrAligned = LookupTree(io.in.bits.uop.ctrl.fuOpType(1,0), List(
"b00".U -> true.B, //b
"b01".U -> (s2_out.bits.vaddr(0) === 0.U), //h
"b10".U -> (s2_out.bits.vaddr(1,0) === 0.U), //w
"b11".U -> (s2_out.bits.vaddr(2,0) === 0.U) //d
"b01".U -> (io.out.bits.vaddr(0) === 0.U), //h
"b10".U -> (io.out.bits.vaddr(1,0) === 0.U), //w
"b11".U -> (io.out.bits.vaddr(2,0) === 0.U) //d
))
s2_out.bits.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned
s2_out.bits.uop.cf.exceptionVec(storePageFault) := io.dtlb.resp.bits.excp.pf.st
PipelineConnect(s2_out, s3_in, true.B, false.B)
//-------------------------------------------------------
// ST Pipeline Stage 3
// Write paddr to LSQ
//-------------------------------------------------------
io.out.bits.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned
io.out.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
// Send TLB feedback to store issue queue
io.tlbFeedback.valid := RegNext(io.stin.valid && s2_out.ready)
io.tlbFeedback.bits.hit := RegNext(!s2_out.bits.miss)
io.tlbFeedback.bits.roqIdx := RegNext(s2_out.bits.uop.roqIdx)
// TODO: should be moved to S1
io.tlbFeedback.valid := RegNext(io.in.valid && io.out.ready)
io.tlbFeedback.bits.hit := RegNext(!io.out.bits.miss)
io.tlbFeedback.bits.roqIdx := RegNext(io.out.bits.uop.roqIdx)
XSDebug(io.tlbFeedback.valid,
"S3 Store: tlbHit: %d roqIdx: %d\n",
"S1 Store: tlbHit: %d roqIdx: %d\n",
io.tlbFeedback.bits.hit,
io.tlbFeedback.bits.roqIdx.asUInt
)
}
// Load Pipeline Stage 1
// TLB resp (send paddr to dcache)
class StoreUnit_S1 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
// val fp_out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
})
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
// writeback to LSQ
s3_in.ready := true.B
io.lsq.bits := s3_in.bits
io.lsq.bits.miss := false.B
io.lsq.bits.mmio := AddressSpace.isMMIO(s3_in.bits.paddr)
io.lsq.valid := s3_in.fire()
//-------------------------------------------------------
// ST Pipeline Stage 4
// Store writeback, send store request to store buffer
//-------------------------------------------------------
io.in.ready := true.B
io.out.bits := io.in.bits
io.out.bits.miss := false.B
io.out.bits.mmio := AddressSpace.isMMIO(io.in.bits.paddr)
io.out.valid := io.in.fire() // TODO: && ! FP
// if fp
// io.fp_out.valid := ...
// io.fp_out.bits := ...
}
// class StoreUnit_S2 extends XSModule {
// val io = IO(new Bundle() {
// val in = Flipped(Decoupled(new LsPipelineBundle))
// val out = Decoupled(new LsPipelineBundle)
// val redirect = Flipped(ValidIO(new Redirect))
// })
// io.in.ready := true.B
// io.out.bits := io.in.bits
// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
// }
// Writeback to CDB
// (0 until LoadPipelineWidth).map(i => {
// io.ldout <> hitLoadOut
// })
class StoreUnit extends XSModule {
val io = IO(new Bundle() {
val stin = Flipped(Decoupled(new ExuInput))
val redirect = Flipped(ValidIO(new Redirect))
val tlbFeedback = ValidIO(new TlbFeedback)
val dtlb = new TlbRequestIO()
val lsq = ValidIO(new LsPipelineBundle)
})
//-------------------------------------------------------
// ST Pipeline Async Stage 1
// Read paddr from store buffer, query DTAG in DCache
//-------------------------------------------------------
val store_s0 = Module(new StoreUnit_S0)
val store_s1 = Module(new StoreUnit_S1)
// val store_s2 = Module(new StoreUnit_S2)
store_s0.io.in <> io.stin
store_s0.io.redirect <> io.redirect
store_s0.io.dtlbReq <> io.dtlb.req
store_s0.io.dtlbResp <> io.dtlb.resp
store_s0.io.tlbFeedback <> io.tlbFeedback
//-------------------------------------------------------
// ST Pipeline Async Stage 2
// DTAG compare, write data to DCache
//-------------------------------------------------------
PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, false.B)
// PipelineConnect(store_s1.io.fp_out, store_s2.io.in, true.B, false.B)
// Done in DCache
store_s1.io.redirect <> io.redirect
// send result to sq
io.lsq.valid := store_s1.io.out.valid
io.lsq.bits := store_s1.io.out.bits
//-------------------------------------------------------
// ST Pipeline Async Stage 2
// DCache miss / Shared cache wirte
//-------------------------------------------------------
store_s1.io.out.ready := true.B
private def printPipeLine(pipeline: LsPipelineBundle, cond: Bool, name: String): Unit = {
XSDebug(cond,
p"$name" + p" pc ${Hexadecimal(pipeline.uop.cf.pc)} " +
p"addr ${Hexadecimal(pipeline.vaddr)} -> ${Hexadecimal(pipeline.paddr)} " +
p"op ${Binary(pipeline.uop.ctrl.fuOpType)} " +
p"data ${Hexadecimal(pipeline.data)} " +
p"mask ${Hexadecimal(pipeline.mask)}\n"
)
}
// update store buffer according to store fill buffer
printPipeLine(store_s0.io.out.bits, store_s0.io.out.valid, "S0")
printPipeLine(store_s1.io.out.bits, store_s1.io.out.valid, "S1")
}
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册