未验证 提交 31ebfb1d 编写于 作者: Y YikeZhou 提交者: GitHub

backend, rename: support elimination of move instruction whose lsrc is 0 + bug fix (#1008)

* backend, rename: support elimination of mv inst whose lsrc=0
[known bug] instr page fault not properly raised after sfence.vma

* backend, roq: [bug fix] won't label me with exception as writebacked
上级 842f7991
......@@ -300,7 +300,8 @@ trait HasXSParameter {
val EnableIntMoveElim = coreParams.EnableIntMoveElim
val IntRefCounterWidth = coreParams.IntRefCounterWidth
val StdFreeListSize = NRPhyRegs - 32
val MEFreeListSize = NRPhyRegs - { if (IntRefCounterWidth > 0 && IntRefCounterWidth < 5) (32 / Math.pow(2, IntRefCounterWidth)).toInt else 1 }
// val MEFreeListSize = NRPhyRegs - { if (IntRefCounterWidth > 0 && IntRefCounterWidth < 5) (32 / Math.pow(2, IntRefCounterWidth)).toInt else 1 }
val MEFreeListSize = NRPhyRegs
val LoadQueueSize = coreParams.LoadQueueSize
val StoreQueueSize = coreParams.StoreQueueSize
val dpParams = coreParams.dpParams
......
......@@ -504,6 +504,7 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan
cs.fpu := fpDecoder.io.fpCtrl
val isMove = BitPat("b000000000000_?????_000_?????_0010011") === ctrl_flow.instr
cs.isMove := isMove
// read src1~3 location
cs.lsrc(0) := Mux(ctrl_flow.instr === LUI, 0.U,ctrl_flow.instr(RS1_MSB,RS1_LSB))
......@@ -536,8 +537,6 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan
}
))
cs.isMove := isMove && cs.lsrc(0) =/= 0.U /* TODO these special Move instructions can be optimized */
cf_ctrl.ctrl := cs
// TODO: do we still need this?
......
......@@ -186,7 +186,7 @@ class Rename(implicit p: Parameters) extends XSModule {
if (i == 0) {
// calculate meEnable
meEnable(i) := isMove(i) && !isMax.get(uops(i).psrc(0))
meEnable(i) := isMove(i) && (!isMax.get(uops(i).psrc(0)) || uops(i).ctrl.lsrc(0) === 0.U)
} else {
// compare psrc0
psrc_cmp(i-1) := Cat((0 until i).map(j => {
......@@ -194,7 +194,7 @@ class Rename(implicit p: Parameters) extends XSModule {
}) /* reverse is not necessary here */)
// calculate meEnable
meEnable(i) := isMove(i) && !(io.renameBypass.lsrc1_bypass(i-1).orR | psrc_cmp(i-1).orR | isMax.get(uops(i).psrc(0)))
meEnable(i) := isMove(i) && (!(io.renameBypass.lsrc1_bypass(i-1).orR | psrc_cmp(i-1).orR | isMax.get(uops(i).psrc(0))) || uops(i).ctrl.lsrc(0) === 0.U)
}
uops(i).eliminatedMove := meEnable(i) || (uops(i).ctrl.isMove && uops(i).ctrl.ldest === 0.U)
......
......@@ -147,37 +147,24 @@ class MEFreeList(implicit val p: config.Parameters) extends MultiIOModule with M
val (pdests_is_last, pdests_has_same_before, pdests_times) = getCompareResult(pdests_cmp)
for (i <- 0 until CommitWidth) {
XSDebug(p"decReq:${freeReq(i)},dec_old_pdst:${freePhyReg(i)},dec_is_me:${eliminatedMove(i)},dec_pdest:${multiRefPhyReg(i)}(isWalk:${walk})\n")
val preg = freeRegCandidates(i) // physical register waiting for freeing
oldPdestIsUnique(i) := old_pdests_is_last(i) && !old_pdests_has_same_before(i)
oldPdestNotUniqueButLast(i) := old_pdests_is_last(i) && old_pdests_has_same_before(i)
XSDebug(freeReq(i), p"port[$i]:old_pdest:${freePhyReg(i)},isUnique:${oldPdestIsUnique(i)},notUniqueButLast:${oldPdestNotUniqueButLast(i)}\n")
pdestIsUnique(i) := pdests_is_last(i) && !pdests_has_same_before(i)
pdestNotUniqueButLast(i) := pdests_is_last(i) && pdests_has_same_before(i)
XSDebug(freeReq(i) && eliminatedMove(i), p"port[$i]:pdest:${multiRefPhyReg(i)},isUnique:${pdestIsUnique(i)},notUniqueButLast:${pdestNotUniqueButLast(i)}\n")
freeVec(i) := ((oldPdestIsUnique(i) && (cmtCounter(preg) === Mux(updateSpecRefCounter(preg), specRefCounterNext(preg), specRefCounter(preg))))
|| (oldPdestNotUniqueButLast(i) && (cmtCounter(preg) + old_pdests_times(i) === Mux(updateSpecRefCounter(preg), specRefCounterNext(preg), specRefCounter(preg))))) && freeReq(i) && !walk
|| (oldPdestNotUniqueButLast(i) && (cmtCounter(preg) + old_pdests_times(i) === Mux(updateSpecRefCounter(preg), specRefCounterNext(preg), specRefCounter(preg))))) &&
freeReq(i) && freePhyReg(i) =/= 0.U && !walk
updateCmtCounterVec(i) := freeReq(i) && (oldPdestIsUnique(i) || oldPdestNotUniqueButLast(i)) && !walk
XSDebug(p"port[$i]cmtCounterInfo:plus_1=${cmtCounter(preg) + 1.U},plus_1_plus_times=${cmtCounter(preg) + 1.U + old_pdests_times(i)}\n")
XSDebug(p"port[$i]cmtCounterCtl:plus_1=${(freeReq(i) && oldPdestIsUnique(i)).asBool()},plus_1_plus_times=${freeReq(i) && oldPdestNotUniqueButLast(i)},clear=${freeVec(i)}\n")
updateCmtCounterVec(i) := freeReq(i) && (oldPdestIsUnique(i) || oldPdestNotUniqueButLast(i)) && freePhyReg(i) =/= 0.U && !walk
updateArchRefCounterVec(i) := freeReq(i) && eliminatedMove(i) && (pdestIsUnique(i) || pdestNotUniqueButLast(i)) && freePhyReg(i) =/= 0.U && !walk
updateArchRefCounterVec(i) := freeReq(i) && eliminatedMove(i) && (pdestIsUnique(i) || pdestNotUniqueButLast(i)) && !walk
XSDebug((specRefCounter(preg) === 0.U) && freeVec(i), p"normal preg free, preg:${preg}\n")
XSDebug((cmtCounter(preg) === specRefCounter(preg) && (specRefCounter(preg) =/= 0.U)) && freeVec(i), p"multi referenced preg free, preg:${preg}\n")
decreaseSpecRefCounterVec(i) := freeReq(i) && eliminatedMove(i) && walk && (pdestIsUnique(i) || pdestNotUniqueButLast(i))
decreaseSpecRefCounterVec(i) := freeReq(i) && eliminatedMove(i) && freePhyReg(i) =/= 0.U && walk && (pdestIsUnique(i) || pdestNotUniqueButLast(i))
decreaseSpecRefCounterValueVec(i) := pdests_times(i) + 1.U
......@@ -192,6 +179,14 @@ class MEFreeList(implicit val p: config.Parameters) extends MultiIOModule with M
freeList(idx) := freeRegCandidates(i)
XSDebug(p"[$i] Free List enqueue: [ preg ${freeRegCandidates(i)} ]\n")
}
XSDebug(p"decReq:${freeReq(i)},dec_old_pdst:${freePhyReg(i)},dec_is_me:${eliminatedMove(i)},dec_pdest:${multiRefPhyReg(i)}(isWalk:${walk})\n")
XSDebug(freeReq(i), p"port[$i]:old_pdest:${freePhyReg(i)},isUnique:${oldPdestIsUnique(i)},notUniqueButLast:${oldPdestNotUniqueButLast(i)}\n")
XSDebug(freeReq(i) && eliminatedMove(i), p"port[$i]:pdest:${multiRefPhyReg(i)},isUnique:${pdestIsUnique(i)},notUniqueButLast:${pdestNotUniqueButLast(i)}\n")
XSDebug(p"port[$i]cmtCounterInfo:plus_1=${cmtCounter(preg) + 1.U},plus_1_plus_times=${cmtCounter(preg) + 1.U + old_pdests_times(i)}\n")
XSDebug(p"port[$i]cmtCounterCtl:plus_1=${(freeReq(i) && oldPdestIsUnique(i)).asBool()},plus_1_plus_times=${freeReq(i) && oldPdestNotUniqueButLast(i)},clear=${freeVec(i)}\n")
XSDebug((specRefCounter(preg) === 0.U) && freeVec(i), p"normal preg free, preg:${preg}\n")
XSDebug((cmtCounter(preg) === specRefCounter(preg) && (specRefCounter(preg) =/= 0.U)) && freeVec(i), p"multi referenced preg free, preg:${preg}\n")
}
// set counters-update flag
......@@ -227,6 +222,14 @@ class MEFreeList(implicit val p: config.Parameters) extends MultiIOModule with M
}
}
// arch ref counter of #0 register
val archRefCntZero = RegInit(0.U(5.W))
// when old_pdest = 0 -> cnt[0]--
val zeroCntDecValue = PopCount(freeReq.zip(freePhyReg).map{ case (v, r) => v && r === 0.U })
// when pdest = 0 && isMove -> cnt[0]++
val zeroCntIncValue = PopCount(freeReq.zip(eliminatedMove).zip(multiRefPhyReg).map{ case ((v, m), r) => v && m && r === 0.U })
archRefCntZero := Mux(!flush && !walk, archRefCntZero + zeroCntIncValue - zeroCntDecValue, archRefCntZero)
/*
Increments: from rename stage
......@@ -241,9 +244,7 @@ class MEFreeList(implicit val p: config.Parameters) extends MultiIOModule with M
needAllocatingVec(i) := allocateReq(i) && canAllocate && doAllocate && !flush && !psrcOfMove(i).valid && !redirect && !walk
// enqueue instr, is move elimination
when (allocateReq(i) && canAllocate && doAllocate && !flush && psrcOfMove(i).valid && !redirect && !walk) {
// specRefCounterNext(psrcOfMove(i).bits) := specRefCounter(psrcOfMove(i).bits) + 1.U
// updateSpecRefCounter(psrcOfMove(i).bits) := true.B
when (allocateReq(i) && canAllocate && doAllocate && !flush && psrcOfMove(i).valid && psrcOfMove(i).bits =/= 0.U && !redirect && !walk) {
increaseSpecRefCounterVec(i) := true.B
}
......@@ -276,8 +277,8 @@ class MEFreeList(implicit val p: config.Parameters) extends MultiIOModule with M
// update tail pointer
val tailPtrNext = Mux(walk, tailPtr, tailPtr + PopCount(freeVec))
// update head pointer
val dupRegVec = WireInit(VecInit(archRefCounter.zip(cmtCounter).map{ case (a, c) => a - c }))
val headPtrNext = Mux(flush, tailPtr - (NRPhyRegs-32).U - dupRegVec.reduceTree(_ +& _), // FIXME Maybe this is too complicated?
val dupRegVec = WireInit(VecInit(archRefCounter.zip(cmtCounter).drop(1).map{ case (a, c) => a - c }))
val headPtrNext = Mux(flush, tailPtr - (NRPhyRegs-32).U - dupRegVec.reduceTree(_ +& _) - archRefCntZero, // FIXME Maybe this is too complicated?
Mux(walk, headPtr - PopCount(freeReq.zip(eliminatedMove).map{ case (rq, em) => rq && !em }),
headPtr + PopCount(needAllocatingVec))) // when io.redirect is valid, needAllocatingVec is all-zero
......
......@@ -688,7 +688,7 @@ class Roq(numWbPorts: Int)(implicit p: Parameters) extends XSModule with HasCirc
when (canEnqueue(i)) {
if (EnableIntMoveElim) {
eliminatedMove(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove
writebacked(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove
writebacked(enqPtrVec(i).value) := io.enq.req(i).bits.eliminatedMove && !io.enq.req(i).bits.cf.exceptionVec.asUInt().orR
} else {
writebacked(enqPtrVec(i).value) := false.B
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册