提交 2312328e 编写于 作者: Fa_wang's avatar Fa_wang

Sbuffer: opt store logic timing

上级 2a088b29
......@@ -22,6 +22,7 @@ trait HasSbufferCst extends HasXSParameter {
val CacheLineBytes: Int = CacheLineSize / 8
val CacheLineWords: Int = CacheLineBytes / DataBytes
val OffsetWidth: Int = log2Up(CacheLineBytes)
val WordsWidth: Int = log2Up(CacheLineWords)
val TagWidth: Int = PAddrBits - OffsetWidth
}
......@@ -37,29 +38,29 @@ class SbufferLine extends SbufferBundle {
}
}
class AbstractEvictor extends XSModule with HasSbufferCst{
val io = IO(new Bundle{
val states = Input(Vec(StoreBufferSize, UInt(s_invalid.getWidth.W)))
val do_eviction = Output(Bool())
})
}
class NaiveEvictor(threshold: Int) extends AbstractEvictor{
require(threshold >= 0 && threshold <= StoreBufferSize)
val entryCnt = PopCount(io.states.map(s => s=/=s_invalid))
io.do_eviction := entryCnt >= threshold.U((SbufferIndexWidth+1).W)
XSDebug("sbuffer entry cnt: %d\n", entryCnt)
}
//class AbstractEvictor extends XSModule with HasSbufferCst{
// val io = IO(new Bundle{
// val states = Input(Vec(StoreBufferSize, UInt(s_invalid.getWidth.W)))
// val do_eviction = Output(Bool())
// })
//}
//
//
//class NaiveEvictor(threshold: Int) extends AbstractEvictor{
//
// require(threshold >= 0 && threshold <= StoreBufferSize)
//
// val entryCnt = PopCount(io.states.map(s => s=/=s_invalid))
//
// io.do_eviction := entryCnt >= threshold.U((SbufferIndexWidth+1).W)
//
// XSDebug("sbuffer entry cnt: %d\n", entryCnt)
//
//}
class NewSbuffer extends XSModule with HasSbufferCst {
val io = IO(new Bundle() {
val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq)))
val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq))) //Todo: store logic only support Width == 2 now
val dcache = new DCacheLineIO
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
val flush = new Bundle {
......@@ -73,7 +74,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
//val lru = new SbufferLRU(StoreBufferSize)
val lru = new SbufferLRU(StoreBufferSize)
// 2 * enq + 1 * deq
val lruAccessWays = Wire(Vec(io.in.getWidth+1, new Valid(UInt(SbufferIndexWidth.W))))
val lruAccessWays = Wire(Vec(io.in.getWidth+io.forward.getWidth, new Valid(UInt(SbufferIndexWidth.W))))
for(w <- lruAccessWays){
w.bits := DontCare
w.valid := false.B
......@@ -87,115 +88,184 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val sbuffer_state = RegInit(x_idle)
// ---------------------- Store Enq Sbuffer ---------------------
// (state, lineBuf)
type SbufferEntry = (UInt, SbufferLine)
def getTag(pa: UInt): UInt =
pa(PAddrBits - 1, PAddrBits - TagWidth)
def getWordOffset(pa: UInt): UInt =
pa(OffsetWidth-1, 3)
def getAddr(tag: UInt): UInt =
Cat(tag, 0.U((PAddrBits - TagWidth).W))
def getByteOffset(pa: UInt): UInt =
Cat(pa(OffsetWidth - 1, 3), 0.U(3.W))
def getWordOffset(pa: UInt): UInt = pa(OffsetWidth-1, 3)
def getByteOffset(offect: UInt): UInt =
Cat(offect(OffsetWidth - 1, 3), 0.U(3.W))
def isOneOf(key: UInt, seq: Seq[UInt]): Bool =
if(seq.isEmpty) false.B else Cat(seq.map(_===key)).orR()
def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f
// TODO:useless
def maskData(mask: UInt, data: UInt): UInt = {
assert(mask.getWidth * 8 == data.getWidth)
Cat((0 until mask.getWidth).map(i => data(i*8+7, i*8) & Fill(8, mask(i))).reverse)
}
def wordReqToBufLine(req: DCacheWordReq): SbufferLine = {
val bufLine = Wire(new SbufferLine)
val wordIdx = getWordOffset(req.addr)
val dataVec = Wire(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))
val mask = VecInit(Seq.fill(CacheLineWords){
VecInit(Seq.fill(DataBytes)(false.B))
})
dataVec := DontCare
// type ReqWithIdx = (DecoupledIO[DCacheWordReq], Int)
//
// def enqSbuffer(buf: Seq[SbufferEntry], reqWithIdx: ReqWithIdx): Seq[SbufferEntry] = {
// val req = reqWithIdx._1
// val reqIdx = reqWithIdx._2
// val state_old = VecInit(buf.map(_._1))
// val mem_old = VecInit(buf.map(_._2))
// val state_new = WireInit(state_old)
// val mem_new = WireInit(mem_old)
//
// def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req))
//
// val mergeMask = widthMap(i =>
// req.valid && stateCanMerge(state_old(i)) && getTag(req.bits.addr)===mem_old(i).tag
// )
// val canMerge = Cat(mergeMask).orR()
// val invalidMask = state_old.map(s => s===s_invalid)
// val notFull = Cat(invalidMask).orR()
// req.ready := notFull || canMerge
// val mergeIdx = PriorityEncoder(mergeMask)
// val insertIdx = PriorityEncoder(invalidMask)
// when(canMerge){
// mem_new(mergeIdx) := mergeWordReq(req.bits, mem_old(mergeIdx))
// lruAccessWays(reqIdx).valid := true.B
// lruAccessWays(reqIdx).bits := mergeIdx
// XSDebug(p"merge req $reqIdx to line [$mergeIdx]\n")
// }.elsewhen(notFull && req.valid){
// state_new(insertIdx) := s_valid
// mem_new(insertIdx) := wordReqToBufLine(req.bits)
// lruAccessWays(reqIdx).valid := true.B
// lruAccessWays(reqIdx).bits := insertIdx
// XSDebug(p"insert req $reqIdx to line[$insertIdx]\n")
// }
// state_new.zip(mem_new)
// }
// sbuffer entry count
val invalidCount = RegInit(StoreBufferSize.U((log2Up(StoreBufferSize) + 1).W))
val validCount = RegInit(0.U((log2Up(StoreBufferSize) + 1).W))
val full = invalidCount === 0.U
val oneSpace = invalidCount === 1.U
val bufferRead = VecInit((0 until StoreBufferSize).map(i => buffer(i)))
val stateRead = VecInit((0 until StoreBufferSize).map(i => stateVec(i)))
val dataRead = VecInit((0 until StoreBufferSize).map(i => bufferRead(i).data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))))
val maskRead = VecInit((0 until StoreBufferSize).map(i => bufferRead(i).mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))))
val tagRead = VecInit((0 until StoreBufferSize).map(i => bufferRead(i).tag))
val dataUpdate = WireInit(dataRead)
val maskUpdate = WireInit(maskRead)
val tagUpdate = WireInit(tagRead)
val stateUpdate = WireInit(stateRead)
val bufferUpdate = Wire(Vec(StoreBufferSize, new SbufferLine))
(0 until StoreBufferSize) foreach { i =>
bufferUpdate(i).tag := tagUpdate(i)
bufferUpdate(i).data := dataUpdate(i).asUInt()
bufferUpdate(i).mask := maskUpdate(i).asUInt()
}
val tags = io.in.map(in => getTag(in.bits.addr))
val sameTag = tags(0) === tags(1)
val firstWord = getWordOffset(io.in(0).bits.addr)
val secondWord = getWordOffset(io.in(1).bits.addr)
val sameWord = firstWord === secondWord
val mergeMask = Wire(Vec(StorePipelineWidth, Vec(StoreBufferSize, Bool())))
val mergeIdx = mergeMask.map(PriorityEncoder(_))
val canMerge = mergeMask.map(ParallelOR(_))
//val canMerge = io.in.zip(mergeMask).map(i => i._1.valid && ParallelOR(i._2))
for(i <- 0 until StorePipelineWidth){
def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req))
mergeMask(i) := widthMap(j =>
Mux(tags(i) === bufferRead(j).tag && stateCanMerge(stateVec(j)), true.B ,false.B))
}
val invalidMask = stateRead.map(s => s === s_invalid)
val firstInsertMask = PriorityEncoderOH(invalidMask)
val secondInsertMask = Wire(Vec(StoreBufferSize, Bool()))
for (i <- 0 until StoreBufferSize){
secondInsertMask(i) := Mux(canMerge(0) || sameTag, firstInsertMask(i), invalidMask(i) - firstInsertMask(i))
}
val (firstInsertIdx, firstCanInsert) = PriorityEncoderWithFlag(invalidMask)
val (secondInsertIdx, secondCanInsert) = PriorityEncoderWithFlag(secondInsertMask)
io.in(0).ready := firstCanInsert || canMerge(0)
io.in(1).ready := (secondCanInsert || canMerge(1)) && !sameWord && io.in(0).ready
def wordReqToBufLine(req: DCacheWordReq, tag: UInt, insertIdx: UInt, wordOffset: UInt, flushMask: Bool): Unit = {
stateUpdate(insertIdx) := s_valid
tagUpdate(insertIdx) := tag
when(flushMask){
for(j <- 0 until CacheLineWords){
for(i <- 0 until DataBytes){
maskUpdate(insertIdx)(j)(i) := false.B
}
}
}
for(i <- 0 until DataBytes){
when(req.mask(i)){
mask(wordIdx)(i) := true.B
dataVec(wordIdx)(i) := req.data(i*8+7, i*8)
maskUpdate(insertIdx)(wordOffset)(i) := true.B
dataUpdate(insertIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
}
}
bufLine.tag := getTag(req.addr)
bufLine.mask := mask.asUInt()
bufLine.data := dataVec.asUInt()
bufLine
}
def mergeWordReq(req: DCacheWordReq, oldLine: SbufferLine): SbufferLine = {
val newLine = WireInit(oldLine)
val wordIdx = getWordOffset(req.addr)
val mask = oldLine.mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))
val data = oldLine.data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))
def mergeWordReq(req: DCacheWordReq, mergeIdx:UInt, wordOffset:UInt): Unit = {
for(i <- 0 until DataBytes){
when(req.mask(i)){
mask(wordIdx)(i) := true.B
data(wordIdx)(i) := req.data(i*8+7, i*8)
maskUpdate(mergeIdx)(wordOffset)(i) := true.B
dataUpdate(mergeIdx)(wordOffset)(i) := req.data(i*8+7, i*8)
}
}
newLine.mask := mask.asUInt()
newLine.data := data.asUInt()
newLine
}
type ReqWithIdx = (DecoupledIO[DCacheWordReq], Int)
def enqSbuffer(buf: Seq[SbufferEntry], reqWithIdx: ReqWithIdx): Seq[SbufferEntry] = {
val req = reqWithIdx._1
val reqIdx = reqWithIdx._2
val state_old = VecInit(buf.map(_._1))
val mem_old = VecInit(buf.map(_._2))
val state_new = WireInit(state_old)
val mem_new = WireInit(mem_old)
def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req))
val mergeMask = widthMap(i =>
req.valid && stateCanMerge(state_old(i)) && getTag(req.bits.addr)===mem_old(i).tag
)
val canMerge = Cat(mergeMask).orR()
val invalidMask = state_old.map(s => s===s_invalid)
val notFull = Cat(invalidMask).orR()
req.ready := notFull || canMerge
val mergeIdx = PriorityEncoder(mergeMask)
val insertIdx = PriorityEncoder(invalidMask)
when(canMerge){
mem_new(mergeIdx) := mergeWordReq(req.bits, mem_old(mergeIdx))
lruAccessWays(reqIdx).valid := true.B
lruAccessWays(reqIdx).bits := mergeIdx
XSDebug(p"merge req $reqIdx to line [$mergeIdx]\n")
}.elsewhen(notFull && req.valid){
state_new(insertIdx) := s_valid
mem_new(insertIdx) := wordReqToBufLine(req.bits)
lruAccessWays(reqIdx).valid := true.B
lruAccessWays(reqIdx).bits := insertIdx
XSDebug(p"insert req $reqIdx to line[$insertIdx]\n")
// first store
when(io.in(0).fire()){
when(canMerge(0)){
mergeWordReq(io.in(0).bits, mergeIdx(0), firstWord)
lruAccessWays(0).valid := true.B
lruAccessWays(0).bits := mergeIdx(0)
XSDebug(p"merge req 0 to line [${mergeIdx(0)}]\n")
}.elsewhen(firstCanInsert){
wordReqToBufLine(io.in(0).bits, tags(0), firstInsertIdx, firstWord, true.B)
lruAccessWays(0).valid := true.B
lruAccessWays(0).bits := firstInsertIdx
XSDebug(p"insert req 0 to line[$firstInsertIdx]\n")
}
state_new.zip(mem_new)
}
val bufferRead = VecInit((0 until StoreBufferSize) map (i => buffer(i)))
val initialSbuffer = stateVec.zip(bufferRead)
val updatedSbuffer = io.in.zipWithIndex.foldLeft[Seq[SbufferEntry]](initialSbuffer)(enqSbuffer)
val updatedState = updatedSbuffer.map(_._1)
val updatedSbufferLine = VecInit(updatedSbuffer.map(_._2))
when (!io.in(0).ready) {
io.in(1).ready := false.B
// second store
when(io.in(1).fire()){
when(canMerge(1)){
mergeWordReq(io.in(1).bits, mergeIdx(1), secondWord)
lruAccessWays(1).valid := true.B
lruAccessWays(1).bits := mergeIdx(1)
XSDebug(p"merge req 1 to line [${mergeIdx(1)}]\n")
}.elsewhen(secondCanInsert){
wordReqToBufLine(io.in(1).bits, tags(1), secondInsertIdx, secondWord, !sameTag)
lruAccessWays(1).valid := true.B
lruAccessWays(1).bits := secondInsertIdx
XSDebug(p"insert req 1 to line[$secondInsertIdx]\n")
}
}
for(i <- 0 until StoreBufferSize){
buffer.write(i.U, updatedSbufferLine(i))
stateVec(i) := updatedState(i)
buffer.write(i.U, bufferUpdate(i))
stateVec(i) := stateUpdate(i)
}
for(i <- 0 until StoreBufferSize){
......@@ -224,9 +294,10 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val replaceIdx = lru.way(stateVec.map(s => s===s_valid))
val firstValidEntry = PriorityEncoder(stateVec.map(s => s===s_valid))
val evictor = Module(new NaiveEvictor(StoreBufferSize-4))
evictor.io.states := stateVec
do_eviction := evictor.io.do_eviction
//val evictor = Module(new NaiveEvictor(StoreBufferSize-4))
//evictor.io.states := stateVec
do_eviction := validCount >= 12.U
io.flush.empty := empty
switch(sbuffer_state){
......@@ -256,14 +327,14 @@ class NewSbuffer extends XSModule with HasSbufferCst {
//val evictionIdxWire = replaceIdx
val evictionIdxWire = Mux(stateVec(replaceIdx)===s_valid, replaceIdx, firstValidEntry)
val evictionIdxEnqReq = Wire(DecoupledIO(UInt(SbufferIndexWidth.W)))
val evictionIdxQueue = Module(new Queue(UInt(SbufferIndexWidth.W), StoreBufferSize, pipe = true, flow = false))
val evictionIdxQueue = Module(new Queue(UInt(SbufferIndexWidth.W), StoreBufferSize, pipe = false, flow = false))
def noSameBlockInflight(idx: UInt): Bool = {
val tag = updatedSbufferLine(idx).tag
val tag = bufferRead(idx).tag
!Cat(widthMap(i => {
// stateVec(idx) itself must not be s_inflight*
isOneOf(stateVec(i), Seq(s_inflight_req, s_inflight_resp)) &&
tag===updatedSbufferLine(i).tag
tag===bufferRead(i).tag
})).orR()
}
......@@ -273,28 +344,17 @@ class NewSbuffer extends XSModule with HasSbufferCst {
*/
evictionIdxEnqReq.valid :=
isOneOf(sbuffer_state, Seq(x_drain_sbuffer, x_replace)) &&
stateVec(evictionIdxWire)===s_valid &&
noSameBlockInflight(evictionIdxWire)
evictionIdxEnqReq.bits := evictionIdxWire
evictionIdxQueue.io.enq <> evictionIdxEnqReq
when(evictionIdxEnqReq.fire()){
stateVec(evictionIdxWire) := s_inflight_req
lruAccessWays.last.valid := true.B
lruAccessWays.last.bits := evictionIdxWire
}
// update lru
lru.access(lruAccessWays)
when(sbuffer_state === x_drain_sbuffer && empty){
lru.flush()
XSDebug("drain sbuffer finish, flush lru\n")
}
val wbIdx = evictionIdxQueue.io.deq.bits
val wbLine = updatedSbufferLine(wbIdx)
val wbLine = bufferRead(wbIdx)
io.dcache.req.valid := evictionIdxQueue.io.deq.valid
io.dcache.req.bits.addr := getAddr(wbLine.tag)
io.dcache.req.bits.data := wbLine.data
......@@ -317,6 +377,13 @@ class NewSbuffer extends XSModule with HasSbufferCst {
XSDebug(p"recv cache resp: id=[$respId]\n")
}
val needSpace = (io.in(0).fire && !canMerge(0)) +& (io.in(1).fire && !canMerge(1) && !sameTag)
invalidCount := invalidCount - needSpace + io.dcache.resp.fire
validCount := validCount + needSpace - evictionIdxEnqReq.fire()
XSDebug(p"needSpace[$needSpace] invalidCount[$invalidCount] validCount[$validCount]\n")
// ---------------------- Load Data Forward ---------------------
for ((forward, i) <- io.forward.zipWithIndex) {
......@@ -354,10 +421,27 @@ class NewSbuffer extends XSModule with HasSbufferCst {
}
}
// forward lru
val lru_tag_matches = widthMap(i =>
tag_matches(i) && (stateVec(i)===s_valid || stateVec(i)===s_inflight_req)
)
val forward_lru = PriorityEncoderWithFlag(lru_tag_matches)
when(forward_lru._2){
lruAccessWays(io.in.getWidth + i).valid:= true.B
lruAccessWays(io.in.getWidth + i).bits := forward_lru._1
}
XSDebug(Cat(inflight_tag_matches).orR || Cat(valid_tag_matches).orR,
p"[$i] forward paddr:${Hexadecimal(forward.paddr)}\n"
)
}
// update lru
lru.access(lruAccessWays)
when(sbuffer_state === x_drain_sbuffer && empty){
lru.flush()
XSDebug("drain sbuffer finish, flush lru\n")
}
}
object NewSbuffer extends App {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册