From 2312328e9b0647ff8029e0aa13f7719283d6ede2 Mon Sep 17 00:00:00 2001 From: Zhangfw <471348957@qq.com> Date: Mon, 28 Dec 2020 16:35:14 +0800 Subject: [PATCH] Sbuffer: opt store logic timing --- .../xiangshan/mem/sbuffer/NewSbuffer.scala | 308 +++++++++++------- 1 file changed, 196 insertions(+), 112 deletions(-) diff --git a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala index 82c1b54f6..e11a3f7f7 100644 --- a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala +++ b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala @@ -22,6 +22,7 @@ trait HasSbufferCst extends HasXSParameter { val CacheLineBytes: Int = CacheLineSize / 8 val CacheLineWords: Int = CacheLineBytes / DataBytes val OffsetWidth: Int = log2Up(CacheLineBytes) + val WordsWidth: Int = log2Up(CacheLineWords) val TagWidth: Int = PAddrBits - OffsetWidth } @@ -37,29 +38,29 @@ class SbufferLine extends SbufferBundle { } } -class AbstractEvictor extends XSModule with HasSbufferCst{ - val io = IO(new Bundle{ - val states = Input(Vec(StoreBufferSize, UInt(s_invalid.getWidth.W))) - val do_eviction = Output(Bool()) - }) -} - - -class NaiveEvictor(threshold: Int) extends AbstractEvictor{ - - require(threshold >= 0 && threshold <= StoreBufferSize) - - val entryCnt = PopCount(io.states.map(s => s=/=s_invalid)) - - io.do_eviction := entryCnt >= threshold.U((SbufferIndexWidth+1).W) - - XSDebug("sbuffer entry cnt: %d\n", entryCnt) - -} +//class AbstractEvictor extends XSModule with HasSbufferCst{ +// val io = IO(new Bundle{ +// val states = Input(Vec(StoreBufferSize, UInt(s_invalid.getWidth.W))) +// val do_eviction = Output(Bool()) +// }) +//} +// +// +//class NaiveEvictor(threshold: Int) extends AbstractEvictor{ +// +// require(threshold >= 0 && threshold <= StoreBufferSize) +// +// val entryCnt = PopCount(io.states.map(s => s=/=s_invalid)) +// +// io.do_eviction := entryCnt >= threshold.U((SbufferIndexWidth+1).W) +// +// XSDebug("sbuffer entry cnt: %d\n", entryCnt) +// +//} class NewSbuffer extends XSModule with HasSbufferCst { val io = IO(new Bundle() { - val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq))) + val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq))) //Todo: store logic only support Width == 2 now val dcache = new DCacheLineIO val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) val flush = new Bundle { @@ -73,7 +74,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { //val lru = new SbufferLRU(StoreBufferSize) val lru = new SbufferLRU(StoreBufferSize) // 2 * enq + 1 * deq - val lruAccessWays = Wire(Vec(io.in.getWidth+1, new Valid(UInt(SbufferIndexWidth.W)))) + val lruAccessWays = Wire(Vec(io.in.getWidth+io.forward.getWidth, new Valid(UInt(SbufferIndexWidth.W)))) for(w <- lruAccessWays){ w.bits := DontCare w.valid := false.B @@ -87,115 +88,184 @@ class NewSbuffer extends XSModule with HasSbufferCst { val sbuffer_state = RegInit(x_idle) // ---------------------- Store Enq Sbuffer --------------------- - // (state, lineBuf) - type SbufferEntry = (UInt, SbufferLine) def getTag(pa: UInt): UInt = pa(PAddrBits - 1, PAddrBits - TagWidth) + def getWordOffset(pa: UInt): UInt = + pa(OffsetWidth-1, 3) + def getAddr(tag: UInt): UInt = Cat(tag, 0.U((PAddrBits - TagWidth).W)) - def getByteOffset(pa: UInt): UInt = - Cat(pa(OffsetWidth - 1, 3), 0.U(3.W)) - - def getWordOffset(pa: UInt): UInt = pa(OffsetWidth-1, 3) + def getByteOffset(offect: UInt): UInt = + Cat(offect(OffsetWidth - 1, 3), 0.U(3.W)) def isOneOf(key: UInt, seq: Seq[UInt]): Bool = if(seq.isEmpty) false.B else Cat(seq.map(_===key)).orR() def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f - + // TODO:useless def maskData(mask: UInt, data: UInt): UInt = { assert(mask.getWidth * 8 == data.getWidth) Cat((0 until mask.getWidth).map(i => data(i*8+7, i*8) & Fill(8, mask(i))).reverse) } - def wordReqToBufLine(req: DCacheWordReq): SbufferLine = { - val bufLine = Wire(new SbufferLine) - val wordIdx = getWordOffset(req.addr) - val dataVec = Wire(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))) - val mask = VecInit(Seq.fill(CacheLineWords){ - VecInit(Seq.fill(DataBytes)(false.B)) - }) - dataVec := DontCare + + + // type ReqWithIdx = (DecoupledIO[DCacheWordReq], Int) + // + // def enqSbuffer(buf: Seq[SbufferEntry], reqWithIdx: ReqWithIdx): Seq[SbufferEntry] = { + // val req = reqWithIdx._1 + // val reqIdx = reqWithIdx._2 + // val state_old = VecInit(buf.map(_._1)) + // val mem_old = VecInit(buf.map(_._2)) + // val state_new = WireInit(state_old) + // val mem_new = WireInit(mem_old) + // + // def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req)) + // + // val mergeMask = widthMap(i => + // req.valid && stateCanMerge(state_old(i)) && getTag(req.bits.addr)===mem_old(i).tag + // ) + // val canMerge = Cat(mergeMask).orR() + // val invalidMask = state_old.map(s => s===s_invalid) + // val notFull = Cat(invalidMask).orR() + // req.ready := notFull || canMerge + // val mergeIdx = PriorityEncoder(mergeMask) + // val insertIdx = PriorityEncoder(invalidMask) + // when(canMerge){ + // mem_new(mergeIdx) := mergeWordReq(req.bits, mem_old(mergeIdx)) + // lruAccessWays(reqIdx).valid := true.B + // lruAccessWays(reqIdx).bits := mergeIdx + // XSDebug(p"merge req $reqIdx to line [$mergeIdx]\n") + // }.elsewhen(notFull && req.valid){ + // state_new(insertIdx) := s_valid + // mem_new(insertIdx) := wordReqToBufLine(req.bits) + // lruAccessWays(reqIdx).valid := true.B + // lruAccessWays(reqIdx).bits := insertIdx + // XSDebug(p"insert req $reqIdx to line[$insertIdx]\n") + // } + // state_new.zip(mem_new) + // } + + // sbuffer entry count + val invalidCount = RegInit(StoreBufferSize.U((log2Up(StoreBufferSize) + 1).W)) + val validCount = RegInit(0.U((log2Up(StoreBufferSize) + 1).W)) + val full = invalidCount === 0.U + val oneSpace = invalidCount === 1.U + + val bufferRead = VecInit((0 until StoreBufferSize).map(i => buffer(i))) + val stateRead = VecInit((0 until StoreBufferSize).map(i => stateVec(i))) + val dataRead = VecInit((0 until StoreBufferSize).map(i => bufferRead(i).data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))))) + val maskRead = VecInit((0 until StoreBufferSize).map(i => bufferRead(i).mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool()))))) + val tagRead = VecInit((0 until StoreBufferSize).map(i => bufferRead(i).tag)) + + val dataUpdate = WireInit(dataRead) + val maskUpdate = WireInit(maskRead) + val tagUpdate = WireInit(tagRead) + val stateUpdate = WireInit(stateRead) + val bufferUpdate = Wire(Vec(StoreBufferSize, new SbufferLine)) + (0 until StoreBufferSize) foreach { i => + bufferUpdate(i).tag := tagUpdate(i) + bufferUpdate(i).data := dataUpdate(i).asUInt() + bufferUpdate(i).mask := maskUpdate(i).asUInt() + } + + val tags = io.in.map(in => getTag(in.bits.addr)) + val sameTag = tags(0) === tags(1) + val firstWord = getWordOffset(io.in(0).bits.addr) + val secondWord = getWordOffset(io.in(1).bits.addr) + val sameWord = firstWord === secondWord + + val mergeMask = Wire(Vec(StorePipelineWidth, Vec(StoreBufferSize, Bool()))) + val mergeIdx = mergeMask.map(PriorityEncoder(_)) + val canMerge = mergeMask.map(ParallelOR(_)) + //val canMerge = io.in.zip(mergeMask).map(i => i._1.valid && ParallelOR(i._2)) + for(i <- 0 until StorePipelineWidth){ + def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req)) + mergeMask(i) := widthMap(j => + Mux(tags(i) === bufferRead(j).tag && stateCanMerge(stateVec(j)), true.B ,false.B)) + } + + val invalidMask = stateRead.map(s => s === s_invalid) + val firstInsertMask = PriorityEncoderOH(invalidMask) + val secondInsertMask = Wire(Vec(StoreBufferSize, Bool())) + for (i <- 0 until StoreBufferSize){ + secondInsertMask(i) := Mux(canMerge(0) || sameTag, firstInsertMask(i), invalidMask(i) - firstInsertMask(i)) + } + + val (firstInsertIdx, firstCanInsert) = PriorityEncoderWithFlag(invalidMask) + val (secondInsertIdx, secondCanInsert) = PriorityEncoderWithFlag(secondInsertMask) + + + io.in(0).ready := firstCanInsert || canMerge(0) + io.in(1).ready := (secondCanInsert || canMerge(1)) && !sameWord && io.in(0).ready + + def wordReqToBufLine(req: DCacheWordReq, tag: UInt, insertIdx: UInt, wordOffset: UInt, flushMask: Bool): Unit = { + stateUpdate(insertIdx) := s_valid + tagUpdate(insertIdx) := tag + + when(flushMask){ + for(j <- 0 until CacheLineWords){ + for(i <- 0 until DataBytes){ + maskUpdate(insertIdx)(j)(i) := false.B + } + } + } + for(i <- 0 until DataBytes){ when(req.mask(i)){ - mask(wordIdx)(i) := true.B - dataVec(wordIdx)(i) := req.data(i*8+7, i*8) + maskUpdate(insertIdx)(wordOffset)(i) := true.B + dataUpdate(insertIdx)(wordOffset)(i) := req.data(i*8+7, i*8) } } - bufLine.tag := getTag(req.addr) - bufLine.mask := mask.asUInt() - bufLine.data := dataVec.asUInt() - bufLine } - def mergeWordReq(req: DCacheWordReq, oldLine: SbufferLine): SbufferLine = { - val newLine = WireInit(oldLine) - val wordIdx = getWordOffset(req.addr) - val mask = oldLine.mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool()))) - val data = oldLine.data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W)))) + def mergeWordReq(req: DCacheWordReq, mergeIdx:UInt, wordOffset:UInt): Unit = { for(i <- 0 until DataBytes){ when(req.mask(i)){ - mask(wordIdx)(i) := true.B - data(wordIdx)(i) := req.data(i*8+7, i*8) + maskUpdate(mergeIdx)(wordOffset)(i) := true.B + dataUpdate(mergeIdx)(wordOffset)(i) := req.data(i*8+7, i*8) } } - newLine.mask := mask.asUInt() - newLine.data := data.asUInt() - newLine } - type ReqWithIdx = (DecoupledIO[DCacheWordReq], Int) - - def enqSbuffer(buf: Seq[SbufferEntry], reqWithIdx: ReqWithIdx): Seq[SbufferEntry] = { - val req = reqWithIdx._1 - val reqIdx = reqWithIdx._2 - val state_old = VecInit(buf.map(_._1)) - val mem_old = VecInit(buf.map(_._2)) - val state_new = WireInit(state_old) - val mem_new = WireInit(mem_old) - - def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req)) - - val mergeMask = widthMap(i => - req.valid && stateCanMerge(state_old(i)) && getTag(req.bits.addr)===mem_old(i).tag - ) - val canMerge = Cat(mergeMask).orR() - val invalidMask = state_old.map(s => s===s_invalid) - val notFull = Cat(invalidMask).orR() - req.ready := notFull || canMerge - val mergeIdx = PriorityEncoder(mergeMask) - val insertIdx = PriorityEncoder(invalidMask) - when(canMerge){ - mem_new(mergeIdx) := mergeWordReq(req.bits, mem_old(mergeIdx)) - lruAccessWays(reqIdx).valid := true.B - lruAccessWays(reqIdx).bits := mergeIdx - XSDebug(p"merge req $reqIdx to line [$mergeIdx]\n") - }.elsewhen(notFull && req.valid){ - state_new(insertIdx) := s_valid - mem_new(insertIdx) := wordReqToBufLine(req.bits) - lruAccessWays(reqIdx).valid := true.B - lruAccessWays(reqIdx).bits := insertIdx - XSDebug(p"insert req $reqIdx to line[$insertIdx]\n") + // first store + when(io.in(0).fire()){ + when(canMerge(0)){ + mergeWordReq(io.in(0).bits, mergeIdx(0), firstWord) + lruAccessWays(0).valid := true.B + lruAccessWays(0).bits := mergeIdx(0) + XSDebug(p"merge req 0 to line [${mergeIdx(0)}]\n") + }.elsewhen(firstCanInsert){ + wordReqToBufLine(io.in(0).bits, tags(0), firstInsertIdx, firstWord, true.B) + lruAccessWays(0).valid := true.B + lruAccessWays(0).bits := firstInsertIdx + XSDebug(p"insert req 0 to line[$firstInsertIdx]\n") } - state_new.zip(mem_new) } - val bufferRead = VecInit((0 until StoreBufferSize) map (i => buffer(i))) - val initialSbuffer = stateVec.zip(bufferRead) - val updatedSbuffer = io.in.zipWithIndex.foldLeft[Seq[SbufferEntry]](initialSbuffer)(enqSbuffer) - val updatedState = updatedSbuffer.map(_._1) - val updatedSbufferLine = VecInit(updatedSbuffer.map(_._2)) - when (!io.in(0).ready) { - io.in(1).ready := false.B + // second store + when(io.in(1).fire()){ + when(canMerge(1)){ + mergeWordReq(io.in(1).bits, mergeIdx(1), secondWord) + lruAccessWays(1).valid := true.B + lruAccessWays(1).bits := mergeIdx(1) + XSDebug(p"merge req 1 to line [${mergeIdx(1)}]\n") + }.elsewhen(secondCanInsert){ + wordReqToBufLine(io.in(1).bits, tags(1), secondInsertIdx, secondWord, !sameTag) + lruAccessWays(1).valid := true.B + lruAccessWays(1).bits := secondInsertIdx + XSDebug(p"insert req 1 to line[$secondInsertIdx]\n") + } } + for(i <- 0 until StoreBufferSize){ - buffer.write(i.U, updatedSbufferLine(i)) - stateVec(i) := updatedState(i) + buffer.write(i.U, bufferUpdate(i)) + stateVec(i) := stateUpdate(i) } for(i <- 0 until StoreBufferSize){ @@ -224,9 +294,10 @@ class NewSbuffer extends XSModule with HasSbufferCst { val replaceIdx = lru.way(stateVec.map(s => s===s_valid)) val firstValidEntry = PriorityEncoder(stateVec.map(s => s===s_valid)) - val evictor = Module(new NaiveEvictor(StoreBufferSize-4)) - evictor.io.states := stateVec - do_eviction := evictor.io.do_eviction + //val evictor = Module(new NaiveEvictor(StoreBufferSize-4)) + //evictor.io.states := stateVec + + do_eviction := validCount >= 12.U io.flush.empty := empty switch(sbuffer_state){ @@ -256,14 +327,14 @@ class NewSbuffer extends XSModule with HasSbufferCst { //val evictionIdxWire = replaceIdx val evictionIdxWire = Mux(stateVec(replaceIdx)===s_valid, replaceIdx, firstValidEntry) val evictionIdxEnqReq = Wire(DecoupledIO(UInt(SbufferIndexWidth.W))) - val evictionIdxQueue = Module(new Queue(UInt(SbufferIndexWidth.W), StoreBufferSize, pipe = true, flow = false)) + val evictionIdxQueue = Module(new Queue(UInt(SbufferIndexWidth.W), StoreBufferSize, pipe = false, flow = false)) def noSameBlockInflight(idx: UInt): Bool = { - val tag = updatedSbufferLine(idx).tag + val tag = bufferRead(idx).tag !Cat(widthMap(i => { // stateVec(idx) itself must not be s_inflight* isOneOf(stateVec(i), Seq(s_inflight_req, s_inflight_resp)) && - tag===updatedSbufferLine(i).tag + tag===bufferRead(i).tag })).orR() } @@ -273,28 +344,17 @@ class NewSbuffer extends XSModule with HasSbufferCst { */ evictionIdxEnqReq.valid := isOneOf(sbuffer_state, Seq(x_drain_sbuffer, x_replace)) && - stateVec(evictionIdxWire)===s_valid && noSameBlockInflight(evictionIdxWire) evictionIdxEnqReq.bits := evictionIdxWire evictionIdxQueue.io.enq <> evictionIdxEnqReq - when(evictionIdxEnqReq.fire()){ stateVec(evictionIdxWire) := s_inflight_req - lruAccessWays.last.valid := true.B - lruAccessWays.last.bits := evictionIdxWire - } - - // update lru - lru.access(lruAccessWays) - when(sbuffer_state === x_drain_sbuffer && empty){ - lru.flush() - XSDebug("drain sbuffer finish, flush lru\n") } val wbIdx = evictionIdxQueue.io.deq.bits - val wbLine = updatedSbufferLine(wbIdx) + val wbLine = bufferRead(wbIdx) io.dcache.req.valid := evictionIdxQueue.io.deq.valid io.dcache.req.bits.addr := getAddr(wbLine.tag) io.dcache.req.bits.data := wbLine.data @@ -317,6 +377,13 @@ class NewSbuffer extends XSModule with HasSbufferCst { XSDebug(p"recv cache resp: id=[$respId]\n") } + + val needSpace = (io.in(0).fire && !canMerge(0)) +& (io.in(1).fire && !canMerge(1) && !sameTag) + invalidCount := invalidCount - needSpace + io.dcache.resp.fire + validCount := validCount + needSpace - evictionIdxEnqReq.fire() + + XSDebug(p"needSpace[$needSpace] invalidCount[$invalidCount] validCount[$validCount]\n") + // ---------------------- Load Data Forward --------------------- for ((forward, i) <- io.forward.zipWithIndex) { @@ -354,10 +421,27 @@ class NewSbuffer extends XSModule with HasSbufferCst { } } + // forward lru + val lru_tag_matches = widthMap(i => + tag_matches(i) && (stateVec(i)===s_valid || stateVec(i)===s_inflight_req) + ) + val forward_lru = PriorityEncoderWithFlag(lru_tag_matches) + when(forward_lru._2){ + lruAccessWays(io.in.getWidth + i).valid:= true.B + lruAccessWays(io.in.getWidth + i).bits := forward_lru._1 + } + XSDebug(Cat(inflight_tag_matches).orR || Cat(valid_tag_matches).orR, p"[$i] forward paddr:${Hexadecimal(forward.paddr)}\n" ) } + + // update lru + lru.access(lruAccessWays) + when(sbuffer_state === x_drain_sbuffer && empty){ + lru.flush() + XSDebug("drain sbuffer finish, flush lru\n") + } } object NewSbuffer extends App { -- GitLab