未验证 提交 6008d57d 编写于 作者: H happy-lx 提交者: GitHub

dcache: optimize the ready signal of missqueue (#1965)

Add a custom arbiter. In the case of multiple sources with the same
cache block address, the arbiter will assign only one entry in
misssqueue but ready for all same cache block address requests.

This will reduce the number of replays of the load instruction which cannot
enter the missqueue
上级 ece4c195
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package utils
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import chisel3.internal.naming.chiselName
import xiangshan.cache._
object ArbiterCtrl {
def apply(request: Seq[Bool]): Seq[Bool] = request.length match {
case 0 => Seq()
case 1 => Seq(true.B)
case _ => true.B +: request.tail.init.scanLeft(request.head)(_ || _).map(!_)
}
}
/** Hardware module that is used to sequence n producers into 1 consumer.
* Priority is given to lower producer.
* if any producer's cache block addr matches the one of chosen producer, the producer will be served
*
* @param gen data type, must have addr which indicates physical address
* @param n number of inputs
* @param offset_width cache line offset width
* @param paddr_bits how many bits in paddr
*
* @example {{{
* val arb = Module(new Arbiter(UInt(), 2))
* arb.io.in(0) <> producer0.io.out
* arb.io.in(1) <> producer1.io.out
* consumer.io.in <> arb.io.out
* }}}
*/
@chiselName
class ArbiterFilterByCacheLineAddr[T <: MissReqWoStoreData](val gen: T, val n: Int, val offset_width: Int, val paddr_bits: Int) extends Module{
val io = IO(new ArbiterIO(gen, n))
io.chosen := (n - 1).asUInt
io.out.bits := io.in(n - 1).bits
for (i <- n - 2 to 0 by -1) {
when(io.in(i).valid) {
io.chosen := i.asUInt
io.out.bits := io.in(i).bits
}
}
val grant = ArbiterCtrl(io.in.map(_.valid))
for ((in, g) <- io.in.zip(grant))
in.ready := (g || (in.bits.addr(paddr_bits - 1, offset_width) === io.out.bits.addr(paddr_bits - 1, offset_width))) && io.out.ready
io.out.valid := !grant.last || io.in.last.valid
}
......@@ -861,7 +861,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val MainPipeMissReqPort = 0
// Request
val missReqArb = Module(new Arbiter(new MissReq, MissReqPortCount))
val missReqArb = Module(new ArbiterFilterByCacheLineAddr(new MissReq, MissReqPortCount, blockOffBits, PAddrBits))
missReqArb.io.in(MainPipeMissReqPort) <> mainPipe.io.miss_req
for (w <- 0 until LoadPipelineWidth) { missReqArb.io.in(w + 1) <> ldu(w).io.miss_req }
......@@ -879,6 +879,9 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
missReqArb.io.out.ready := false.B
}
XSPerfAccumulate("miss_queue_fire", PopCount(VecInit(missReqArb.io.in.map(_.fire))) >= 1.U)
XSPerfAccumulate("miss_queue_muti_fire", PopCount(VecInit(missReqArb.io.in.map(_.fire))) > 1.U)
// forward missqueue
(0 until LoadPipelineWidth).map(i => io.lsu.forward_mshr(i).connect(missQueue.io.forward(i)))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册