未验证 提交 6cdd85d9 编写于 作者: Y Yinan Xu 提交者: GitHub

backend,fu: add InputBuffer for fdivSqrt (#990)

This commit adds an 8-entry buffer for fdivSqrt function unit input.
Set hasInputBuffer to true to enable input buffers for other function
units.
上级 a0301c0d
......@@ -97,16 +97,6 @@ case class ExuConfig
abstract class Exu(val config: ExuConfig)(implicit p: Parameters) extends XSModule {
val functionUnits = config.fuConfigs.map(cfg => {
val mod = Module(cfg.fuGen(p))
mod.suggestName(cfg.name)
mod
})
val fuSel = functionUnits.zip(config.fuConfigs.map(_.fuSel)).map {
case (fu, sel) => sel(fu)
}
val io = IO(new Bundle() {
val fromInt = if (config.readIntRf) Flipped(DecoupledIO(new ExuInput)) else null
val fromFp = if (config.readFpRf) Flipped(DecoupledIO(new ExuInput)) else null
......@@ -114,44 +104,52 @@ abstract class Exu(val config: ExuConfig)(implicit p: Parameters) extends XSModu
val flush = Input(Bool())
val out = DecoupledIO(new ExuOutput)
})
val csrio = if (config == JumpCSRExeUnitCfg) Some(IO(new CSRFileIO)) else None
val fenceio = if (config == JumpCSRExeUnitCfg) Some(IO(new FenceIO)) else None
val frm = if (config == FmacExeUnitCfg || config == FmiscExeUnitCfg) Some(IO(Input(UInt(3.W)))) else None
val stData = if (config == StdExeUnitCfg) Some(IO(ValidIO(new StoreDataBundle))) else None
for ((fuCfg, (fu, sel)) <- config.fuConfigs.zip(functionUnits.zip(fuSel))) {
val functionUnits = config.fuConfigs.map(cfg => {
val mod = Module(cfg.fuGen(p))
mod.suggestName(cfg.name)
mod
})
val in = if (fuCfg.numIntSrc > 0) {
val fuIn = config.fuConfigs.map(fuCfg =>
if (fuCfg.numIntSrc > 0) {
assert(fuCfg.numFpSrc == 0 || config == StdExeUnitCfg)
io.fromInt
} else {
assert(fuCfg.numFpSrc > 0)
io.fromFp
}
)
val fuSel = fuIn.zip(config.fuConfigs).map { case (in, cfg) => cfg.fuSel(in.bits.uop) }
val src1 = in.bits.src(0)
val src2 = in.bits.src(1)
val src3 = in.bits.src(2)
val fuInReady = config.fuConfigs.zip(fuIn).zip(functionUnits.zip(fuSel)).map { case ((fuCfg, in), (fu, sel)) =>
fu.io.redirectIn := io.redirect
fu.io.flushIn := io.flush
fu.io.in.valid := in.valid && sel
fu.io.in.bits.uop := in.bits.uop
fu.io.in.bits.src.foreach(_ <> DontCare)
if (fuCfg.srcCnt > 0) {
fu.io.in.bits.src(0) := src1
if (fuCfg.hasInputBuffer) {
val buffer = Module(new InputBuffer(8))
buffer.io.redirect <> io.redirect
buffer.io.flush <> io.flush
buffer.io.in.valid := in.valid && sel
buffer.io.in.bits.uop := in.bits.uop
buffer.io.in.bits.src := in.bits.src
buffer.io.out <> fu.io.in
buffer.io.in.ready
}
if (fuCfg.srcCnt > 1 || fuCfg == jmpCfg) { // jump is special for jalr target
fu.io.in.bits.src(1) := src2
else {
fu.io.in.valid := in.valid && sel
fu.io.in.bits.uop := in.bits.uop
fu.io.in.bits.src := in.bits.src
fu.io.in.ready
}
if (fuCfg.srcCnt > 2) {
fu.io.in.bits.src(2) := src3
}
fu.io.redirectIn := io.redirect
fu.io.flushIn := io.flush
}
val needArbiter = !(config.latency.latencyVal.nonEmpty && (config.latency.latencyVal.get == 0))
def writebackArb(in: Seq[DecoupledIO[FuOutput]], out: DecoupledIO[ExuOutput]): Seq[Bool] = {
if (needArbiter) {
if(in.size == 1){
......@@ -196,23 +194,23 @@ abstract class Exu(val config: ExuConfig)(implicit p: Parameters) extends XSModu
}
val readIntFu = config.fuConfigs
.zip(functionUnits.zip(fuSel))
.zip(fuInReady.zip(fuSel))
.filter(_._1.numIntSrc > 0)
.map(_._2)
val readFpFu = config.fuConfigs
.zip(functionUnits.zip(fuSel))
.zip(fuInReady.zip(fuSel))
.filter(_._1.numFpSrc > 0)
.map(_._2)
def inReady(s: Seq[(FunctionUnit, Bool)]): Bool = {
def inReady(s: Seq[(Bool, Bool)]): Bool = {
if (s.size == 1) {
s.head._1.io.in.ready
s.head._1
} else {
if (needArbiter) {
Cat(s.map(x => x._1.io.in.ready && x._2)).orR()
Cat(s.map(x => x._1 && x._2)).orR()
} else {
Cat(s.map(x => x._1.io.in.ready)).andR()
Cat(s.map(x => x._1)).andR()
}
}
}
......
......@@ -28,15 +28,9 @@ class FmiscExeUnit(implicit p: Parameters) extends ExeUnit(FmiscExeUnitCfg) {
val fus = functionUnits.map(fu => fu.asInstanceOf[FPUSubModule])
val input = io.fromFp
val isRVF = input.bits.uop.ctrl.isRVF
val instr_rm = input.bits.uop.ctrl.fpu.rm
val (src1, src2) = (input.bits.src(0), input.bits.src(1))
functionUnits.foreach { module =>
module.io.in.bits.src(0) := src1
module.io.in.bits.src(1) := src2
module.asInstanceOf[FPUSubModule].rm := Mux(instr_rm =/= 7.U, instr_rm, frm.get)
fus.foreach { module =>
val instr_rm = module.io.in.bits.uop.ctrl.fpu.rm
module.rm := Mux(instr_rm =/= 7.U, instr_rm, frm.get)
}
require(config.hasFastUopOut)
......
......@@ -40,7 +40,7 @@ case class FuConfig
(
name: String,
fuGen: Parameters => FunctionUnit,
fuSel: FunctionUnit => Bool,
fuSel: MicroOp => Bool,
fuType: UInt,
numIntSrc: Int,
numFpSrc: Int,
......@@ -49,7 +49,8 @@ case class FuConfig
hasRedirect: Boolean,
latency: HasFuLatency = CertainLatency(0),
fastUopOut: Boolean = false,
fastImplemented: Boolean = false
fastImplemented: Boolean = false,
hasInputBuffer: Boolean = false
) {
def srcCnt: Int = math.max(numIntSrc, numFpSrc)
}
......@@ -60,12 +61,13 @@ class FuOutput(val len: Int)(implicit p: Parameters) extends XSBundle {
val uop = new MicroOp
}
class FunctionUnitInput(val len: Int)(implicit p: Parameters) extends XSBundle {
val src = Vec(3, UInt(len.W))
val uop = new MicroOp
}
class FunctionUnitIO(val len: Int)(implicit p: Parameters) extends XSBundle {
val in = Flipped(DecoupledIO(new Bundle() {
val src = Vec(3, UInt(len.W))
val uop = new MicroOp
}))
val in = Flipped(DecoupledIO(new FunctionUnitInput(len)))
val out = DecoupledIO(new FuOutput(len))
......
/***************************************************************************************
* * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* * Copyright (c) 2020-2021 Peng Cheng Laboratory
* *
* * XiangShan is licensed under Mulan PSL v2.
* * You can use this software according to the terms and conditions of the Mulan PSL v2.
* * You may obtain a copy of Mulan PSL v2 at:
* * http://license.coscl.org.cn/MulanPSL2
* *
* * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* *
* * See the Mulan PSL v2 for more details.
* ***************************************************************************************/
package xiangshan.backend.fu
import chipsalliance.rocketchip.config.Parameters
import chisel3._
import chisel3.util._
import utils._
import xiangshan._
import xiangshan.backend.issue.AgeDetector
class InputBuffer(numEntries: Int)(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val redirect = Flipped(ValidIO(new Redirect))
val flush = Input(Bool())
val in = Flipped(DecoupledIO(new FunctionUnitInput(XLEN)))
val out = DecoupledIO(new FunctionUnitInput(XLEN))
})
val data = Reg(Vec(numEntries, new FunctionUnitInput(XLEN)))
val emptyVec = RegInit(VecInit(Seq.fill(numEntries)(true.B)))
val selectEnq = SelectOne("naive", emptyVec, 1).getNthOH(1)
io.in.ready := emptyVec.asUInt.orR
val enqVec = selectEnq._2
// enqueue
val doEnqueue = io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirect, io.flush)
when (doEnqueue) {
for (i <- 0 until numEntries) {
when (enqVec(i)) {
data(i) := io.in.bits
emptyVec(i) := false.B
}
}
}
// dequeue
val age = Module(new AgeDetector(numEntries, 1))
age.io.enq(0) := Mux(doEnqueue, enqVec.asUInt, 0.U)
io.out.valid := !emptyVec.asUInt.andR
io.out.bits := Mux1H(age.io.out, data)
when (io.out.fire) {
for (i <- 0 until numEntries) {
when (age.io.out(i)) {
emptyVec(i) := true.B
XSError(emptyVec(i), "should not deq an empty entry\n")
}
}
}
// flush
val flushVec = data.map(_.uop.roqIdx).zip(emptyVec).map{ case (r, e) => !e && r.needFlush(io.redirect, io.flush) }
for (i <- 0 until numEntries) {
when (flushVec(i)) {
emptyVec(i) := true.B
}
}
val flushDeq = VecInit(flushVec).asUInt
age.io.deq := Mux(io.out.fire, age.io.out, 0.U) | flushDeq
val numValid = PopCount(emptyVec.map(e => !e))
XSPerfHistogram("num_valid", numValid, true.B, 0, numEntries, 1)
XSPerfAccumulate("aver_num_valid", numValid)
}
......@@ -422,28 +422,28 @@ package object xiangshan {
def fdivSqrtGen(p: Parameters) = new FDivSqrt()(p)
def stdGen(p: Parameters) = new Std()(p)
def f2iSel(x: FunctionUnit): Bool = {
x.io.in.bits.uop.ctrl.rfWen
def f2iSel(uop: MicroOp): Bool = {
uop.ctrl.rfWen
}
def i2fSel(x: FunctionUnit): Bool = {
x.io.in.bits.uop.ctrl.fpu.fromInt
def i2fSel(uop: MicroOp): Bool = {
uop.ctrl.fpu.fromInt
}
def f2fSel(x: FunctionUnit): Bool = {
val ctrl = x.io.in.bits.uop.ctrl.fpu
def f2fSel(uop: MicroOp): Bool = {
val ctrl = uop.ctrl.fpu
ctrl.fpWen && !ctrl.div && !ctrl.sqrt
}
def fdivSqrtSel(x: FunctionUnit): Bool = {
val ctrl = x.io.in.bits.uop.ctrl.fpu
def fdivSqrtSel(uop: MicroOp): Bool = {
val ctrl = uop.ctrl.fpu
ctrl.div || ctrl.sqrt
}
val aluCfg = FuConfig(
name = "alu",
fuGen = aluGen,
fuSel = (x: FunctionUnit) => x.io.in.bits.uop.ctrl.fuType === FuType.alu,
fuSel = (uop: MicroOp) => uop.ctrl.fuType === FuType.alu,
fuType = FuType.alu,
numIntSrc = 2,
numFpSrc = 0,
......@@ -455,7 +455,7 @@ package object xiangshan {
val jmpCfg = FuConfig(
name = "jmp",
fuGen = jmpGen,
fuSel = (x: FunctionUnit) => x.io.in.bits.uop.ctrl.fuType === FuType.jmp,
fuSel = (uop: MicroOp) => uop.ctrl.fuType === FuType.jmp,
fuType = FuType.jmp,
numIntSrc = 1,
numFpSrc = 0,
......@@ -467,7 +467,7 @@ package object xiangshan {
val fenceCfg = FuConfig(
name = "fence",
fuGen = fenceGen,
fuSel = (x: FunctionUnit) => x.io.in.bits.uop.ctrl.fuType === FuType.fence,
fuSel = (uop: MicroOp) => uop.ctrl.fuType === FuType.fence,
FuType.fence, 1, 0, writeIntRf = false, writeFpRf = false, hasRedirect = false,
UncertainLatency() // TODO: need rewrite latency structure, not just this value
)
......@@ -475,7 +475,7 @@ package object xiangshan {
val csrCfg = FuConfig(
name = "csr",
fuGen = csrGen,
fuSel = (x: FunctionUnit) => x.io.in.bits.uop.ctrl.fuType === FuType.csr,
fuSel = (uop: MicroOp) => uop.ctrl.fuType === FuType.csr,
fuType = FuType.csr,
numIntSrc = 1,
numFpSrc = 0,
......@@ -501,7 +501,7 @@ package object xiangshan {
val divCfg = FuConfig(
name = "div",
fuGen = dividerGen,
fuSel = (x: FunctionUnit) => MDUOpType.isDiv(x.io.in.bits.uop.ctrl.fuOpType),
fuSel = (uop: MicroOp) => MDUOpType.isDiv(uop.ctrl.fuOpType),
FuType.div,
2,
0,
......@@ -516,7 +516,7 @@ package object xiangshan {
val mulCfg = FuConfig(
name = "mul",
fuGen = multiplierGen,
fuSel = (x: FunctionUnit) => MDUOpType.isMul(x.io.in.bits.uop.ctrl.fuOpType),
fuSel = (uop: MicroOp) => MDUOpType.isMul(uop.ctrl.fuOpType),
FuType.mul,
2,
0,
......@@ -531,7 +531,7 @@ package object xiangshan {
val bmuCfg = FuConfig(
name = "bmu",
fuGen = bmuGen,
fuSel = (x: FunctionUnit) => x.io.in.bits.uop.ctrl.fuType === FuType.bmu,
fuSel = (uop: MicroOp) => uop.ctrl.fuType === FuType.bmu,
fuType = FuType.bmu,
numIntSrc = 2,
numFpSrc = 0,
......@@ -572,7 +572,7 @@ package object xiangshan {
fuGen = fdivSqrtGen,
fuSel = fdivSqrtSel,
FuType.fDivSqrt, 0, 2, writeIntRf = false, writeFpRf = true, hasRedirect = false, UncertainLatency(),
fastUopOut = true, fastImplemented = false
fastUopOut = true, fastImplemented = false, hasInputBuffer = true
)
val lduCfg = FuConfig(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册