提交 e50fb2d7 编写于 作者: L LinJiawei

use berkeley-hardfloat instead xs-fpu

上级 4f70f012
......@@ -194,7 +194,6 @@ class FPUCtrlSignals extends XSBundle {
val div = Bool()
val sqrt = Bool()
val fcvt = Bool()
val fma = Bool()
val typ = UInt(2.W)
val fmt = UInt(2.W)
val ren3 = Bool() //TODO: remove SrcType.fp
......
......@@ -18,7 +18,7 @@ class FPDecoder extends XSModule{
val s = BitPat(S)
val d = BitPat(D)
val default = List(X,X,X,X,X,X,X,X,X)
val default = List(X,X,X,X,N,N,X,X,X)
// isAddSub tagIn tagOut fromInt wflags fpWen div sqrt fcvt
val single: Array[(BitPat, List[BitPat])] = Array(
......@@ -52,7 +52,42 @@ class FPDecoder extends XSModule{
FSQRT_S -> List(N,s,s,N,Y,Y,N,Y,N)
)
val table = single
// isAddSub tagIn tagOut fromInt wflags fpWen div sqrt fcvt
val double: Array[(BitPat, List[BitPat])] = Array(
FMV_D_X -> List(N,d,d,Y,N,Y,N,N,N),
FCVT_D_W -> List(N,d,d,Y,Y,Y,N,N,Y),
FCVT_D_WU-> List(N,d,d,Y,Y,Y,N,N,Y),
FCVT_D_L -> List(N,d,d,Y,Y,Y,N,N,Y),
FCVT_D_LU-> List(N,d,d,Y,Y,Y,N,N,Y),
FMV_X_D -> List(N,d,X,N,N,N,N,N,N),
FCLASS_D -> List(N,d,X,N,N,N,N,N,N),
FCVT_W_D -> List(N,d,X,N,Y,N,N,N,Y),
FCVT_WU_D-> List(N,d,X,N,Y,N,N,N,Y),
FCVT_L_D -> List(N,d,X,N,Y,N,N,N,Y),
FCVT_LU_D-> List(N,d,X,N,Y,N,N,N,Y),
FCVT_S_D -> List(N,d,s,N,Y,Y,N,N,Y),
FCVT_D_S -> List(N,s,d,N,Y,Y,N,N,Y),
FEQ_D -> List(N,d,X,N,Y,N,N,N,N),
FLT_D -> List(N,d,X,N,Y,N,N,N,N),
FLE_D -> List(N,d,X,N,Y,N,N,N,N),
FSGNJ_D -> List(N,d,d,N,N,Y,N,N,N),
FSGNJN_D -> List(N,d,d,N,N,Y,N,N,N),
FSGNJX_D -> List(N,d,d,N,N,Y,N,N,N),
FMIN_D -> List(N,d,d,N,Y,Y,N,N,N),
FMAX_D -> List(N,d,d,N,Y,Y,N,N,N),
FADD_D -> List(Y,d,d,N,Y,Y,N,N,N),
FSUB_D -> List(Y,d,d,N,Y,Y,N,N,N),
FMUL_D -> List(N,d,d,N,Y,Y,N,N,N),
FMADD_D -> List(N,d,d,N,Y,Y,N,N,N),
FMSUB_D -> List(N,d,d,N,Y,Y,N,N,N),
FNMADD_D -> List(N,d,d,N,Y,Y,N,N,N),
FNMSUB_D -> List(N,d,d,N,Y,Y,N,N,N),
FDIV_D -> List(N,d,d,N,Y,Y,Y,N,N),
FSQRT_D -> List(N,d,d,N,Y,Y,N,Y,N)
)
val table = single ++ double
val decoder = DecodeLogic(io.instr, default, table)
......@@ -67,16 +102,23 @@ class FPDecoder extends XSModule{
ctrl.fmt := io.instr(26,25)
val fmaTable: Array[(BitPat, List[BitPat])] = Array(
FADD_S -> List(BitPat("b00"),N,Y),
FSUB_S -> List(BitPat("b01"),N,Y),
FMUL_S -> List(BitPat("b00"),N,Y),
FMADD_S -> List(BitPat("b00"),Y,Y),
FMSUB_S -> List(BitPat("b01"),Y,Y),
FNMADD_S-> List(BitPat("b11"),Y,Y),
FNMSUB_S-> List(BitPat("b10"),Y,Y)
FADD_S -> List(BitPat("b00"),N),
FADD_D -> List(BitPat("b00"),N),
FSUB_S -> List(BitPat("b01"),N),
FSUB_D -> List(BitPat("b01"),N),
FMUL_S -> List(BitPat("b00"),N),
FMUL_D -> List(BitPat("b00"),N),
FMADD_S -> List(BitPat("b00"),Y),
FMADD_D -> List(BitPat("b00"),Y),
FMSUB_S -> List(BitPat("b01"),Y),
FMSUB_D -> List(BitPat("b01"),Y),
FNMADD_S-> List(BitPat("b11"),Y),
FNMADD_D-> List(BitPat("b11"),Y),
FNMSUB_S-> List(BitPat("b10"),Y),
FNMSUB_D-> List(BitPat("b10"),Y)
)
val fmaDefault = List(BitPat("b??"), N, N)
Seq(ctrl.fmaCmd, ctrl.ren3, ctrl.fma).zip(
val fmaDefault = List(BitPat("b??"), N)
Seq(ctrl.fmaCmd, ctrl.ren3).zip(
DecodeLogic(io.instr, fmaDefault, fmaTable)
).foreach({
case (s, d) => s := d
......
......@@ -120,7 +120,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
def writebackArb(in: Seq[DecoupledIO[FuOutput]], out: DecoupledIO[ExuOutput]): Arbiter[FuOutput] = {
if (needArbiter) {
val arb = Module(new Arbiter(new FuOutput, in.size))
val arb = Module(new Arbiter(new FuOutput(in.head.bits.len), in.size))
arb.io.in <> in
arb.io.out.ready := out.ready
out.bits.data := arb.io.out.bits.data
......
......@@ -35,24 +35,21 @@ case class FuConfig
}
class FuOutput extends XSBundle {
val data = UInt(XLEN.W)
class FuOutput(val len: Int) extends XSBundle {
val data = UInt(len.W)
val uop = new MicroOp
}
class FunctionUnitIO(len: Int) extends XSBundle {
class FunctionUnitIO(val len: Int) extends XSBundle {
val in = Flipped(DecoupledIO(new Bundle() {
val src = Vec(3, UInt(len.W))
val uop = new MicroOp
}))
val out = DecoupledIO(new FuOutput)
val out = DecoupledIO(new FuOutput(len))
val redirectIn = Flipped(ValidIO(new Redirect))
override def cloneType: FunctionUnitIO.this.type =
new FunctionUnitIO(len).asInstanceOf[this.type]
}
abstract class FunctionUnit(len: Int = 64) extends XSModule {
......@@ -130,14 +127,10 @@ object FunctionUnit extends HasXSParameter {
def f2f = new FPToFP
def fdivSqrt = new FDIvSqrt
def fmiscSel(fu: String)(x: FunctionUnit): Bool = {
x.io.in.bits.uop.ctrl.fuOpType.head(4) === s"b$fu".U
}
def fdivSqrt = new FDivSqrt
def f2iSel(x: FunctionUnit): Bool = {
x.io.in.bits.uop.ctrl.fuType === FuType.i2f
x.io.in.bits.uop.ctrl.rfWen
}
def i2fSel(x: FunctionUnit): Bool = {
......@@ -233,7 +226,7 @@ object FunctionUnit extends HasXSParameter {
val fmacCfg = FuConfig(
fuGen = fmac _,
fuSel = _ => true.B,
FuType.fmac, 0, 3, writeIntRf = false, writeFpRf = true, hasRedirect = false, CertainLatency(5)
FuType.fmac, 0, 3, writeIntRf = false, writeFpRf = true, hasRedirect = false, CertainLatency(4)
)
val f2iCfg = FuConfig(
......@@ -244,7 +237,7 @@ object FunctionUnit extends HasXSParameter {
val f2fCfg = FuConfig(
fuGen = f2f _,
fuSel = f2iSel,
fuSel = f2fSel,
FuType.fmisc, 0, 1, writeIntRf = false, writeFpRf = true, hasRedirect = false, CertainLatency(2)
)
......
package xiangshan.backend.fu.fpu
import xiangshan.NeedImpl
class FDIvSqrt extends FPUSubModule with NeedImpl {
}
package xiangshan.backend.fu.fpu
import chisel3._
import chisel3.util._
import freechips.rocketchip.tile.FType
import hardfloat.{DivSqrtRecFNToRaw_small, RoundAnyRawFNToRecFN}
class FDivSqrt extends FPUSubModule {
val s_idle :: s_div :: s_finish :: Nil = Enum(3)
val state = RegInit(s_idle)
val divSqrt = Module(new DivSqrtRecFNToRaw_small(FType.D.exp, FType.D.sig, 0))
val divSqrtRawValid = divSqrt.io.rawOutValid_sqrt || divSqrt.io.rawOutValid_div
val fpCtrl = io.in.bits.uop.ctrl.fpu
val tag = fpCtrl.typeTagIn
val uopReg = RegEnable(io.in.bits.uop, io.in.fire())
val single = RegEnable(tag === S, io.in.fire())
val kill = uopReg.roqIdx.needFlush(io.redirectIn)
val killReg = RegInit(false.B)
switch(state){
is(s_idle){
when(io.in.fire() && !io.in.bits.uop.roqIdx.needFlush(io.redirectIn)){ state := s_div }
}
is(s_div){
when(divSqrtRawValid){
when(kill || killReg){
state := s_idle
}.otherwise({
state := s_finish
})
}.elsewhen(kill){
killReg := true.B
}
}
is(s_finish){
state := s_idle
killReg := false.B
}
}
val src1 = unbox(io.in.bits.src(0), tag, None)
val src2 = unbox(io.in.bits.src(1), tag, None)
divSqrt.io.inValid := io.in.fire()
divSqrt.io.sqrtOp := fpCtrl.sqrt
divSqrt.io.a := src1
divSqrt.io.b := src2
divSqrt.io.roundingMode := rm
val round32 = Module(new RoundAnyRawFNToRecFN(
FType.D.exp, FType.D.sig+2, FType.S.exp, FType.S.sig, 0
))
val round64 = Module(new RoundAnyRawFNToRecFN(
FType.D.exp, FType.D.sig+2, FType.D.exp, FType.D.sig, 0
))
for(rounder <- Seq(round32, round64)){
rounder.io.invalidExc := divSqrt.io.invalidExc
rounder.io.infiniteExc := divSqrt.io.infiniteExc
rounder.io.in := divSqrt.io.rawOut
rounder.io.roundingMode := rm
rounder.io.detectTininess := hardfloat.consts.tininess_afterRounding
}
val data = Mux(single, round32.io.out, round64.io.out)
val flags = Mux(single, round32.io.exceptionFlags, round64.io.exceptionFlags)
io.in.ready := state===s_idle
io.out.valid := state===s_finish && !(killReg || kill)
io.out.bits.uop := uopReg
io.out.bits.data := RegNext(data, divSqrtRawValid)
fflags := RegNext(flags, divSqrtRawValid)
}
package xiangshan.backend.fu.fpu
import chisel3._
import xiangshan.NeedImpl
import freechips.rocketchip.tile.FType
import hardfloat.MulAddRecFN_pipeline
import xiangshan.backend.fu.FunctionUnit
class FMA extends FPUSubModule with NeedImpl {
class FMA extends FPUPipelineModule {
override def latency: Int = FunctionUnit.fmacCfg.latency.latencyVal.get
io.in.ready := true.B
val sfma = Module(new MulAddRecFN_pipeline(FType.S.exp, FType.S.sig))
val dfma = Module(new MulAddRecFN_pipeline(FType.D.exp, FType.D.sig))
val fpCtrl = io.in.bits.uop.ctrl.fpu
val typeTagIn = fpCtrl.typeTagIn
val singleIn = typeTagIn === S
sfma.io.in.valid := io.in.valid && singleIn
dfma.io.in.valid := io.in.valid && !singleIn
for((t, fma) <- Seq(FType.S, FType.D).zip(Seq(sfma, dfma))){
val src1 = unbox(io.in.bits.src(0), typeTagIn, Some(t))
val src2 = unbox(io.in.bits.src(1), typeTagIn, Some(t))
val src3 = unbox(io.in.bits.src(2), typeTagIn, Some(t))
val (in1, in2, in3) = (
WireInit(src1), WireInit(src2), WireInit(Mux(fpCtrl.isAddSub, src2, src3))
)
val one = 1.U << (t.sig + t.exp - 1)
val zero = (src1 ^ src2) & (1.U << (t.sig + t.exp))
when(fpCtrl.isAddSub){ in2 := one }
when(!(fpCtrl.isAddSub || fpCtrl.ren3)){ in3 := zero }
fma.io.in.bits.a := in1
fma.io.in.bits.b := in2
fma.io.in.bits.c := in3
fma.io.in.bits.op := fpCtrl.fmaCmd
fma.io.in.bits.roundingMode := rm
fma.io.in.bits.detectTininess := hardfloat.consts.tininess_afterRounding
fma.io.out.ready := io.out.ready
}
val singleOut = io.out.bits.uop.ctrl.fpu.typeTagOut === S
io.out.bits.data := Mux(singleOut,
sanitizeNaN(sfma.io.out.bits.out, FType.S),
sanitizeNaN(dfma.io.out.bits.out, FType.D)
)
fflags := Mux(singleOut,
sfma.io.out.bits.exceptionFlags,
dfma.io.out.bits.exceptionFlags
)
}
......@@ -16,8 +16,8 @@ class FPToInt extends FPUPipelineModule {
val ctrl = io.in.bits.uop.ctrl.fpu
val src1_s = unbox(src1, S, Some(FType.S))
val src1_d = unbox(src1, D, Some(FType.D))
val src2_d = unbox(src2, D, Some(FType.D))
val src1_d = unbox(src1, ctrl.typeTagIn, None)
val src2_d = unbox(src2, ctrl.typeTagIn, None)
val src1_ieee = ieee(src1)
val move_out = Mux(ctrl.typeTagIn === S, src1_ieee(31, 0), src1_ieee)
......@@ -68,7 +68,8 @@ class FPToInt extends FPUPipelineModule {
Mux(ctrl.fcvt, conv_out, dcmp_out),
Mux(rm(0), classify_out, move_out)
)
val intValue = Mux(ctrl.typ(1),
val doubleOut = Mux(ctrl.fcvt, ctrl.typ(1), ctrl.fmt(0))
val intValue = Mux(doubleOut,
SignExt(intData, XLEN),
SignExt(intData(31, 0), XLEN)
)
......
......@@ -13,7 +13,7 @@ class IntToFP extends FPUSubModule {
val src1 = io.in.bits.src(0)(XLEN-1, 0)
val mux = Wire(new Bundle() {
val data = UInt(XLEN.W)
val data = UInt((XLEN+1).W)
val exc = UInt(5.W)
})
mux.data := recode(src1, tag)
......@@ -40,7 +40,7 @@ class IntToFP extends FPUSubModule {
fflags := mux.exc
io.out.bits.uop := io.in.bits.uop
io.out.bits.data := mux.data
io.out.bits.data := box(mux.data, io.in.bits.uop.ctrl.fpu.typeTagOut)
io.out.valid := io.in.valid
io.in.ready := io.out.ready
}
......@@ -329,7 +329,7 @@ class ReservationStationData
// listen to write back data bus(certain latency)
// and extra wrtie back(uncertan latency)
val writeBackedData = Vec(wakeupCnt, Input(UInt(XLEN.W)))
val writeBackedData = Vec(wakeupCnt, Input(UInt((XLEN+1).W)))
val extraListenPorts = Vec(extraListenPortsCnt, Flipped(ValidIO(new ExuOutput)))
// tlb feedback
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册