提交 4ee69032 编写于 作者: Z zhanglyGit 提交者: Xuan Hu

VldIssue: backend support Vld issue

上级 b536da76
......@@ -366,6 +366,10 @@ case class XSCoreParameters
ExeUnitParams(Seq(StdCfg, MoudCfg), Seq(), Seq(Seq(IntRD(12, 0), VfRD(12, 0)))),
ExeUnitParams(Seq(StdCfg, MoudCfg), Seq(), Seq(Seq(IntRD(13, 0), VfRD(13, 0)))),
), numEntries = 8, pregBits = pregBits, numWakeupFromWB = 16, numEnq = 2),
IssueBlockParams(Seq(
ExeUnitParams(Seq(VlduCfg), Seq(VecWB(6, 0)), Seq(Seq(VfRD(0, 0)), Seq(VfRD(1, 0)), Seq(VfRD(2, 0)), Seq(VfRD(3, 0)), Seq(VfRD(4, 0)))),
ExeUnitParams(Seq(VlduCfg), Seq(VecWB(7, 0)), Seq(Seq(VfRD(5, 0)), Seq(VfRD(6, 0)), Seq(VfRD(7, 0)), Seq(VfRD(8, 0)), Seq(VfRD(9, 0)))),
), numEntries = 8, pregBits = pregBits, numWakeupFromWB = 16, numEnq = 2),
),
numPregs = intPreg.numEntries max vfPreg.numEntries,
numRfReadWrite = None,
......
......@@ -133,9 +133,11 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
backend.io.mem.lqCancelCnt := memBlock.io.lqCancelCnt
backend.io.mem.sqCancelCnt := memBlock.io.sqCancelCnt
backend.io.mem.otherFastWakeup := memBlock.io.otherFastWakeup
backend.io.mem.writeBack <> memBlock.io.writeback
backend.io.mem.ldaIqFeedback <> memBlock.io.ldaIqFeedback
backend.io.mem.staIqFeedback <> memBlock.io.staIqFeedback
backend.io.mem.writeBack.zip(memBlock.io.writeback).foreach { case(back, mem) =>
back <> mem
}
frontend.io.reset_vector := io.reset_vector
......@@ -148,7 +150,9 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
io.beu_errors.dcache <> memBlock.io.error.toL1BusErrorUnitInfo()
memBlock.io.hartId := io.hartId
memBlock.io.issue <> backend.io.mem.issueUops
memBlock.io.issue.zip(backend.io.mem.issueUops).foreach { case(memIssue, backIssue) =>
memIssue <> backIssue
}
// By default, instructions do not have exceptions when they enter the function units.
memBlock.io.issue.map(_.bits.uop.clearExceptions())
backend.io.mem.loadFastMatch <> memBlock.io.loadFastMatch
......
......@@ -281,7 +281,7 @@ class BackendMemIO(implicit p: Parameters, params: BackendParams) extends XSBund
val loadPcRead = Vec(params.LduCnt, Flipped(new FtqRead(UInt(VAddrBits.W))))
// Input
val writeBack = Vec(params.LduCnt + params.StaCnt * 2, Flipped(DecoupledIO(new MemExuOutput())))
val writeBack = MixedVec(Seq.fill(params.LduCnt + params.StaCnt * 2)(Flipped(DecoupledIO(new MemExuOutput()))) ++ Seq.fill(params.VlduCnt)(Flipped(DecoupledIO(new MemExuOutput(true)))))
val s3_delayed_load_error = Input(Vec(LoadPipelineWidth, Bool()))
val stIn = Input(Vec(params.StaCnt, ValidIO(new DynInst())))
......@@ -300,7 +300,7 @@ class BackendMemIO(implicit p: Parameters, params: BackendParams) extends XSBund
// Output
val redirect = ValidIO(new Redirect) // rob flush MemBlock
val issueUops = Vec(params.LduCnt + 2 * params.StaCnt, DecoupledIO(new MemExuInput()))
val issueUops = MixedVec(Seq.fill(params.LduCnt + params.StaCnt * 2)(DecoupledIO(new MemExuInput())) ++ Seq.fill(params.VlduCnt)(DecoupledIO(new MemExuInput(true))))
val loadFastMatch = Vec(params.LduCnt, Output(UInt(params.LduCnt.W)))
val loadFastImm = Vec(params.LduCnt, Output(UInt(12.W))) // Imm_I
......
......@@ -56,6 +56,7 @@ case class BackendParams(
def StaCnt = allSchdParams.map(_.StaCnt).sum
def StdCnt = allSchdParams.map(_.StdCnt).sum
def LduCnt = allSchdParams.map(_.LduCnt).sum
def VlduCnt = allSchdParams.map(_.VlduCnt).sum
def LsExuCnt = StaCnt + LduCnt
def JmpCnt = allSchdParams.map(_.JmpCnt).sum
def BrhCnt = allSchdParams.map(_.BrhCnt).sum
......
......@@ -482,16 +482,16 @@ object Bundles {
val isInterrupt = Bool()
}
class MemExuInput(implicit p: Parameters) extends XSBundle {
class MemExuInput(isVector: Boolean = false)(implicit p: Parameters) extends XSBundle {
val uop = new DynInst
val src = Vec(3, UInt(XLEN.W))
val src = if(isVector) Vec(5, UInt(VLEN.W)) else Vec(3, UInt(XLEN.W))
val iqIdx = UInt(log2Up(MemIQSizeMax).W)
val isFirstIssue = Bool()
}
class MemExuOutput(implicit p: Parameters) extends XSBundle {
class MemExuOutput(isVector: Boolean = false)(implicit p: Parameters) extends XSBundle {
val uop = new DynInst
val data = UInt(XLEN.W)
val data = if(isVector) UInt(VLEN.W) else UInt(XLEN.W)
val debug = new DebugBundle
}
......
......@@ -71,6 +71,8 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
val src1 = Cat(0.U(1.W), staticInst.instr(19, 15))
val src2 = Cat(0.U(1.W), staticInst.instr(24, 20))
val dest = Cat(0.U(1.W), staticInst.instr(11, 7))
val width = staticInst.instr(14, 12) //Vector LS eew
val eew = Cat(0.U(1.W), width(1, 0))
//output bits
val decodedInsts = Wire(Vec(RenameWidth, new DecodedInst))
......@@ -102,6 +104,9 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
//Type of uop Div
val typeOfDiv = decodedInsts_u.uopSplitType
val sew = Cat(0.U(1.W), simple.io.enq.vtype.vsew)
val vlmul = simple.io.enq.vtype.vlmul
//LMUL
val lmul = MuxLookup(simple.io.enq.vtype.vlmul, 1.U(4.W), Array(
"b001".U -> 2.U,
......@@ -113,6 +118,13 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
"b010".U -> 10.U,
"b011".U -> 36.U
))
val vemul : UInt = eew.asUInt + 1.U + vlmul.asUInt + ~sew.asUInt
val emul = MuxLookup(vemul, 1.U(4.W), Array(
"b001".U -> 2.U,
"b010".U -> 4.U,
"b011".U -> 8.U
)) //TODO : eew and emul illegal exception need to be handled
//number of uop
val numOfUop = MuxLookup(typeOfDiv, 1.U(log2Up(maxUopSize+1).W), Array(
UopSplitType.VEC_0XV -> 2.U,
......@@ -144,6 +156,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
UopSplitType.VEC_M0X -> (lmul +& 1.U),
UopSplitType.VEC_MVV -> (Cat(lmul, 0.U(1.W)) -1.U),
UopSplitType.VEC_M0X_VFIRST -> 2.U,
UopSplitType.VEC_US_LD -> (emul +& 1.U),
))
//uop div up to maxUopSize
......@@ -481,7 +494,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
csBundle(0).lsrc(2) := dest
csBundle(0).ldest := dest
csBundle(0).uopIdx := 0.U
for(i <- 1 until MAX_VLMUL) {
for (i <- 1 until MAX_VLMUL) {
csBundle(i).lsrc(0) := src1 + i.U
csBundle(i).lsrc(1) := src2 + i.U
csBundle(i).lsrc(2) := dest
......@@ -605,12 +618,12 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
for (i <- 0 until MAX_VLMUL) {
csBundle(2 * i + 1).srcType(0) := SrcType.vp
csBundle(2 * i + 1).srcType(1) := SrcType.vp
csBundle(2 * i + 1).lsrc(0) := src2 + (i+1).U
csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
csBundle(2 * i + 1).lsrc(1) := src2 + i.U
csBundle(2 * i + 1).lsrc(2) := dest + i.U
csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U
csBundle(2 * i + 1).uopIdx := (2 * i).U
if (2 * i + 2 < MAX_VLMUL * 2 ){
if (2 * i + 2 < MAX_VLMUL * 2) {
csBundle(2 * i + 2).srcType(0) := SrcType.fp
csBundle(2 * i + 2).lsrc(0) := FP_TMP_REG_MV.U
// csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare
......@@ -628,7 +641,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
for (i <- 0 until MAX_VLMUL) {
csBundle(2 * i).srcType(0) := SrcType.vp
csBundle(2 * i).srcType(1) := SrcType.vp
csBundle(2 * i).lsrc(0) := src2 + (i+1).U
csBundle(2 * i).lsrc(0) := src2 + (i + 1).U
csBundle(2 * i).lsrc(1) := src2 + i.U
csBundle(2 * i).lsrc(2) := dest + i.U
csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U
......@@ -644,7 +657,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
}
is(UopSplitType.VEC_VRED) {
when(simple.io.enq.vtype.vlmul === "b001".U){
when(simple.io.enq.vtype.vlmul === "b001".U) {
csBundle(0).srcType(2) := SrcType.DC
csBundle(0).lsrc(0) := src2 + 1.U
csBundle(0).lsrc(1) := src2
......@@ -661,26 +674,26 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
csBundle(1).srcType(2) := SrcType.DC
csBundle(1).lsrc(0) := src2 + 3.U
csBundle(1).lsrc(1) := src2 + 2.U
csBundle(1).ldest := (VECTOR_TMP_REG_LMUL+1).U
csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
csBundle(1).uopIdx := 1.U
csBundle(2).srcType(2) := SrcType.DC
csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL+1).U
csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
csBundle(2).ldest := (VECTOR_TMP_REG_LMUL+2).U
csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
csBundle(2).uopIdx := 2.U
}
when(simple.io.enq.vtype.vlmul === "b011".U) {
for(i <- 0 until MAX_VLMUL){
if(i < MAX_VLMUL - MAX_VLMUL/2){
for (i <- 0 until MAX_VLMUL) {
if (i < MAX_VLMUL - MAX_VLMUL / 2) {
csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
csBundle(i).lsrc(1) := src2 + (i * 2).U
csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
} else if (i < MAX_VLMUL - MAX_VLMUL/4) {
csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL/2)*2 + 1).U
csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL/2)*2).U
} else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
}else if (i < MAX_VLMUL - MAX_VLMUL/8) {
} else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
......@@ -689,7 +702,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
csBundle(i).uopIdx := i.U
}
}
when (simple.io.enq.vtype.vlmul.orR()){
when(simple.io.enq.vtype.vlmul.orR()) {
csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
csBundle(numOfUop - 1.U).lsrc(0) := src1
csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
......@@ -719,29 +732,37 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
csBundle(0).fpu.sqrt := false.B
csBundle(0).fpu.fcvt := false.B
// LMUL
for(i <- 0 until MAX_VLMUL)
for(j <- 0 to i){
val old_vd = if (j==0) {dest + i.U} else (VECTOR_TMP_REG_LMUL+j-1).U
val vd = if (j==i) {dest + i.U} else (VECTOR_TMP_REG_LMUL+j).U
csBundle(i*(i+1)/2+j+1).srcType(0) := SrcType.fp
csBundle(i*(i+1)/2+j+1).lsrc(0) := FP_TMP_REG_MV.U
csBundle(i*(i+1)/2+j+1).lsrc(1) := src2 + j.U
csBundle(i*(i+1)/2+j+1).lsrc(2) := old_vd
csBundle(i*(i+1)/2+j+1).ldest := vd
csBundle(i*(i+1)/2+j+1).uopIdx := (i*(i+1)/2+j).U
for (i <- 0 until MAX_VLMUL)
for (j <- 0 to i) {
val old_vd = if (j == 0) {
dest + i.U
} else (VECTOR_TMP_REG_LMUL + j - 1).U
val vd = if (j == i) {
dest + i.U
} else (VECTOR_TMP_REG_LMUL + j).U
csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.fp
csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := FP_TMP_REG_MV.U
csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
}
}
is(UopSplitType.VEC_ISLIDEUP) {
// LMUL
for(i <- 0 until MAX_VLMUL)
for(j <- 0 to i){
val old_vd = if (j==0) {dest + i.U} else (VECTOR_TMP_REG_LMUL+j-1).U
val vd = if (j==i) {dest + i.U} else (VECTOR_TMP_REG_LMUL+j).U
csBundle(i*(i+1)/2+j).lsrc(1) := src2 + j.U
csBundle(i*(i+1)/2+j).lsrc(2) := old_vd
csBundle(i*(i+1)/2+j).ldest := vd
csBundle(i*(i+1)/2+j).uopIdx := (i*(i+1)/2+j).U
for (i <- 0 until MAX_VLMUL)
for (j <- 0 to i) {
val old_vd = if (j == 0) {
dest + i.U
} else (VECTOR_TMP_REG_LMUL + j - 1).U
val vd = if (j == i) {
dest + i.U
} else (VECTOR_TMP_REG_LMUL + j).U
csBundle(i * (i + 1) / 2 + j).lsrc(1) := src2 + j.U
csBundle(i * (i + 1) / 2 + j).lsrc(2) := old_vd
csBundle(i * (i + 1) / 2 + j).ldest := vd
csBundle(i * (i + 1) / 2 + j).uopIdx := (i * (i + 1) / 2 + j).U
}
}
......@@ -765,32 +786,40 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
csBundle(0).fpu.sqrt := false.B
csBundle(0).fpu.fcvt := false.B
// LMUL
for(i <- 0 until MAX_VLMUL)
for(j <- (0 to i).reverse){
when(i.U < lmul){
val old_vd = if (j==0) {dest + lmul -1.U - i.U} else (VECTOR_TMP_REG_LMUL+j-1).U
val vd = if (j==i) {dest + lmul - 1.U - i.U} else (VECTOR_TMP_REG_LMUL+j).U
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).srcType(0) := SrcType.fp
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(0) := FP_TMP_REG_MV.U
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(1) := src2 + lmul - 1.U - j.U
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(2) := old_vd
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).ldest := vd
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).uopIdx := numOfUop-(i*(i+1)/2+i-j+2).U
for (i <- 0 until MAX_VLMUL)
for (j <- (0 to i).reverse) {
when(i.U < lmul) {
val old_vd = if (j == 0) {
dest + lmul - 1.U - i.U
} else (VECTOR_TMP_REG_LMUL + j - 1).U
val vd = if (j == i) {
dest + lmul - 1.U - i.U
} else (VECTOR_TMP_REG_LMUL + j).U
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.fp
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := FP_TMP_REG_MV.U
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
}
}
}
is(UopSplitType.VEC_ISLIDEDOWN) {
// LMUL
for(i <- 0 until MAX_VLMUL)
for(j <- (0 to i).reverse){
when(i.U < lmul){
val old_vd = if (j==0) {dest + lmul -1.U - i.U} else (VECTOR_TMP_REG_LMUL+j-1).U
val vd = if (j==i) {dest + lmul - 1.U - i.U} else (VECTOR_TMP_REG_LMUL+j).U
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(1) := src2 + lmul - 1.U - j.U
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(2) := old_vd
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).ldest := vd
csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).uopIdx := numOfUop-(i*(i+1)/2+i-j+1).U
for (i <- 0 until MAX_VLMUL)
for (j <- (0 to i).reverse) {
when(i.U < lmul) {
val old_vd = if (j == 0) {
dest + lmul - 1.U - i.U
} else (VECTOR_TMP_REG_LMUL + j - 1).U
val vd = if (j == i) {
dest + lmul - 1.U - i.U
} else (VECTOR_TMP_REG_LMUL + j).U
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 1).U
}
}
}
......@@ -798,7 +827,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
is(UopSplitType.VEC_M0X) {
// LMUL
for (i <- 0 until MAX_VLMUL) {
val srcType0 = if (i==0) SrcType.DC else SrcType.vp
val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
val ldest = (VECTOR_TMP_REG_LMUL + i).U
csBundle(i).srcType(0) := srcType0
csBundle(i).srcType(1) := SrcType.vp
......@@ -810,9 +839,9 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
csBundle(i).ldest := ldest
csBundle(i).uopIdx := i.U
}
csBundle(lmul-1.U).vecWen := false.B
csBundle(lmul-1.U).fpWen := true.B
csBundle(lmul-1.U).ldest := FP_TMP_REG_MV.U
csBundle(lmul - 1.U).vecWen := false.B
csBundle(lmul - 1.U).fpWen := true.B
csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U
// FMV_X_D
csBundle(lmul).srcType(0) := SrcType.fp
csBundle(lmul).srcType(1) := SrcType.imm
......@@ -837,22 +866,22 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
is(UopSplitType.VEC_MVV) {
// LMUL
for (i <- 0 until MAX_VLMUL) {
val srcType0 = if (i==0) SrcType.DC else SrcType.vp
csBundle(i*2+0).srcType(0) := srcType0
csBundle(i*2+0).srcType(1) := SrcType.vp
csBundle(i*2+0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
csBundle(i*2+0).lsrc(1) := src2
csBundle(i*2+0).lsrc(2) := dest + i.U
csBundle(i*2+0).ldest := dest + i.U
csBundle(i*2+0).uopIdx := (i*2+0).U
csBundle(i*2+1).srcType(0) := srcType0
csBundle(i*2+1).srcType(1) := SrcType.vp
csBundle(i*2+1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
csBundle(i*2+1).lsrc(1) := src2
val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
csBundle(i * 2 + 0).srcType(0) := srcType0
csBundle(i * 2 + 0).srcType(1) := SrcType.vp
csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
csBundle(i * 2 + 0).lsrc(1) := src2
csBundle(i * 2 + 0).lsrc(2) := dest + i.U
csBundle(i * 2 + 0).ldest := dest + i.U
csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
csBundle(i * 2 + 1).srcType(0) := srcType0
csBundle(i * 2 + 1).srcType(1) := SrcType.vp
csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
csBundle(i * 2 + 1).lsrc(1) := src2
// csBundle(i).lsrc(2) := dest + i.U DontCare
csBundle(i*2+1).ldest := (VECTOR_TMP_REG_LMUL + i).U
csBundle(i*2+1).uopIdx := (i*2+1).U
csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
}
}
......@@ -881,6 +910,35 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit
csBundle(1).fpu.sqrt := false.B
csBundle(1).fpu.fcvt := false.B
}
is(UopSplitType.VEC_US_LD) {
/*
FMV.D.X
*/
csBundle(0).srcType(0) := SrcType.reg
csBundle(0).srcType(1) := SrcType.imm
csBundle(0).lsrc(1) := 0.U
csBundle(0).ldest := FP_TMP_REG_MV.U
csBundle(0).fuType := FuType.i2f.U
csBundle(0).rfWen := false.B
csBundle(0).fpWen := true.B
csBundle(0).vecWen := false.B
csBundle(0).fpu.isAddSub := false.B
csBundle(0).fpu.typeTagIn := FPU.D
csBundle(0).fpu.typeTagOut := FPU.D
csBundle(0).fpu.fromInt := true.B
csBundle(0).fpu.wflags := false.B
csBundle(0).fpu.fpWen := true.B
csBundle(0).fpu.div := false.B
csBundle(0).fpu.sqrt := false.B
csBundle(0).fpu.fcvt := false.B
//LMUL
for (i <- 0 until MAX_VLMUL) {
csBundle(i + 1).srcType(0) := SrcType.fp
csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
csBundle(i + 1).ldest := dest + i.U
csBundle(i + 1).uopIdx := i.U
}
}
}
//uops dispatch
......
......@@ -84,7 +84,7 @@ case class VSET(vli: Boolean, vtypei: Boolean, fuOp: BitPat, flushPipe: Boolean,
}
case class VLD(src2: BitPat, fuOp: BitPat, strided: Boolean = false, indexed: Boolean = false, ff: Boolean = false,
mask: Boolean = false, whole: Boolean = false, ordered: Boolean = false, uopSplitType: BitPat = UopSplitType.dummy) extends XSDecodeBase {
mask: Boolean = false, whole: Boolean = false, ordered: Boolean = false, uopSplitType: BitPat = UopSplitType.VEC_US_LD) extends XSDecodeBase {
def generate() : List[BitPat] = {
val fu = FuType.vldu
val src1 = SrcType.xp
......
......@@ -84,9 +84,12 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents {
))
val isFp = VecInit(io.fromRename.map(req => FuType.isFp (req.bits.fuType) ||
FuType.isVpu (req.bits.fuType)))
val isMem = VecInit(io.fromRename.map(req => FuType.isMem(req.bits.fuType)))
val isMem = VecInit(io.fromRename.map(req => FuType.isMem(req.bits.fuType) ||
FuType.isVls (req.bits.fuType)))
val isLs = VecInit(io.fromRename.map(req => FuType.isLoadStore(req.bits.fuType)))
val isVls = VecInit(io.fromRename.map(req => FuType.isVls (req.bits.fuType)))
val isStore = VecInit(io.fromRename.map(req => FuType.isStore(req.bits.fuType)))
val isVStore = VecInit(io.fromRename.map(req => FuType.isVStore(req.bits.fuType)))
val isAMO = VecInit(io.fromRename.map(req => FuType.isAMO(req.bits.fuType)))
val isBlockBackward = VecInit(io.fromRename.map(_.bits.blockBackward))
val isWaitForward = VecInit(io.fromRename.map(_.bits.waitForward))
......@@ -108,7 +111,7 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents {
for (i <- 0 until RenameWidth) {
updatedCommitType(i) := Cat(isLs(i), (isStore(i) && !isAMO(i)) | isBranch(i))
updatedCommitType(i) := Cat(isLs(i) | isVls(i), (isStore(i) && !isAMO(i)) | isVStore(i) | isBranch(i))
updatedUop(i) := io.fromRename(i).bits
updatedUop(i).debugInfo.eliminatedMove := io.fromRename(i).bits.eliminatedMove
......@@ -122,7 +125,7 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents {
when (io.fromRename(i).bits.isLUI) {
updatedUop(i).psrc(0) := 0.U
}
//TODO: vec ls mdp
io.lfst.req(i).valid := io.fromRename(i).fire && updatedUop(i).storeSetHit
io.lfst.req(i).bits.isstore := isStore(i)
io.lfst.req(i).bits.ssid := updatedUop(i).ssid
......
......@@ -63,11 +63,13 @@ case class ExeUnitParams(
def hasLoadFu = fuConfigs.map(_.fuType == FuType.ldu).reduce(_ || _)
def hasVLoadFu = fuConfigs.map(_.fuType == FuType.vldu).reduce(_ || _)
def hasStoreAddrFu = fuConfigs.map(_.name == "sta").reduce(_ || _)
def hasStdFu = fuConfigs.map(_.name == "std").reduce(_ || _)
def hasMemAddrFu = hasLoadFu || hasStoreAddrFu
def hasMemAddrFu = hasLoadFu || hasStoreAddrFu || hasVLoadFu
def hasVecFu = fuConfigs.map(x => FuConfig.VecArithFuConfigs.contains(x)).reduce(_ || _)
......
......@@ -500,13 +500,31 @@ object FuConfig {
writeVecRf = true,
latency = UncertainLatency(),
)
// Todo
// def VlduCfg = FuConfig ()
val VlduCfg: FuConfig = FuConfig (
name = "vldu",
fuType = FuType.vldu,
fuGen = null,
srcData = Seq(
Seq(VecData(), VecData(), VecData(), MaskSrcData(), VConfigData()), //vs1, vs2, vd_old, v0, vconfig
),
piped = false, // Todo: check it
writeVecRf = true,
latency = UncertainLatency(),
exceptionOut = Seq(loadAddrMisaligned, loadAccessFault, loadPageFault),
flushPipe = true,
replayInst = true,
hasLoadError = true,
vconfigWakeUp = true,
maskWakeUp = true,
dataBits = 128,
)
//TODO
// def VstuCfg = FuConfig ()
def allConfigs = Seq(
JmpCfg, BrhCfg, I2fCfg, CsrCfg, AluCfg, MulCfg, DivCfg, FenceCfg, BkuCfg, VSetRvfWvfCfg, VSetRiWvfCfg, VSetRiWiCfg,
FmacCfg, F2iCfg, F2fCfg, FDivSqrtCfg, LduCfg, StaCfg, StdCfg, MouCfg, MoudCfg, VialuCfg, VipuCfg, VfpuCfg
FmacCfg, F2iCfg, F2fCfg, FDivSqrtCfg, LduCfg, StaCfg, StdCfg, MouCfg, MoudCfg, VialuCfg, VipuCfg, VfpuCfg, VlduCfg
)
def VecArithFuConfigs = Seq(
......
......@@ -62,7 +62,13 @@ object FuType {
def isFence(fuType: UInt): Bool = fuType(7)
def isVpu(fuType: UInt): Bool = fuType(19, 16).orR || fuType(21) || fuType(24)
def isVpu(fuType: UInt): Bool = fuType(18, 16).orR || fuType(21) || fuType(24)
def isVls(fuType: UInt): Bool = fuType(20, 19).orR
def isVLoad(fuType: UInt): Bool = fuType(19)
def isVStore(fuType: UInt): Bool = fuType(20)
def storeIsAMO(fuType: UInt): Bool = fuType(15)
......
......@@ -288,12 +288,14 @@ class Dispatch2IqMemImp(override val wrapper: Dispatch2Iq)(implicit p: Parameter
private val dispatchCfg: Seq[(Seq[Int], Int)] = Seq(
(Seq(ldu), 2),
(Seq(stu, mou), 2),
(Seq(vldu), 2),
)
private val enqLsqIO = io.enqLsqIO.get
private val numLoadDeq = LoadPipelineWidth
private val numStoreAMODeq = StorePipelineWidth
private val numVLoadDeq = LoadPipelineWidth
private val numDeq = enqLsqIO.req.size
private val numEnq = io.in.size
......@@ -314,17 +316,23 @@ class Dispatch2IqMemImp(override val wrapper: Dispatch2Iq)(implicit p: Parameter
private val isStoreVec = VecInit(io.in.map(x => x.valid && FuType.isStore(x.bits.fuType)))
private val isAMOVec = io.in.map(x => x.valid && FuType.isAMO(x.bits.fuType))
private val isStoreAMOVec = io.in.map(x => x.valid && (FuType.isStore(x.bits.fuType) || FuType.isAMO(x.bits.fuType)))
private val isVLoadVec = VecInit(io.in.map(x => x.valid && FuType.isVLoad(x.bits.fuType)))
private val isVStoreVec = VecInit(io.in.map(x => x.valid && FuType.isVStore(x.bits.fuType)))
private val loadCntVec = VecInit(isLoadVec.indices.map(x => PopCount(isLoadVec.slice(0, x + 1))))
private val storeAMOCntVec = VecInit(isStoreAMOVec.indices.map(x => PopCount(isStoreAMOVec.slice(0, x + 1))))
private val vloadCntVec = VecInit(isVLoadVec.indices.map(x => PopCount(isVLoadVec.slice(0, x + 1))))
val loadBlockVec = VecInit(loadCntVec.map(_ > numLoadDeq.U))
val storeAMOBlockVec = VecInit(storeAMOCntVec.map(_ > numStoreAMODeq.U))
val lsStructBlockVec = VecInit(loadBlockVec.zip(storeAMOBlockVec).map(x => x._1 || x._2))
val vloadBlockVec = VecInit(vloadCntVec.map(_ > numVLoadDeq.U))
val lsStructBlockVec = VecInit((loadBlockVec.zip(storeAMOBlockVec)).zip(vloadBlockVec).map(x => x._1._1 || x._1._2 || x._2))
dontTouch(loadBlockVec)
dontTouch(storeAMOBlockVec)
dontTouch(lsStructBlockVec)
dontTouch(vloadBlockVec)
dontTouch(isLoadVec)
dontTouch(isVLoadVec)
dontTouch(loadCntVec)
s0_in <> io.in
......@@ -342,10 +350,10 @@ class Dispatch2IqMemImp(override val wrapper: Dispatch2Iq)(implicit p: Parameter
for (i <- enqLsqIO.req.indices) {
when (!io.in(i).valid) {
enqLsqIO.needAlloc(i) := 0.U
}.elsewhen(isStoreAMOVec(i)) {
enqLsqIO.needAlloc(i) := 2.U // store | amo
}.elsewhen(isStoreAMOVec(i) || isVStoreVec(i)) {
enqLsqIO.needAlloc(i) := 2.U // store | amo | vstore
}.otherwise {
enqLsqIO.needAlloc(i) := 1.U // load
enqLsqIO.needAlloc(i) := 1.U // load | vload
}
enqLsqIO.req(i).valid := io.in(i).valid && !s0_blockedVec(i) && !iqNotAllReady && !lsqCannotAccept && !FuType.isAMO(io.in(i).bits.fuType)
enqLsqIO.req(i).bits := io.in(i).bits
......
......@@ -461,7 +461,7 @@ class IssueQueueMemIO(implicit p: Parameters, params: IssueBlockParams) extends
class IssueQueueMemAddrImp(override val wrapper: IssueQueue)(implicit p: Parameters, params: IssueBlockParams)
extends IssueQueueImp(wrapper) with HasCircularQueuePtrHelper {
require(params.StdCnt == 0 && (params.LduCnt + params.StaCnt) > 0, "IssueQueueMemAddrImp can only be instance of MemAddr IQ")
require(params.StdCnt == 0 && (params.LduCnt + params.StaCnt + params.VlduCnt) > 0, "IssueQueueMemAddrImp can only be instance of MemAddr IQ")
io.suggestName("none")
override lazy val io = IO(new IssueQueueMemIO).suggestName("io")
......
......@@ -98,6 +98,7 @@ class StatusArray()(implicit p: Parameters, params: IssueBlockParams) extends XS
val flushedVec = Wire(Vec(params.numEntries, Bool()))
val clearVec = Wire(Vec(params.numEntries, Bool()))
val deqSelVec = Wire(Vec(params.numEntries, Bool()))
val deqSelVec2 = Wire(Vec(params.numDeq, Vec(params.numEntries, Bool()))) // per deq's deqSelVec
dontTouch(deqRespVec)
// Reg
......@@ -159,6 +160,12 @@ class StatusArray()(implicit p: Parameters, params: IssueBlockParams) extends XS
deqSel := VecInit(io.deq.map(x => x.deqSelOH.valid && x.deqSelOH.bits(i))).asUInt.orR
}
deqSelVec2.zip(io.deq).foreach { case (deqSelVecSingle, deqSingle) =>
deqSelVecSingle.zipWithIndex.foreach { case (deqSelBool, i) =>
deqSelBool := deqSingle.deqSelOH.valid && deqSingle.deqSelOH.bits(i)
}
}
val resps = params.schdType match {
case IntScheduler() => io.deqResp ++ io.og0Resp ++ io.og1Resp
case MemScheduler() => io.deqResp ++ io.og1Resp
......@@ -195,7 +202,9 @@ class StatusArray()(implicit p: Parameters, params: IssueBlockParams) extends XS
io.canIssue := canIssueVec.asUInt
io.clear := clearVec.asUInt
io.rsFeedback := 0.U.asTypeOf(io.rsFeedback)
io.deq.foreach(_.isFirstIssue := Mux1H(deqSelVec, statusVec.map(!_.firstIssue)))
io.deq.zip(deqSelVec2).foreach { case (deqSingle, deqSelVecSingle) =>
deqSingle.isFirstIssue := Mux1H(deqSelVecSingle, statusVec.map(!_.firstIssue))
}
dontTouch(io.deq)
}
......
......@@ -602,6 +602,7 @@ package object xiangshan {
def VEC_RGATHER_VX = "b101110".U // vrgather.vx
def VEC_RGATHEREI16 = "b101111".U // vrgatherei16.vv
def VEC_COMPRESS = "b110000".U // vcompress.vm
def VEC_US_LD = "b110001".U // vector unit strided load
def VEC_M0M = "b000000".U // VEC_M0M
def VEC_MMM = "b000000".U // VEC_MMM
def dummy = "b111111".U
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册