diff --git a/src/main/scala/xiangshan/Parameters.scala b/src/main/scala/xiangshan/Parameters.scala index 092afad33e44fe430cfcd03f343c98c63acfcea5..24a986ede2b73c91c2e9d16a92e00befa1d91c18 100644 --- a/src/main/scala/xiangshan/Parameters.scala +++ b/src/main/scala/xiangshan/Parameters.scala @@ -366,6 +366,10 @@ case class XSCoreParameters ExeUnitParams(Seq(StdCfg, MoudCfg), Seq(), Seq(Seq(IntRD(12, 0), VfRD(12, 0)))), ExeUnitParams(Seq(StdCfg, MoudCfg), Seq(), Seq(Seq(IntRD(13, 0), VfRD(13, 0)))), ), numEntries = 8, pregBits = pregBits, numWakeupFromWB = 16, numEnq = 2), + IssueBlockParams(Seq( + ExeUnitParams(Seq(VlduCfg), Seq(VecWB(6, 0)), Seq(Seq(VfRD(0, 0)), Seq(VfRD(1, 0)), Seq(VfRD(2, 0)), Seq(VfRD(3, 0)), Seq(VfRD(4, 0)))), + ExeUnitParams(Seq(VlduCfg), Seq(VecWB(7, 0)), Seq(Seq(VfRD(5, 0)), Seq(VfRD(6, 0)), Seq(VfRD(7, 0)), Seq(VfRD(8, 0)), Seq(VfRD(9, 0)))), + ), numEntries = 8, pregBits = pregBits, numWakeupFromWB = 16, numEnq = 2), ), numPregs = intPreg.numEntries max vfPreg.numEntries, numRfReadWrite = None, diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index 99369d4926e4c3e6607c0c16b2dbed02ff50e2f4..abaaf555326b00e63531964af28825d11123316d 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -133,9 +133,11 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) backend.io.mem.lqCancelCnt := memBlock.io.lqCancelCnt backend.io.mem.sqCancelCnt := memBlock.io.sqCancelCnt backend.io.mem.otherFastWakeup := memBlock.io.otherFastWakeup - backend.io.mem.writeBack <> memBlock.io.writeback backend.io.mem.ldaIqFeedback <> memBlock.io.ldaIqFeedback backend.io.mem.staIqFeedback <> memBlock.io.staIqFeedback + backend.io.mem.writeBack.zip(memBlock.io.writeback).foreach { case(back, mem) => + back <> mem + } frontend.io.reset_vector := io.reset_vector @@ -148,7 +150,9 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer) io.beu_errors.dcache <> memBlock.io.error.toL1BusErrorUnitInfo() memBlock.io.hartId := io.hartId - memBlock.io.issue <> backend.io.mem.issueUops + memBlock.io.issue.zip(backend.io.mem.issueUops).foreach { case(memIssue, backIssue) => + memIssue <> backIssue + } // By default, instructions do not have exceptions when they enter the function units. memBlock.io.issue.map(_.bits.uop.clearExceptions()) backend.io.mem.loadFastMatch <> memBlock.io.loadFastMatch diff --git a/src/main/scala/xiangshan/backend/Backend.scala b/src/main/scala/xiangshan/backend/Backend.scala index f2a394ce9c037f2db317d5f9b184315b10d2e6a6..1f88a530496c436f3c5252f7a5fb23a911bbc4d3 100644 --- a/src/main/scala/xiangshan/backend/Backend.scala +++ b/src/main/scala/xiangshan/backend/Backend.scala @@ -281,7 +281,7 @@ class BackendMemIO(implicit p: Parameters, params: BackendParams) extends XSBund val loadPcRead = Vec(params.LduCnt, Flipped(new FtqRead(UInt(VAddrBits.W)))) // Input - val writeBack = Vec(params.LduCnt + params.StaCnt * 2, Flipped(DecoupledIO(new MemExuOutput()))) + val writeBack = MixedVec(Seq.fill(params.LduCnt + params.StaCnt * 2)(Flipped(DecoupledIO(new MemExuOutput()))) ++ Seq.fill(params.VlduCnt)(Flipped(DecoupledIO(new MemExuOutput(true))))) val s3_delayed_load_error = Input(Vec(LoadPipelineWidth, Bool())) val stIn = Input(Vec(params.StaCnt, ValidIO(new DynInst()))) @@ -300,7 +300,7 @@ class BackendMemIO(implicit p: Parameters, params: BackendParams) extends XSBund // Output val redirect = ValidIO(new Redirect) // rob flush MemBlock - val issueUops = Vec(params.LduCnt + 2 * params.StaCnt, DecoupledIO(new MemExuInput())) + val issueUops = MixedVec(Seq.fill(params.LduCnt + params.StaCnt * 2)(DecoupledIO(new MemExuInput())) ++ Seq.fill(params.VlduCnt)(DecoupledIO(new MemExuInput(true)))) val loadFastMatch = Vec(params.LduCnt, Output(UInt(params.LduCnt.W))) val loadFastImm = Vec(params.LduCnt, Output(UInt(12.W))) // Imm_I diff --git a/src/main/scala/xiangshan/backend/BackendParams.scala b/src/main/scala/xiangshan/backend/BackendParams.scala index ec2dfdc9e14a49a37704d87f1ada7d332cbdb815..962945572a4e6ef7c78a252bc1e4965b75994b84 100644 --- a/src/main/scala/xiangshan/backend/BackendParams.scala +++ b/src/main/scala/xiangshan/backend/BackendParams.scala @@ -56,6 +56,7 @@ case class BackendParams( def StaCnt = allSchdParams.map(_.StaCnt).sum def StdCnt = allSchdParams.map(_.StdCnt).sum def LduCnt = allSchdParams.map(_.LduCnt).sum + def VlduCnt = allSchdParams.map(_.VlduCnt).sum def LsExuCnt = StaCnt + LduCnt def JmpCnt = allSchdParams.map(_.JmpCnt).sum def BrhCnt = allSchdParams.map(_.BrhCnt).sum diff --git a/src/main/scala/xiangshan/backend/Bundles.scala b/src/main/scala/xiangshan/backend/Bundles.scala index 1374d3870d3a464e96b883a078428ddf4ec62dea..ad04031e8a6b8168fd3f19fb08880add5b59102a 100644 --- a/src/main/scala/xiangshan/backend/Bundles.scala +++ b/src/main/scala/xiangshan/backend/Bundles.scala @@ -482,16 +482,16 @@ object Bundles { val isInterrupt = Bool() } - class MemExuInput(implicit p: Parameters) extends XSBundle { + class MemExuInput(isVector: Boolean = false)(implicit p: Parameters) extends XSBundle { val uop = new DynInst - val src = Vec(3, UInt(XLEN.W)) + val src = if(isVector) Vec(5, UInt(VLEN.W)) else Vec(3, UInt(XLEN.W)) val iqIdx = UInt(log2Up(MemIQSizeMax).W) val isFirstIssue = Bool() } - class MemExuOutput(implicit p: Parameters) extends XSBundle { + class MemExuOutput(isVector: Boolean = false)(implicit p: Parameters) extends XSBundle { val uop = new DynInst - val data = UInt(XLEN.W) + val data = if(isVector) UInt(VLEN.W) else UInt(XLEN.W) val debug = new DebugBundle } diff --git a/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala b/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala index 94fb09fd42de130668f9fe512d8f454feca7f319..a3f114dc58c8faf0b96a16552071b2dc695986ed 100644 --- a/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala +++ b/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala @@ -71,6 +71,8 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit val src1 = Cat(0.U(1.W), staticInst.instr(19, 15)) val src2 = Cat(0.U(1.W), staticInst.instr(24, 20)) val dest = Cat(0.U(1.W), staticInst.instr(11, 7)) + val width = staticInst.instr(14, 12) //Vector LS eew + val eew = Cat(0.U(1.W), width(1, 0)) //output bits val decodedInsts = Wire(Vec(RenameWidth, new DecodedInst)) @@ -102,6 +104,9 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit //Type of uop Div val typeOfDiv = decodedInsts_u.uopSplitType + val sew = Cat(0.U(1.W), simple.io.enq.vtype.vsew) + val vlmul = simple.io.enq.vtype.vlmul + //LMUL val lmul = MuxLookup(simple.io.enq.vtype.vlmul, 1.U(4.W), Array( "b001".U -> 2.U, @@ -113,6 +118,13 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit "b010".U -> 10.U, "b011".U -> 36.U )) + val vemul : UInt = eew.asUInt + 1.U + vlmul.asUInt + ~sew.asUInt + val emul = MuxLookup(vemul, 1.U(4.W), Array( + "b001".U -> 2.U, + "b010".U -> 4.U, + "b011".U -> 8.U + )) //TODO : eew and emul illegal exception need to be handled + //number of uop val numOfUop = MuxLookup(typeOfDiv, 1.U(log2Up(maxUopSize+1).W), Array( UopSplitType.VEC_0XV -> 2.U, @@ -144,6 +156,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit UopSplitType.VEC_M0X -> (lmul +& 1.U), UopSplitType.VEC_MVV -> (Cat(lmul, 0.U(1.W)) -1.U), UopSplitType.VEC_M0X_VFIRST -> 2.U, + UopSplitType.VEC_US_LD -> (emul +& 1.U), )) //uop div up to maxUopSize @@ -481,7 +494,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit csBundle(0).lsrc(2) := dest csBundle(0).ldest := dest csBundle(0).uopIdx := 0.U - for(i <- 1 until MAX_VLMUL) { + for (i <- 1 until MAX_VLMUL) { csBundle(i).lsrc(0) := src1 + i.U csBundle(i).lsrc(1) := src2 + i.U csBundle(i).lsrc(2) := dest @@ -605,12 +618,12 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit for (i <- 0 until MAX_VLMUL) { csBundle(2 * i + 1).srcType(0) := SrcType.vp csBundle(2 * i + 1).srcType(1) := SrcType.vp - csBundle(2 * i + 1).lsrc(0) := src2 + (i+1).U + csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U csBundle(2 * i + 1).lsrc(1) := src2 + i.U csBundle(2 * i + 1).lsrc(2) := dest + i.U csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U csBundle(2 * i + 1).uopIdx := (2 * i).U - if (2 * i + 2 < MAX_VLMUL * 2 ){ + if (2 * i + 2 < MAX_VLMUL * 2) { csBundle(2 * i + 2).srcType(0) := SrcType.fp csBundle(2 * i + 2).lsrc(0) := FP_TMP_REG_MV.U // csBundle(2 * i + 2).lsrc(1) := src2 + i.U // DontCare @@ -628,7 +641,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit for (i <- 0 until MAX_VLMUL) { csBundle(2 * i).srcType(0) := SrcType.vp csBundle(2 * i).srcType(1) := SrcType.vp - csBundle(2 * i).lsrc(0) := src2 + (i+1).U + csBundle(2 * i).lsrc(0) := src2 + (i + 1).U csBundle(2 * i).lsrc(1) := src2 + i.U csBundle(2 * i).lsrc(2) := dest + i.U csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U @@ -644,7 +657,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U } is(UopSplitType.VEC_VRED) { - when(simple.io.enq.vtype.vlmul === "b001".U){ + when(simple.io.enq.vtype.vlmul === "b001".U) { csBundle(0).srcType(2) := SrcType.DC csBundle(0).lsrc(0) := src2 + 1.U csBundle(0).lsrc(1) := src2 @@ -661,26 +674,26 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit csBundle(1).srcType(2) := SrcType.DC csBundle(1).lsrc(0) := src2 + 3.U csBundle(1).lsrc(1) := src2 + 2.U - csBundle(1).ldest := (VECTOR_TMP_REG_LMUL+1).U + csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U csBundle(1).uopIdx := 1.U csBundle(2).srcType(2) := SrcType.DC - csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL+1).U + csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U - csBundle(2).ldest := (VECTOR_TMP_REG_LMUL+2).U + csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U csBundle(2).uopIdx := 2.U } when(simple.io.enq.vtype.vlmul === "b011".U) { - for(i <- 0 until MAX_VLMUL){ - if(i < MAX_VLMUL - MAX_VLMUL/2){ + for (i <- 0 until MAX_VLMUL) { + if (i < MAX_VLMUL - MAX_VLMUL / 2) { csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U csBundle(i).lsrc(1) := src2 + (i * 2).U csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U - } else if (i < MAX_VLMUL - MAX_VLMUL/4) { - csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL/2)*2 + 1).U - csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL/2)*2).U + } else if (i < MAX_VLMUL - MAX_VLMUL / 4) { + csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U + csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U - }else if (i < MAX_VLMUL - MAX_VLMUL/8) { + } else if (i < MAX_VLMUL - MAX_VLMUL / 8) { csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U @@ -689,7 +702,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit csBundle(i).uopIdx := i.U } } - when (simple.io.enq.vtype.vlmul.orR()){ + when(simple.io.enq.vtype.vlmul.orR()) { csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp csBundle(numOfUop - 1.U).lsrc(0) := src1 csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U @@ -719,29 +732,37 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit csBundle(0).fpu.sqrt := false.B csBundle(0).fpu.fcvt := false.B // LMUL - for(i <- 0 until MAX_VLMUL) - for(j <- 0 to i){ - val old_vd = if (j==0) {dest + i.U} else (VECTOR_TMP_REG_LMUL+j-1).U - val vd = if (j==i) {dest + i.U} else (VECTOR_TMP_REG_LMUL+j).U - csBundle(i*(i+1)/2+j+1).srcType(0) := SrcType.fp - csBundle(i*(i+1)/2+j+1).lsrc(0) := FP_TMP_REG_MV.U - csBundle(i*(i+1)/2+j+1).lsrc(1) := src2 + j.U - csBundle(i*(i+1)/2+j+1).lsrc(2) := old_vd - csBundle(i*(i+1)/2+j+1).ldest := vd - csBundle(i*(i+1)/2+j+1).uopIdx := (i*(i+1)/2+j).U + for (i <- 0 until MAX_VLMUL) + for (j <- 0 to i) { + val old_vd = if (j == 0) { + dest + i.U + } else (VECTOR_TMP_REG_LMUL + j - 1).U + val vd = if (j == i) { + dest + i.U + } else (VECTOR_TMP_REG_LMUL + j).U + csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.fp + csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := FP_TMP_REG_MV.U + csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U + csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd + csBundle(i * (i + 1) / 2 + j + 1).ldest := vd + csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U } } is(UopSplitType.VEC_ISLIDEUP) { // LMUL - for(i <- 0 until MAX_VLMUL) - for(j <- 0 to i){ - val old_vd = if (j==0) {dest + i.U} else (VECTOR_TMP_REG_LMUL+j-1).U - val vd = if (j==i) {dest + i.U} else (VECTOR_TMP_REG_LMUL+j).U - csBundle(i*(i+1)/2+j).lsrc(1) := src2 + j.U - csBundle(i*(i+1)/2+j).lsrc(2) := old_vd - csBundle(i*(i+1)/2+j).ldest := vd - csBundle(i*(i+1)/2+j).uopIdx := (i*(i+1)/2+j).U + for (i <- 0 until MAX_VLMUL) + for (j <- 0 to i) { + val old_vd = if (j == 0) { + dest + i.U + } else (VECTOR_TMP_REG_LMUL + j - 1).U + val vd = if (j == i) { + dest + i.U + } else (VECTOR_TMP_REG_LMUL + j).U + csBundle(i * (i + 1) / 2 + j).lsrc(1) := src2 + j.U + csBundle(i * (i + 1) / 2 + j).lsrc(2) := old_vd + csBundle(i * (i + 1) / 2 + j).ldest := vd + csBundle(i * (i + 1) / 2 + j).uopIdx := (i * (i + 1) / 2 + j).U } } @@ -765,32 +786,40 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit csBundle(0).fpu.sqrt := false.B csBundle(0).fpu.fcvt := false.B // LMUL - for(i <- 0 until MAX_VLMUL) - for(j <- (0 to i).reverse){ - when(i.U < lmul){ - val old_vd = if (j==0) {dest + lmul -1.U - i.U} else (VECTOR_TMP_REG_LMUL+j-1).U - val vd = if (j==i) {dest + lmul - 1.U - i.U} else (VECTOR_TMP_REG_LMUL+j).U - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).srcType(0) := SrcType.fp - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(0) := FP_TMP_REG_MV.U - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(1) := src2 + lmul - 1.U - j.U - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(2) := old_vd - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).ldest := vd - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).uopIdx := numOfUop-(i*(i+1)/2+i-j+2).U + for (i <- 0 until MAX_VLMUL) + for (j <- (0 to i).reverse) { + when(i.U < lmul) { + val old_vd = if (j == 0) { + dest + lmul - 1.U - i.U + } else (VECTOR_TMP_REG_LMUL + j - 1).U + val vd = if (j == i) { + dest + lmul - 1.U - i.U + } else (VECTOR_TMP_REG_LMUL + j).U + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.fp + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := FP_TMP_REG_MV.U + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U } } } is(UopSplitType.VEC_ISLIDEDOWN) { // LMUL - for(i <- 0 until MAX_VLMUL) - for(j <- (0 to i).reverse){ - when(i.U < lmul){ - val old_vd = if (j==0) {dest + lmul -1.U - i.U} else (VECTOR_TMP_REG_LMUL+j-1).U - val vd = if (j==i) {dest + lmul - 1.U - i.U} else (VECTOR_TMP_REG_LMUL+j).U - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(1) := src2 + lmul - 1.U - j.U - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).lsrc(2) := old_vd - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).ldest := vd - csBundle(numOfUop-(i*(i+1)/2+i-j+1).U).uopIdx := numOfUop-(i*(i+1)/2+i-j+1).U + for (i <- 0 until MAX_VLMUL) + for (j <- (0 to i).reverse) { + when(i.U < lmul) { + val old_vd = if (j == 0) { + dest + lmul - 1.U - i.U + } else (VECTOR_TMP_REG_LMUL + j - 1).U + val vd = if (j == i) { + dest + lmul - 1.U - i.U + } else (VECTOR_TMP_REG_LMUL + j).U + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd + csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 1).U } } } @@ -798,7 +827,7 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit is(UopSplitType.VEC_M0X) { // LMUL for (i <- 0 until MAX_VLMUL) { - val srcType0 = if (i==0) SrcType.DC else SrcType.vp + val srcType0 = if (i == 0) SrcType.DC else SrcType.vp val ldest = (VECTOR_TMP_REG_LMUL + i).U csBundle(i).srcType(0) := srcType0 csBundle(i).srcType(1) := SrcType.vp @@ -810,9 +839,9 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit csBundle(i).ldest := ldest csBundle(i).uopIdx := i.U } - csBundle(lmul-1.U).vecWen := false.B - csBundle(lmul-1.U).fpWen := true.B - csBundle(lmul-1.U).ldest := FP_TMP_REG_MV.U + csBundle(lmul - 1.U).vecWen := false.B + csBundle(lmul - 1.U).fpWen := true.B + csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U // FMV_X_D csBundle(lmul).srcType(0) := SrcType.fp csBundle(lmul).srcType(1) := SrcType.imm @@ -837,22 +866,22 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit is(UopSplitType.VEC_MVV) { // LMUL for (i <- 0 until MAX_VLMUL) { - val srcType0 = if (i==0) SrcType.DC else SrcType.vp - csBundle(i*2+0).srcType(0) := srcType0 - csBundle(i*2+0).srcType(1) := SrcType.vp - csBundle(i*2+0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U - csBundle(i*2+0).lsrc(1) := src2 - csBundle(i*2+0).lsrc(2) := dest + i.U - csBundle(i*2+0).ldest := dest + i.U - csBundle(i*2+0).uopIdx := (i*2+0).U - - csBundle(i*2+1).srcType(0) := srcType0 - csBundle(i*2+1).srcType(1) := SrcType.vp - csBundle(i*2+1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U - csBundle(i*2+1).lsrc(1) := src2 + val srcType0 = if (i == 0) SrcType.DC else SrcType.vp + csBundle(i * 2 + 0).srcType(0) := srcType0 + csBundle(i * 2 + 0).srcType(1) := SrcType.vp + csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U + csBundle(i * 2 + 0).lsrc(1) := src2 + csBundle(i * 2 + 0).lsrc(2) := dest + i.U + csBundle(i * 2 + 0).ldest := dest + i.U + csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U + + csBundle(i * 2 + 1).srcType(0) := srcType0 + csBundle(i * 2 + 1).srcType(1) := SrcType.vp + csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U + csBundle(i * 2 + 1).lsrc(1) := src2 // csBundle(i).lsrc(2) := dest + i.U DontCare - csBundle(i*2+1).ldest := (VECTOR_TMP_REG_LMUL + i).U - csBundle(i*2+1).uopIdx := (i*2+1).U + csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U + csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U } } @@ -881,6 +910,35 @@ class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnit csBundle(1).fpu.sqrt := false.B csBundle(1).fpu.fcvt := false.B } + is(UopSplitType.VEC_US_LD) { + /* + FMV.D.X + */ + csBundle(0).srcType(0) := SrcType.reg + csBundle(0).srcType(1) := SrcType.imm + csBundle(0).lsrc(1) := 0.U + csBundle(0).ldest := FP_TMP_REG_MV.U + csBundle(0).fuType := FuType.i2f.U + csBundle(0).rfWen := false.B + csBundle(0).fpWen := true.B + csBundle(0).vecWen := false.B + csBundle(0).fpu.isAddSub := false.B + csBundle(0).fpu.typeTagIn := FPU.D + csBundle(0).fpu.typeTagOut := FPU.D + csBundle(0).fpu.fromInt := true.B + csBundle(0).fpu.wflags := false.B + csBundle(0).fpu.fpWen := true.B + csBundle(0).fpu.div := false.B + csBundle(0).fpu.sqrt := false.B + csBundle(0).fpu.fcvt := false.B + //LMUL + for (i <- 0 until MAX_VLMUL) { + csBundle(i + 1).srcType(0) := SrcType.fp + csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U + csBundle(i + 1).ldest := dest + i.U + csBundle(i + 1).uopIdx := i.U + } + } } //uops dispatch diff --git a/src/main/scala/xiangshan/backend/decode/VecDecoder.scala b/src/main/scala/xiangshan/backend/decode/VecDecoder.scala index 7b27be25567e891aa5ed440bbc0e123bd4ed0edd..8e2b2480485fa9176ee73ee3fbdb2723d90e3b6c 100644 --- a/src/main/scala/xiangshan/backend/decode/VecDecoder.scala +++ b/src/main/scala/xiangshan/backend/decode/VecDecoder.scala @@ -84,7 +84,7 @@ case class VSET(vli: Boolean, vtypei: Boolean, fuOp: BitPat, flushPipe: Boolean, } case class VLD(src2: BitPat, fuOp: BitPat, strided: Boolean = false, indexed: Boolean = false, ff: Boolean = false, - mask: Boolean = false, whole: Boolean = false, ordered: Boolean = false, uopSplitType: BitPat = UopSplitType.dummy) extends XSDecodeBase { + mask: Boolean = false, whole: Boolean = false, ordered: Boolean = false, uopSplitType: BitPat = UopSplitType.VEC_US_LD) extends XSDecodeBase { def generate() : List[BitPat] = { val fu = FuType.vldu val src1 = SrcType.xp diff --git a/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala b/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala index 377d4f634f98aac57d1ca07e18a197dabe2e925f..fad13e043c0f70e1d2055d3857bb35c0c421eb4f 100644 --- a/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala +++ b/src/main/scala/xiangshan/backend/dispatch/Dispatch.scala @@ -84,9 +84,12 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents { )) val isFp = VecInit(io.fromRename.map(req => FuType.isFp (req.bits.fuType) || FuType.isVpu (req.bits.fuType))) - val isMem = VecInit(io.fromRename.map(req => FuType.isMem(req.bits.fuType))) + val isMem = VecInit(io.fromRename.map(req => FuType.isMem(req.bits.fuType) || + FuType.isVls (req.bits.fuType))) val isLs = VecInit(io.fromRename.map(req => FuType.isLoadStore(req.bits.fuType))) + val isVls = VecInit(io.fromRename.map(req => FuType.isVls (req.bits.fuType))) val isStore = VecInit(io.fromRename.map(req => FuType.isStore(req.bits.fuType))) + val isVStore = VecInit(io.fromRename.map(req => FuType.isVStore(req.bits.fuType))) val isAMO = VecInit(io.fromRename.map(req => FuType.isAMO(req.bits.fuType))) val isBlockBackward = VecInit(io.fromRename.map(_.bits.blockBackward)) val isWaitForward = VecInit(io.fromRename.map(_.bits.waitForward)) @@ -108,7 +111,7 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents { for (i <- 0 until RenameWidth) { - updatedCommitType(i) := Cat(isLs(i), (isStore(i) && !isAMO(i)) | isBranch(i)) + updatedCommitType(i) := Cat(isLs(i) | isVls(i), (isStore(i) && !isAMO(i)) | isVStore(i) | isBranch(i)) updatedUop(i) := io.fromRename(i).bits updatedUop(i).debugInfo.eliminatedMove := io.fromRename(i).bits.eliminatedMove @@ -122,7 +125,7 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents { when (io.fromRename(i).bits.isLUI) { updatedUop(i).psrc(0) := 0.U } - + //TODO: vec ls mdp io.lfst.req(i).valid := io.fromRename(i).fire && updatedUop(i).storeSetHit io.lfst.req(i).bits.isstore := isStore(i) io.lfst.req(i).bits.ssid := updatedUop(i).ssid diff --git a/src/main/scala/xiangshan/backend/exu/ExeUnitParams.scala b/src/main/scala/xiangshan/backend/exu/ExeUnitParams.scala index e098f522b36cc68b436c92c523cc3fe69dc169b8..eb9b8d8fe475e7513bbd5ec0d1a9590a17d17593 100644 --- a/src/main/scala/xiangshan/backend/exu/ExeUnitParams.scala +++ b/src/main/scala/xiangshan/backend/exu/ExeUnitParams.scala @@ -63,11 +63,13 @@ case class ExeUnitParams( def hasLoadFu = fuConfigs.map(_.fuType == FuType.ldu).reduce(_ || _) + def hasVLoadFu = fuConfigs.map(_.fuType == FuType.vldu).reduce(_ || _) + def hasStoreAddrFu = fuConfigs.map(_.name == "sta").reduce(_ || _) def hasStdFu = fuConfigs.map(_.name == "std").reduce(_ || _) - def hasMemAddrFu = hasLoadFu || hasStoreAddrFu + def hasMemAddrFu = hasLoadFu || hasStoreAddrFu || hasVLoadFu def hasVecFu = fuConfigs.map(x => FuConfig.VecArithFuConfigs.contains(x)).reduce(_ || _) diff --git a/src/main/scala/xiangshan/backend/fu/FuConfig.scala b/src/main/scala/xiangshan/backend/fu/FuConfig.scala index 0aa942960d1b053e98ccd2773d922fb247fe6e74..e4c23b9117d1eafd188543c3d29314b2c144ccc7 100644 --- a/src/main/scala/xiangshan/backend/fu/FuConfig.scala +++ b/src/main/scala/xiangshan/backend/fu/FuConfig.scala @@ -500,13 +500,31 @@ object FuConfig { writeVecRf = true, latency = UncertainLatency(), ) - // Todo - // def VlduCfg = FuConfig () + + val VlduCfg: FuConfig = FuConfig ( + name = "vldu", + fuType = FuType.vldu, + fuGen = null, + srcData = Seq( + Seq(VecData(), VecData(), VecData(), MaskSrcData(), VConfigData()), //vs1, vs2, vd_old, v0, vconfig + ), + piped = false, // Todo: check it + writeVecRf = true, + latency = UncertainLatency(), + exceptionOut = Seq(loadAddrMisaligned, loadAccessFault, loadPageFault), + flushPipe = true, + replayInst = true, + hasLoadError = true, + vconfigWakeUp = true, + maskWakeUp = true, + dataBits = 128, + ) + //TODO // def VstuCfg = FuConfig () def allConfigs = Seq( JmpCfg, BrhCfg, I2fCfg, CsrCfg, AluCfg, MulCfg, DivCfg, FenceCfg, BkuCfg, VSetRvfWvfCfg, VSetRiWvfCfg, VSetRiWiCfg, - FmacCfg, F2iCfg, F2fCfg, FDivSqrtCfg, LduCfg, StaCfg, StdCfg, MouCfg, MoudCfg, VialuCfg, VipuCfg, VfpuCfg + FmacCfg, F2iCfg, F2fCfg, FDivSqrtCfg, LduCfg, StaCfg, StdCfg, MouCfg, MoudCfg, VialuCfg, VipuCfg, VfpuCfg, VlduCfg ) def VecArithFuConfigs = Seq( diff --git a/src/main/scala/xiangshan/backend/fu/FuType.scala b/src/main/scala/xiangshan/backend/fu/FuType.scala index a2523294a577280554878d344ae475f985c9f55e..422ee1f183c31e48d759f7a1c9f9a8a0ce74e66f 100644 --- a/src/main/scala/xiangshan/backend/fu/FuType.scala +++ b/src/main/scala/xiangshan/backend/fu/FuType.scala @@ -62,7 +62,13 @@ object FuType { def isFence(fuType: UInt): Bool = fuType(7) - def isVpu(fuType: UInt): Bool = fuType(19, 16).orR || fuType(21) || fuType(24) + def isVpu(fuType: UInt): Bool = fuType(18, 16).orR || fuType(21) || fuType(24) + + def isVls(fuType: UInt): Bool = fuType(20, 19).orR + + def isVLoad(fuType: UInt): Bool = fuType(19) + + def isVStore(fuType: UInt): Bool = fuType(20) def storeIsAMO(fuType: UInt): Bool = fuType(15) diff --git a/src/main/scala/xiangshan/backend/issue/Dispatch2Iq.scala b/src/main/scala/xiangshan/backend/issue/Dispatch2Iq.scala index 0bb0d61d687e237eddfe4e857bd886a3528713ba..deb6e75a10b82649d377475b981c7fdfabe669f4 100644 --- a/src/main/scala/xiangshan/backend/issue/Dispatch2Iq.scala +++ b/src/main/scala/xiangshan/backend/issue/Dispatch2Iq.scala @@ -288,12 +288,14 @@ class Dispatch2IqMemImp(override val wrapper: Dispatch2Iq)(implicit p: Parameter private val dispatchCfg: Seq[(Seq[Int], Int)] = Seq( (Seq(ldu), 2), (Seq(stu, mou), 2), + (Seq(vldu), 2), ) private val enqLsqIO = io.enqLsqIO.get private val numLoadDeq = LoadPipelineWidth private val numStoreAMODeq = StorePipelineWidth + private val numVLoadDeq = LoadPipelineWidth private val numDeq = enqLsqIO.req.size private val numEnq = io.in.size @@ -314,17 +316,23 @@ class Dispatch2IqMemImp(override val wrapper: Dispatch2Iq)(implicit p: Parameter private val isStoreVec = VecInit(io.in.map(x => x.valid && FuType.isStore(x.bits.fuType))) private val isAMOVec = io.in.map(x => x.valid && FuType.isAMO(x.bits.fuType)) private val isStoreAMOVec = io.in.map(x => x.valid && (FuType.isStore(x.bits.fuType) || FuType.isAMO(x.bits.fuType))) + private val isVLoadVec = VecInit(io.in.map(x => x.valid && FuType.isVLoad(x.bits.fuType))) + private val isVStoreVec = VecInit(io.in.map(x => x.valid && FuType.isVStore(x.bits.fuType))) private val loadCntVec = VecInit(isLoadVec.indices.map(x => PopCount(isLoadVec.slice(0, x + 1)))) private val storeAMOCntVec = VecInit(isStoreAMOVec.indices.map(x => PopCount(isStoreAMOVec.slice(0, x + 1)))) + private val vloadCntVec = VecInit(isVLoadVec.indices.map(x => PopCount(isVLoadVec.slice(0, x + 1)))) val loadBlockVec = VecInit(loadCntVec.map(_ > numLoadDeq.U)) val storeAMOBlockVec = VecInit(storeAMOCntVec.map(_ > numStoreAMODeq.U)) - val lsStructBlockVec = VecInit(loadBlockVec.zip(storeAMOBlockVec).map(x => x._1 || x._2)) + val vloadBlockVec = VecInit(vloadCntVec.map(_ > numVLoadDeq.U)) + val lsStructBlockVec = VecInit((loadBlockVec.zip(storeAMOBlockVec)).zip(vloadBlockVec).map(x => x._1._1 || x._1._2 || x._2)) dontTouch(loadBlockVec) dontTouch(storeAMOBlockVec) dontTouch(lsStructBlockVec) + dontTouch(vloadBlockVec) dontTouch(isLoadVec) + dontTouch(isVLoadVec) dontTouch(loadCntVec) s0_in <> io.in @@ -342,10 +350,10 @@ class Dispatch2IqMemImp(override val wrapper: Dispatch2Iq)(implicit p: Parameter for (i <- enqLsqIO.req.indices) { when (!io.in(i).valid) { enqLsqIO.needAlloc(i) := 0.U - }.elsewhen(isStoreAMOVec(i)) { - enqLsqIO.needAlloc(i) := 2.U // store | amo + }.elsewhen(isStoreAMOVec(i) || isVStoreVec(i)) { + enqLsqIO.needAlloc(i) := 2.U // store | amo | vstore }.otherwise { - enqLsqIO.needAlloc(i) := 1.U // load + enqLsqIO.needAlloc(i) := 1.U // load | vload } enqLsqIO.req(i).valid := io.in(i).valid && !s0_blockedVec(i) && !iqNotAllReady && !lsqCannotAccept && !FuType.isAMO(io.in(i).bits.fuType) enqLsqIO.req(i).bits := io.in(i).bits diff --git a/src/main/scala/xiangshan/backend/issue/IssueQueue.scala b/src/main/scala/xiangshan/backend/issue/IssueQueue.scala index 625273485ae6fcfbfbbdb1ee5849c040a00ef0b7..86b4e461bc15385674004f4e49fbf6ed70ba28f1 100644 --- a/src/main/scala/xiangshan/backend/issue/IssueQueue.scala +++ b/src/main/scala/xiangshan/backend/issue/IssueQueue.scala @@ -461,7 +461,7 @@ class IssueQueueMemIO(implicit p: Parameters, params: IssueBlockParams) extends class IssueQueueMemAddrImp(override val wrapper: IssueQueue)(implicit p: Parameters, params: IssueBlockParams) extends IssueQueueImp(wrapper) with HasCircularQueuePtrHelper { - require(params.StdCnt == 0 && (params.LduCnt + params.StaCnt) > 0, "IssueQueueMemAddrImp can only be instance of MemAddr IQ") + require(params.StdCnt == 0 && (params.LduCnt + params.StaCnt + params.VlduCnt) > 0, "IssueQueueMemAddrImp can only be instance of MemAddr IQ") io.suggestName("none") override lazy val io = IO(new IssueQueueMemIO).suggestName("io") diff --git a/src/main/scala/xiangshan/backend/issue/StatusArray.scala b/src/main/scala/xiangshan/backend/issue/StatusArray.scala index a30149809b1faeacd5447c3d4e1b3ed0f38c338d..8e67cfeea0dd956f4ec7e6bd3ec943d199dab70e 100644 --- a/src/main/scala/xiangshan/backend/issue/StatusArray.scala +++ b/src/main/scala/xiangshan/backend/issue/StatusArray.scala @@ -98,6 +98,7 @@ class StatusArray()(implicit p: Parameters, params: IssueBlockParams) extends XS val flushedVec = Wire(Vec(params.numEntries, Bool())) val clearVec = Wire(Vec(params.numEntries, Bool())) val deqSelVec = Wire(Vec(params.numEntries, Bool())) + val deqSelVec2 = Wire(Vec(params.numDeq, Vec(params.numEntries, Bool()))) // per deq's deqSelVec dontTouch(deqRespVec) // Reg @@ -159,6 +160,12 @@ class StatusArray()(implicit p: Parameters, params: IssueBlockParams) extends XS deqSel := VecInit(io.deq.map(x => x.deqSelOH.valid && x.deqSelOH.bits(i))).asUInt.orR } + deqSelVec2.zip(io.deq).foreach { case (deqSelVecSingle, deqSingle) => + deqSelVecSingle.zipWithIndex.foreach { case (deqSelBool, i) => + deqSelBool := deqSingle.deqSelOH.valid && deqSingle.deqSelOH.bits(i) + } + } + val resps = params.schdType match { case IntScheduler() => io.deqResp ++ io.og0Resp ++ io.og1Resp case MemScheduler() => io.deqResp ++ io.og1Resp @@ -195,7 +202,9 @@ class StatusArray()(implicit p: Parameters, params: IssueBlockParams) extends XS io.canIssue := canIssueVec.asUInt io.clear := clearVec.asUInt io.rsFeedback := 0.U.asTypeOf(io.rsFeedback) - io.deq.foreach(_.isFirstIssue := Mux1H(deqSelVec, statusVec.map(!_.firstIssue))) + io.deq.zip(deqSelVec2).foreach { case (deqSingle, deqSelVecSingle) => + deqSingle.isFirstIssue := Mux1H(deqSelVecSingle, statusVec.map(!_.firstIssue)) + } dontTouch(io.deq) } diff --git a/src/main/scala/xiangshan/package.scala b/src/main/scala/xiangshan/package.scala index 973c0e7d0fd5a9d650c75d848d09c9469c54e959..5279ea6ffe022b65fb1e7bfa5aec07be1b690590 100644 --- a/src/main/scala/xiangshan/package.scala +++ b/src/main/scala/xiangshan/package.scala @@ -602,6 +602,7 @@ package object xiangshan { def VEC_RGATHER_VX = "b101110".U // vrgather.vx def VEC_RGATHEREI16 = "b101111".U // vrgatherei16.vv def VEC_COMPRESS = "b110000".U // vcompress.vm + def VEC_US_LD = "b110001".U // vector unit strided load def VEC_M0M = "b000000".U // VEC_M0M def VEC_MMM = "b000000".U // VEC_MMM def dummy = "b111111".U