未验证 提交 184a1958 编写于 作者: Z zfw 提交者: GitHub

Alu: optimize timing for bitmanip (#959)

* separate the Alu instructions by 64bit data instructions and w-suffix instructions
* optimize select logic of instructions result
上级 0a6329f1
......@@ -333,13 +333,13 @@ object FDecode extends DecodeConstants{
object BDecode extends DecodeConstants{
val table: Array[(BitPat, List[BitPat])] = Array(
// Basic bit manipulation
CLZ -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, ALUOpType.clz, Y, N, N, N, N, N, N, SelImm.IMM_X),
CTZ -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, ALUOpType.ctz, Y, N, N, N, N, N, N, SelImm.IMM_X),
CPOP -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, ALUOpType.cpop, Y, N, N, N, N, N, N, SelImm.IMM_X),
CLZ -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, BMUOpType.clz, Y, N, N, N, N, N, N, SelImm.IMM_X),
CTZ -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, BMUOpType.ctz, Y, N, N, N, N, N, N, SelImm.IMM_X),
CPOP -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, BMUOpType.cpop, Y, N, N, N, N, N, N, SelImm.IMM_X),
CLZW -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, ALUOpType.clzw, Y, N, N, N, N, N, N, SelImm.IMM_X),
CTZW -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, ALUOpType.ctzw, Y, N, N, N, N, N, N, SelImm.IMM_X),
CPOPW -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, ALUOpType.cpopw, Y, N, N, N, N, N, N, SelImm.IMM_X),
CLZW -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, BMUOpType.clzw, Y, N, N, N, N, N, N, SelImm.IMM_X),
CTZW -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, BMUOpType.ctzw, Y, N, N, N, N, N, N, SelImm.IMM_X),
CPOPW -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.bmu, BMUOpType.cpopw, Y, N, N, N, N, N, N, SelImm.IMM_X),
CLMUL -> List(SrcType.reg, SrcType.reg, SrcType.DC, FuType.bmu, BMUOpType.clmul, Y, N, N, N, N, N, N, SelImm.IMM_X),
CLMULH -> List(SrcType.reg, SrcType.reg, SrcType.DC, FuType.bmu, BMUOpType.clmulh, Y, N, N, N, N, N, N, SelImm.IMM_X),
......
......@@ -25,17 +25,17 @@ import xiangshan._
class AddModule(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val src = Vec(2, Input(UInt(XLEN.W)))
val out = Output(UInt((XLEN+1).W))
val add = Output(UInt((XLEN+1).W))
})
io.out := io.src(0) +& io.src(1)
io.add := io.src(0) +& io.src(1)
}
class SubModule(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val src = Vec(2, Input(UInt(XLEN.W)))
val out = Output(UInt((XLEN+1).W))
val sub = Output(UInt((XLEN+1).W))
})
io.out := (io.src(0) +& (~io.src(1)).asUInt()) + 1.U
io.sub := (io.src(0) +& (~io.src(1)).asUInt()) + 1.U
}
class LeftShiftModule(implicit p: Parameters) extends XSModule {
......@@ -44,105 +44,107 @@ class LeftShiftModule(implicit p: Parameters) extends XSModule {
val sllSrc = Input(UInt(XLEN.W))
val sll = Output(UInt(XLEN.W))
})
io.sll := (io.sllSrc << io.shamt)(XLEN - 1, 0)
io.sll := io.sllSrc << io.shamt
}
class LeftShiftWordModule(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val shamt = Input(UInt(5.W))
val sllSrc = Input(UInt((XLEN/2).W))
val sllw = Output(UInt((XLEN/2).W))
})
io.sllw := io.sllSrc << io.shamt
}
class RightShiftModule(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val shamt = Input(UInt(6.W))
val srlSrc, sraSrc = Input(UInt(XLEN.W))
val srl_l, srl_w, sra_l, sra_w = Output(UInt(XLEN.W))
val srl, sra = Output(UInt(XLEN.W))
})
io.srl_l := io.srlSrc >> io.shamt
io.srl_w := io.srlSrc(31, 0) >> io.shamt
io.sra_l := (io.sraSrc.asSInt() >> io.shamt).asUInt()
io.sra_w := (Cat(Fill(32, io.sraSrc(31)), io.sraSrc(31, 0)).asSInt() >> io.shamt).asUInt()
io.srl := io.srlSrc >> io.shamt
io.sra := (io.sraSrc.asSInt() >> io.shamt).asUInt()
}
class RotateShiftModule(implicit p: Parameters) extends XSModule {
class RightShiftWordModule(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val shamt = Input(UInt(6.W))
val roSrc = Input(UInt(XLEN.W))
val rol_l, rol_w, ror_l, ror_w = Output(UInt(XLEN.W))
val shamt = Input(UInt(5.W))
val srlSrc, sraSrc = Input(UInt((XLEN/2).W))
val srlw, sraw = Output(UInt((XLEN/2).W))
})
io.rol_l := io.roSrc << io.shamt | io.roSrc >> ((~io.shamt).asUInt()+&1.U)
io.rol_w := (io.roSrc << io.shamt | io.roSrc >> (32.U-io.shamt))(31,0)
io.ror_l := io.roSrc>>io.shamt | io.roSrc << ((~io.shamt).asUInt()+&1.U)
io.ror_w := (io.roSrc>>io.shamt | io.roSrc << (32.U-io.shamt))(31,0)
io.srlw := io.srlSrc >> io.shamt
io.sraw := (io.sraSrc.asSInt() >> io.shamt).asUInt()
}
class MiscResultSelect(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val func = Input(UInt())
val andn, orn, xnor, and, or, xor, sltu, slt, maxMin, maxMinU, sextb, sexth, zexth, rev8, orcb = Input(UInt(XLEN.W))
val andn, orn, xnor, and, or, xor, sextb, sexth, zexth, rev8, orcb = Input(UInt(XLEN.W))
val miscRes = Output(UInt(XLEN.W))
})
val (func, andn, orn, xnor, and, or, xor, sltu, slt, maxMin, maxMinU, sextb, sexth, zexth, rev8, orcb) =
(io.func, io.andn, io.orn, io.xnor, io.and, io.or, io.xor, io.sltu, io.slt, io.maxMin, io.maxMinU, io.sextb, io.sexth, io.zexth, io.rev8, io.orcb)
val baseMisc = ParallelMux(List(
ALUOpType.andn -> andn,
ALUOpType.and -> and,
ALUOpType.orn -> orn,
ALUOpType.or -> or,
ALUOpType.xnor -> xnor,
ALUOpType.xor -> xor
).map(x => (x._1(2, 0) === io.func(2, 0), x._2)))
val bitMisc = ParallelMux(List(
ALUOpType.sext_b -> sextb,
ALUOpType.sext_h -> sexth,
ALUOpType.zext_h -> zexth,
ALUOpType.orc_b -> orcb,
ALUOpType.rev8 -> rev8
).map(x => (x._1(2, 0) === io.func(2, 0), x._2)))
val compMisc = Mux(func(2),
Mux(func(1), maxMinU, maxMin),
Mux(func(1), sltu, slt))
io.miscRes := Mux(func(4), compMisc, Mux(func(3), bitMisc, baseMisc))
val miscRes = ParallelMux(List(
ALUOpType.andn -> io.andn,
ALUOpType.and -> io.and,
ALUOpType.orn -> io.orn,
ALUOpType.or -> io.or,
ALUOpType.xnor -> io.xnor,
ALUOpType.xor -> io.xor,
ALUOpType.sext_b -> io.sextb,
ALUOpType.sext_h -> io.sexth,
ALUOpType.zext_h -> io.zexth,
ALUOpType.orc_b -> io.orcb,
ALUOpType.rev8 -> io.rev8
).map(x => (x._1(3, 0) === io.func(3, 0), x._2)))
io.miscRes := miscRes
}
class ShiftResultSelect(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val func = Input(UInt())
val sll, srl, sra, rol, ror, bclr, bset, binv, bext = Input(UInt(XLEN.W))
val sll, srl, sra, rot, bclr, bset, binv, bext = Input(UInt(XLEN.W))
val shiftRes = Output(UInt(XLEN.W))
})
val (func, sll, srl, sra, rol, ror, bclr, bset, binv, bext) =
(io.func, io.sll, io.srl, io.sra, io.rol, io.ror, io.bclr, io.bset, io.binv, io.bext)
val singleBitRes = ParallelMux(List(
ALUOpType.bclr -> bclr,
ALUOpType.binv -> binv,
ALUOpType.bset -> bset,
ALUOpType.bext -> bext
).map(x => (x._1(1, 0) === io.func(1, 0), x._2)))
val leftBit = Mux(io.func(1), io.binv, Mux(io.func(0), io.bset, io.bclr))
val leftRes = Mux(io.func(2), leftBit, io.sll)
val rightRes = Mux(io.func(2), io.sra, Mux(io.func(1), io.bext, io.srl))
io.shiftRes := Mux(io.func(4), io.rot, Mux(io.func(3), rightRes, leftRes))
}
val lrShiftRes = Mux(func(1), Mux(func(0), sra, srl), sll)
val rotateShiftRes = Mux(func(3), ror, rol)
class WordResultSelect(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val func = Input(UInt())
val sllw, srlw, sraw, rotw, addw, subw = Input(UInt((XLEN/2).W))
val wordRes = Output(UInt(XLEN.W))
})
io.shiftRes := Mux(func(4), rotateShiftRes, Mux(func(2), singleBitRes, lrShiftRes))
val addsubRes = Mux(io.func(6), io.subw, io.addw)
val shiftRes = Mux(io.func(4), io.rotw,
Mux(io.func(3),
Mux(io.func(2), io.sraw, io.srlw),
io.sllw))
val wordRes = Mux(io.func(6,5) === 2.U, shiftRes, addsubRes)
io.wordRes := SignExt(wordRes, XLEN)
}
class AluResSel(implicit p: Parameters) extends XSModule {
val io = IO(new Bundle() {
val func = Input(UInt())
// val addSubRes, shiftRes, miscRes, countRes = Input(UInt(XLEN.W))
val addSubRes, shiftRes, miscRes = Input(UInt(XLEN.W))
val addRes, shiftRes, miscRes, compareRes, wordRes = Input(UInt(XLEN.W))
val aluRes = Output(UInt(XLEN.W))
})
val res = Mux(io.func(6),
io.shiftRes,
Mux(io.func(5), io.addSubRes, io.miscRes)
)
val h32 = Mux(io.func(7), Fill(32, res(31)), res(63, 32))
io.aluRes := Cat(h32, res(31, 0))
val res = Mux(io.func(7), io.wordRes, Mux(io.func(6),
Mux(io.func(5), io.compareRes, io.shiftRes),
Mux(io.func(5), io.addRes, io.miscRes)
))
io.aluRes := res
}
class AluDataModule(implicit p: Parameters) extends XSModule {
......@@ -155,35 +157,65 @@ class AluDataModule(implicit p: Parameters) extends XSModule {
})
val (src1, src2, func) = (io.src(0), io.src(1), io.func)
val isW = ALUOpType.isWordOp(func)
val addModule = Module(new AddModule)
val shaddShamt = func(2,1)
val add = addModule.io.add
val addw = addModule.io.add
addModule.io.src(0) := Mux(func(0),src1(31,0), src1) << shaddShamt
addModule.io.src(1) := src2
addModule.io.src(0) := ParallelMux(List(
"b000".U -> src1,
"b001".U -> src1(31,0),
"b010".U -> Cat(src1(62,0), 0.U(1.W)),
"b011".U -> Cat(src1(30,0), 0.U(1.W)),
"b100".U -> Cat(src1(61,0), 0.U(2.W)),
"b101".U -> Cat(src1(29,0), 0.U(2.W)),
"b110".U -> Cat(src1(60,0), 0.U(3.W)),
"b111".U -> Cat(src1(28,0), 0.U(3.W)),
).map(x => (x._1(2, 0) === func(2,0), x._2)))
val subModule = Module(new SubModule)
val sub = subModule.io.sub
val subw = subModule.io.sub
subModule.io.src(0) := src1
subModule.io.src(1) := src2
val add = addModule.io.out
val sub = subModule.io.out
val shamt = src2(5, 0)
val revShamt = ~src2(5,0) + 1.U
val leftShiftModule = Module(new LeftShiftModule)
val sll = leftShiftModule.io.sll
leftShiftModule.io.sllSrc := Mux(func(0), src1(31,0), src1)
leftShiftModule.io.shamt := Mux(func(3), revShamt, shamt)
val leftShiftWordModule = Module(new LeftShiftWordModule)
val sllw = leftShiftWordModule.io.sllw
leftShiftWordModule.io.sllSrc := src1
leftShiftWordModule.io.shamt := Mux(func(3), revShamt, shamt)
val rightShiftModule = Module(new RightShiftModule)
val srl = rightShiftModule.io.srl
val sra = rightShiftModule.io.sra
rightShiftModule.io.shamt := Mux(func(3), shamt, revShamt)
rightShiftModule.io.srlSrc := src1
rightShiftModule.io.sraSrc := src1 // 这里可以优化成 不过Mux的src1
val rightShiftWordModule = Module(new RightShiftWordModule)
val srlw = rightShiftWordModule.io.srlw
val sraw = rightShiftWordModule.io.sraw
rightShiftWordModule.io.shamt := Mux(func(3), shamt, revShamt)
rightShiftWordModule.io.srlSrc := src1
rightShiftWordModule.io.sraSrc := src1
val rot = srl | sll
val rotw = srlw | sllw
val bitShift = 1.U << src2(5, 0)
val bset = src1 | bitShift
val bclr = src1 & ~bitShift
val binv = src1 ^ bitShift
val bext = srl(0)
// Misc
val andn = ~(src1 & src2)
val orn = ~(src1 | src2)
val xnor = ~(src1 ^ src2)
val and = ~andn
val or = ~orn
val xor = ~xnor
val sltu = !sub(XLEN)
val sgtu = sub(XLEN)
val sltu = !sgtu
val slt = xor(XLEN-1) ^ sltu
val maxMin = Mux(slt ^ func(0), src2, src1)
val maxMinU = Mux(sltu^ func(0), src2, src1)
......@@ -201,47 +233,32 @@ class AluDataModule(implicit p: Parameters) extends XSModule {
ALUOpType.getBranchType(ALUOpType.bltu) -> sltu
)
val taken = LookupTree(ALUOpType.getBranchType(func), branchOpTable) ^ ALUOpType.isBranchInvert(func)
// Shift
val isW = ALUOpType.isWordOp(func)
val shamt = Cat(!isW && src2(5), src2(4, 0))
val leftShiftModule = Module(new LeftShiftModule)
leftShiftModule.io.sllSrc := Mux(func(2), 1.U, Mux(func(0), src1(31,0), src1))
leftShiftModule.io.shamt := shamt
val sll = leftShiftModule.io.sll
val bset = src1 | sll
val bclr = src1 & ~sll
val binv = src1 ^ sll
// Result Select
val rightShiftModule = Module(new RightShiftModule)
rightShiftModule.io.shamt := shamt
rightShiftModule.io.srlSrc := src1
rightShiftModule.io.sraSrc := src1
val srl = Mux(isW, rightShiftModule.io.srl_w, rightShiftModule.io.srl_l)
val sra = Mux(isW, rightShiftModule.io.sra_w, rightShiftModule.io.sra_l)
val bext = srl(0)
val compareRes = Mux(func(2), Mux(func(1), maxMin, maxMinU), Mux(func(1), slt, Mux(func(0), sltu, sub)))
val rotateShiftModule = Module(new RotateShiftModule)
rotateShiftModule.io.shamt := Mux(isW, src2(4,0), src2(5,0))
rotateShiftModule.io.roSrc := Mux(isW, src1(31,0), src1)
val rol = Mux(isW, rotateShiftModule.io.rol_w, rotateShiftModule.io.rol_l)
val ror = Mux(isW, rotateShiftModule.io.ror_w, rotateShiftModule.io.ror_l)
val shiftResSel = Module(new ShiftResultSelect)
shiftResSel.io.func := func(4,0)
shiftResSel.io.sll := sll
shiftResSel.io.srl := srl
shiftResSel.io.sra := sra
shiftResSel.io.rot := rot
shiftResSel.io.bclr := bclr
shiftResSel.io.binv := binv
shiftResSel.io.bset := bset
shiftResSel.io.bext := bext
val shiftRes = shiftResSel.io.shiftRes
val miscResSel = Module(new MiscResultSelect)
miscResSel.io.func := func(4, 0)
miscResSel.io.func := func(3, 0)
miscResSel.io.andn := andn
miscResSel.io.orn := orn
miscResSel.io.xnor := xnor
miscResSel.io.and := and
miscResSel.io.or := or
miscResSel.io.xor := xor
miscResSel.io.sltu := sltu
miscResSel.io.slt := slt
miscResSel.io.maxMin := maxMin
miscResSel.io.maxMinU := maxMinU
miscResSel.io.sextb := sextb
miscResSel.io.sexth := sexth
miscResSel.io.zexth := zexth
......@@ -249,26 +266,23 @@ class AluDataModule(implicit p: Parameters) extends XSModule {
miscResSel.io.orcb := orcb
val miscRes = miscResSel.io.miscRes
val addSubRes = Mux(func(3), sub, add)
val shiftResSel = Module(new ShiftResultSelect)
shiftResSel.io.func := func(4,0)
shiftResSel.io.sll := sll
shiftResSel.io.srl := srl
shiftResSel.io.sra := sra
shiftResSel.io.rol := rol
shiftResSel.io.ror := ror
shiftResSel.io.bclr := bclr
shiftResSel.io.binv := binv
shiftResSel.io.bset := bset
shiftResSel.io.bext := bext
val shiftRes = shiftResSel.io.shiftRes
val wordResSel = Module(new WordResultSelect)
wordResSel.io.func := func
wordResSel.io.addw := addw
wordResSel.io.subw := subw
wordResSel.io.sllw := sllw
wordResSel.io.srlw := srlw
wordResSel.io.sraw := sraw
wordResSel.io.rotw := rotw
val wordRes = wordResSel.io.wordRes
val aluResSel = Module(new AluResSel)
aluResSel.io.func := func
aluResSel.io.addSubRes := addSubRes
aluResSel.io.addRes := add
aluResSel.io.compareRes := compareRes
aluResSel.io.shiftRes := shiftRes
aluResSel.io.miscRes := miscRes
aluResSel.io.wordRes := wordRes
val aluRes = aluResSel.io.aluRes
io.result := aluRes
......
......@@ -226,13 +226,6 @@ package object xiangshan {
def bltu = "b0_00_10_110".U
def bgeu = "b0_00_10_111".U
def slt = "b0_00_11_000".U
def sltu = "b0_00_11_010".U
def max = "b0_00_11_100".U
def min = "b0_00_11_101".U
def maxu = "b0_00_11_110".U
def minu = "b0_00_11_111".U
// add & sub optype
def add = "b0_01_00_000".U
def add_uw = "b0_01_00_001".U
......@@ -243,39 +236,40 @@ package object xiangshan {
def sh3add = "b0_01_00_110".U
def sh3add_uw = "b0_01_00_111".U
def sub = "b0_01_01_000".U
// shift optype
def sll = "b0_10_00_000".U
def slli_uw = "b0_10_00_001".U
def bclr = "b0_10_00_100".U
def binv = "b0_10_00_101".U
def bset = "b0_10_00_110".U
def bext = "b0_10_00_111".U
def srl = "b0_10_01_010".U
def sra = "b0_10_01_011".U
def bset = "b0_10_00_101".U
def binv = "b0_10_00_110".U
def srl = "b0_10_01_001".U
def bext = "b0_10_01_010".U
def sra = "b0_10_01_100".U
def rol = "b0_10_10_000".U
def ror = "b0_10_11_000".U
// count optype
def clz = "b0_11_00_000".U
def ctz = "b0_11_00_001".U
def cpop = "b0_11_00_010".U
def sub = "b0_11_00_000".U
def sltu = "b0_11_00_001".U
def slt = "b0_11_00_010".U
def maxu = "b0_11_00_100".U
def minu = "b0_11_00_101".U
def max = "b0_11_00_110".U
def min = "b0_11_00_111".U
// RV64 32bit optype
def addw = "b1_01_00_000".U
def subw = "b1_01_01_000".U
def subw = "b1_11_00_000".U
def sllw = "b1_10_00_000".U
def srlw = "b1_10_01_010".U
def sraw = "b1_10_01_011".U
def srlw = "b1_10_01_001".U
def sraw = "b1_10_01_100".U
def rolw = "b1_10_10_000".U
def rorw = "b1_10_11_000".U
def clzw = "b1_11_00_000".U
def ctzw = "b1_11_00_001".U
def cpopw = "b1_11_00_010".U
def isWordOp(func: UInt) = func(7)
def isBranch(func: UInt) = func(6, 3) === "b0010".U
......@@ -614,4 +608,4 @@ package object xiangshan {
val LdExeUnitCfg = ExuConfig("LoadExu", "Mem", Seq(lduCfg), wbIntPriority = 0, wbFpPriority = 0)
val StaExeUnitCfg = ExuConfig("StaExu", "Mem", Seq(staCfg, mouCfg), wbIntPriority = Int.MaxValue, wbFpPriority = Int.MaxValue)
val StdExeUnitCfg = ExuConfig("StdExu", "Mem", Seq(stdCfg), wbIntPriority = Int.MaxValue, wbFpPriority = Int.MaxValue)
}
\ No newline at end of file
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册