提交 7607a96b 编写于 作者: Z Zihao Yu

noop.MDU: merge div32 into div

* This will increase the latency of word operations:
  IPC: 0.544714 -> 0.528246
* Should perform more optimization on earlyFinish.
上级 82d49555
...@@ -23,7 +23,7 @@ object MDUOpType { ...@@ -23,7 +23,7 @@ object MDUOpType {
def remuw = "b1111".U def remuw = "b1111".U
def isDiv(op: UInt) = op(2) def isDiv(op: UInt) = op(2)
def isSign(op: UInt) = isDiv(op) && !op(0) def isDivSign(op: UInt) = isDiv(op) && !op(0)
def isW(op: UInt) = op(3) def isW(op: UInt) = op(3)
} }
...@@ -146,13 +146,14 @@ class MDU extends NOOPModule { ...@@ -146,13 +146,14 @@ class MDU extends NOOPModule {
io.out.bits io.out.bits
} }
val isDiv = MDUOpType.isDiv(func)
val isDivSign = MDUOpType.isDivSign(func)
val isW = MDUOpType.isW(func)
val mul = Module(new Multiplier(XLEN + 1)) val mul = Module(new Multiplier(XLEN + 1))
val div = Module(new Divider(64)) val div = Module(new Divider(64))
val div32 = Module(new Divider(32))
List(mul.io, div.io).map { case x => List(mul.io, div.io).map { case x =>
x.in.bits(0) := src1 x.sign := isDivSign
x.in.bits(1) := src2
x.sign := MDUOpType.isSign(func)
x.out.ready := io.out.ready x.out.ready := io.out.ready
} }
...@@ -167,36 +168,21 @@ class MDU extends NOOPModule { ...@@ -167,36 +168,21 @@ class MDU extends NOOPModule {
mul.io.in.bits(0) := LookupTree(func(1,0), mulInputFuncTable.map(p => (p._1(1,0), p._2._1(src1)))) mul.io.in.bits(0) := LookupTree(func(1,0), mulInputFuncTable.map(p => (p._1(1,0), p._2._1(src1))))
mul.io.in.bits(1) := LookupTree(func(1,0), mulInputFuncTable.map(p => (p._1(1,0), p._2._2(src2)))) mul.io.in.bits(1) := LookupTree(func(1,0), mulInputFuncTable.map(p => (p._1(1,0), p._2._2(src2))))
List(div32.io).map { case x => val divInputFunc = (x: UInt) => Mux(isW, Mux(isDivSign, SignExt(x(31,0), XLEN), ZeroExt(x(31,0), XLEN)), x)
x.in.bits(0) := src1(31,0) div.io.in.bits(0) := divInputFunc(src1)
x.in.bits(1) := src2(31,0) div.io.in.bits(1) := divInputFunc(src2)
x.sign := MDUOpType.isSign(func)
x.out.ready := io.out.ready
}
val isDiv = MDUOpType.isDiv(func)
val isW = MDUOpType.isW(func)
mul.io.in.valid := io.in.valid && !isDiv mul.io.in.valid := io.in.valid && !isDiv
div.io.in.valid := io.in.valid && isDiv && !isW div.io.in.valid := io.in.valid && isDiv
div32.io.in.valid := io.in.valid && isDiv && isW
val mulRes = Mux(func(1,0) === MDUOpType.mul(1,0), mul.io.out.bits(XLEN-1,0), mul.io.out.bits(2*XLEN-1,XLEN)) val mulRes = Mux(func(1,0) === MDUOpType.mul(1,0), mul.io.out.bits(XLEN-1,0), mul.io.out.bits(2*XLEN-1,XLEN))
val divRes = LookupTree(func, List( val divRes = Mux(func(1) /* rem */, div.io.out.bits(2*XLEN-1,XLEN), div.io.out.bits(XLEN-1,0))
MDUOpType.div -> div.io.out.bits(XLEN-1,0),
MDUOpType.divu -> div.io.out.bits(XLEN-1,0),
MDUOpType.rem -> div.io.out.bits(2*XLEN-1,XLEN),
MDUOpType.remu -> div.io.out.bits(2*XLEN-1,XLEN),
MDUOpType.divw -> SignExt(div32.io.out.bits(31,0), XLEN),
MDUOpType.divuw-> SignExt(div32.io.out.bits(31,0), XLEN),
MDUOpType.remw -> SignExt(div32.io.out.bits(63,32), XLEN),
MDUOpType.remuw-> SignExt(div32.io.out.bits(63,32), XLEN)
))
val res = Mux(isDiv, divRes, mulRes) val res = Mux(isDiv, divRes, mulRes)
io.out.bits := Mux(isW, SignExt(res(31,0),64), res) io.out.bits := Mux(isW, SignExt(res(31,0),XLEN), res)
val isDivReg = Mux(io.in.fire(), isDiv, RegNext(isDiv)) val isDivReg = Mux(io.in.fire(), isDiv, RegNext(isDiv))
io.in.ready := Mux(isDiv, div.io.in.ready, mul.io.in.ready) io.in.ready := Mux(isDiv, div.io.in.ready, mul.io.in.ready)
io.out.valid := Mux(isDivReg, div.io.out.valid || div32.io.out.valid, mul.io.out.valid) io.out.valid := Mux(isDivReg, div.io.out.valid, mul.io.out.valid)
BoringUtils.addSource(mul.io.out.fire(), "perfCntCondMmulInstr") BoringUtils.addSource(mul.io.out.fire(), "perfCntCondMmulInstr")
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册