未验证 提交 41760677 编写于 作者: L ljw 提交者: GitHub

Merge pull request #94 from RISCVERS/fix-mul-dispatch

Fix mul/dispatch/lsu bugs, CoreMark passed
...@@ -83,7 +83,10 @@ class Dispatch2 extends XSModule { ...@@ -83,7 +83,10 @@ class Dispatch2 extends XSModule {
val readPortSrc = Seq(aluInstIdxs(i), bruInstIdx, mulInstIdx, muldivInstIdx) val readPortSrc = Seq(aluInstIdxs(i), bruInstIdx, mulInstIdx, muldivInstIdx)
val wantReadPort = (0 until 4).map(j => ( val wantReadPort = (0 until 4).map(j => (
if (i == 0) !readPortSrc(j)(2) if (i == 0) !readPortSrc(j)(2)
else !readPortSrc(j)(2) && (j.U > intDeqChoice(i-1) || j.U === 0.U))) else {
val prevMax = (0 until i).map(intDeqChoice(_)).reduce((a, b) => Mux(a > b, a, b))
!readPortSrc(j)(2) && (j.U > prevMax || j.U === 0.U)
}))
val readIdxVec = Wire(Vec(4, UInt(2.W))) val readIdxVec = Wire(Vec(4, UInt(2.W)))
for (j <- 0 until 4) { for (j <- 0 until 4) {
readIdxVec(j) := readPortSrc(j)(1, 0) readIdxVec(j) := readPortSrc(j)(1, 0)
...@@ -109,7 +112,10 @@ class Dispatch2 extends XSModule { ...@@ -109,7 +112,10 @@ class Dispatch2 extends XSModule {
val readPortSrc = Seq(fmacInstIdxs(i), fmisc0InstIdx, fmisc1InstIdx) val readPortSrc = Seq(fmacInstIdxs(i), fmisc0InstIdx, fmisc1InstIdx)
val wantReadPort = (0 until 3).map(j => ( val wantReadPort = (0 until 3).map(j => (
if (i == 0) !readPortSrc(j)(2) if (i == 0) !readPortSrc(j)(2)
else !readPortSrc(j)(2) && (j.U > fpDeqChoice(i-1) || j.U === 0.U))) else {
val prevMax = (0 until i).map(fpDeqChoice(_)).reduce((a, b) => Mux(a > b, a, b))
!readPortSrc(j)(2) && (j.U > prevMax || j.U === 0.U)
}))
val readIdxVec = Wire(Vec(3, UInt(2.W))) val readIdxVec = Wire(Vec(3, UInt(2.W)))
for (j <- 0 until 3) { for (j <- 0 until 3) {
readIdxVec(j) := readPortSrc(j)(1, 0) readIdxVec(j) := readPortSrc(j)(1, 0)
......
...@@ -234,7 +234,10 @@ class Lsu extends Exu( ...@@ -234,7 +234,10 @@ class Lsu extends Exu(
io.in.ready := io.out.fire() io.in.ready := io.out.fire()
io.out.valid := (!isStoreIn && !retiringStore && Mux(partialLoad, state === s_partialLoad, dmem.resp.fire() && (state === s_wait_resp)) || stqEnqueue) && io.in.valid val validLoad = RegInit(false.B)
when(state =/= s_idle && !io.in.valid) { validLoad := false.B }
when(state === s_idle && io.in.valid && !retiringStore && dmem.req.fire()) { validLoad := true.B }
io.out.valid := (!isStoreIn && !retiringStore && validLoad && Mux(partialLoad, state === s_partialLoad, dmem.resp.fire() && (state === s_wait_resp)) || stqEnqueue) && io.in.valid
io.out.bits.uop <> io.in.bits.uop io.out.bits.uop <> io.in.bits.uop
io.out.bits.data := Mux(partialLoad, rdataPartialLoad, rdata) io.out.bits.data := Mux(partialLoad, rdataPartialLoad, rdata)
// io.out.bits.debug.isMMIO := AddressSpace.isMMIO(addr) && io.out.valid // io.out.bits.debug.isMMIO := AddressSpace.isMMIO(addr) && io.out.valid
......
...@@ -4,7 +4,7 @@ import chisel3._ ...@@ -4,7 +4,7 @@ import chisel3._
import chisel3.util._ import chisel3.util._
import xiangshan._ import xiangshan._
import utils.{LookupTree, SignExt, ZeroExt, _} import utils.{LookupTree, SignExt, ZeroExt, _}
import xiangshan.backend.{MULOpType, MDUOpType} import xiangshan.backend.{MDUOpType, MULOpType}
class Mul extends Exu(FuType.mul.litValue()){ class Mul extends Exu(FuType.mul.litValue()){
override def toString: String = "Mul" override def toString: String = "Mul"
...@@ -59,6 +59,8 @@ class Mul extends Exu(FuType.mul.litValue()){ ...@@ -59,6 +59,8 @@ class Mul extends Exu(FuType.mul.litValue()){
XSDebug(io.out.valid, "Out(%d %d) res:%x pc:%x\n", XSDebug(io.out.valid, "Out(%d %d) res:%x pc:%x\n",
io.out.valid, io.out.ready, io.out.bits.data, io.out.bits.uop.cf.pc io.out.valid, io.out.ready, io.out.bits.data, io.out.bits.uop.cf.pc
) )
XSDebug(io.redirect.valid, p"redirect: ${io.redirect.bits.brTag}\n")
} }
// A wrapper of Divider // A wrapper of Divider
...@@ -187,23 +189,16 @@ trait HasPipelineReg { this: ArrayMultiplier => ...@@ -187,23 +189,16 @@ trait HasPipelineReg { this: ArrayMultiplier =>
val validVec = io.in.valid +: Array.fill(latency)(RegInit(false.B)) val validVec = io.in.valid +: Array.fill(latency)(RegInit(false.B))
val rdyVec = Array.fill(latency)(Wire(Bool())) :+ io.out.ready val rdyVec = Array.fill(latency)(Wire(Bool())) :+ io.out.ready
val ctrlVec = io.in.bits.ctrl +: Array.fill(latency)(Reg(new MulDivCtrl)) val ctrlVec = io.in.bits.ctrl +: Array.fill(latency)(Reg(new MulDivCtrl))
val flushVec = ctrlVec.map(_.uop.brTag.needFlush(io.redirect)) val flushVec = ctrlVec.zip(validVec).map(x => x._2 && x._1.uop.brTag.needFlush(io.redirect))
for(i <- 0 until latency){ for(i <- 0 until latency){
rdyVec(i) := !validVec(i+1) || rdyVec(i+1) rdyVec(i) := !validVec(i+1) || rdyVec(i+1)
} }
when(io.out.fire()){
validVec.last := false.B
}
for(i <- 1 to latency){ for(i <- 1 to latency){
when(flushVec(i)){ when(flushVec(i) || rdyVec(i) && !validVec(i-1)){
validVec(i) := false.B validVec(i) := false.B
} }.elsewhen(rdyVec(i-1) && validVec(i-1) && !flushVec(i-1)){
when(rdyVec(i-1) && validVec(i-1) && !flushVec(i-1)){
if(i-1 !=0 ) validVec(i-1) := false.B
validVec(i) := validVec(i-1) validVec(i) := validVec(i-1)
ctrlVec(i) := ctrlVec(i-1) ctrlVec(i) := ctrlVec(i-1)
} }
...@@ -250,6 +245,8 @@ class ArrayMultiplier ...@@ -250,6 +245,8 @@ class ArrayMultiplier
val res = Mux(ctrlVec.last.isHi, dataVec.last.head(xlen), dataVec.last.tail(xlen)) val res = Mux(ctrlVec.last.isHi, dataVec.last.head(xlen), dataVec.last.tail(xlen))
io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31,0),xlen), res) io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31,0),xlen), res)
XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")(this.name)
// printf(p"t=${GTimer()} in: v${io.in.valid} r:${io.in.ready}\n") // printf(p"t=${GTimer()} in: v${io.in.valid} r:${io.in.ready}\n")
// printf(p"t=${GTimer()} out: v:${io.out.valid} r:${io.out.ready} vec:${Binary(Cat(validVec))}\n") // printf(p"t=${GTimer()} out: v:${io.out.valid} r:${io.out.ready} vec:${Binary(Cat(validVec))}\n")
} }
......
...@@ -73,13 +73,6 @@ class FakeIFU extends XSModule with HasIFUConst { ...@@ -73,13 +73,6 @@ class FakeIFU extends XSModule with HasIFUConst {
io.fetchPacket.bits.pc := pc io.fetchPacket.bits.pc := pc
io.fetchPacket.bits.instrs := fakeCache.io.rdata io.fetchPacket.bits.instrs := fakeCache.io.rdata
Debug(cond=io.fetchPacket.fire()){
printf(p"==========FetchGroup==========\nfirst pc:${Hexadecimal(pc)}\n")
for(i <- io.fetchPacket.bits.instrs.indices){
printf(p"inst$i: ${Hexadecimal(io.fetchPacket.bits.instrs(i))} v:${io.fetchPacket.bits.mask(i)} isRVC:${io.fetchPacket.bits.instrs(i)(1,0)=/="b11".U}\n")
}
}
XSDebug(p"pc=${Hexadecimal(pc)}\n") XSDebug(p"pc=${Hexadecimal(pc)}\n")
} }
...@@ -137,7 +137,7 @@ int difftest_step(int commit, uint64_t *reg_scala, uint32_t this_inst, ...@@ -137,7 +137,7 @@ int difftest_step(int commit, uint64_t *reg_scala, uint32_t this_inst,
wb_pointer = (wb_pointer+1) % DEBUG_WB_TRACE_SIZE; wb_pointer = (wb_pointer+1) % DEBUG_WB_TRACE_SIZE;
if(selectBit(skip, i)){ if(selectBit(skip, i)){
// MMIO accessing should not be a branch or jump, just +2/+4 to get the next pc // MMIO accessing should not be a branch or jump, just +2/+4 to get the next pc
printf("SKIP %d\n", i); // printf("SKIP %d\n", i);
// to skip the checking of an instruction, just copy the reg state to reference design // to skip the checking of an instruction, just copy the reg state to reference design
ref_difftest_getregs(&ref_r); ref_difftest_getregs(&ref_r);
ref_r[DIFFTEST_THIS_PC] += 4; //TODO: RVC ref_r[DIFFTEST_THIS_PC] += 4; //TODO: RVC
......
...@@ -166,7 +166,7 @@ class Emulator { ...@@ -166,7 +166,7 @@ class Emulator {
set_abort(); set_abort();
} }
printf("xsstatus pc=%lx commit=%d\n", dut_ptr->io_difftest_thisPC, dut_ptr->io_difftest_commit);//FIXIT: delete me when dummy test is passed //printf("xsstatus pc=%lx commit=%d\n", dut_ptr->io_difftest_thisPC, dut_ptr->io_difftest_commit);//FIXIT: delete me when dummy test is passed
if (!hascommit && dut_ptr->io_difftest_thisPC == 0x80000000u) { if (!hascommit && dut_ptr->io_difftest_thisPC == 0x80000000u) {
hascommit = 1; hascommit = 1;
......
package xiangshan.backend.exu
import org.scalatest._
import chiseltest._
import chisel3._
import chisel3.experimental.BundleLiterals._
import chisel3.util.experimental.BoringUtils
import chiseltest.experimental.TestOptionBuilder._
import chiseltest.internal.VerilatorBackendAnnotation
import noop.MDUOpType
import xiangshan._
import xiangshan.testutils._
import xiangshan.testutils.TestCaseGenerator._
import scala.util.Random
class MduTest extends FlatSpec
with ChiselScalatestTester
with Matchers
with ParallelTestExecution
with HasPartialDecoupledDriver
{
"MUL" should "random enq and deq correctly" in {
test(new Mul{
val disp_begin = WireInit(0.S(64.W).asUInt())
val disp_end = WireInit((-1).S(64.W).asUInt())
BoringUtils.addSource(disp_begin, "DISPLAY_LOG_START")
BoringUtils.addSource(disp_end, "DISPLAY_LOG_END")
}){ c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
def TEST_SIZE = 100
val pcSeq = (0 until TEST_SIZE).map(_ => Random.nextInt(0x7fffffff))
fork{
c.io.in.enqueuePartialSeq(pcSeq.map(pc => genMul(c.io.in.bits, pc)))
}.fork{
c.io.out.expectDequeuePartialSeq(pcSeq.map(
pc => chiselTypeOf(c.io.out.bits).Lit(
_.uop.cf.pc -> pc.U
)
))
}.join()
}
}
"MUL" should "dont flush same br tag" in {
test(new Mul{
val disp_begin = WireInit(0.S(64.W).asUInt())
val disp_end = WireInit((-1).S(64.W).asUInt())
BoringUtils.addSource(disp_begin, "DISPLAY_LOG_START")
BoringUtils.addSource(disp_end, "DISPLAY_LOG_END")
}){ c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
def TEST_SIZE = 100
val pcSeq = (0 until TEST_SIZE).map(_ => Random.nextInt(0x7fffffff))
fork{
// 53
c.io.in.enqueuePartial(chiselTypeOf(c.io.in.bits).Lit(
_.uop.cf.pc -> 666.U,
_.uop.brTag.flag -> true.B,
_.uop.brTag.value -> 15.U
))
// 54
c.clock.step(1)
// 55
c.io.redirect.valid.poke(true.B)
c.io.redirect.bits.pokePartial(chiselTypeOf(c.io.redirect.bits).Lit(
_.isException -> false.B,
_.brTag.flag -> true.B,
_.brTag.value -> 15.U
))
c.clock.step(1)
// 56
c.io.redirect.valid.poke(false.B)
}.fork{
c.io.out.expectDequeuePartial(chiselTypeOf(c.io.out.bits).Lit(_.uop.cf.pc -> 666.U))
}.join()
}
}
"MDU" should "random enq and deq correctly" in {
test(new Mdu{
val disp_begin = WireInit(0.S(64.W).asUInt())
val disp_end = WireInit((-1).S(64.W).asUInt())
BoringUtils.addSource(disp_begin, "DISPLAY_LOG_START")
BoringUtils.addSource(disp_end, "DISPLAY_LOG_END")
}){ c =>
c.io.in.initSource().setSourceClock(c.clock)
c.io.out.initSink().setSinkClock(c.clock)
def TEST_SIZE = 50
val pcSeq = (0 until TEST_SIZE).map(_ => Random.nextInt(0x7fffffff))
fork{
c.io.in.enqueuePartialSeq(pcSeq.map(pc => {
genDiv(c.io.in.bits, pc)
}))
}.fork{
c.io.out.expectDequeuePartialSeq(pcSeq.map(
pc => chiselTypeOf(c.io.out.bits).Lit(
_.uop.cf.pc -> pc.U
)
))
}.join()
}
}
}
...@@ -4,12 +4,32 @@ import chisel3._ ...@@ -4,12 +4,32 @@ import chisel3._
import chisel3.util._ import chisel3.util._
import chisel3.experimental.BundleLiterals._ import chisel3.experimental.BundleLiterals._
import chiseltest._ import chiseltest._
import noop.MDUOpType
import xiangshan._ import xiangshan._
import xiangshan.backend.exu.{ALUOpType, LSUOpType} import xiangshan.backend.exu.{ALUOpType, LSUOpType}
object TestCaseGenerator { object TestCaseGenerator {
/*
Generate MUL/DIV Input
*/
def genMul(x: => ExuInput, pc: Long): ExuInput = {
chiselTypeOf(x).Lit(
_.uop.ctrl.fuOpType -> MDUOpType.mulw,
_.uop.cf.pc -> pc.U
)
}
def genDiv(x: => ExuInput, pc: Long): ExuInput = {
chiselTypeOf(x).Lit(
_.uop.ctrl.fuOpType -> MDUOpType.div,
_.uop.cf.pc -> pc.U
)
}
/* /*
Generate ALU Input Generate ALU Input
*/ */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册