提交 ffc2f15b 编写于 作者: L LinJiawei

Merge remote-tracking branch 'origin/master' into fix-dispatch-order

......@@ -3,20 +3,34 @@ name: EMU Test
on:
push:
branches: [ master, update-ci]
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build-emu:
generate-verilog:
runs-on: self-hosted
name: Make EMU
name: Generate Verilog
steps:
- uses: actions/checkout@v2
with:
submodules: 'recursive'
- name: Check Wiring
run: bash .github/workflows/check-usage.sh "BoringUtils" $GITHUB_WORKSPACE
- name: set env
run: |
echo "NEMU_HOME=/home/ci-runner/xsenv/NEMU" >> $GITHUB_ENV
echo "NOOP_HOME=$GITHUB_WORKSPACE" >> $GITHUB_ENV
- name: generate verilog file
run:
make verilog SIM_ARGS=--dual-core
build-emu:
runs-on: self-hosted
name: Make EMU
steps:
- uses: actions/checkout@v2
with:
submodules: 'recursive'
- name: Set env
run: |
echo "NEMU_HOME=/home/ci-runner/xsenv/NEMU" >> $GITHUB_ENV
......@@ -25,7 +39,7 @@ jobs:
echo "AM_HOME=/home/ci-runner/xsenv/nexus-am" >> $GITHUB_ENV
- name: Build EMU
run:
make ./build/emu SIM_ARGS=--disable-all NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME -j60
make ./build/emu SIM_ARGS=--disable-all NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME -j220
- name: Run cputest
run: |
CPU_TEST_DIR=$AM_HOME/tests/cputest
......
......@@ -29,7 +29,7 @@ trait CommonModule extends ScalaModule {
}
val chisel = Agg(
ivy"edu.berkeley.cs::chisel3:3.4.0"
ivy"edu.berkeley.cs::chisel3:3.4.1"
)
object `api-config-chipsalliance` extends CommonModule {
......@@ -103,4 +103,4 @@ object XiangShan extends CommonModule with SbtModule {
}
}
}
\ No newline at end of file
}
......@@ -3,7 +3,7 @@ package utils
import chisel3._
import chisel3.util._
class AsyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int) extends Module {
class DataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int, isSync: Boolean) extends Module {
val io = IO(new Bundle {
val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W)))
val rdata = Vec(numRead, Output(gen))
......@@ -15,8 +15,9 @@ class AsyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int,
val data = Mem(numEntries, gen)
// read ports
val raddr = if (isSync) (RegNext(io.raddr)) else io.raddr
for (i <- 0 until numRead) {
io.rdata(i) := data(io.raddr(i))
io.rdata(i) := data(raddr(i))
}
// below is the write ports (with priorities)
......@@ -34,34 +35,5 @@ class AsyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int,
}
}
class SyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int) extends Module {
val io = IO(new Bundle {
val raddr = Vec(numRead, Input(UInt(log2Up(numEntries).W)))
val rdata = Vec(numRead, Output(gen))
val wen = Vec(numWrite, Input(Bool()))
val waddr = Vec(numWrite, Input(UInt(log2Up(numEntries).W)))
val wdata = Vec(numWrite, Input(gen))
})
val data = Mem(numEntries, gen)
// read ports
val raddr_reg = RegNext(io.raddr)
for (i <- 0 until numRead) {
io.rdata(i) := data(raddr_reg(i))
}
// below is the write ports (with priorities)
for (i <- 0 until numWrite) {
when (io.wen(i)) {
data(io.waddr(i)) := io.wdata(i)
}
}
// DataModuleTemplate should not be used when there're any write conflicts
for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) {
assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
}
}
}
class SyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int) extends DataModuleTemplate(gen, numEntries, numRead, numWrite, true)
class AsyncDataModuleTemplate[T <: Data](gen: T, numEntries: Int, numRead: Int, numWrite: Int) extends DataModuleTemplate(gen, numEntries, numRead, numWrite, false)
......@@ -284,6 +284,7 @@ class ReplayPregReq extends XSBundle {
class DebugBundle extends XSBundle{
val isMMIO = Bool()
val isPerfCnt = Bool()
}
class ExuInput extends XSBundle {
......
......@@ -425,6 +425,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
integerBlock.io.csrio.memExceptionVAddr <> memBlock.io.lsqio.exceptionAddr.vaddr
integerBlock.io.csrio.externalInterrupt <> io.externalInterrupt
integerBlock.io.csrio.tlb <> memBlock.io.tlbCsr
integerBlock.io.csrio.perfinfo <> ctrlBlock.io.roqio.toCSR.perfinfo
integerBlock.io.fenceio.sfence <> memBlock.io.sfence
integerBlock.io.fenceio.sbuffer <> memBlock.io.fenceToSbuffer
......
......@@ -82,6 +82,9 @@ class IntegerBlock
val memExceptionVAddr = Input(UInt(VAddrBits.W)) // from lsq
val externalInterrupt = new ExternalInterruptIO // from outside
val tlb = Output(new TlbCsrBundle) // from tlb
val perfinfo = new Bundle {
val retiredInstr = Input(UInt(3.W))
}
}
val fenceio = new Bundle {
val sfence = Output(new SfenceBundle) // to front,mem
......
......@@ -306,16 +306,16 @@ class Brq extends XSModule with HasCircularQueuePtrHelper {
val mbpRWrong = predWrong && isRType
if(!env.FPGAPlatform){
ExcitingUtils.addSource(mbpInstr, "perfCntCondMbpInstr", Perf)
ExcitingUtils.addSource(mbpRight, "perfCntCondMbpRight", Perf)
ExcitingUtils.addSource(mbpWrong, "perfCntCondMbpWrong", Perf)
ExcitingUtils.addSource(mbpBRight, "perfCntCondMbpBRight", Perf)
ExcitingUtils.addSource(mbpBWrong, "perfCntCondMbpBWrong", Perf)
ExcitingUtils.addSource(mbpJRight, "perfCntCondMbpJRight", Perf)
ExcitingUtils.addSource(mbpJWrong, "perfCntCondMbpJWrong", Perf)
ExcitingUtils.addSource(mbpIRight, "perfCntCondMbpIRight", Perf)
ExcitingUtils.addSource(mbpIWrong, "perfCntCondMbpIWrong", Perf)
ExcitingUtils.addSource(mbpRRight, "perfCntCondMbpRRight", Perf)
ExcitingUtils.addSource(mbpRWrong, "perfCntCondMbpRWrong", Perf)
ExcitingUtils.addSource(mbpInstr, "perfCntCondBpInstr", Perf)
ExcitingUtils.addSource(mbpRight, "perfCntCondBpRight", Perf)
ExcitingUtils.addSource(mbpWrong, "perfCntCondBpWrong", Perf)
ExcitingUtils.addSource(mbpBRight, "perfCntCondBpBRight", Perf)
ExcitingUtils.addSource(mbpBWrong, "perfCntCondBpBWrong", Perf)
ExcitingUtils.addSource(mbpJRight, "perfCntCondBpJRight", Perf)
ExcitingUtils.addSource(mbpJWrong, "perfCntCondBpJWrong", Perf)
ExcitingUtils.addSource(mbpIRight, "perfCntCondBpIRight", Perf)
ExcitingUtils.addSource(mbpIWrong, "perfCntCondBpIWrong", Perf)
ExcitingUtils.addSource(mbpRRight, "perfCntCondBpRRight", Perf)
ExcitingUtils.addSource(mbpRWrong, "perfCntCondBpRWrong", Perf)
}
}
......@@ -183,6 +183,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
out.fflags := DontCare
out.debug <> DontCare
out.debug.isMMIO := false.B
out.debug.isPerfCnt := false.B
out.redirect <> DontCare
out.redirectValid := false.B
}
......
......@@ -21,6 +21,9 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
val memExceptionVAddr = Input(UInt(VAddrBits.W))
val externalInterrupt = new ExternalInterruptIO
val tlb = Output(new TlbCsrBundle)
val perfinfo = new Bundle {
val retiredInstr = Input(UInt(3.W))
}
})
val fenceio = IO(new Bundle {
val sfence = Output(new SfenceBundle)
......@@ -42,6 +45,7 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
}.get
csr.csrio.perf <> DontCare
csr.csrio.perf.retiredInstr <> csrio.perfinfo.retiredInstr
csr.csrio.fpu.fflags <> csrio.fflags
csr.csrio.fpu.isIllegal := false.B
csr.csrio.fpu.dirty_fs <> csrio.dirty_fs
......@@ -73,6 +77,7 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
io.toInt.bits.redirect.roqIdx := uop.roqIdx
io.toInt.bits.redirect.target := csr.csrio.redirectOut.bits
io.toInt.bits.redirect.pc := uop.cf.pc
io.toInt.bits.debug.isPerfCnt := csr.csrio.isPerfCnt
}.elsewhen(jmp.io.out.valid){
io.toInt.bits.redirectValid := jmp.redirectOutValid
io.toInt.bits.redirect := jmp.redirectOut
......
package xiangshan.backend.fu.util
import chisel3._
import chisel3.ExcitingUtils.{ConnectionType, Debug}
import chisel3.util._
import utils._
import xiangshan._
import xiangshan.backend._
import utils.XSDebug
trait HasCSRConst {
// User Trap Setup
val Ustatus = 0x000
val Uie = 0x004
val Utvec = 0x005
// User Trap Handling
val Uscratch = 0x040
val Uepc = 0x041
val Ucause = 0x042
val Utval = 0x043
val Uip = 0x044
// User Floating-Point CSRs (not implemented)
val Fflags = 0x001
val Frm = 0x002
val Fcsr = 0x003
// User Counter/Timers
val Cycle = 0xC00
val Time = 0xC01
val Instret = 0xC02
// Supervisor Trap Setup
val Sstatus = 0x100
val Sedeleg = 0x102
val Sideleg = 0x103
val Sie = 0x104
val Stvec = 0x105
val Scounteren = 0x106
// Supervisor Trap Handling
val Sscratch = 0x140
val Sepc = 0x141
val Scause = 0x142
val Stval = 0x143
val Sip = 0x144
// Supervisor Protection and Translation
val Satp = 0x180
// Machine Information Registers
val Mvendorid = 0xF11
val Marchid = 0xF12
val Mimpid = 0xF13
val Mhartid = 0xF14
// Machine Trap Setup
val Mstatus = 0x300
val Misa = 0x301
val Medeleg = 0x302
val Mideleg = 0x303
val Mie = 0x304
val Mtvec = 0x305
val Mcounteren = 0x306
// Machine Trap Handling
val Mscratch = 0x340
val Mepc = 0x341
val Mcause = 0x342
val Mtval = 0x343
val Mip = 0x344
// Machine Memory Protection
// TBD
val Pmpcfg0 = 0x3A0
val Pmpcfg1 = 0x3A1
val Pmpcfg2 = 0x3A2
val Pmpcfg3 = 0x3A3
val PmpaddrBase = 0x3B0
// Machine Counter/Timers
// Currently, we uses perfcnt csr set instead of standard Machine Counter/Timers
// 0xB80 - 0x89F are also used as perfcnt csr
val Mcycle = 0xb00
val Minstret = 0xb02
val Mhpmcounter3 = 0xB03
val Mhpmcounter4 = 0xB04
val Mhpmcounter5 = 0xB05
val Mhpmcounter6 = 0xB06
val Mhpmcounter7 = 0xB07
val Mhpmcounter8 = 0xB08
val Mhpmcounter9 = 0xB09
val Mhpmcounter10 = 0xB0A
val Mhpmcounter11 = 0xB0B
val Mhpmcounter12 = 0xB0C
val Mhpmcounter13 = 0xB0D
val Mhpmcounter14 = 0xB0E
val Mhpmcounter15 = 0xB0F
val Mhpmcounter16 = 0xB10
val Mhpmcounter17 = 0xB11
val Mhpmcounter18 = 0xB12
val Mhpmcounter19 = 0xB13
val Mhpmcounter20 = 0xB14
val Mhpmcounter21 = 0xB15
val Mhpmcounter22 = 0xB16
val Mhpmcounter23 = 0xB17
val Mhpmcounter24 = 0xB18
val Mhpmcounter25 = 0xB19
val Mhpmcounter26 = 0xB1A
val Mhpmcounter27 = 0xB1B
val Mhpmcounter28 = 0xB1C
val Mhpmcounter29 = 0xB1D
val Mhpmcounter30 = 0xB1E
val Mhpmcounter31 = 0xB1F
// Machine Counter Setup (not implemented)
val Mcountinhibit = 0x320
val Mhpmevent3 = 0x323
val Mhpmevent4 = 0x324
val Mhpmevent5 = 0x325
val Mhpmevent6 = 0x326
val Mhpmevent7 = 0x327
val Mhpmevent8 = 0x328
val Mhpmevent9 = 0x329
val Mhpmevent10 = 0x32A
val Mhpmevent11 = 0x32B
val Mhpmevent12 = 0x32C
val Mhpmevent13 = 0x32D
val Mhpmevent14 = 0x32E
val Mhpmevent15 = 0x32F
val Mhpmevent16 = 0x330
val Mhpmevent17 = 0x331
val Mhpmevent18 = 0x332
val Mhpmevent19 = 0x333
val Mhpmevent20 = 0x334
val Mhpmevent21 = 0x335
val Mhpmevent22 = 0x336
val Mhpmevent23 = 0x337
val Mhpmevent24 = 0x338
val Mhpmevent25 = 0x339
val Mhpmevent26 = 0x33A
val Mhpmevent27 = 0x33B
val Mhpmevent28 = 0x33C
val Mhpmevent29 = 0x33D
val Mhpmevent30 = 0x33E
val Mhpmevent31 = 0x33F
// Debug/Trace Registers (shared with Debug Mode) (not implemented)
// Debug Mode Registers (not implemented)
def privEcall = 0x000.U
def privEbreak = 0x001.U
def privMret = 0x302.U
def privSret = 0x102.U
def privUret = 0x002.U
def ModeM = 0x3.U
def ModeH = 0x2.U
def ModeS = 0x1.U
def ModeU = 0x0.U
def IRQ_UEIP = 0
def IRQ_SEIP = 1
def IRQ_MEIP = 3
def IRQ_UTIP = 4
def IRQ_STIP = 5
def IRQ_MTIP = 7
def IRQ_USIP = 8
def IRQ_SSIP = 9
def IRQ_MSIP = 11
val IntPriority = Seq(
IRQ_MEIP, IRQ_MSIP, IRQ_MTIP,
IRQ_SEIP, IRQ_SSIP, IRQ_STIP,
IRQ_UEIP, IRQ_USIP, IRQ_UTIP
)
def csrAccessPermissionCheck(addr: UInt, wen: Bool, mode: UInt): Bool = {
val readOnly = addr(11,10) === "b11".U
val lowestAccessPrivilegeLevel = addr(9,8)
mode >= lowestAccessPrivilegeLevel && !(wen && readOnly)
}
}
\ No newline at end of file
......@@ -358,7 +358,7 @@ class ReservationStationData
// Data
// ------------------------
val data = List.tabulate(srcNum)(_ => Module(new SyncDataModuleTemplate(UInt((XLEN + 1).W), iqSize, if (!env.FPGAPlatform) iqSize else 1, iqSize)))
val data = List.tabulate(srcNum)(_ => Module(new SyncDataModuleTemplate(UInt((XLEN + 1).W), iqSize, numRead = iqSize + 1, numWrite = iqSize)))
data.foreach(_.io <> DontCare)
data.foreach(_.io.wen.foreach(_ := false.B))
......@@ -366,14 +366,13 @@ class ReservationStationData
// ! warning: reading has 1 cycle delay, so input addr is used in next cycle
// luckily, for fpga platform, read port has fixed value
// otherwise, read port has same value as read addr
def dataRead(iqIdx: UInt, srcIdx: Int): UInt = {
if (env.FPGAPlatform) {
data(srcIdx).io.raddr(0) := iqIdx
data(srcIdx).io.rdata(0)
} else {
data(srcIdx).io.raddr(iqIdx) := iqIdx
data(srcIdx).io.rdata(iqIdx)
}
def dataDebugRead(iqIdx: UInt, srcIdx: Int): UInt = {
data(srcIdx).io.raddr(iqIdx + 1.U) := iqIdx
data(srcIdx).io.rdata(iqIdx + 1.U)
}
def dataRead(nextIqIdx: UInt, srcIdx: Int): UInt = {
data(srcIdx).io.raddr(0) := nextIqIdx
data(srcIdx).io.rdata(0)
}
def dataWrite(iqIdx: UInt, srcIdx: Int, wdata: UInt) = {
data(srcIdx).io.waddr(iqIdx) := iqIdx
......@@ -381,7 +380,7 @@ class ReservationStationData
data(srcIdx).io.wen(iqIdx) := true.B
}
// debug data: only for XSDebug log printing!
val debug_data = if (!env.FPGAPlatform) List.tabulate(srcNum)(i => WireInit(VecInit((0 until iqSize).map(j => dataRead(j.U, i))))) else null
val debug_data = List.tabulate(srcNum)(i => WireInit(VecInit((0 until iqSize).map(j => dataDebugRead(j.U, i)))))
// Uop
// ------------------------
......@@ -501,7 +500,7 @@ class ReservationStationData
val exuInput = io.deq.bits
exuInput := DontCare
exuInput.uop := uop(deq)
val regValues = List.tabulate(srcNum)(i => dataRead(/* Mux(sel.valid, sel.bits, deq), i */deq, i))
val regValues = List.tabulate(srcNum)(i => dataRead(Mux(sel.valid, sel.bits, deq), i))
XSDebug(io.deq.fire(), p"[regValues] " + List.tabulate(srcNum)(idx => p"reg$idx: ${Hexadecimal(regValues(idx))}").reduce((p1, p2) => p1 + " " + p2) + "\n")
exuInput.src1 := regValues(0)
if (srcNum > 1) exuInput.src2 := regValues(1)
......
......@@ -38,6 +38,9 @@ class RoqCSRIO extends XSBundle {
val fflags = Output(Valid(UInt(5.W)))
val dirty_fs = Output(Bool())
val perfinfo = new Bundle {
val retiredInstr = Output(UInt(3.W))
}
}
class RoqEnqIO extends XSBundle {
......@@ -671,11 +674,10 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
if(i % 4 == 3) XSDebug(false, true.B, "\n")
}
val id = roqDebugId()
val difftestIntrNO = WireInit(0.U(XLEN.W))
val difftestCause = WireInit(0.U(XLEN.W))
ExcitingUtils.addSink(difftestIntrNO, s"difftestIntrNOfromCSR$id")
ExcitingUtils.addSink(difftestCause, s"difftestCausefromCSR$id")
val instrCnt = RegInit(0.U(64.W))
val retireCounter = Mux(state === s_idle, commitCnt, 0.U)
instrCnt := instrCnt + retireCounter
io.csr.perfinfo.retiredInstr := RegNext(retireCounter)
if(!env.FPGAPlatform) {
......@@ -696,10 +698,11 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val uop = debug_microOp(idx)
val DifftestSkipSC = false
if(!DifftestSkipSC){
skip(i) := debug_exuDebug(idx).isMMIO && io.commits.valid(i)
skip(i) := (debug_exuDebug(idx).isMMIO || debug_exuDebug(idx).isPerfCnt) && io.commits.valid(i)
}else{
skip(i) := (
debug_exuDebug(idx).isMMIO ||
debug_exuDebug(idx).isPerfCnt ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_d ||
uop.ctrl.fuType === FuType.mou && uop.ctrl.fuOpType === LSUOpType.sc_w
) && io.commits.valid(i)
......@@ -717,10 +720,10 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
debug_deqUop.ctrl.fuType === FuType.mou &&
(debug_deqUop.ctrl.fuOpType === LSUOpType.sc_d || debug_deqUop.ctrl.fuOpType === LSUOpType.sc_w)
val instrCnt = RegInit(0.U(64.W))
val retireCounter = Mux(state === s_idle, commitCnt, 0.U)
instrCnt := instrCnt + retireCounter
val difftestIntrNO = WireInit(0.U(XLEN.W))
val difftestCause = WireInit(0.U(XLEN.W))
ExcitingUtils.addSink(difftestIntrNO, "difftestIntrNOfromCSR")
ExcitingUtils.addSink(difftestCause, "difftestCausefromCSR")
XSDebug(difftestIntrNO =/= 0.U, "difftest intrNO set %x\n", difftestIntrNO)
val retireCounterFix = Mux(io.redirectOut.valid, 1.U, retireCounter)
val retirePCFix = SignExt(Mux(io.redirectOut.valid, debug_deqUop.cf.pc, debug_microOp(firstValidCommit).cf.pc), XLEN)
......
......@@ -441,28 +441,28 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val atomics_addr_match = atomics_addr_matches.reduce(_||_)
val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr)
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
store_addr_match || atomics_addr_match || prober_addr_match || miss_idx_match
store_addr_match || atomics_addr_match || prober_idx_match || miss_idx_match
}
def block_store(addr: UInt) = {
val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr)
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
prober_addr_match || miss_idx_match
prober_idx_match || miss_idx_match
}
def block_atomics(addr: UInt) = {
val prober_addr_match = prober.io.inflight_req_block_addr.valid && prober.io.inflight_req_block_addr.bits === get_block_addr(addr)
val prober_idx_match = prober.io.inflight_req_block_addr.valid && get_idx(prober.io.inflight_req_block_addr.bits) === get_idx(addr)
val miss_idx_matches = VecInit(missQueue.io.inflight_req_idxes map (entry => entry.valid && entry.bits === get_idx(addr)))
val miss_idx_match = miss_idx_matches.reduce(_||_)
prober_addr_match || miss_idx_match
prober_idx_match || miss_idx_match
}
def block_miss(addr: UInt) = {
......@@ -475,11 +475,11 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
}
def block_probe(addr: UInt) = {
val store_addr_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val store_addr_match = store_addr_matches.reduce(_||_)
val store_idx_matches = VecInit(stu.io.inflight_req_block_addrs map (entry => entry.valid && get_idx(entry.bits) === get_idx(addr)))
val store_idx_match = store_idx_matches.reduce(_||_)
val atomics_addr_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && entry.bits === get_block_addr(addr)))
val atomics_addr_match = atomics_addr_matches.reduce(_||_)
val atomics_idx_matches = VecInit(atomics.io.inflight_req_block_addrs map (entry => entry.valid && get_idx(entry.bits) === get_idx(addr)))
val atomics_idx_match = atomics_idx_matches.reduce(_||_)
val lrsc_addr_match = atomics.io.block_probe_addr.valid && atomics.io.block_probe_addr.bits === get_block_addr(addr)
......@@ -489,7 +489,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
// the missed req
val miss_req_idx_match = missReq.fire() && get_idx(missReq.bits.addr) === get_idx(addr)
store_addr_match || atomics_addr_match || lrsc_addr_match || miss_idx_match || miss_req_idx_match
store_idx_match || atomics_idx_match || lrsc_addr_match || miss_idx_match || miss_req_idx_match
}
def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = {
......
......@@ -5,7 +5,7 @@ import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend.roq.RoqPtr
import xiangshan.backend.fu.HasCSRConst
import xiangshan.backend.fu.util.HasCSRConst
import chisel3.ExcitingUtils._
trait HasTlbConst extends HasXSParameter {
......
......@@ -403,7 +403,9 @@ class LoopPredictor extends BasePredictor with LTBParams {
io.meta.specCnts(i) := ltbResps(i).meta
}
ExcitingUtils.addSource(io.resp.exit.reduce(_||_), "perfCntLoopExit", Perf)
if (!env.FPGAPlatform) {
ExcitingUtils.addSource(io.resp.exit.reduce(_||_), "perfCntLoopExit", Perf)
}
if (BPUDebug && debug) {
// debug info
......@@ -422,4 +424,4 @@ class LoopPredictor extends BasePredictor with LTBParams {
XSDebug(false, out_fire && (i.U === 3.U || i.U === 7.U || i.U === 11.U || i.U === 15.U), "\n")
}
}
}
\ No newline at end of file
}
......@@ -16,210 +16,11 @@ class ExceptionAddrIO extends XSBundle {
val vaddr = Output(UInt(VAddrBits.W))
}
class LsqEntry extends XSBundle {
val vaddr = UInt(VAddrBits.W) // TODO: need opt
val paddr = UInt(PAddrBits.W)
val mask = UInt(8.W)
val data = UInt(XLEN.W)
val exception = UInt(16.W) // TODO: opt size
val fwdMask = Vec(8, Bool())
val fwdData = Vec(8, UInt(8.W))
}
class FwdEntry extends XSBundle {
val mask = Vec(8, Bool())
val data = Vec(8, UInt(8.W))
}
class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
val wb = Vec(nchannel, new Bundle() {
val wen = Input(Bool())
val index = Input(UInt(log2Up(size).W))
val wdata = Input(new LsqEntry)
})
val uncache = new Bundle() {
val wen = Input(Bool())
val index = Input(UInt(log2Up(size).W))
val wdata = Input(UInt(XLEN.W))
}
val refill = new Bundle() {
val wen = Input(Vec(size, Bool()))
val data = Input(UInt((cfg.blockBytes * 8).W))
}
val needForward = Input(Vec(nchannel, Vec(2, UInt(size.W))))
val forward = Vec(nchannel, Flipped(new LoadForwardQueryIO))
val rdata = Output(Vec(size, new LsqEntry))
// val debug = new Bundle() {
// val debug_data = Vec(LoadQueueSize, new LsqEntry)
// }
def wbWrite(channel: Int, index: UInt, wdata: LsqEntry): Unit = {
require(channel < nchannel && channel >= 0)
// need extra "this.wb(channel).wen := true.B"
this.wb(channel).index := index
this.wb(channel).wdata := wdata
}
def uncacheWrite(index: UInt, wdata: UInt): Unit = {
// need extra "this.uncache.wen := true.B"
this.uncache.index := index
this.uncache.wdata := wdata
}
def forwardQuery(channel: Int, paddr: UInt, needForward1: Data, needForward2: Data): Unit = {
this.needForward(channel)(0) := needForward1
this.needForward(channel)(1) := needForward2
this.forward(channel).paddr := paddr
}
// def refillWrite(ldIdx: Int): Unit = {
// }
// use "this.refill.wen(ldIdx) := true.B" instead
})
io := DontCare
val data = Reg(Vec(size, new LsqEntry))
// writeback to lq/sq
(0 until 2).map(i => {
when(io.wb(i).wen){
data(io.wb(i).index) := io.wb(i).wdata
}
})
when(io.uncache.wen){
data(io.uncache.index).data := io.uncache.wdata
}
// refill missed load
def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = {
val res = Wire(Vec(8, UInt(8.W)))
(0 until 8).foreach(i => {
res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i))
})
res.asUInt
}
// split dcache result into words
val words = VecInit((0 until blockWords) map { i => io.refill.data(DataBits * (i + 1) - 1, DataBits * i)})
(0 until size).map(i => {
when(io.refill.wen(i) ){
val refillData = words(get_word(data(i).paddr))
data(i).data := mergeRefillData(refillData, data(i).fwdData.asUInt, data(i).fwdMask.asUInt)
XSDebug("miss resp: pos %d addr %x data %x + %x(%b)\n", i.U, data(i).paddr, refillData, data(i).fwdData.asUInt, data(i).fwdMask.asUInt)
}
})
// forwarding
// Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
// (1) if they have the same flag, we need to check range(tail, sqIdx)
// (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx)
// Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize))
// Forward2: Mux(same_flag, 0.U, range(0, sqIdx) )
// i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
// entry with larger index should have higher priority since it's data is younger
// FIXME: old fwd logic for assertion, remove when rtl freeze
(0 until nchannel).map(i => {
val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B)))
val forwardData1 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
val forwardMask2 = WireInit(VecInit(Seq.fill(8)(false.B)))
val forwardData2 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
for (j <- 0 until size) {
val needCheck = io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
(0 until XLEN / 8).foreach(k => {
when (needCheck && data(j).mask(k)) {
when (io.needForward(i)(0)(j)) {
forwardMask1(k) := true.B
forwardData1(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
}
when (io.needForward(i)(1)(j)) {
forwardMask2(k) := true.B
forwardData2(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
}
XSDebug(io.needForward(i)(0)(j) || io.needForward(i)(1)(j),
p"forwarding $k-th byte ${Hexadecimal(data(j).data(8 * (k + 1) - 1, 8 * k))} " +
p"from ptr $j\n")
}
})
}
// merge forward lookup results
// forward2 is younger than forward1 and should have higher priority
val oldFwdResult = Wire(new FwdEntry)
(0 until XLEN / 8).map(k => {
oldFwdResult.mask(k) := RegNext(forwardMask1(k) || forwardMask2(k))
oldFwdResult.data(k) := RegNext(Mux(forwardMask2(k), forwardData2(k), forwardData1(k)))
})
// parallel fwd logic
val paddrMatch = Wire(Vec(size, Bool()))
val matchResultVec = Wire(Vec(size * 2, new FwdEntry))
def parallelFwd(xs: Seq[Data]): Data = {
ParallelOperation(xs, (a: Data, b: Data) => {
val l = a.asTypeOf(new FwdEntry)
val r = b.asTypeOf(new FwdEntry)
val res = Wire(new FwdEntry)
(0 until 8).map(p => {
res.mask(p) := l.mask(p) || r.mask(p)
res.data(p) := Mux(r.mask(p), r.data(p), l.data(p))
})
res
})
}
for (j <- 0 until size) {
paddrMatch(j) := io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
}
for (j <- 0 until size) {
val needCheck0 = RegNext(paddrMatch(j) && io.needForward(i)(0)(j))
val needCheck1 = RegNext(paddrMatch(j) && io.needForward(i)(1)(j))
(0 until XLEN / 8).foreach(k => {
matchResultVec(j).mask(k) := needCheck0 && data(j).mask(k)
matchResultVec(j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
matchResultVec(size + j).mask(k) := needCheck1 && data(j).mask(k)
matchResultVec(size + j).data(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
})
}
val parallelFwdResult = parallelFwd(matchResultVec).asTypeOf(new FwdEntry)
io.forward(i).forwardMask := parallelFwdResult.mask
io.forward(i).forwardData := parallelFwdResult.data
when(
oldFwdResult.mask.asUInt =/= parallelFwdResult.mask.asUInt
){
printf("%d: mask error: right: %b false %b\n", GTimer(), oldFwdResult.mask.asUInt, parallelFwdResult.mask.asUInt)
}
for (p <- 0 until 8) {
when(
oldFwdResult.data(p) =/= parallelFwdResult.data(p) && oldFwdResult.mask(p)
){
printf("%d: data "+p+" error: right: %x false %x\n", GTimer(), oldFwdResult.data(p), parallelFwdResult.data(p))
}
}
})
// data read
io.rdata := data
// io.debug.debug_data := data
}
// inflight miss block reqs
class InflightBlockInfo extends XSBundle {
val block_addr = UInt(PAddrBits.W)
......
......@@ -76,8 +76,10 @@ class LoadQueue extends XSModule
val uop = Reg(Vec(LoadQueueSize, new MicroOp))
// val data = Reg(Vec(LoadQueueSize, new LsRoqEntry))
val dataModule = Module(new LSQueueData(LoadQueueSize, LoadPipelineWidth))
val dataModule = Module(new LoadQueueData(LoadQueueSize, wbNumRead = LoadPipelineWidth, wbNumWrite = LoadPipelineWidth))
dataModule.io := DontCare
val vaddrModule = Module(new AsyncDataModuleTemplate(UInt(VAddrBits.W), LoadQueueSize, numRead = 1, numWrite = LoadPipelineWidth))
vaddrModule.io := DontCare
val allocated = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // lq entry has been allocated
val datavalid = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // data is valid
val writebacked = RegInit(VecInit(List.fill(LoadQueueSize)(false.B))) // inst has been writebacked to CDB
......@@ -144,7 +146,8 @@ class LoadQueue extends XSModule
* After cache refills, it will write back through arbiter with loadUnit.
*/
for (i <- 0 until LoadPipelineWidth) {
dataModule.io.wb(i).wen := false.B
dataModule.io.wb.wen(i) := false.B
vaddrModule.io.wen(i) := false.B
when(io.loadIn(i).fire()) {
when(io.loadIn(i).bits.miss) {
XSInfo(io.loadIn(i).valid, "load miss write to lq idx %d pc 0x%x vaddr %x paddr %x data %x mask %x forwardData %x forwardMask: %x mmio %x roll %x exc %x\n",
......@@ -179,16 +182,18 @@ class LoadQueue extends XSModule
datavalid(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
writebacked(loadWbIndex) := !io.loadIn(i).bits.miss && !io.loadIn(i).bits.mmio
val loadWbData = Wire(new LsqEntry)
val loadWbData = Wire(new LQDataEntry)
loadWbData.paddr := io.loadIn(i).bits.paddr
loadWbData.vaddr := io.loadIn(i).bits.vaddr
loadWbData.mask := io.loadIn(i).bits.mask
loadWbData.data := io.loadIn(i).bits.data // for mmio / misc / debug
loadWbData.data := io.loadIn(i).bits.data // fwd data
loadWbData.fwdMask := io.loadIn(i).bits.forwardMask
loadWbData.fwdData := io.loadIn(i).bits.forwardData
loadWbData.exception := io.loadIn(i).bits.uop.cf.exceptionVec.asUInt
dataModule.io.wbWrite(i, loadWbIndex, loadWbData)
dataModule.io.wb(i).wen := true.B
dataModule.io.wb.wen(i) := true.B
vaddrModule.io.waddr(i) := loadWbIndex
vaddrModule.io.wdata(i) := io.loadIn(i).bits.vaddr
vaddrModule.io.wen(i) := true.B
debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
......@@ -270,13 +275,13 @@ class LoadQueue extends XSModule
// Refill 64 bit in a cycle
// Refill data comes back from io.dcache.resp
dataModule.io.refill.valid := io.dcache.valid
dataModule.io.refill.paddr := io.dcache.bits.addr
dataModule.io.refill.data := io.dcache.bits.data
(0 until LoadQueueSize).map(i => {
val blockMatch = get_block_addr(dataModule.io.rdata(i).paddr) === get_block_addr(io.dcache.bits.addr)
dataModule.io.refill.wen(i) := false.B
when(allocated(i) && miss(i) && blockMatch && io.dcache.valid) {
dataModule.io.refill.wen(i) := true.B
dataModule.io.refill.refillMask(i) := allocated(i) && miss(i)
when(dataModule.io.refill.valid && dataModule.io.refill.refillMask(i) && dataModule.io.refill.matchMask(i)) {
datavalid(i) := true.B
miss(i) := false.B
}
......@@ -290,7 +295,7 @@ class LoadQueue extends XSModule
// Stage 0
// Generate writeback indexes
val loadWbSelVec = VecInit((0 until LoadQueueSize).map(i => {
allocated(i) && !writebacked(i) && (datavalid(i) || dataModule.io.refill.wen(i))
allocated(i) && !writebacked(i) && datavalid(i)
})).asUInt() // use uint instead vec to reduce verilog lines
val loadEvenSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i)}))
val loadOddSelVec = VecInit((0 until LoadQueueSize/2).map(i => {loadWbSelVec(2*i+1)}))
......@@ -329,10 +334,11 @@ class LoadQueue extends XSModule
// writeback data to cdb
(0 until LoadPipelineWidth).map(i => {
// data select
val rdata = dataModule.io.rdata(loadWbSel(i)).data
dataModule.io.wb.raddr(i) := loadWbSel(i)
val rdata = dataModule.io.wb.rdata(i).data
val seluop = uop(loadWbSel(i))
val func = seluop.ctrl.fuOpType
val raddr = dataModule.io.rdata(loadWbSel(i)).paddr
val raddr = dataModule.io.wb.rdata(i).paddr
val rdataSel = LookupTree(raddr(2, 0), List(
"b000".U -> rdata(63, 0),
"b001".U -> rdata(63, 8),
......@@ -349,13 +355,14 @@ class LoadQueue extends XSModule
//
// Int load writeback will finish (if not blocked) in one cycle
io.ldout(i).bits.uop := seluop
io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.rdata(loadWbSel(i)).exception.asBools
io.ldout(i).bits.uop.cf.exceptionVec := dataModule.io.wb.rdata(i).exception.asBools
io.ldout(i).bits.uop.lqIdx := loadWbSel(i).asTypeOf(new LqPtr)
io.ldout(i).bits.data := rdataPartialLoad
io.ldout(i).bits.redirectValid := false.B
io.ldout(i).bits.redirect := DontCare
io.ldout(i).bits.brUpdate := DontCare
io.ldout(i).bits.debug.isMMIO := debug_mmio(loadWbSel(i))
io.ldout(i).bits.debug.isPerfCnt := false.B
io.ldout(i).bits.fflags := DontCare
io.ldout(i).valid := loadWbSelV(i)
......@@ -364,8 +371,8 @@ class LoadQueue extends XSModule
io.ldout(i).bits.uop.roqIdx.asUInt,
io.ldout(i).bits.uop.lqIdx.asUInt,
io.ldout(i).bits.uop.cf.pc,
dataModule.io.rdata(loadWbSel(i)).paddr,
dataModule.io.rdata(loadWbSel(i)).data,
dataModule.io.debug(loadWbSel(i)).paddr,
dataModule.io.debug(loadWbSel(i)).data,
debug_mmio(loadWbSel(i))
)
}
......@@ -433,18 +440,14 @@ class LoadQueue extends XSModule
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
// check if load already in lq needs to be rolledback
val addrMatch = RegNext(VecInit((0 until LoadQueueSize).map(j => {
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3)
})))
dataModule.io.violation(i).paddr := io.storeIn(i).bits.paddr
dataModule.io.violation(i).mask := io.storeIn(i).bits.mask
val addrMaskMatch = RegNext(dataModule.io.violation(i).violationMask)
val entryNeedCheck = RegNext(VecInit((0 until LoadQueueSize).map(j => {
allocated(j) && toEnqPtrMask(j) && (datavalid(j) || miss(j))
})))
val overlap = RegNext(VecInit((0 until LoadQueueSize).map(j => {
val overlapVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k))
Cat(overlapVec).orR()
})))
val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => {
addrMatch(j) && entryNeedCheck(j) && overlap(j)
addrMaskMatch(j) && entryNeedCheck(j)
}))
val lqViolation = lqViolationVec.asUInt().orR()
val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask))
......@@ -552,18 +555,20 @@ class LoadQueue extends XSModule
io.roqDeqPtr === uop(deqPtr).roqIdx &&
!io.commits.isWalk
dataModule.io.uncache.raddr := deqPtr
io.uncache.req.bits.cmd := MemoryOpConstants.M_XRD
io.uncache.req.bits.addr := dataModule.io.rdata(deqPtr).paddr
io.uncache.req.bits.data := dataModule.io.rdata(deqPtr).data
io.uncache.req.bits.mask := dataModule.io.rdata(deqPtr).mask
io.uncache.req.bits.addr := dataModule.io.uncache.rdata.paddr
io.uncache.req.bits.data := dataModule.io.uncache.rdata.data
io.uncache.req.bits.mask := dataModule.io.uncache.rdata.mask
io.uncache.req.bits.meta.id := DontCare
io.uncache.req.bits.meta.vaddr := DontCare
io.uncache.req.bits.meta.paddr := dataModule.io.rdata(deqPtr).paddr
io.uncache.req.bits.meta.paddr := dataModule.io.uncache.rdata.paddr
io.uncache.req.bits.meta.uop := uop(deqPtr)
io.uncache.req.bits.meta.mmio := true.B
io.uncache.req.bits.meta.tlb_miss := false.B
io.uncache.req.bits.meta.mask := dataModule.io.rdata(deqPtr).mask
io.uncache.req.bits.meta.mask := dataModule.io.uncache.rdata.mask
io.uncache.req.bits.meta.replay := false.B
io.uncache.resp.ready := true.B
......@@ -590,7 +595,8 @@ class LoadQueue extends XSModule
}
// Read vaddr for mem exception
io.exceptionAddr.vaddr := dataModule.io.rdata(io.exceptionAddr.lsIdx.lqIdx.value).vaddr
vaddrModule.io.raddr(0) := io.exceptionAddr.lsIdx.lqIdx.value
io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
// misprediction recovery / exception redirect
// invalidate lq term using robIdx
......@@ -646,7 +652,7 @@ class LoadQueue extends XSModule
for (i <- 0 until LoadQueueSize) {
if (i % 4 == 0) XSDebug("")
XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.debug(i).paddr)
PrintFlag(allocated(i), "a")
PrintFlag(allocated(i) && datavalid(i), "v")
PrintFlag(allocated(i) && writebacked(i), "w")
......
package xiangshan.mem
import chisel3._
import chisel3.util._
import utils._
import xiangshan._
import xiangshan.cache._
import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
import xiangshan.backend.LSUOpType
import xiangshan.mem._
import xiangshan.backend.roq.RoqPtr
class LQDataEntry extends XSBundle {
// val vaddr = UInt(VAddrBits.W)
val paddr = UInt(PAddrBits.W)
val mask = UInt(8.W)
val data = UInt(XLEN.W)
val exception = UInt(16.W) // TODO: opt size
val fwdMask = Vec(8, Bool())
}
// Data module define
// These data modules are like SyncDataModuleTemplate, but support cam-like ops
class PaddrModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule with HasDCacheParameters {
val io = IO(new Bundle {
val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
val rdata = Output(Vec(numRead, UInt((PAddrBits).W)))
val wen = Input(Vec(numWrite, Bool()))
val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
val wdata = Input(Vec(numWrite, UInt((PAddrBits).W)))
val violationMdata = Input(Vec(2, UInt((PAddrBits).W)))
val violationMmask = Output(Vec(2, Vec(numEntries, Bool())))
val refillMdata = Input(UInt((PAddrBits).W))
val refillMmask = Output(Vec(numEntries, Bool()))
})
val data = Reg(Vec(numEntries, UInt((PAddrBits).W)))
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(io.raddr(i))
}
// below is the write ports (with priorities)
for (i <- 0 until numWrite) {
when (io.wen(i)) {
data(io.waddr(i)) := io.wdata(i)
}
}
// content addressed match
for (i <- 0 until 2) {
for (j <- 0 until numEntries) {
io.violationMmask(i)(j) := io.violationMdata(i)(PAddrBits-1, 3) === data(j)(PAddrBits-1, 3)
}
}
for (j <- 0 until numEntries) {
io.refillMmask(j) := get_block_addr(io.refillMdata) === get_block_addr(data(j))
}
// DataModuleTemplate should not be used when there're any write conflicts
for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) {
assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
}
}
}
class MaskModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule {
val io = IO(new Bundle {
val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
val rdata = Output(Vec(numRead, UInt(8.W)))
val wen = Input(Vec(numWrite, Bool()))
val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
val wdata = Input(Vec(numWrite, UInt(8.W)))
val violationMdata = Input(Vec(2, UInt((PAddrBits).W)))
val violationMmask = Output(Vec(2, Vec(numEntries, Bool())))
})
val data = Reg(Vec(numEntries, UInt(8.W)))
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(io.raddr(i))
}
// below is the write ports (with priorities)
for (i <- 0 until numWrite) {
when (io.wen(i)) {
data(io.waddr(i)) := io.wdata(i)
}
}
// content addressed match
for (i <- 0 until 2) {
for (j <- 0 until numEntries) {
io.violationMmask(i)(j) := (io.violationMdata(i) & data(j)).orR
}
}
// DataModuleTemplate should not be used when there're any write conflicts
for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) {
assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
}
}
}
class CoredataModule(numEntries: Int, numRead: Int, numWrite: Int) extends XSModule with HasDCacheParameters {
val io = IO(new Bundle {
// data io
// read
val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
val rdata = Output(Vec(numRead, UInt(XLEN.W)))
// address indexed write
val wen = Input(Vec(numWrite, Bool()))
val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
val wdata = Input(Vec(numWrite, UInt(XLEN.W)))
// masked write
val mwmask = Input(Vec(numEntries, Bool()))
val refillData = Input(UInt((cfg.blockBytes * 8).W))
// fwdMask io
val fwdMaskWdata = Input(Vec(numWrite, UInt(8.W)))
val fwdMaskWen = Input(Vec(numWrite, Bool()))
// fwdMaskWaddr = waddr
// paddr io
// 3 bits in paddr need to be stored in CoredataModule for refilling
val paddrWdata = Input(Vec(numWrite, UInt((PAddrBits).W)))
val paddrWen = Input(Vec(numWrite, Bool()))
})
val data = Reg(Vec(numEntries, UInt(XLEN.W)))
val fwdMask = Reg(Vec(numEntries, UInt(8.W)))
val wordIndex = Reg(Vec(numEntries, UInt((blockOffBits - wordOffBits).W)))
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(io.raddr(i))
}
// below is the write ports (with priorities)
for (i <- 0 until numWrite) {
when (io.wen(i)) {
data(io.waddr(i)) := io.wdata(i)
}
when (io.fwdMaskWen(i)) {
fwdMask(io.waddr(i)) := io.fwdMaskWdata(i)
}
when (io.paddrWen(i)) {
wordIndex(io.waddr(i)) := get_word(io.paddrWdata(i))
}
}
// masked write
// refill missed load
def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = {
val res = Wire(Vec(8, UInt(8.W)))
(0 until 8).foreach(i => {
res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i))
})
res.asUInt
}
// split dcache result into words
val words = VecInit((0 until blockWords) map { i => io.refillData(DataBits * (i + 1) - 1, DataBits * i)})
// refill data according to matchMask, refillMask and refill.vald
for (j <- 0 until numEntries) {
when (io.mwmask(j)) {
val refillData = words(wordIndex(j)) // TODO
data(j) := mergeRefillData(refillData, data(j), fwdMask(j))
}
}
// DataModuleTemplate should not be used when there're any write conflicts
for (i <- 0 until numWrite) {
for (j <- i+1 until numWrite) {
assert(!(io.wen(i) && io.wen(j) && io.waddr(i) === io.waddr(j)))
}
}
}
class LoadQueueData(size: Int, wbNumRead: Int, wbNumWrite: Int) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
val io = IO(new Bundle() {
val wb = new Bundle() {
val wen = Vec(wbNumWrite, Input(Bool()))
val waddr = Input(Vec(wbNumWrite, UInt(log2Up(size).W)))
val wdata = Input(Vec(wbNumWrite, new LQDataEntry))
val raddr = Input(Vec(wbNumRead, UInt(log2Up(size).W)))
val rdata = Output(Vec(wbNumRead, new LQDataEntry))
}
val uncache = new Bundle() {
val wen = Input(Bool())
val waddr = Input(UInt(log2Up(size).W))
val wdata = Input(UInt(XLEN.W)) // only write back uncache data
val raddr = Input(UInt(log2Up(size).W))
val rdata = Output(new LQDataEntry)
}
val refill = new Bundle() {
val valid = Input(Bool())
val paddr = Input(UInt(PAddrBits.W))
val data = Input(UInt((cfg.blockBytes * 8).W))
val refillMask = Input(Vec(size, Bool()))
val matchMask = Output(Vec(size, Bool()))
}
val violation = Vec(StorePipelineWidth, new Bundle() {
val paddr = Input(UInt(PAddrBits.W))
val mask = Input(UInt(8.W))
val violationMask = Output(Vec(size, Bool()))
})
val debug = Output(Vec(size, new LQDataEntry))
def wbWrite(channel: Int, waddr: UInt, wdata: LQDataEntry): Unit = {
require(channel < wbNumWrite && wbNumWrite >= 0)
// need extra "this.wb(channel).wen := true.B"
this.wb.waddr(channel) := waddr
this.wb.wdata(channel) := wdata
}
def uncacheWrite(waddr: UInt, wdata: UInt): Unit = {
// need extra "this.uncache.wen := true.B"
this.uncache.waddr := waddr
this.uncache.wdata := wdata
}
// def refillWrite(ldIdx: Int): Unit = {
// }
// use "this.refill.wen(ldIdx) := true.B" instead
})
// val data = Reg(Vec(size, new LQDataEntry))
// data module
val paddrModule = Module(new PaddrModule(size, numRead = 3, numWrite = 2))
val maskModule = Module(new MaskModule(size, numRead = 3, numWrite = 2))
val exceptionModule = Module(new AsyncDataModuleTemplate(UInt(16.W), size, numRead = 3, numWrite = 2))
val coredataModule = Module(new CoredataModule(size, numRead = 3, numWrite = 3))
// read data
// read port 0 -> wbNumRead-1
(0 until wbNumRead).map(i => {
paddrModule.io.raddr(i) := io.wb.raddr(i)
maskModule.io.raddr(i) := io.wb.raddr(i)
exceptionModule.io.raddr(i) := io.wb.raddr(i)
coredataModule.io.raddr(i) := io.wb.raddr(i)
io.wb.rdata(i).paddr := paddrModule.io.rdata(i)
io.wb.rdata(i).mask := maskModule.io.rdata(i)
io.wb.rdata(i).data := coredataModule.io.rdata(i)
io.wb.rdata(i).exception := exceptionModule.io.rdata(i)
io.wb.rdata(i).fwdMask := DontCare
})
// read port wbNumRead
paddrModule.io.raddr(wbNumRead) := io.uncache.raddr
maskModule.io.raddr(wbNumRead) := io.uncache.raddr
exceptionModule.io.raddr(wbNumRead) := io.uncache.raddr
coredataModule.io.raddr(wbNumRead) := io.uncache.raddr
io.uncache.rdata.paddr := paddrModule.io.rdata(wbNumRead)
io.uncache.rdata.mask := maskModule.io.rdata(wbNumRead)
io.uncache.rdata.data := exceptionModule.io.rdata(wbNumRead)
io.uncache.rdata.exception := coredataModule.io.rdata(wbNumRead)
io.uncache.rdata.fwdMask := DontCare
// write data
// write port 0 -> wbNumWrite-1
(0 until wbNumWrite).map(i => {
paddrModule.io.wen(i) := false.B
maskModule.io.wen(i) := false.B
exceptionModule.io.wen(i) := false.B
coredataModule.io.wen(i) := false.B
coredataModule.io.fwdMaskWen(i) := false.B
coredataModule.io.paddrWen(i) := false.B
paddrModule.io.waddr(i) := io.wb.waddr(i)
maskModule.io.waddr(i) := io.wb.waddr(i)
exceptionModule.io.waddr(i) := io.wb.waddr(i)
coredataModule.io.waddr(i) := io.wb.waddr(i)
paddrModule.io.wdata(i) := io.wb.wdata(i).paddr
maskModule.io.wdata(i) := io.wb.wdata(i).mask
exceptionModule.io.wdata(i) := io.wb.wdata(i).exception
coredataModule.io.wdata(i) := io.wb.wdata(i).data
coredataModule.io.fwdMaskWdata(i) := io.wb.wdata(i).fwdMask.asUInt
coredataModule.io.paddrWdata(i) := io.wb.wdata(i).paddr
when(io.wb.wen(i)){
paddrModule.io.wen(i) := true.B
maskModule.io.wen(i) := true.B
exceptionModule.io.wen(i) := true.B
coredataModule.io.wen(i) := true.B
coredataModule.io.fwdMaskWen(i) := true.B
coredataModule.io.paddrWen(i) := true.B
}
})
// write port wbNumWrite
// exceptionModule.io.wen(wbNumWrite) := false.B
coredataModule.io.wen(wbNumWrite) := io.uncache.wen
coredataModule.io.fwdMaskWen(wbNumWrite) := false.B
coredataModule.io.paddrWen(wbNumWrite) := false.B
coredataModule.io.waddr(wbNumWrite) := io.uncache.waddr
coredataModule.io.fwdMaskWdata(wbNumWrite) := DontCare
coredataModule.io.paddrWdata(wbNumWrite) := DontCare
coredataModule.io.wdata(wbNumWrite) := io.uncache.wdata
// mem access violation check, gen violationMask
(0 until StorePipelineWidth).map(i => {
paddrModule.io.violationMdata(i) := io.violation(i).paddr
maskModule.io.violationMdata(i) := io.violation(i).mask
io.violation(i).violationMask := (paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt).asBools
// VecInit((0 until size).map(j => {
// val addrMatch = io.violation(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
// val violationVec = (0 until 8).map(k => data(j).mask(k) && io.violation(i).mask(k))
// Cat(violationVec).orR() && addrMatch
// }))
})
// refill missed load
def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = {
val res = Wire(Vec(8, UInt(8.W)))
(0 until 8).foreach(i => {
res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i))
})
res.asUInt
}
// gen paddr match mask
paddrModule.io.refillMdata := io.refill.paddr
(0 until size).map(i => {
io.refill.matchMask := paddrModule.io.refillMmask
// io.refill.matchMask(i) := get_block_addr(data(i).paddr) === get_block_addr(io.refill.paddr)
})
// refill data according to matchMask, refillMask and refill.valid
coredataModule.io.refillData := io.refill.data
(0 until size).map(i => {
coredataModule.io.mwmask(i) := io.refill.valid && io.refill.matchMask(i) && io.refill.refillMask(i)
})
// debug data read
io.debug := DontCare
}
......@@ -132,7 +132,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
writebacked(stWbIndex) := hasWritebacked
pending(stWbIndex) := !hasWritebacked // valid mmio require
val storeWbData = Wire(new LsqEntry)
val storeWbData = Wire(new SQDataEntry)
storeWbData := DontCare
storeWbData.paddr := io.storeIn(i).bits.paddr
storeWbData.mask := io.storeIn(i).bits.mask
......@@ -264,6 +264,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
io.mmioStout.bits.redirect := DontCare
io.mmioStout.bits.brUpdate := DontCare
io.mmioStout.bits.debug.isMMIO := true.B
io.mmioStout.bits.debug.isPerfCnt := false.B
io.mmioStout.bits.fflags := DontCare
when (io.mmioStout.fire()) {
writebacked(deqPtr) := true.B
......@@ -322,7 +323,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
}
// Read vaddr for mem exception
io.exceptionAddr.vaddr := exceptionModule.io.rdata(0)
io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
// misprediction recovery / exception redirect
// invalidate sq term using robIdx
......
......@@ -281,6 +281,7 @@ class LoadUnit extends XSModule with HasLoadHelper {
intHitLoadOut.bits.redirect := DontCare
intHitLoadOut.bits.brUpdate := DontCare
intHitLoadOut.bits.debug.isMMIO := load_s2.io.out.bits.mmio
intHitLoadOut.bits.debug.isPerfCnt := false.B
intHitLoadOut.bits.fflags := DontCare
load_s2.io.out.ready := true.B
......
......@@ -115,6 +115,7 @@ class StoreUnit_S2 extends XSModule {
io.stout.bits.redirect := DontCare
io.stout.bits.brUpdate := DontCare
io.stout.bits.debug.isMMIO := io.in.bits.mmio
io.stout.bits.debug.isPerfCnt := false.B
io.stout.bits.fflags := DontCare
}
......
......@@ -13,6 +13,10 @@ trait HasSbufferCst extends HasXSParameter {
def s_prepare = 2.U(2.W)
def s_inflight = 3.U(2.W)
val evictCycle = 8192
require(isPow2(evictCycle))
val countBits = 1 + log2Up(evictCycle)
val SbufferIndexWidth: Int = log2Up(StoreBufferSize)
// paddr = tag + offset
val CacheLineBytes: Int = CacheLineSize / 8
......@@ -37,7 +41,6 @@ class SbufferLine extends SbufferBundle {
class ChooseReplace(nWay: Int) extends XSModule {
val io = IO(new Bundle{
val mask = Vec(nWay, Input(Bool()))
val fire = Input(Bool())
val way = Output(UInt(nWay.W))
val flush = Input(Bool())
})
......@@ -49,12 +52,9 @@ class ChooseReplace(nWay: Int) extends XSModule {
val nextWay = PriorityEncoder(Cat(stateMask, loMask))(log2Up(nWay)-1, 0)
XSDebug(p"nextWay[${nextWay}]\n")
wayReg := nextWay
io.way := wayReg
when(io.fire){
wayReg := nextWay
}
when(io.flush){
wayReg := 0.U
}
......@@ -116,11 +116,11 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val buffer = Mem(StoreBufferSize, new SbufferLine)
val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(s_invalid)))
val cohCount = Reg(Vec(StoreBufferSize, UInt(countBits.W)))
/*
idle --[flush]--> drian_sbuffer --[buf empty]--> idle
--[buf full]--> replace --[dcache resp]--> idle
*/
*/
val x_idle :: x_drain_sbuffer :: x_replace :: Nil = Enum(3)
val sbuffer_state = RegInit(x_idle)
......@@ -150,7 +150,6 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val invalidCount = RegInit(StoreBufferSize.U((log2Up(StoreBufferSize) + 1).W))
val validCount = RegInit(0.U((log2Up(StoreBufferSize) + 1).W))
val full = invalidCount === 0.U
// val oneSpace = invalidCount === 1.U
val bufferRead = VecInit((0 until StoreBufferSize).map(i => buffer(i)))
val stateRead = VecInit((0 until StoreBufferSize).map(i => stateVec(i)))
......@@ -171,8 +170,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
val lru = Module(new ChooseReplace(StoreBufferSize))
val evictionIdx = lru.io.way
lru.io.fire := false.B
lru.io.mask := stateRead.map(_ === s_valid)
val tags = io.in.map(in => getTag(in.bits.addr))
......@@ -212,6 +210,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
def wordReqToBufLine(req: DCacheWordReq, tag: UInt, insertIdx: UInt, wordOffset: UInt, flushMask: Bool): Unit = {
stateUpdate(insertIdx) := s_valid
tagUpdate(insertIdx) := tag
cohCount(insertIdx) := 0.U
when(flushMask){
for(j <- 0 until CacheLineWords){
......@@ -230,6 +229,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
}
def mergeWordReq(req: DCacheWordReq, mergeIdx:UInt, wordOffset:UInt): Unit = {
cohCount(mergeIdx) := 0.U
for(i <- 0 until DataBytes){
when(req.mask(i)){
maskUpdate(mergeIdx)(wordOffset)(i) := true.B
......@@ -267,7 +267,7 @@ class NewSbuffer extends XSModule with HasSbufferCst {
for(i <- 0 until StoreBufferSize){
XSDebug(stateVec(i)=/=s_invalid,
p"[$i] state:${stateVec(i)} buf:${bufferRead(i)}\n"
p"[$i] timeout:${cohCount(i)(countBits-1)} state:${stateVec(i)} buf:${bufferRead(i)}\n"
)
}
......@@ -338,16 +338,14 @@ class NewSbuffer extends XSModule with HasSbufferCst {
//
// evictionEntry.bits := evictionIdx
val prepareValid = ((do_eviction && sbuffer_state === x_replace)|| (sbuffer_state === x_drain_sbuffer)) &&
val prepareValid = ((do_eviction && sbuffer_state === x_replace) || (sbuffer_state === x_drain_sbuffer)) &&
stateVec(evictionIdx)===s_valid &&
noSameBlockInflight(evictionIdx)
when(prepareValid){
stateVec(evictionIdx) := s_prepare
lru.io.fire := true.B
}
val prepareMask = stateVec.map(s => s === s_prepare)
val (prepareIdx, prepareEn) = PriorityEncoderWithFlag(prepareMask)
......@@ -383,6 +381,21 @@ class NewSbuffer extends XSModule with HasSbufferCst {
XSDebug(p"needSpace[$needSpace] invalidCount[$invalidCount] validCount[$validCount]\n")
//-------------------------cohCount-----------------------------
// insert and merge: cohCount=0
// every cycle cohCount+=1
// if cohCount(countBits-1)==1,evict
for(i <- 0 until StoreBufferSize){
when(stateVec(i) === s_valid){
when(cohCount(i)(countBits-1)){
assert(stateVec(i) === s_valid)
stateUpdate(i) := s_prepare
}
cohCount(i) := cohCount(i)+1.U
}
}
// ---------------------- Load Data Forward ---------------------
for ((forward, i) <- io.forward.zipWithIndex) {
......
......@@ -44,9 +44,87 @@ class SbufferTest extends AnyFlatSpec
top.Parameters.set(top.Parameters.debugParameters)
it should "random req" in {
// it should "random req" in {
// test(new SbufferWapper{AddSinks()}){ c =>
//
// def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) ={
// (0 until StorePipelineWidth).map { i =>
// c.io.in(i).valid.poke(true.B)
// c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit(
// _.mask -> mask(i),
// _.addr -> addr(i),
// _.data -> data(i)
// ))
// }
// c.clock.step(1)
// for (in <- c.io.in){ in.valid.poke(false.B)}
// }
//
// def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
// (0 until LoadPipelineWidth).map{ i =>
// c.io.forward(i).paddr.poke(addr(i))
// c.io.forward(i).mask.poke(mask(i))
// if(c.io.in(i).ready.peek() == true.B) {
// (0 until 8).map { j =>
// c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
// }
// }
// }
// }
//
// val TEST_SIZE = 100
// for(i <- 0 until TEST_SIZE) {
// val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to block size
// val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
// val mask = Seq.fill(StorePipelineWidth)(0xff.U)
// store_enq(addr, data, mask)
// forward_req_and_resp(addr, data, mask)
// }
// }
// }
//
// it should "sequence req" in {
// test(new SbufferWapper{AddSinks()}){ c =>
//
// def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) = {
// (0 until StorePipelineWidth).map { i =>
// c.io.in(i).valid.poke(true.B)
// c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit(
// _.mask -> mask(i),
// _.addr -> addr(i),
// _.data -> data(i)
// ))
// }
// c.clock.step(1)
// for (in <- c.io.in){ in.valid.poke(false.B)}
// }
//
// def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
// (0 until LoadPipelineWidth).map{ i =>
// c.io.forward(i).paddr.poke(addr(i))
// c.io.forward(i).mask.poke(mask(i))
// if(c.io.in(i).ready.peek() == true.B) {
// (0 until 8).map { j =>
// c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
// }
// }
// }
// }
//
// val TEST_SIZE = 100
// val start_addr = Random.nextLong() & 0x7ffffffff8L
// for(i <- 0 until TEST_SIZE) {
// val addr = Seq(((i<<4) + start_addr).U,((i<<4)+8+start_addr).U)
// val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
// val mask = Seq.fill(StorePipelineWidth)(0xff.U)
// store_enq(addr, data, mask)
// forward_req_and_resp(addr, data, mask)
// }
// }
// }
it should "sbuffer coherence" in {
test(new SbufferWapper{AddSinks()}){ c =>
def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) ={
(0 until StorePipelineWidth).map { i =>
c.io.in(i).valid.poke(true.B)
......@@ -59,7 +137,6 @@ class SbufferTest extends AnyFlatSpec
c.clock.step(1)
for (in <- c.io.in){ in.valid.poke(false.B)}
}
def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
(0 until LoadPipelineWidth).map{ i =>
c.io.forward(i).paddr.poke(addr(i))
......@@ -71,55 +148,16 @@ class SbufferTest extends AnyFlatSpec
}
}
}
val TEST_SIZE = 100
val TEST_SIZE = 10
for(i <- 0 until TEST_SIZE) {
val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to block size
val addr = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7ffffffff8L).U)// align to
val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
val mask = Seq.fill(StorePipelineWidth)(0xff.U)
store_enq(addr, data, mask)
forward_req_and_resp(addr, data, mask)
}
}
}
it should "sequence req" in {
test(new SbufferWapper{AddSinks()}){ c =>
def store_enq(addr: Seq[UInt], data: Seq[UInt], mask: Seq[UInt]) = {
(0 until StorePipelineWidth).map { i =>
c.io.in(i).valid.poke(true.B)
c.io.in(i).bits.pokePartial(chiselTypeOf(c.io.in(i).bits).Lit(
_.mask -> mask(i),
_.addr -> addr(i),
_.data -> data(i)
))
}
c.clock.step(1)
for (in <- c.io.in){ in.valid.poke(false.B)}
}
def forward_req_and_resp(addr: Seq[UInt], data: Seq[UInt], mask:Seq[UInt]) = {
(0 until LoadPipelineWidth).map{ i =>
c.io.forward(i).paddr.poke(addr(i))
c.io.forward(i).mask.poke(mask(i))
if(c.io.in(i).ready.peek() == true.B) {
(0 until 8).map { j =>
c.io.forward(i).forwardData(j).expect(data(i)(j * 8 + 7, j * 8))
}
}
}
}
val TEST_SIZE = 100
val start_addr = Random.nextLong() & 0x7ffffffff8L
for(i <- 0 until TEST_SIZE) {
val addr = Seq(((i<<4) + start_addr).U,((i<<4)+8+start_addr).U)
val data = Seq.fill(StorePipelineWidth)((Random.nextLong() & 0x7fffffffffffffffL).U)
val mask = Seq.fill(StorePipelineWidth)(0xff.U)
store_enq(addr, data, mask)
forward_req_and_resp(addr, data, mask)
}
c.clock.step(512 + 10)
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册