未验证 提交 ca18a0b4 编写于 作者: W William Wang 提交者: GitHub

mem: add Zicbom and Zicboz support (#1145)

Now we merge them for timing opt, unit test to be added later
上级 1af89150
......@@ -35,7 +35,7 @@ abstract trait DecodeConstants {
def Y = BitPat("b1")
def decodeDefault: List[BitPat] = // illegal instruction
// srcType(0) srcType(1) srcType(2) fuType fuOpType rfWen
// srcType(0) srcType(1) srcType(2) fuType fuOpType rfWen
// | | | | | | fpWen
// | | | | | | | isXSTrap
// | | | | | | | | noSpecExec
......@@ -389,6 +389,18 @@ object FDivSqrtDecode extends DecodeConstants {
)
}
/**
* CBO decode
*/
object CBODecode extends DecodeConstants {
val table: Array[(BitPat, List[BitPat])] = Array(
CBO_ZERO -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.stu, LSUOpType.cbo_zero , N, N, N, N, N, N, N, SelImm.IMM_S),
CBO_CLEAN -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.stu, LSUOpType.cbo_clean, N, N, N, N, N, N, N, SelImm.IMM_S),
CBO_FLUSH -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.stu, LSUOpType.cbo_flush, N, N, N, N, N, N, N, SelImm.IMM_S),
CBO_INVAL -> List(SrcType.reg, SrcType.DC, SrcType.DC, FuType.stu, LSUOpType.cbo_inval, N, N, N, N, N, N, N, SelImm.IMM_S)
)
}
/**
* XiangShan Trap Decode constants
*/
......@@ -522,7 +534,7 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan
ctrl_flow := io.enq.ctrl_flow
val decode_table = XDecode.table ++ FDecode.table ++ FDivSqrtDecode.table ++ X64Decode.table ++ XSTrapDecode.table ++ BDecode.table
val decode_table = XDecode.table ++ FDecode.table ++ FDivSqrtDecode.table ++ X64Decode.table ++ XSTrapDecode.table ++ BDecode.table ++ CBODecode.table
// output
cf_ctrl.cf := ctrl_flow
......
......@@ -73,6 +73,10 @@ object Instructions {
def SH = BitPat("b?????????????????001?????0100011")
def SW = BitPat("b?????????????????010?????0100011")
def SD = BitPat("b?????????????????011?????0100011")
def CBO_ZERO = BitPat("b000000000100?????010000000001111")
def CBO_CLEAN = BitPat("b000000000001?????010000000001111")
def CBO_FLUSH = BitPat("b000000000010?????010000000001111")
def CBO_INVAL = BitPat("b000000000000?????010000000001111")
def FENCE = BitPat("b?????????????????000?????0001111")
def FENCE_I = BitPat("b?????????????????001?????0001111")
def MUL = BitPat("b0000001??????????000?????0110011")
......
......@@ -74,7 +74,7 @@ case class DCacheParameters
// | Above index | Set | Bank | Offset |
// --------------------------------------
// | | | |
// | | | DCacheWordOffset
// | | | 0
// | | DCacheBankOffset
// | DCacheSetOffset
// DCacheAboveIndexOffset
......@@ -114,19 +114,23 @@ trait HasDCacheParameters extends HasL1CacheParameters {
val DCacheWays = cacheParams.nWays
val DCacheBanks = 8
val DCacheSRAMRowBits = 64 // hardcoded
val DCacheWordBits = 64 // hardcoded
val DCacheWordBytes = DCacheWordBits / 8
val DCacheLineBits = DCacheSRAMRowBits * DCacheBanks * DCacheWays * DCacheSets
val DCacheLineBytes = DCacheLineBits / 8
val DCacheLineWords = DCacheLineBits / 64 // TODO
val DCacheSizeBits = DCacheSRAMRowBits * DCacheBanks * DCacheWays * DCacheSets
val DCacheSizeBytes = DCacheSizeBits / 8
val DCacheSizeWords = DCacheSizeBits / 64 // TODO
val DCacheSameVPAddrLength = 12
val DCacheSRAMRowBytes = DCacheSRAMRowBits / 8
val DCacheWordOffset = 0
val DCacheBankOffset = DCacheWordOffset + log2Up(DCacheSRAMRowBytes)
val DCacheWordOffset = log2Up(DCacheWordBytes)
val DCacheBankOffset = log2Up(DCacheSRAMRowBytes)
val DCacheSetOffset = DCacheBankOffset + log2Up(DCacheBanks)
val DCacheAboveIndexOffset = DCacheSetOffset + log2Up(DCacheSets)
val DCacheTagOffset = DCacheAboveIndexOffset min DCacheSameVPAddrLength
val DCacheLineOffset = DCacheSetOffset
val DCacheIndexOffset = DCacheBankOffset
def addr_to_dcache_bank(addr: UInt) = {
......@@ -206,6 +210,7 @@ class DCacheLineReq(implicit p: Parameters) extends DCacheBundle
class DCacheWordReqWithVaddr(implicit p: Parameters) extends DCacheWordReq {
val vaddr = UInt(VAddrBits.W)
val wline = Bool()
}
class DCacheWordResp(implicit p: Parameters) extends DCacheBundle
......
......@@ -55,6 +55,7 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundle {
val mask = UInt(8.W)
val data = UInt((XLEN+1).W)
val uop = new MicroOp
val wlineflag = Bool() // store write the whole cache line
val miss = Bool()
val tlbMiss = Bool()
......
......@@ -223,10 +223,12 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
paddrModule.io.waddr(i) := stWbIndex
paddrModule.io.wdata(i) := io.storeIn(i).bits.paddr
paddrModule.io.wlineflag(i) := io.storeIn(i).bits.wlineflag
paddrModule.io.wen(i) := true.B
vaddrModule.io.waddr(i) := stWbIndex
vaddrModule.io.wdata(i) := io.storeIn(i).bits.vaddr
vaddrModule.io.wlineflag(i) := io.storeIn(i).bits.wlineflag
vaddrModule.io.wen(i) := true.B
debug_paddr(paddrModule.io.waddr(i)) := paddrModule.io.wdata(i)
......@@ -258,7 +260,10 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
datavalid(stWbIndex) := true.B
dataModule.io.data.waddr(i) := stWbIndex
dataModule.io.data.wdata(i) := genWdata(io.storeDataIn(i).bits.data, io.storeDataIn(i).bits.uop.ctrl.fuOpType(1,0))
dataModule.io.data.wdata(i) := Mux(io.storeDataIn(i).bits.uop.ctrl.fuOpType === LSUOpType.cbo_zero,
0.U,
genWdata(io.storeDataIn(i).bits.data, io.storeDataIn(i).bits.uop.ctrl.fuOpType(1,0))
)
dataModule.io.data.wen(i) := true.B
debug_data(dataModule.io.data.waddr(i)) := dataModule.io.data.wdata(i)
......@@ -393,6 +398,17 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
io.uncache.req.bits.data := dataModule.io.rdata(0).data
io.uncache.req.bits.mask := dataModule.io.rdata(0).mask
// CBO op type check can be delayed for 1 cycle,
// as uncache op will not start in s_idle
val cbo_mmio_addr = paddrModule.io.rdata(0) >> 2 << 2 // clear lowest 2 bits for op
val cbo_mmio_op = 0.U //TODO
val cbo_mmio_data = cbo_mmio_addr | cbo_mmio_op
when(RegNext(LSUOpType.isCbo(uop(deqPtr).ctrl.fuOpType))){
io.uncache.req.bits.addr := DontCare // TODO
io.uncache.req.bits.data := paddrModule.io.rdata(0)
io.uncache.req.bits.mask := DontCare // TODO
}
io.uncache.req.bits.id := DontCare
io.uncache.req.bits.instrtype := DontCare
......@@ -463,11 +479,14 @@ class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParamete
io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr)
// Note that store data/addr should both be valid after store's commit
assert(!io.sbuffer(i).valid || allvalid(ptr))
// Write line request should have all 1 mask
assert(!(io.sbuffer(i).valid && io.sbuffer(i).bits.wline && !io.sbuffer(i).bits.mask.andR))
io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR
io.sbuffer(i).bits.addr := paddrModule.io.rdata(i)
io.sbuffer(i).bits.vaddr := vaddrModule.io.rdata(i)
io.sbuffer(i).bits.data := dataModule.io.rdata(i).data
io.sbuffer(i).bits.mask := dataModule.io.rdata(i).mask
io.sbuffer(i).bits.wline := paddrModule.io.rlineflag(i)
io.sbuffer(i).bits.id := DontCare
io.sbuffer(i).bits.instrtype := DontCare
......
......@@ -33,33 +33,42 @@ class SQAddrModule(dataWidth: Int, numEntries: Int, numRead: Int, numWrite: Int,
val io = IO(new Bundle {
val raddr = Input(Vec(numRead, UInt(log2Up(numEntries).W)))
val rdata = Output(Vec(numRead, UInt(dataWidth.W)))
val rlineflag = Output(Vec(numRead, Bool()))
val wen = Input(Vec(numWrite, Bool()))
val waddr = Input(Vec(numWrite, UInt(log2Up(numEntries).W)))
val wdata = Input(Vec(numWrite, UInt(dataWidth.W)))
val wlineflag = Input(Vec(numWrite, Bool()))
val forwardMdata = Input(Vec(numForward, UInt(dataWidth.W)))
val forwardMmask = Output(Vec(numForward, Vec(numEntries, Bool())))
val debug_data = Output(Vec(numEntries, UInt(dataWidth.W)))
})
val data = Reg(Vec(numEntries, UInt(dataWidth.W)))
val lineflag = Reg(Vec(numEntries, Bool())) // cache line match flag
// if lineflag == true, this address points to a whole cacheline
io.debug_data := data
// read ports
for (i <- 0 until numRead) {
io.rdata(i) := data(RegNext(io.raddr(i)))
io.rlineflag(i) := lineflag(RegNext(io.raddr(i)))
}
// below is the write ports (with priorities)
for (i <- 0 until numWrite) {
when (io.wen(i)) {
data(io.waddr(i)) := io.wdata(i)
lineflag(io.waddr(i)) := io.wlineflag(i)
}
}
// content addressed match
for (i <- 0 until numForward) {
for (j <- 0 until numEntries) {
io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3)
// io.forwardMmask(i)(j) := io.forwardMdata(i)(dataWidth-1, 3) === data(j)(dataWidth-1, 3)
val linehit = io.forwardMdata(i)(dataWidth-1, DCacheLineOffset) === data(j)(dataWidth-1, DCacheLineOffset)
val wordhit = io.forwardMdata(i)(DCacheLineOffset-1, DCacheWordOffset) === data(j)(DCacheLineOffset-1, DCacheWordOffset)
io.forwardMmask(i)(j) := linehit && (wordhit || lineflag(j))
}
}
......
......@@ -66,6 +66,7 @@ class StoreUnit_S0(implicit p: Parameters) extends XSModule {
io.out.bits.rsIdx := io.rsIdx
io.out.bits.mask := genWmask(io.out.bits.vaddr, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.bits.isFirstIssue := io.isFirstIssue
io.out.bits.wlineflag := io.in.bits.uop.ctrl.fuOpType === LSUOpType.cbo_zero
io.out.valid := io.in.valid
io.in.ready := io.out.ready
......@@ -98,9 +99,14 @@ class StoreUnit_S1(implicit p: Parameters) extends XSModule {
val rsFeedback = ValidIO(new RSFeedback)
})
// mmio cbo decoder
val is_mmio_cbo = io.in.bits.uop.ctrl.fuOpType === LSUOpType.cbo_clean ||
io.in.bits.uop.ctrl.fuOpType === LSUOpType.cbo_flush ||
io.in.bits.uop.ctrl.fuOpType === LSUOpType.cbo_inval
val s1_paddr = io.dtlbResp.bits.paddr
val s1_tlb_miss = io.dtlbResp.bits.miss
val s1_mmio = io.dtlbResp.bits.mmio
val s1_mmio = io.dtlbResp.bits.mmio || is_mmio_cbo
val s1_exception = selectStore(io.out.bits.uop.cf.exceptionVec, false).asUInt.orR
io.in.ready := true.B
......
......@@ -67,6 +67,7 @@ class DataWriteReq(implicit p: Parameters) extends SbufferBundle {
val mask = UInt((DataBits/8).W)
val data = UInt(DataBits.W)
val wordOffset = UInt(WordOffsetWidth.W)
val wline = Bool()
}
class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst {
......@@ -81,9 +82,14 @@ class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst
for(i <- 0 until StorePipelineWidth) {
when(req(i).valid){
for(j <- 0 until DataBytes){
when(req(i).bits.mask(j)){
data(req(i).bits.idx)(req(i).bits.wordOffset)(j) := req(i).bits.data(j*8+7, j*8)
for(word <- 0 until CacheLineWords){
for(byte <- 0 until DataBytes){
when(
req(i).bits.mask(byte) && (req(i).bits.wordOffset(WordsWidth-1, 0) === word.U) ||
req(i).bits.wline
){
data(req(i).bits.idx)(word)(byte) := req(i).bits.data(byte*8+7, byte*8)
}
}
}
}
......@@ -272,6 +278,7 @@ class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst
writeReq(i).bits.wordOffset := wordOffset
writeReq(i).bits.mask := in.bits.mask
writeReq(i).bits.data := in.bits.data
writeReq(i).bits.wline := in.bits.wline
val insertIdx = if(i == 0) firstInsertIdx else secondInsertIdx
val flushMask = if(i == 0) true.B else !sameTag
accessIdx(i).valid := RegNext(in.fire())
......
......@@ -351,20 +351,27 @@ package object xiangshan {
object LSUOpType {
// normal load/store
// bit(1, 0) are size
def lb = "b000000".U
def lh = "b000001".U
def lw = "b000010".U
def ld = "b000011".U
def lbu = "b000100".U
def lhu = "b000101".U
def lwu = "b000110".U
def sb = "b001000".U
def sh = "b001001".U
def sw = "b001010".U
def sd = "b001011".U
def lb = "b000000".U
def lh = "b000001".U
def lw = "b000010".U
def ld = "b000011".U
def lbu = "b000100".U
def lhu = "b000101".U
def lwu = "b000110".U
def sb = "b001000".U
def sh = "b001001".U
def sw = "b001010".U
def sd = "b001011".U
def cbo_zero = "b001111".U // l1 cache op
def cbo_clean = "b011111".U // llc op
def cbo_flush = "b101111".U // llc op
def cbo_inval = "b111111".U // llc op
def isLoad(op: UInt): Bool = !op(3)
def isStore(op: UInt): Bool = op(3)
def isCbo(op: UInt): Bool = op(3, 0) === "b1111".U
// atomics
// bit(1, 0) are size
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册