提交 20d4a8bf 编写于 作者: L linjiawei

Merge remote-tracking branch 'origin/master' into l2cache

......@@ -261,8 +261,10 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
//----------------------------------------
// store pipe and store miss queue
storeMissQueue.io.lsu <> io.lsu.store
/*
assert(!(storeMissQueue.io.replay.req.fire() && !storeMissQueue.io.replay.req.bits.meta.replay),
"StoreMissQueue should replay requests")
*/
assert(!(io.lsu.store.req.fire() && io.lsu.store.req.bits.meta.replay),
"Sbuffer should not should replay requests")
assert(!(io.lsu.store.req.fire() && io.lsu.store.req.bits.meta.mmio),
......
......@@ -11,10 +11,8 @@ class StoreMissEntry extends DCacheModule
val io = IO(new Bundle {
val id = Input(UInt())
val req_pri_val = Input(Bool())
val req_pri_rdy = Output(Bool())
val req = Input(new DCacheLineReq )
val replay = DecoupledIO(new DCacheLineReq )
val lsu = Flipped(new DCacheStoreIO)
val replay = new DCacheStoreIO
val miss_req = DecoupledIO(new MissReq)
val miss_resp = Flipped(ValidIO(new MissResp))
......@@ -24,23 +22,25 @@ class StoreMissEntry extends DCacheModule
val tag = Output(Valid(UInt()))
})
val s_invalid :: s_miss_req :: s_miss_resp :: s_drain_rpq :: s_replay_resp :: s_miss_finish :: Nil = Enum(6)
val s_invalid :: s_replay_req :: s_replay_resp :: s_resp :: s_miss_req :: s_miss_resp :: s_miss_finish :: Nil = Enum(7)
val state = RegInit(s_invalid)
val req = Reg(new DCacheLineReq )
val resp = Reg(new DCacheResp)
val req_idx = get_idx(req.addr)
val req_tag = get_tag(req.addr)
val req_block_addr = get_block_addr(req.addr)
val reg_miss_resp = Reg(new MissResp)
// assign default values to output signals
io.req_pri_rdy := state === s_invalid
when (io.req_pri_val && io.req_pri_rdy) {
assert(req.cmd === M_XWR)
}
io.lsu.req.ready := state === s_invalid
io.lsu.resp.valid := false.B
io.lsu.resp.bits := DontCare
io.replay.valid := false.B
io.replay.bits := DontCare
io.replay.req.valid := false.B
io.replay.req.bits := DontCare
io.replay.resp.ready := false.B
io.miss_req.valid := false.B
io.miss_req.bits := DontCare
......@@ -57,9 +57,43 @@ class StoreMissEntry extends DCacheModule
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.req_pri_val && io.req_pri_rdy) {
req := io.req
state := s_miss_req
when (io.lsu.req.fire()) {
assert(io.lsu.req.bits.cmd === M_XWR)
assert(!io.lsu.req.bits.meta.replay)
req := io.lsu.req.bits
state := s_replay_req
}
}
// --------------------------------------------
// replay
when (state === s_replay_req) {
io.replay.req.valid := true.B
io.replay.req.bits := req
when (io.replay.req.fire()) {
state := s_replay_resp
}
}
when (state === s_replay_resp) {
io.replay.resp.ready := true.B
when (io.replay.resp.fire()) {
when (io.replay.resp.bits.miss) {
// replayed reqs should not miss
assert(!req.meta.replay)
when (!req.meta.replay) {
state := s_miss_req
}
} .otherwise {
resp := io.replay.resp.bits
when (!req.meta.replay) {
state := s_resp
} .otherwise {
state := s_miss_finish
}
}
assert(!io.replay.resp.bits.nack)
}
}
......@@ -77,34 +111,10 @@ class StoreMissEntry extends DCacheModule
when (state === s_miss_resp) {
when (io.miss_resp.fire()) {
reg_miss_resp := io.miss_resp.bits
state := s_drain_rpq
}
}
// --------------------------------------------
// replay
val storePipelineLatency = 2
val replay_resp_ctr = Reg(UInt(log2Up(storePipelineLatency).W))
when (state === s_drain_rpq) {
io.replay.valid := true.B
io.replay.bits := req
io.replay.bits.meta.replay := true.B
when (io.replay.fire()) {
replay_resp_ctr := 0.U
state := s_replay_resp
}
}
//
// we must wait for response here,
// if we do'not wait for response here,
// this entry may be freed before it's response comes back
//
when (state === s_replay_resp) {
replay_resp_ctr := replay_resp_ctr + 1.U
when (replay_resp_ctr === (storePipelineLatency - 1).U) {
state := s_miss_finish
reg_miss_resp := io.miss_resp.bits
// mark req as replayed req
req.meta.replay := true.B
state := s_replay_req
}
}
......@@ -113,6 +123,16 @@ class StoreMissEntry extends DCacheModule
io.miss_finish.bits.client_id := io.id
io.miss_finish.bits.entry_id := reg_miss_resp.entry_id
when (io.miss_finish.fire()) {
state := s_resp
}
}
// --------------------------------------------
when (state === s_resp) {
io.lsu.resp.valid := true.B
io.lsu.resp.bits := resp
when (io.lsu.resp.fire()) {
state := s_invalid
}
}
......@@ -130,9 +150,10 @@ class StoreMissQueue extends DCacheModule
val miss_finish = DecoupledIO(new MissFinish)
})
val miss_req_arb = Module(new Arbiter(new MissReq, cfg.nStoreMissEntries))
val miss_finish_arb = Module(new Arbiter(new MissFinish, cfg.nStoreMissEntries))
val replay_arb = Module(new Arbiter(new DCacheLineReq , cfg.nStoreMissEntries))
val miss_req_arb = Module(new Arbiter(new MissReq, cfg.nStoreMissEntries))
val miss_finish_arb = Module(new Arbiter(new MissFinish, cfg.nStoreMissEntries))
val replay_arb = Module(new Arbiter(new DCacheLineReq, cfg.nStoreMissEntries))
val resp_arb = Module(new Arbiter(new DCacheResp, cfg.nStoreMissEntries))
val idx_matches = Wire(Vec(cfg.nLoadMissEntries, Bool()))
val tag_matches = Wire(Vec(cfg.nLoadMissEntries, Bool()))
......@@ -150,38 +171,58 @@ class StoreMissQueue extends DCacheModule
// if the same block is being handled dcache
// assert(!(req.valid && tag_match))
io.replay.resp.ready := false.B
val entry_id_MSB = reqIdWidth - 1
val entry_id_LSB = reqIdWidth - storeMissQueueEntryIdWidth
val entries = (0 until cfg.nStoreMissEntries) map { i =>
val entry = Module(new StoreMissEntry)
entry.io.id := i.U(log2Up(cfg.nStoreMissEntries).W)
entry.io.id := i.U(storeMissQueueEntryIdWidth.W)
idx_matches(i) := entry.io.idx.valid && entry.io.idx.bits === get_idx(req.bits.addr)
tag_matches(i) := entry.io.tag.valid && entry.io.tag.bits === get_tag(req.bits.addr)
// entry req
entry.io.req_pri_val := (i.U === entry_alloc_idx) && pri_val
// lsu req and resp
val entry_lsu = entry.io.lsu
entry_lsu.req.valid := (i.U === entry_alloc_idx) && pri_val
when (i.U === entry_alloc_idx) {
pri_rdy := entry.io.req_pri_rdy
pri_rdy := entry_lsu.req.ready
}
entry_lsu.req.bits := req.bits
resp_arb.io.in(i) <> entry_lsu.resp
// replay req and resp
val entry_replay = entry.io.replay
replay_arb.io.in(i) <> entry_replay.req
replay_arb.io.in(i).bits.meta.id <> Cat(entry.io.id,
entry_replay.req.bits.meta.id(entry_id_LSB - 1, 0))
val resp_entry_id = io.replay.resp.bits.meta.id(entry_id_MSB, entry_id_LSB)
entry_replay.resp.valid := (i.U === resp_entry_id) && io.replay.resp.valid
entry_replay.resp.bits := io.replay.resp.bits
entry_replay.resp.bits.meta.id := Cat(0.U(storeMissQueueEntryIdWidth.W),
io.replay.resp.bits.meta.id(entry_id_LSB - 1, 0))
when (entry_replay.resp.valid) {
io.replay.resp.ready := entry_replay.resp.ready
}
entry.io.req := req.bits
replay_arb.io.in(i) <> entry.io.replay
miss_req_arb.io.in(i) <> entry.io.miss_req
miss_req_arb.io.in(i) <> entry.io.miss_req
entry.io.miss_resp.valid := (i.U === io.miss_resp.bits.client_id) && io.miss_resp.valid
entry.io.miss_resp.bits := io.miss_resp.bits
miss_finish_arb.io.in(i) <> entry.io.miss_finish
entry
}
entry_alloc_idx := PriorityEncoder(entries.map(m=>m.io.req_pri_rdy))
entry_alloc_idx := PriorityEncoder(entries.map(m=>m.io.lsu.req.ready))
// whenever index matches, do not let it in
req.ready := pri_rdy && !idx_match
io.lsu.resp <> resp_arb.io.out
io.replay.req <> replay_arb.io.out
io.lsu.resp <> io.replay.resp
io.miss_req <> miss_req_arb.io.out
io.miss_finish <> miss_finish_arb.io.out
......
......@@ -13,11 +13,7 @@ class LsRoqEntry extends XSBundle {
val mask = UInt(8.W)
val data = UInt(XLEN.W)
val exception = UInt(8.W)
// val miss = Bool()
val mmio = Bool()
// val store = Bool()
// val bwdMask = Vec(8, Bool()) // UInt(8.W)
// val bwdData = Vec(8, UInt(8.W))
val fwdMask = Vec(8, Bool())
val fwdData = Vec(8, UInt(8.W))
}
......@@ -56,14 +52,20 @@ class Lsroq extends XSModule {
val ringBufferTailExtended = RegInit(0.U(LsroqIdxWidth.W))
val ringBufferHead = ringBufferHeadExtended(InnerLsroqIdxWidth - 1, 0)
val ringBufferTail = ringBufferTailExtended(InnerLsroqIdxWidth - 1, 0)
val ringBufferEmpty = ringBufferHead === ringBufferTail && ringBufferHeadExtended(InnerLsroqIdxWidth) === ringBufferTailExtended(InnerLsroqIdxWidth)
val ringBufferFull = ringBufferHead === ringBufferTail && ringBufferHeadExtended(InnerLsroqIdxWidth) =/= ringBufferTailExtended(InnerLsroqIdxWidth)
val ringBufferSameFlag = ringBufferHeadExtended(InnerLsroqIdxWidth) === ringBufferTailExtended(InnerLsroqIdxWidth)
val ringBufferEmpty = ringBufferHead === ringBufferTail && ringBufferSameFlag
val ringBufferFull = ringBufferHead === ringBufferTail && !ringBufferSameFlag
val ringBufferAllowin = !ringBufferFull
val storeCommit = (0 until CommitWidth).map(i => io.commits(i).valid && !io.commits(i).bits.isWalk && io.commits(i).bits.uop.ctrl.commitType === CommitType.STORE)
val loadCommit = (0 until CommitWidth).map(i => io.commits(i).valid && !io.commits(i).bits.isWalk && io.commits(i).bits.uop.ctrl.commitType === CommitType.LOAD)
val mcommitIdx = (0 until CommitWidth).map(i => io.commits(i).bits.uop.lsroqIdx(InnerLsroqIdxWidth-1,0))
val tailMask = (((1.U((LsroqSize + 1).W)) << ringBufferTail).asUInt - 1.U)(LsroqSize - 1, 0)
val headMask = (((1.U((LsroqSize + 1).W)) << ringBufferHead).asUInt - 1.U)(LsroqSize - 1, 0)
val enqDeqMask1 = tailMask ^ headMask
val enqDeqMask = Mux(ringBufferSameFlag, enqDeqMask1, ~enqDeqMask1)
// TODO: misc arbitor
// Enqueue at dispatch
......@@ -75,7 +77,6 @@ class Lsroq extends XSModule {
val index = lsroqIdx(InnerLsroqIdxWidth - 1, 0)
when(io.dp1Req(i).fire()) {
uop(index) := io.dp1Req(i).bits
uop(index).lsroqIdx := lsroqIdx // NOTE: add by zhangzifei, need check by others
allocated(index) := true.B
valid(index) := false.B
writebacked(index) := false.B
......@@ -175,7 +176,7 @@ class Lsroq extends XSModule {
val missRefillSelVec = VecInit(
(0 until LsroqSize).map(i => allocated(i) && miss(i))
)
val missRefillSel = getFirstOne(missRefillSelVec, ringBufferTail)
val missRefillSel = getFirstOne(missRefillSelVec, tailMask)
io.dcache.req.valid := missRefillSelVec.asUInt.orR
io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
io.dcache.req.bits.addr := data(missRefillSel).paddr
......@@ -209,32 +210,6 @@ class Lsroq extends XSModule {
XSDebug("miss resp: pc:0x%x roqIdx:%d lsroqIdx:%d (p)addr:0x%x data %x\n", io.dcache.resp.bits.meta.uop.cf.pc, io.dcache.resp.bits.meta.uop.roqIdx, io.dcache.resp.bits.meta.uop.lsroqIdx, io.dcache.resp.bits.meta.paddr, io.dcache.resp.bits.data)
}
// get load result from refill resp
// Refill a line in 1 cycle
// def refillDataSel(data: UInt, offset: UInt): UInt = {
// Mux1H((0 until 8).map(p => (data(5, 3) === p.U, data(64 * (p + 1) - 1, 64 * p))))
// }
// def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = {
// val res = Wire(Vec(8, UInt(8.W)))
// (0 until 8).foreach(i => {
// res(i) := Mux(fwdMask(i), fwd(8 * (i + 1) - 1, 8 * i), refill(8 * (i + 1) - 1, 8 * i))
// })
// res.asUInt
// }
// (0 until LsroqSize).map(i => {
// val addrMatch = data(i).paddr(PAddrBits - 1, 6) === io.refill.bits.meta.paddr
// when(allocated(i) && listening(i) && addrMatch && io.dcache.resp.fire()) {
// // TODO: merge data
// // val refillData = refillDataSel(io.refill.bits.data, data(i).paddr(5, 0))
// // data(i).data := mergeRefillData(refillData, data(i).data, data(i).mask)
// data(i).data := refillDataSel(io.refill.bits.data, data(i).paddr(5, 0)) // TODO: forward refill data
// valid(i) := true.B
// listening(i) := false.B
// }
// })
// Refill 64 bit in a cycle
// Refill data comes back from io.dcache.resp
def mergeRefillData(refill: UInt, fwd: UInt, fwdMask: UInt): UInt = {
......@@ -311,18 +286,18 @@ class Lsroq extends XSModule {
})
// writeback up to 2 store insts to CDB
// just randomly pick 2 stores, write them back to cdb
// choose the first two valid store requests from deqPtr
val storeWbSelVec = VecInit((0 until LsroqSize).map(i => {
allocated(i) && valid(i) && !writebacked(i) && store(i)
})).asUInt()
}))
val storeWbSel = Wire(Vec(StorePipelineWidth, UInt(log2Up(LsroqSize).W)))
val storeWbValid = Wire(Vec(StorePipelineWidth, Bool()))
val sselvec0 = PriorityEncoderOH(storeWbSelVec)
val sselvec1 = PriorityEncoderOH(storeWbSelVec & (~sselvec0).asUInt)
storeWbSel(0) := OHToUInt(sselvec0)
storeWbSel(1) := OHToUInt(sselvec1)
storeWbValid(0) := sselvec0.orR
storeWbValid(1) := sselvec1.orR
storeWbSel(0) := getFirstOne(storeWbSelVec, tailMask)
val firstSelMask = UIntToOH(storeWbSel(0))
val secondWbSelVec = VecInit((0 until LsroqSize).map(i => storeWbSelVec(i) && !firstSelMask(i)))
storeWbSel(1) := getFirstOne(secondWbSelVec, tailMask)
storeWbValid(0) := Cat(storeWbSelVec).orR
storeWbValid(1) := Cat(secondWbSelVec).orR
(0 until StorePipelineWidth).map(i => {
io.stout(i).bits.uop := uop(storeWbSel(i))
......@@ -340,21 +315,12 @@ class Lsroq extends XSModule {
// remove retired insts from lsroq, add retired store to sbuffer
// move tailPtr
// FIXME: opt size using OH -> Mask
val dequeueMask = Wire(Vec(LsroqSize * 2, Bool()))
(0 until LsroqSize * 2).foreach(i => {
val ptr = i.U(InnerLsroqIdxWidth - 1, 0)
if (i == 0) {
dequeueMask(i) := ringBufferTail === i.U && !ringBufferEmpty && !allocated(ptr) // beginning of dequeuemask
} else {
dequeueMask(i) := (
dequeueMask(i - 1) && !allocated(ptr) && ringBufferHead =/= i.U(InnerLsroqIdxWidth - 1, 0) ||
ringBufferTail === i.U && !ringBufferEmpty && !allocated(ptr) // beginning of dequeuemask
// TODO: opt timing
)
}
})
ringBufferTailExtended := ringBufferTailExtended + PopCount(dequeueMask.asUInt)
// allocatedMask: dequeuePtr can go to the next 1-bit
val allocatedMask = VecInit((0 until LsroqSize).map(i => allocated(i) || !enqDeqMask(i)))
// find the first one from deqPtr (ringBufferTail)
val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, ringBufferTailExtended(InnerLsroqIdxWidth))
val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, ringBufferHeadExtended)
ringBufferTailExtended := nextTail
// send commited store inst to sbuffer
// select up to 2 writebacked store insts
......@@ -422,113 +388,75 @@ class Lsroq extends XSModule {
})
// load forward query
// check over all lsroq entries and forward data from the first matched store
(0 until LoadPipelineWidth).map(i => {
io.forward(i).forwardMask := 0.U(8.W).asBools
io.forward(i).forwardData := DontCare
// Just for functional simulation
// forward
val needForward1 = WireInit(VecInit((0 until LsroqSize).map(j => {
io.forward(i).lsroqIdx(InnerLsroqIdxWidth - 1, 0) > j.U &&
(
ringBufferTail <= j.U ||
ringBufferTailExtended(InnerLsroqIdxWidth) =/= io.forward(i).lsroqIdx(InnerLsroqIdxWidth)
)
})))
val needForward2 = WireInit(VecInit((0 until LsroqSize).map(j => {
ringBufferTail <= j.U &&
ringBufferTailExtended(InnerLsroqIdxWidth) =/= io.forward(i).lsroqIdx(InnerLsroqIdxWidth)
})))
// Compare ringBufferTail (deqPtr) and forward.lsroqIdx, we have two cases:
// (1) if they have the same flag, we need to check range(tail, lsroqIdx)
// (2) if they have different flags, we need to check range(tail, lsroqSize) and range(0, lsroqIdx)
// Forward1: Mux(same_flag, range(tail, lsroqIdx), range(tail, lsroqSize))
// Forward2: Mux(same_flag, 0.U, range(0, lsroqIdx) )
// i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B)))
val forwardData1 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
val forwardMask2 = WireInit(VecInit(Seq.fill(8)(false.B)))
val forwardData2 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
// forward lookup vec2
(0 until LsroqSize).map(j => {
when(
needForward2(j) &&
valid(j) && allocated(j) && store(j) &&
io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
) {
(0 until 8).map(k => {
when(data(j).mask(k)) {
forwardMask2(k) := true.B
forwardData2(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
XSDebug("forwarding " + k + "th byte %x from ptr %d pc %x\n",
data(j).data(8 * (k + 1) - 1, 8 * k), j.U, uop(j).cf.pc
)
}
})
}
})
// forward lookup vec1
(0 until LsroqSize).map(j => {
when(
needForward1(j) &&
valid(j) && allocated(j) && store(j) &&
io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
) {
(0 until 8).map(k => {
when(data(j).mask(k)) {
val differentFlag = ringBufferTailExtended(InnerLsroqIdxWidth) =/= io.forward(i).lsroqIdx(InnerLsroqIdxWidth)
val forwardMask = ((1.U((LsroqSize + 1).W)) << io.forward(i).lsroqIdx(InnerLsroqIdxWidth - 1, 0)).asUInt - 1.U
val needForward1 = Mux(differentFlag, ~tailMask, tailMask ^ forwardMask)
val needForward2 = Mux(differentFlag, forwardMask, 0.U(LsroqSize.W))
// entry with larger index should have higher priority since it's data is younger
for (j <- 0 until LsroqSize) {
val needCheck = valid(j) && allocated(j) && store(j) &&
io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
(0 until XLEN / 8).foreach(k => {
when (needCheck && data(j).mask(k)) {
when (needForward1(j)) {
forwardMask1(k) := true.B
forwardData1(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
XSDebug("forwarding " + k + "th byte %x from ptr %d pc %x\n",
data(j).data(8 * (k + 1) - 1, 8 * k), j.U, uop(j).cf.pc
)
}
})
}
})
when (needForward2(j)) {
forwardMask2(k) := true.B
forwardData2(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
}
XSDebug(needForward1(j) || needForward2(j),
p"forwarding $k-th byte ${Hexadecimal(data(j).data(8 * (k + 1) - 1, 8 * k))} " +
p"from ptr $j pc ${Hexadecimal(uop(j).cf.pc)}\n")
}
})
}
// merge forward lookup results
(0 until 8).map(k => {
// forward2 is younger than forward1 and should have higher priority
(0 until XLEN / 8).map(k => {
io.forward(i).forwardMask(k) := forwardMask1(k) || forwardMask2(k)
io.forward(i).forwardData(k) := Mux(forwardMask1(k), forwardData1(k), forwardData2(k))
io.forward(i).forwardData(k) := Mux(forwardMask2(k), forwardData2(k), forwardData1(k))
})
// (1 until LsroqSize).map(j => {
// val ptr = io.forward(i).lsroqIdx - j.U
// when(
// lsroqIdxOlderThan(ptr, io.forward(i).lsroqIdx) &&
// valid(ptr) && allocated(ptr) && store(ptr) &&
// io.forward(i).paddr(PAddrBits-1, 3) === data(ptr).paddr(PAddrBits-1, 3)
// ){
// (0 until 8).map(k => {
// // when(data(ptr).mask(k) && io.forward(i).mask(k)){
// when(data(ptr).mask(k)){
// io.forward(i).forwardMask(k) := true.B
// io.forward(i).forwardData(k) := data(ptr).data(8*(k+1)-1, 8*k)
// XSDebug("forwarding "+k+"th byte %x from ptr %d pc %x\n",
// io.forward(i).forwardData(k), ptr, uop(ptr).cf.pc
// )
// }
// })
// }
// })
// backward
// (0 until 8).map(k => {
// when(data(io.forward(i).lsroqIdx).bwdMask(k)) {
// io.forward(i).forwardMask(k) := true.B
// io.forward(i).forwardData(k) := data(io.forward(i).lsroqIdx).bwdData(k)
// XSDebug("backwarding " + k + "th byte %x, idx %d pc %x\n",
// io.forward(i).forwardData(k), io.forward(i).lsroqIdx(InnerLsroqIdxWidth - 1, 0), uop(io.forward(i).lsroqIdx).cf.pc
// )
// }
// })
})
// rollback check
val rollback = Wire(Vec(StorePipelineWidth, Valid(new Redirect)))
def getFirstOne(mask: Vec[Bool], start: UInt) = {
def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
val length = mask.length
val lowMask = (1.U((length + 1).W) << start).asUInt() - 1.U
val highBits = (0 until length).map(i => mask(i) & ~lowMask(i))
val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
val highBitsUint = Cat(highBits.reverse)
PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
}
def getFirstOneWithFlag(mask: Vec[Bool], startMask: UInt, startFlag: UInt) = {
val length = mask.length
val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
val highBitsUint = Cat(highBits.reverse)
val changeDirection = !highBitsUint.orR()
val index = PriorityEncoder(Mux(!changeDirection, highBitsUint, mask.asUInt))
Cat(startFlag ^ changeDirection, index)
}
def getOldestInTwo(valid: Seq[Bool], uop: Seq[MicroOp]) = {
assert(valid.length == uop.length)
assert(valid.length == 2)
......@@ -563,7 +491,10 @@ class Lsroq extends XSModule {
when(io.storeIn(i).valid) {
val startIndex = io.storeIn(i).bits.uop.lsroqIdx(InnerLsroqIdxWidth - 1, 0)
val toEnqPtrMask = rangeMask(io.storeIn(i).bits.uop.lsroqIdx, ringBufferHeadExtended)
val lsroqIdxMask = ((1.U((LsroqSize + 1).W) << startIndex).asUInt - 1.U)(LsroqSize - 1, 0)
val xorMask = lsroqIdxMask ^ headMask
val sameFlag = io.storeIn(i).bits.uop.lsroqIdx(InnerLsroqIdxWidth) === ringBufferHeadExtended(InnerLsroqIdxWidth)
val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask)
val lsroqViolationVec = VecInit((0 until LsroqSize).map(j => {
val addrMatch = allocated(j) &&
io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
......@@ -573,7 +504,7 @@ class Lsroq extends XSModule {
Cat(violationVec).orR() && entryNeedCheck
}))
val lsroqViolation = lsroqViolationVec.asUInt().orR()
val lsroqViolationIndex = getFirstOne(lsroqViolationVec, startIndex)
val lsroqViolationIndex = getFirstOne(lsroqViolationVec, lsroqIdxMask)
val lsroqViolationUop = uop(lsroqViolationIndex)
XSDebug(lsroqViolation, p"${Binary(Cat(lsroqViolationVec))}, $startIndex, $lsroqViolationIndex\n")
......
......@@ -129,6 +129,8 @@ class Memend extends XSModule {
val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
val lsroq = Module(new Lsroq)
val sbuffer = Module(new Sbuffer)
// if you wants to stress test dcache store, use FakeSbuffer
// val sbuffer = Module(new FakeSbuffer)
val loadUnitToDcacheVec = Wire(Vec(exuParameters.LduCnt, new DCacheLoadIO))
......
......@@ -371,3 +371,85 @@ class Sbuffer extends XSModule with HasSBufferConst {
XSDebug(line.valid, "[#%d line] Tag: %x, data: %x, mask: %x\n", i.U, line.tag, line.data.asUInt(), line.mask.asUInt())
}}
}
// Fake Store buffer for XiangShan Out of Order LSU
class FakeSbuffer extends XSModule {
val io = IO(new Bundle() {
val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReq)))
val dcache = new DCacheStoreIO
val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
})
assert(!(io.in(1).valid && !io.in(0).valid))
// assign default values to signals
io.in(1).ready := false.B
io.dcache.req.valid := false.B
io.dcache.req.bits := DontCare
io.dcache.resp.ready := false.B
val s_invalid :: s_req :: s_resp :: Nil = Enum(3)
val state = RegInit(s_invalid)
val req = Reg(new DCacheWordReq)
XSDebug("state: %d\n", state)
io.in(0).ready := state === s_invalid
def word_addr(addr: UInt) = (addr >> 3) << 3
def block_addr(addr: UInt) = (addr >> 6) << 6
// --------------------------------------------
// s_invalid: receive requests
when (state === s_invalid) {
when (io.in(0).fire()) {
req := io.in(0).bits
state := s_req
}
}
val wdataVec = WireInit(VecInit(Seq.fill(8)(0.U(64.W))))
val wmaskVec = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
wdataVec(req.addr(5,3)) := req.data
wmaskVec(req.addr(5,3)) := req.mask
when (state === s_req) {
val dcache_req = io.dcache.req
dcache_req.valid := true.B
dcache_req.bits.cmd := MemoryOpConstants.M_XWR
dcache_req.bits.addr := block_addr(req.addr)
dcache_req.bits.data := wdataVec.asUInt
dcache_req.bits.mask := wmaskVec.asUInt
dcache_req.bits.meta := DontCare
when (dcache_req.fire()) {
state := s_resp
}
}
when (state === s_resp) {
io.dcache.resp.ready := true.B
when (io.dcache.resp.fire()) {
state := s_invalid
}
}
// do forwarding here
for (i <- 0 until LoadPipelineWidth) {
val addr_match = word_addr(io.forward(i).paddr) === word_addr(req.addr)
val mask = io.forward(i).mask & req.mask(7, 0)
val mask_match = mask =/= 0.U
val need_forward = state =/= s_invalid && addr_match && mask_match
io.forward(i).forwardMask := Mux(need_forward, VecInit(mask.asBools),
VecInit(0.U(8.W).asBools))
io.forward(i).forwardData := VecInit((0 until 8) map {i => req.data((i + 1) * 8 - 1, i * 8)})
}
XSInfo(io.in(0).fire(), "ensbuffer addr 0x%x wdata 0x%x mask %b\n", io.in(0).bits.addr, io.in(0).bits.data, io.in(0).bits.mask)
XSInfo(io.in(1).fire(), "ensbuffer addr 0x%x wdata 0x%x mask %b\n", io.in(1).bits.addr, io.in(1).bits.data, io.in(0).bits.mask)
XSInfo(io.dcache.req.fire(), "desbuffer addr 0x%x wdata 0x%x mask %b\n", io.dcache.req.bits.addr, io.dcache.req.bits.data, io.dcache.req.bits.mask)
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册