未验证 提交 6140f0d4 编写于 作者: Y Yinan Xu 提交者: GitHub

Merge pull request #320 from RISCVERS/opt-memblock

dtlb: response at the next cycle after req.fire
......@@ -229,49 +229,67 @@ class MemBlock
assert(!(fenceFlush && atomicsFlush))
sbuffer.io.flush.valid := fenceFlush || atomicsFlush
// TODO: make 0/1 configurable
// AtomicsUnit
// AtomicsUnit will override other control signials,
// AtomicsUnit: AtomicsUnit will override other control signials,
// as atomics insts (LR/SC/AMO) will block the pipeline
val st0_atomics = reservationStations(2).io.deq.valid && reservationStations(2).io.deq.bits.uop.ctrl.fuType === FuType.mou
val st1_atomics = reservationStations(3).io.deq.valid && reservationStations(3).io.deq.bits.uop.ctrl.fuType === FuType.mou
// amo should always go through store issue queue 0
assert(!st1_atomics)
val s_normal :: s_atomics_0 :: s_atomics_1 :: Nil = Enum(3)
val state = RegInit(s_normal)
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready
val atomic_rs0 = exuParameters.LduCnt + 0
val atomic_rs1 = exuParameters.LduCnt + 1
val st0_atomics = reservationStations(atomic_rs0).io.deq.valid && reservationStations(atomic_rs0).io.deq.bits.uop.ctrl.fuType === FuType.mou
val st1_atomics = reservationStations(atomic_rs1).io.deq.valid && reservationStations(atomic_rs1).io.deq.bits.uop.ctrl.fuType === FuType.mou
// dispatch 0 takes priority
atomicsUnit.io.in.valid := st0_atomics
atomicsUnit.io.in.bits := reservationStations(2).io.deq.bits
when (st0_atomics) {
reservationStations(0).io.deq.ready := atomicsUnit.io.in.ready
reservationStations(atomic_rs0).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(0).io.stin.valid := false.B
}
when(atomicsUnit.io.dtlb.req.valid) {
dtlb.io.requestor(0) <> atomicsUnit.io.dtlb
// take load unit 0's tlb port
// make sure not to disturb loadUnit
assert(!loadUnits(0).io.dtlb.req.valid)
loadUnits(0).io.dtlb.resp.valid := false.B
state := s_atomics_0
assert(!st1_atomics)
}
when (st1_atomics) {
reservationStations(atomic_rs1).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(1).io.stin.valid := false.B
when(atomicsUnit.io.tlbFeedback.valid) {
assert(!storeUnits(0).io.tlbFeedback.valid)
atomicsUnit.io.tlbFeedback <> reservationStations(exuParameters.LduCnt + 0).io.feedback
state := s_atomics_1
assert(!st0_atomics)
}
when (atomicsUnit.io.out.valid) {
assert(state === s_atomics_0 || state === s_atomics_1)
state := s_normal
}
atomicsUnit.io.in.valid := st0_atomics || st1_atomics
atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq.bits, reservationStations(atomic_rs1).io.deq.bits)
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
atomicsUnit.io.dtlb.resp.valid := false.B
atomicsUnit.io.dtlb.resp.bits := DontCare
atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready
atomicsUnit.io.dcache <> io.dcache.atomics
atomicsUnit.io.flush_sbuffer.empty := sbuffer.io.flush.empty
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
// for atomicsUnit, it uses loadUnit(0)'s TLB port
when (state === s_atomics_0 || state === s_atomics_1) {
atomicsUnit.io.dtlb <> dtlb.io.requestor(0)
when(atomicsUnit.io.out.valid){
// take load unit 0's write back port
assert(!loadUnits(0).io.ldout.valid)
loadUnits(0).io.dtlb.resp.valid := false.B
loadUnits(0).io.ldout.ready := false.B
// make sure there's no in-flight uops in load unit
assert(!loadUnits(0).io.dtlb.req.valid)
assert(!loadUnits(0).io.ldout.valid)
}
when (state === s_atomics_0) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs0).io.feedback
assert(!storeUnits(0).io.tlbFeedback.valid)
}
when (state === s_atomics_1) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs1).io.feedback
assert(!storeUnits(1).io.tlbFeedback.valid)
}
lsq.io.exceptionAddr.lsIdx := io.lsqio.exceptionAddr.lsIdx
......
......@@ -273,65 +273,81 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val entry = Reg(Vec(TlbEntrySize, new TlbEntry))
val g = VecInit(entry.map(_.perm.g)).asUInt // TODO: need check if reverse is needed
val entryHitVec = widthMapSeq{i => VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/))) }
val hitVec = widthMapSeq{ i => (v.asBools zip entryHitVec(i)).map{ case (a,b) => a&b } }
val pfHitVec = widthMapSeq{ i => (pf.asBools zip entryHitVec(i)).map{ case (a,b) => a&b } }
val pfArray = widthMap{ i => ParallelOR(pfHitVec(i)).asBool && valid(i) && vmEnable }
val hit = widthMap{ i => ParallelOR(hitVec(i)).asBool && valid(i) && vmEnable && ~pfArray(i) }
val miss = widthMap{ i => !hit(i) && valid(i) && vmEnable && ~pfArray(i) }
val hitppn = widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.ppn)) }
val hitPerm = widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.perm)) }
val hitLevel= widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.level)) }
val multiHit = {
val hitSum = widthMap{ i => PopCount(hitVec(i)) }
val pfHitSum = widthMap{ i => PopCount(pfHitVec(i)) }
ParallelOR(widthMap{ i => !(hitSum(i)===0.U || hitSum(i)===1.U) || !(pfHitSum(i)===0.U || pfHitSum(i)===1.U)})
}
def TLBRead(i: Int) = {
val entryHitVec = VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/)))
val reqAddrReg = if (isDtlb) RegNext(reqAddr(i)) else reqAddr(i)
val cmdReg = if (isDtlb) RegNext(cmd(i)) else cmd(i)
val validReg = if (isDtlb) RegNext(valid(i)) else valid(i)
val entryHitVecReg = if (isDtlb) RegNext(entryHitVec) else entryHitVec
val hitVec = (v.asBools zip entryHitVecReg).map{ case (a,b) => a&b }
val pfHitVec = (pf.asBools zip entryHitVecReg).map{ case (a,b) => a&b }
val pfArray = ParallelOR(pfHitVec).asBool && validReg && vmEnable
val hit = ParallelOR(hitVec).asBool && validReg && vmEnable && ~pfArray
val miss = !hit && validReg && vmEnable && ~pfArray
val hitppn = ParallelMux(hitVec zip entry.map(_.ppn))
val hitPerm = ParallelMux(hitVec zip entry.map(_.perm))
val hitLevel= ParallelMux(hitVec zip entry.map(_.level))
val multiHit = {
val hitSum = PopCount(hitVec)
val pfHitSum = PopCount(pfHitVec)
!(hitSum===0.U || hitSum===1.U) || !(pfHitSum===0.U || pfHitSum===1.U)
}
// resp // TODO: A/D has not being concerned
for(i <- 0 until Width) {
val paddr = LookupTreeDefault(hitLevel(i), Cat(hitppn(i), reqAddr(i).off), List(
0.U -> Cat(hitppn(i)(ppnLen - 1, 2*vpnnLen), reqAddr(i).vpn(2*vpnnLen - 1, 0), reqAddr(i).off),
1.U -> Cat(hitppn(i)(ppnLen - 1, vpnnLen), reqAddr(i).vpn(vpnnLen - 1, 0), reqAddr(i).off),
2.U -> Cat(hitppn(i), reqAddr(i).off)
// resp // TODO: A/D has not being concerned
val paddr = LookupTreeDefault(hitLevel, Cat(hitppn, reqAddrReg.off), List(
0.U -> Cat(hitppn(ppnLen - 1, 2*vpnnLen), reqAddrReg.vpn(2*vpnnLen - 1, 0), reqAddrReg.off),
1.U -> Cat(hitppn(ppnLen - 1, vpnnLen), reqAddrReg.vpn(vpnnLen - 1, 0), reqAddrReg.off),
2.U -> Cat(hitppn, reqAddrReg.off)
))
val vaddr = SignExt(req(i).bits.vaddr, PAddrBits)
req(i).ready := resp(i).ready
resp(i).valid := valid(i)
resp(i).bits.paddr := Mux(vmEnable, paddr, SignExt(req(i).bits.vaddr, PAddrBits))
resp(i).bits.miss := miss(i)
resp(i).valid := validReg
resp(i).bits.paddr := Mux(vmEnable, paddr, if (isDtlb) RegNext(vaddr) else vaddr)
resp(i).bits.miss := miss
val perm = hitPerm(i) // NOTE: given the excp, the out module choose one to use?
val update = false.B && hit(i) && (!hitPerm(i).a || !hitPerm(i).d && TlbCmd.isWrite(cmd(i))) // update A/D through exception
val perm = hitPerm // NOTE: given the excp, the out module choose one to use?
val update = false.B && hit && (!hitPerm.a || !hitPerm.d && TlbCmd.isWrite(cmdReg)) // update A/D through exception
val modeCheck = !(mode === ModeU && !perm.u || mode === ModeS && perm.u && (!priv.sum || ifecth))
val ldPf = (pfArray(i) && TlbCmd.isRead(cmd(i)) && true.B /*!isAMO*/) || hit(i) && !(modeCheck && (perm.r || priv.mxr && perm.x)) && (TlbCmd.isRead(cmd(i)) && true.B/*!isAMO*/) // TODO: handle isAMO
val stPf = (pfArray(i) && TlbCmd.isWrite(cmd(i)) || false.B /*isAMO*/ ) || hit(i) && !(modeCheck && perm.w) && (TlbCmd.isWrite(cmd(i)) || false.B/*TODO isAMO. */)
val instrPf = (pfArray(i) && TlbCmd.isExec(cmd(i))) || hit(i) && !(modeCheck && perm.x) && TlbCmd.isExec(cmd(i))
val ldPf = (pfArray && TlbCmd.isRead(cmdReg) && true.B /*!isAMO*/) || hit && !(modeCheck && (perm.r || priv.mxr && perm.x)) && (TlbCmd.isRead(cmdReg) && true.B/*!isAMO*/) // TODO: handle isAMO
val stPf = (pfArray && TlbCmd.isWrite(cmdReg) || false.B /*isAMO*/ ) || hit && !(modeCheck && perm.w) && (TlbCmd.isWrite(cmdReg) || false.B/*TODO isAMO. */)
val instrPf = (pfArray && TlbCmd.isExec(cmdReg)) || hit && !(modeCheck && perm.x) && TlbCmd.isExec(cmdReg)
resp(i).bits.excp.pf.ld := ldPf || update
resp(i).bits.excp.pf.st := stPf || update
resp(i).bits.excp.pf.instr := instrPf || update
(hit, miss, pfHitVec, multiHit)
}
val readResult = (0 until Width).map(TLBRead(_))
val hitVec = readResult.map(res => res._1)
val missVec = readResult.map(res => res._2)
val pfHitVecVec = readResult.map(res => res._3)
val multiHitVec = readResult.map(res => res._4)
val hasMissReq = Cat(missVec).orR
// ptw
val state_idle :: state_wait :: Nil = Enum(2)
val state = RegInit(state_idle)
ptw <> DontCare // TODO: need check it
ptw.req.valid := ParallelOR(miss).asBool && state===state_idle && !sfence.valid
ptw.req.valid := hasMissReq && state===state_idle && !sfence.valid
ptw.resp.ready := state===state_wait
// val ptwReqSeq = Wire(Seq.fill(Width)(new comBundle()))
val ptwReqSeq = Seq.fill(Width)(Wire(new comBundle()))
for (i <- 0 until Width) {
ptwReqSeq(i).valid := valid(i) && miss(i)
ptwReqSeq(i).roqIdx := req(i).bits.roqIdx
ptwReqSeq(i).bits.vpn := reqAddr(i).vpn
ptwReqSeq(i).valid := ((if (isDtlb) RegNext(valid(i)) else valid(i)) && missVec(i))
ptwReqSeq(i).roqIdx := (if (isDtlb) RegNext(req(i).bits.roqIdx) else req(i).bits.roqIdx)
ptwReqSeq(i).bits.vpn := (if (isDtlb) RegNext(reqAddr(i).vpn) else reqAddr(i).vpn)
}
ptw.req.bits := Compare(ptwReqSeq).bits
switch (state) {
is (state_idle) {
when (ParallelOR(miss).asBool && ptw.req.fire()) {
when (hasMissReq && ptw.req.fire()) {
state := state_wait
}
assert(!ptw.resp.valid)
......@@ -345,7 +361,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
}
// reset pf when pf hit
val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVec(i)).asUInt, 0.U) })
val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVecVec(i)).asUInt, 0.U) })
val pfHitRefill = ParallelOR(pfHitReset.asBools)
// refill
......@@ -409,15 +425,15 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
ExcitingUtils.addSource(valid(1)/* && vmEnable*/, "perfCntDtlbReqCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/, "perfCntDtlbReqCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/, "perfCntDtlbReqCnt3", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && miss(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && miss(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && miss(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && miss(3), "perfCntDtlbMissCnt3", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntDtlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && missVec(1), "perfCntDtlbMissCnt1", Perf)
ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && missVec(2), "perfCntDtlbMissCnt2", Perf)
ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && missVec(3), "perfCntDtlbMissCnt3", Perf)
}
if (!env.FPGAPlatform && !isDtlb) {
ExcitingUtils.addSource(valid(0)/* && vmEnable*/, "perfCntItlbReqCnt0", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && miss(0), "perfCntItlbMissCnt0", Perf)
ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntItlbMissCnt0", Perf)
}
// Log
......@@ -428,7 +444,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
XSDebug(sfence.valid, p"Sfence: ${sfence}\n")
XSDebug(ParallelOR(valid)|| ptw.resp.valid, p"CSR: ${csr}\n")
XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hit).asUInt)} miss:${Binary(VecInit(miss).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n")
XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n")
XSDebug(ptw.req.fire(), p"PTW req:${ptw.req.bits}\n")
XSDebug(ptw.resp.valid, p"PTW resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n")
......@@ -437,7 +453,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
// assert((hit(i)&pfArray(i))===false.B, "hit(%d):%d pfArray(%d):%d v:0x%x pf:0x%x", i.U, hit(i), i.U, pfArray(i), v, pf)
// }
// for(i <- 0 until Width) {
// XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVec:0x${Hexadecimal(VecInit(pfHitVec(i)).asUInt)}\n")
// XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVecVec:0x${Hexadecimal(VecInit(pfHitVecVec(i)).asUInt)}\n")
// }
// for(i <- 0 until TlbEntrySize) {
// XSDebug(multiHit, p"entry(${i.U}): v:${v(i)} ${entry(i)}\n")
......
......@@ -41,7 +41,7 @@ trait HasICacheParameters extends HasL1CacheParameters {
// icache Queue
val groupAlign = log2Up(cacheParams.blockBytes)
def groupPC(pc: UInt): UInt = Cat(pc(PAddrBits-1, groupAlign), 0.U(groupAlign.W))
//ECC encoding
def encRowBits = cacheParams.dataCode.width(rowBits)
def encTagBits = cacheParams.tagCode.width(tagBits)
......@@ -178,7 +178,7 @@ class ICacheMetaArray extends ICachArray
val metaArray = Module(new SRAMTemplate(UInt(encTagBits.W), set=nSets, way=nWays, shouldReset = true))
//read
//read
metaArray.io.r.req.valid := io.read.valid
io.read.ready := metaArray.io.r.req.ready
io.write.ready := DontCare
......@@ -206,7 +206,7 @@ class ICacheDataArray extends ICachArray
val dataArray = List.fill(blockWords){ Module(new SRAMTemplate(UInt(encRowBits.W), set=nSets, way = nWays))}
//read
//read
//do ECC decoding after way choose
for(b <- 0 until blockWords){
dataArray(b).io.r.req.valid := io.read.valid
......@@ -225,8 +225,8 @@ class ICacheDataArray extends ICachArray
for(b <- 0 until blockWords){
dataArray(b).io.w.req.valid := io.write.valid
dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx,
data=write_data_encoded(b),
dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx,
data=write_data_encoded(b),
waymask=write.waymask)
}
......@@ -273,7 +273,7 @@ class ICache extends ICacheModule
val metaArray = Module(new ICacheMetaArray)
val dataArray = Module(new ICacheDataArray)
// 256-bit valid
val validArray = RegInit(0.U((nSets * nWays).W))
val validArray = RegInit(0.U((nSets * nWays).W))
//----------------------------
// Stage 1
......@@ -283,7 +283,7 @@ class ICache extends ICacheModule
s1_req_mask := io.req.bits.mask
s2_ready := WireInit(false.B)
s1_fire := s1_valid && (s2_ready || io.flush(0))
// SRAM(Meta and Data) read request
val s1_idx = get_idx(s1_req_pc)
......@@ -294,8 +294,8 @@ class ICache extends ICacheModule
XSDebug("[Stage 1] v : r : f (%d %d %d) request pc: 0x%x mask: %b\n",s1_valid,s2_ready,s1_fire,s1_req_pc,s1_req_mask)
XSDebug("[Stage 1] index: %d\n",s1_idx)
//----------------------------
// Stage 2
//----------------------------
......@@ -325,9 +325,9 @@ class ICache extends ICacheModule
val invalidVec = ~validMeta
val hasInvalidWay = invalidVec.orR
val refillInvalidWaymask = PriorityMask(invalidVec)
val waymask = Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask))
s2_hit := ParallelOR(hitVec) || s2_tlb_resp.excp.pf.instr || s2_access_fault
s2_ready := s2_fire || !s2_valid || io.flush(0)
......@@ -336,8 +336,8 @@ class ICache extends ICacheModule
XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n")
XSDebug("[Stage 2] tag: %x hit:%d\n",s2_tag,s2_hit)
XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt)
//----------------------------
// Stage 3
//----------------------------
......@@ -351,16 +351,16 @@ class ICache extends ICacheModule
val s3_access_fault = RegEnable(s2_access_fault,init=false.B,enable=s2_fire)
when(io.flush(1)) { s3_valid := false.B }
.elsewhen(s2_fire) { s3_valid := s2_valid }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
.elsewhen(io.resp.fire()) { s3_valid := false.B }
val refillDataReg = Reg(Vec(refillCycles,UInt(beatBits.W)))
// icache hit
// icache hit
// data ECC encoding
// simply cut the hit cacheline
val dataHitWay = VecInit(s3_data.map(b => Mux1H(s3_wayMask,b).asUInt))
val outPacket = Wire(UInt((FetchWidth * 32).W))
val dataHitWayDecoded = VecInit(
(0 until blockWords).map{r =>
val dataHitWayDecoded = VecInit(
(0 until blockWords).map{r =>
val row = dataHitWay.asTypeOf(Vec(blockWords,UInt(encRowBits.W)))(r)
val decodedRow = cacheParams.dataCode.decode(row)
assert(!(s3_valid && s3_hit && decodedRow.uncorrectable))
......@@ -368,7 +368,7 @@ class ICache extends ICacheModule
}
)
outPacket := cutHelper(dataHitWay,s3_req_pc(5,1).asUInt,s3_req_mask.asUInt)
//ICache MissQueue
val icacheMissQueue = Module(new IcacheMissQueue)
val blocking = RegInit(false.B)
......@@ -394,9 +394,9 @@ class ICache extends ICacheModule
//refill write
val metaWriteReq = icacheMissQueue.io.meta_write.bits
icacheMissQueue.io.meta_write.ready := true.B
metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid
metaArray.io.write.bits.apply(tag=metaWriteReq.meta_write_tag,
idx=metaWriteReq.meta_write_idx,
metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid
metaArray.io.write.bits.apply(tag=metaWriteReq.meta_write_tag,
idx=metaWriteReq.meta_write_idx,
waymask=metaWriteReq.meta_write_waymask)
val wayNum = OHToUInt(metaWriteReq.meta_write_waymask.asTypeOf(Vec(nWays,Bool())))
......@@ -408,7 +408,7 @@ class ICache extends ICacheModule
//data
icacheMissQueue.io.refill.ready := true.B
val refillReq = icacheMissQueue.io.refill.bits
dataArray.io.write.valid := icacheMissQueue.io.refill.valid
dataArray.io.write.valid := icacheMissQueue.io.refill.valid
dataArray.io.write.bits.apply(data=refillReq.refill_data,
idx=refillReq.refill_idx,
waymask=refillReq.refill_waymask)
......@@ -440,7 +440,7 @@ class ICache extends ICacheModule
//----------------------------
//icache request
io.req.ready := metaArray.io.read.ready && dataArray.io.read.ready && s2_ready
//icache response: to pre-decoder
io.resp.valid := s3_valid && (s3_hit || icacheMissQueue.io.resp.valid)
io.resp.bits.data := Mux((s3_valid && s3_hit),outPacket,refillDataOut)
......@@ -456,7 +456,7 @@ class ICache extends ICacheModule
io.tlb.req.bits.cmd := TlbCmd.exec
io.tlb.req.bits.roqIdx := DontCare
io.tlb.req.bits.debug.pc := s2_req_pc
//To L1 plus
io.mem_acquire <> icacheMissQueue.io.mem_acquire
icacheMissQueue.io.mem_grant <> io.mem_grant
......
......@@ -128,7 +128,7 @@ class LoadPipe extends DCacheModule
val s2_data_word = s2_data_words(s2_word_idx)
val s2_decoded = cacheParams.dataCode.decode(s2_data_word)
val s2_data_word_decoded = s2_decoded.corrected
assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
val resp = Wire(ValidIO(new DCacheWordResp))
......
......@@ -121,7 +121,7 @@ class IFU extends XSModule with HasIFUConst
// val if2_newPtr, if3_newPtr, if4_newPtr = Wire(UInt(log2Up(ExtHistoryLength).W))
val if1_gh, if2_gh, if3_gh, if4_gh = Wire(new GlobalHistory)
val if2_predicted_gh, if3_predicted_gh, if4_predicted_gh = Wire(new GlobalHistory)
val final_gh = RegInit(0.U.asTypeOf(new GlobalHistory))
......@@ -149,7 +149,7 @@ class IFU extends XSModule with HasIFUConst
}
val if2_bp = bpu.io.out(0)
// val if2_GHInfo = wrapGHInfo(if2_bp, if2_predHist)
// if taken, bp_redirect should be true
// when taken on half RVI, we suppress this redirect signal
......@@ -184,9 +184,9 @@ class IFU extends XSModule with HasIFUConst
// val if4_prevHalfInstr = Wire(new PrevHalfInstr)
// 32-bit instr crosses 2 pages, and the higher 16-bit triggers page fault
val crossPageIPF = WireInit(false.B)
val if3_pendingPrevHalfInstr = if3_prevHalfInstr.valid
// the previous half of RVI instruction waits until it meets its last half
val if3_prevHalfInstrMet = if3_pendingPrevHalfInstr && (if3_prevHalfInstr.pc + 2.U) === if3_pc && if3_valid
// set to invalid once consumed or redirect from backend
......@@ -222,7 +222,7 @@ class IFU extends XSModule with HasIFUConst
// GHInfo from last pred does not corresponds with this packet
// if3_ghInfoNotIdenticalRedirect
)
val if3_target = WireInit(snpc(if3_pc))
/* when (prevHalfMetRedirect) {
......@@ -255,14 +255,14 @@ class IFU extends XSModule with HasIFUConst
val if4_mask = RegEnable(icacheResp.mask, if3_fire)
val if4_snpc = Mux(inLoop, if4_pc + (PopCount(if4_mask) << 1), snpc(if4_pc))
val if4_predHist = RegEnable(if3_predHist, enable=if3_fire)
// wait until prevHalfInstr written into reg
if4_ready := (if4_fire && !hasPrevHalfInstrReq || !if4_valid || if4_flush) && GTimer() > 500.U
when (if4_flush) { if4_valid := false.B }
.elsewhen (if3_fire) { if4_valid := true.B }
.elsewhen (if4_fire) { if4_valid := false.B }
val if4_bp = Wire(new BranchPrediction)
if4_bp := bpu.io.out(2)
if4_bp.takens := bpu.io.out(2).takens & if4_mask
......@@ -286,7 +286,7 @@ class IFU extends XSModule with HasIFUConst
if4_bp.targets(i) := if4_jal_tgts(i)
}
}
// we need this to tell BPU the prediction of prev half
// because the prediction is with the start of each inst
val if4_prevHalfInstr = RegInit(0.U.asTypeOf(new PrevHalfInstr))
......
......@@ -64,7 +64,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{
// we send feedback right after we receives request
// also, we always treat amo as tlb hit
// since we will continue polling tlb all by ourself
io.tlbFeedback.valid := RegNext(io.in.fire())
io.tlbFeedback.valid := RegNext(RegNext(io.in.valid))
io.tlbFeedback.bits.hit := true.B
io.tlbFeedback.bits.roqIdx := in.uop.roqIdx
......
......@@ -21,35 +21,23 @@ class LoadUnit_S0 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new ExuInput))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dtlbReq = DecoupledIO(new TlbReq)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val dcacheReq = DecoupledIO(new DCacheLoadReq)
})
val s0_uop = io.in.bits.uop
val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm
val s0_paddr = io.dtlbResp.bits.paddr
val s0_tlb_miss = io.dtlbResp.bits.miss
val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0))
// query DTLB
io.dtlbReq.valid := io.out.valid
io.dtlbReq.valid := io.in.valid
io.dtlbReq.bits.vaddr := s0_vaddr
io.dtlbReq.bits.cmd := TlbCmd.read
io.dtlbReq.bits.roqIdx := s0_uop.roqIdx
io.dtlbReq.bits.debug.pc := s0_uop.cf.pc
io.dtlbResp.ready := io.out.ready // TODO: check it: io.out.fire()?
// feedback tlb result to RS
// Note: can be moved to s1
io.tlbFeedback.valid := io.out.valid
io.tlbFeedback.bits.hit := !s0_tlb_miss
io.tlbFeedback.bits.roqIdx := s0_uop.roqIdx
// query DCache
io.dcacheReq.valid := io.in.valid && !s0_uop.roqIdx.needFlush(io.redirect)
io.dcacheReq.valid := io.in.valid
io.dcacheReq.bits.cmd := MemoryOpConstants.M_XRD
io.dcacheReq.bits.addr := s0_vaddr
io.dcacheReq.bits.mask := s0_mask
......@@ -72,21 +60,18 @@ class LoadUnit_S0 extends XSModule {
"b11".U -> (s0_vaddr(2, 0) === 0.U) //d
))
io.out.valid := io.dcacheReq.fire() && // dcache may not accept load request
!io.in.bits.uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid && io.dcacheReq.ready
io.out.bits := DontCare
io.out.bits.vaddr := s0_vaddr
io.out.bits.paddr := s0_paddr
io.out.bits.tlbMiss := io.dtlbResp.bits.miss
io.out.bits.mask := s0_mask
io.out.bits.uop := s0_uop
io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.in.ready := io.out.fire()
io.in.ready := !io.in.valid || (io.out.ready && io.dcacheReq.ready)
XSDebug(io.dcacheReq.fire(), "[DCACHE LOAD REQ] pc %x vaddr %x paddr will be %x\n",
s0_uop.cf.pc, s0_vaddr, s0_paddr
XSDebug(io.dcacheReq.fire(),
p"[DCACHE LOAD REQ] pc ${Hexadecimal(s0_uop.cf.pc)}, vaddr ${Hexadecimal(s0_vaddr)}\n"
)
}
......@@ -97,20 +82,28 @@ class LoadUnit_S1 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val s1_paddr = Output(UInt(PAddrBits.W))
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val dcachePAddr = Output(UInt(PAddrBits.W))
val sbuffer = new LoadForwardQueryIO
val lsq = new LoadForwardQueryIO
})
val s1_uop = io.in.bits.uop
val s1_paddr = io.in.bits.paddr
val s1_tlb_miss = io.in.bits.tlbMiss
val s1_paddr = io.dtlbResp.bits.paddr
val s1_tlb_miss = io.dtlbResp.bits.miss
val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr) && !io.out.bits.uop.cf.exceptionVec.asUInt.orR
val s1_mask = io.in.bits.mask
io.out.bits := io.in.bits // forwardXX field will be updated in s1
io.s1_paddr := s1_paddr
io.dtlbResp.ready := true.B
// feedback tlb result to RS
io.tlbFeedback.valid := io.in.valid
io.tlbFeedback.bits.hit := !s1_tlb_miss
io.tlbFeedback.bits.roqIdx := s1_uop.roqIdx
io.dcachePAddr := s1_paddr
// load forward query datapath
io.sbuffer.valid := io.in.valid
......@@ -130,12 +123,13 @@ class LoadUnit_S1 extends XSModule {
io.out.bits.forwardMask := io.sbuffer.forwardMask
io.out.bits.forwardData := io.sbuffer.forwardData
io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid && !s1_tlb_miss
io.out.bits.paddr := s1_paddr
io.out.bits.mmio := s1_mmio
io.out.bits.tlbMiss := s1_tlb_miss
io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld
io.in.ready := io.out.ready || !io.in.valid
io.in.ready := !io.in.valid || io.out.ready
}
......@@ -146,7 +140,6 @@ class LoadUnit_S2 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp))
val lsq = new LoadForwardQueryIO
})
......@@ -197,7 +190,7 @@ class LoadUnit_S2 extends XSModule {
// TODO: ECC check
io.out.valid := io.in.valid // && !s2_uop.needFlush(io.redirect) will cause comb. loop
io.out.valid := io.in.valid
// Inst will be canceled in store queue / lsq,
// so we do not need to care about flush in load / store unit's out.valid
io.out.bits := io.in.bits
......@@ -224,18 +217,6 @@ class LoadUnit_S2 extends XSModule {
}
// class LoadUnit_S3 extends XSModule {
// val io = IO(new Bundle() {
// val in = Flipped(Decoupled(new LsPipelineBundle))
// val out = Decoupled(new LsPipelineBundle)
// val redirect = Flipped(ValidIO(new Redirect))
// })
// io.in.ready := true.B
// io.out.bits := io.in.bits
// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
// }
class LoadUnit extends XSModule {
val io = IO(new Bundle() {
val ldin = Flipped(Decoupled(new ExuInput))
......@@ -251,33 +232,26 @@ class LoadUnit extends XSModule {
val load_s0 = Module(new LoadUnit_S0)
val load_s1 = Module(new LoadUnit_S1)
val load_s2 = Module(new LoadUnit_S2)
// val load_s3 = Module(new LoadUnit_S3)
load_s0.io.in <> io.ldin
load_s0.io.redirect <> io.redirect
load_s0.io.dtlbReq <> io.dtlb.req
load_s0.io.dtlbResp <> io.dtlb.resp
load_s0.io.dcacheReq <> io.dcache.req
load_s0.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, false.B)
PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, load_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect))
io.dcache.s1_paddr := load_s1.io.out.bits.paddr
load_s1.io.redirect <> io.redirect
load_s1.io.dtlbResp <> io.dtlb.resp
load_s1.io.tlbFeedback <> io.tlbFeedback
io.dcache.s1_paddr <> load_s1.io.dcachePAddr
io.dcache.s1_kill := DontCare // FIXME
io.sbuffer <> load_s1.io.sbuffer
io.lsq.forward <> load_s1.io.lsq
load_s1.io.sbuffer <> io.sbuffer
load_s1.io.lsq <> io.lsq.forward
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, false.B)
PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect))
load_s2.io.redirect <> io.redirect
load_s2.io.dcacheResp <> io.dcache.resp
load_s2.io.lsq := DontCare
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
// PipelineConnect(load_s2.io.fp_out, load_s3.io.in, true.B, false.B)
// load_s3.io.redirect <> io.redirect
load_s2.io.lsq := DontCare
load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData
load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask
XSDebug(load_s0.io.out.valid,
p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " +
......
......@@ -12,10 +12,7 @@ class StoreUnit_S0 extends XSModule {
val io = IO(new Bundle() {
val in = Flipped(Decoupled(new ExuInput))
val out = Decoupled(new LsPipelineBundle)
val redirect = Flipped(ValidIO(new Redirect))
val dtlbReq = DecoupledIO(new TlbReq)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
})
// send req to dtlb
......@@ -26,16 +23,15 @@ class StoreUnit_S0 extends XSModule {
io.dtlbReq.bits.cmd := TlbCmd.write
io.dtlbReq.bits.roqIdx := io.in.bits.uop.roqIdx
io.dtlbReq.bits.debug.pc := io.in.bits.uop.cf.pc
io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready?
io.out.bits := DontCare
io.out.bits.vaddr := saddr
io.out.bits.paddr := io.dtlbResp.bits.paddr
io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.bits.uop := io.in.bits.uop
io.out.bits.miss := io.dtlbResp.bits.miss
io.out.bits.mask := genWmask(io.out.bits.vaddr, io.in.bits.uop.ctrl.fuOpType(1,0))
io.out.valid := io.in.valid && !io.dtlbResp.bits.miss && !io.out.bits.uop.roqIdx.needFlush(io.redirect)
io.out.valid := io.in.valid
io.in.ready := io.out.ready
// exception check
......@@ -46,18 +42,7 @@ class StoreUnit_S0 extends XSModule {
"b11".U -> (io.out.bits.vaddr(2,0) === 0.U) //d
))
io.out.bits.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned
io.out.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
// Send TLB feedback to store issue queue
// TODO: should be moved to S1
io.tlbFeedback.valid := RegNext(io.in.valid && io.out.ready)
io.tlbFeedback.bits.hit := RegNext(!io.out.bits.miss)
io.tlbFeedback.bits.roqIdx := RegNext(io.out.bits.uop.roqIdx)
XSDebug(io.tlbFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n",
io.tlbFeedback.bits.hit,
io.tlbFeedback.bits.roqIdx.asUInt
)
}
// Load Pipeline Stage 1
......@@ -67,21 +52,41 @@ class StoreUnit_S1 extends XSModule {
val in = Flipped(Decoupled(new LsPipelineBundle))
val out = Decoupled(new LsPipelineBundle)
// val fp_out = Decoupled(new LsPipelineBundle)
val dtlbResp = Flipped(DecoupledIO(new TlbResp))
val tlbFeedback = ValidIO(new TlbFeedback)
val stout = DecoupledIO(new ExuOutput) // writeback store
val redirect = Flipped(ValidIO(new Redirect))
})
val s1_paddr = io.dtlbResp.bits.paddr
val s1_tlb_miss = io.dtlbResp.bits.miss
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
// writeback to LSQ
io.in.ready := true.B
io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready?
// Send TLB feedback to store issue queue
// TODO: should be moved to S1
io.tlbFeedback.valid := RegNext(io.in.valid && io.out.ready)
io.tlbFeedback.bits.hit := RegNext(!io.out.bits.miss)
io.tlbFeedback.bits.roqIdx := RegNext(io.out.bits.uop.roqIdx)
XSDebug(io.tlbFeedback.valid,
"S1 Store: tlbHit: %d roqIdx: %d\n",
io.tlbFeedback.bits.hit,
io.tlbFeedback.bits.roqIdx.asUInt
)
io.out.bits := io.in.bits
io.out.bits.miss := false.B
io.out.bits.mmio := AddressSpace.isMMIO(io.in.bits.paddr)
io.out.valid := io.in.fire() // TODO: && ! FP
io.out.bits.mmio := AddressSpace.isMMIO(s1_paddr)
io.out.bits.paddr := s1_paddr
io.out.bits.miss := s1_tlb_miss
io.out.valid := io.in.valid && !s1_tlb_miss// TODO: && ! FP
io.out.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
io.stout.bits.uop := io.in.bits.uop
// io.stout.bits.uop.cf.exceptionVec := // TODO: update according to TLB result
io.stout.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st
io.stout.bits.data := DontCare
io.stout.bits.redirectValid := false.B
io.stout.bits.redirect := DontCare
......@@ -90,7 +95,8 @@ class StoreUnit_S1 extends XSModule {
io.stout.bits.fflags := DontCare
val hasException = io.out.bits.uop.cf.exceptionVec.asUInt.orR
io.stout.valid := io.in.fire() && (!io.out.bits.mmio || hasException) // mmio inst will be writebacked immediately
// mmio inst with exception will be writebacked immediately
io.stout.valid := io.in.valid && (!io.out.bits.mmio || hasException) && !s1_tlb_miss
// if fp
// io.fp_out.valid := ...
......@@ -125,22 +131,20 @@ class StoreUnit extends XSModule {
// val store_s2 = Module(new StoreUnit_S2)
store_s0.io.in <> io.stin
store_s0.io.redirect <> io.redirect
store_s0.io.dtlbReq <> io.dtlb.req
store_s0.io.dtlbResp <> io.dtlb.resp
store_s0.io.tlbFeedback <> io.tlbFeedback
PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, false.B)
PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, store_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect))
// PipelineConnect(store_s1.io.fp_out, store_s2.io.in, true.B, false.B)
store_s1.io.redirect <> io.redirect
store_s1.io.stout <> io.stout
store_s1.io.dtlbResp <> io.dtlb.resp
store_s1.io.tlbFeedback <> io.tlbFeedback
// send result to sq
io.lsq.valid := store_s1.io.out.valid
io.lsq.bits := store_s1.io.out.bits
store_s1.io.out.ready := true.B
private def printPipeLine(pipeline: LsPipelineBundle, cond: Bool, name: String): Unit = {
XSDebug(cond,
p"$name" + p" pc ${Hexadecimal(pipeline.uop.cf.pc)} " +
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册