diff --git a/.github/workflows/emu.yml b/.github/workflows/emu.yml index 505103ff32b260ce6730c644cbd8807a9c4da52c..326ccff73b2c0af2d4c0eace27494b52764e4869 100644 --- a/.github/workflows/emu.yml +++ b/.github/workflows/emu.yml @@ -49,3 +49,6 @@ jobs: - name: Run microbench run: | make -C $AM_HOME/apps/microbench ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME mainargs=test run 2> /dev/null + - name: Run coremark + run: | + make -C $AM_HOME/apps/coremark ARCH=riscv64-noop AM_HOME=$AM_HOME NEMU_HOME=$NEMU_HOME NOOP_HOME=$NOOP_HOME run 2> /dev/null diff --git a/src/main/scala/xiangshan/XSCore.scala b/src/main/scala/xiangshan/XSCore.scala index 757f19f56342bf59b37542a53f8dbd7063be8dc2..84b922ac69602daf45860306890ca76e3c9e440e 100644 --- a/src/main/scala/xiangshan/XSCore.scala +++ b/src/main/scala/xiangshan/XSCore.scala @@ -316,20 +316,16 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer) val uncache = outer.uncache.module val l1pluscache = outer.l1pluscache.module val ptw = outer.ptw.module - val icache = Module(new ICache) + frontend.io.backend <> ctrlBlock.io.frontend - frontend.io.icacheResp <> icache.io.resp - frontend.io.icacheToTlb <> icache.io.tlb - icache.io.req <> frontend.io.icacheReq - icache.io.flush <> frontend.io.icacheFlush frontend.io.sfence <> integerBlock.io.fenceio.sfence frontend.io.tlbCsr <> integerBlock.io.csrio.tlb - icache.io.mem_acquire <> l1pluscache.io.req - l1pluscache.io.resp <> icache.io.mem_grant - l1pluscache.io.flush := icache.io.l1plusflush - icache.io.fencei := integerBlock.io.fenceio.fencei + frontend.io.icacheMemAcq <> l1pluscache.io.req + l1pluscache.io.resp <> frontend.io.icacheMemGrant + l1pluscache.io.flush := frontend.io.l1plusFlush + frontend.io.fencei := integerBlock.io.fenceio.fencei ctrlBlock.io.fromIntBlock <> integerBlock.io.toCtrlBlock ctrlBlock.io.fromFpBlock <> floatBlock.io.toCtrlBlock diff --git a/src/main/scala/xiangshan/backend/MemBlock.scala b/src/main/scala/xiangshan/backend/MemBlock.scala index 2f0148ca3d8dcb1bc56e0c19e9b17dda6e24aa06..950f7ad09130aabedb628a5bcfe988ba402fe464 100644 --- a/src/main/scala/xiangshan/backend/MemBlock.scala +++ b/src/main/scala/xiangshan/backend/MemBlock.scala @@ -229,49 +229,67 @@ class MemBlock assert(!(fenceFlush && atomicsFlush)) sbuffer.io.flush.valid := fenceFlush || atomicsFlush - // TODO: make 0/1 configurable - // AtomicsUnit - // AtomicsUnit will override other control signials, + // AtomicsUnit: AtomicsUnit will override other control signials, // as atomics insts (LR/SC/AMO) will block the pipeline - val st0_atomics = reservationStations(2).io.deq.valid && reservationStations(2).io.deq.bits.uop.ctrl.fuType === FuType.mou - val st1_atomics = reservationStations(3).io.deq.valid && reservationStations(3).io.deq.bits.uop.ctrl.fuType === FuType.mou - // amo should always go through store issue queue 0 - assert(!st1_atomics) + val s_normal :: s_atomics_0 :: s_atomics_1 :: Nil = Enum(3) + val state = RegInit(s_normal) - atomicsUnit.io.dtlb.resp.valid := false.B - atomicsUnit.io.dtlb.resp.bits := DontCare - atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready + val atomic_rs0 = exuParameters.LduCnt + 0 + val atomic_rs1 = exuParameters.LduCnt + 1 + val st0_atomics = reservationStations(atomic_rs0).io.deq.valid && reservationStations(atomic_rs0).io.deq.bits.uop.ctrl.fuType === FuType.mou + val st1_atomics = reservationStations(atomic_rs1).io.deq.valid && reservationStations(atomic_rs1).io.deq.bits.uop.ctrl.fuType === FuType.mou - // dispatch 0 takes priority - atomicsUnit.io.in.valid := st0_atomics - atomicsUnit.io.in.bits := reservationStations(2).io.deq.bits when (st0_atomics) { - reservationStations(0).io.deq.ready := atomicsUnit.io.in.ready + reservationStations(atomic_rs0).io.deq.ready := atomicsUnit.io.in.ready storeUnits(0).io.stin.valid := false.B - } - when(atomicsUnit.io.dtlb.req.valid) { - dtlb.io.requestor(0) <> atomicsUnit.io.dtlb - // take load unit 0's tlb port - // make sure not to disturb loadUnit - assert(!loadUnits(0).io.dtlb.req.valid) - loadUnits(0).io.dtlb.resp.valid := false.B + state := s_atomics_0 + assert(!st1_atomics) } + when (st1_atomics) { + reservationStations(atomic_rs1).io.deq.ready := atomicsUnit.io.in.ready + storeUnits(1).io.stin.valid := false.B - when(atomicsUnit.io.tlbFeedback.valid) { - assert(!storeUnits(0).io.tlbFeedback.valid) - atomicsUnit.io.tlbFeedback <> reservationStations(exuParameters.LduCnt + 0).io.feedback + state := s_atomics_1 + assert(!st0_atomics) } + when (atomicsUnit.io.out.valid) { + assert(state === s_atomics_0 || state === s_atomics_1) + state := s_normal + } + + atomicsUnit.io.in.valid := st0_atomics || st1_atomics + atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq.bits, reservationStations(atomic_rs1).io.deq.bits) + atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect + + atomicsUnit.io.dtlb.resp.valid := false.B + atomicsUnit.io.dtlb.resp.bits := DontCare + atomicsUnit.io.dtlb.req.ready := dtlb.io.requestor(0).req.ready atomicsUnit.io.dcache <> io.dcache.atomics atomicsUnit.io.flush_sbuffer.empty := sbuffer.io.flush.empty - atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect + // for atomicsUnit, it uses loadUnit(0)'s TLB port + when (state === s_atomics_0 || state === s_atomics_1) { + atomicsUnit.io.dtlb <> dtlb.io.requestor(0) - when(atomicsUnit.io.out.valid){ - // take load unit 0's write back port - assert(!loadUnits(0).io.ldout.valid) + loadUnits(0).io.dtlb.resp.valid := false.B loadUnits(0).io.ldout.ready := false.B + + // make sure there's no in-flight uops in load unit + assert(!loadUnits(0).io.dtlb.req.valid) + assert(!loadUnits(0).io.ldout.valid) + } + + when (state === s_atomics_0) { + atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs0).io.feedback + + assert(!storeUnits(0).io.tlbFeedback.valid) + } + when (state === s_atomics_1) { + atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs1).io.feedback + + assert(!storeUnits(1).io.tlbFeedback.valid) } lsq.io.exceptionAddr.lsIdx := io.lsqio.exceptionAddr.lsIdx diff --git a/src/main/scala/xiangshan/cache/dtlb.scala b/src/main/scala/xiangshan/cache/dtlb.scala index 0494e2ee82751137c4718aff3d869ac9b6b651cf..7239c3af5f9ada64b2b5324382c74be20c930516 100644 --- a/src/main/scala/xiangshan/cache/dtlb.scala +++ b/src/main/scala/xiangshan/cache/dtlb.scala @@ -273,65 +273,81 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ val entry = Reg(Vec(TlbEntrySize, new TlbEntry)) val g = VecInit(entry.map(_.perm.g)).asUInt // TODO: need check if reverse is needed - val entryHitVec = widthMapSeq{i => VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/))) } - val hitVec = widthMapSeq{ i => (v.asBools zip entryHitVec(i)).map{ case (a,b) => a&b } } - val pfHitVec = widthMapSeq{ i => (pf.asBools zip entryHitVec(i)).map{ case (a,b) => a&b } } - val pfArray = widthMap{ i => ParallelOR(pfHitVec(i)).asBool && valid(i) && vmEnable } - val hit = widthMap{ i => ParallelOR(hitVec(i)).asBool && valid(i) && vmEnable && ~pfArray(i) } - val miss = widthMap{ i => !hit(i) && valid(i) && vmEnable && ~pfArray(i) } - val hitppn = widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.ppn)) } - val hitPerm = widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.perm)) } - val hitLevel= widthMap{ i => ParallelMux(hitVec(i) zip entry.map(_.level)) } - val multiHit = { - val hitSum = widthMap{ i => PopCount(hitVec(i)) } - val pfHitSum = widthMap{ i => PopCount(pfHitVec(i)) } - ParallelOR(widthMap{ i => !(hitSum(i)===0.U || hitSum(i)===1.U) || !(pfHitSum(i)===0.U || pfHitSum(i)===1.U)}) - } + def TLBRead(i: Int) = { + val entryHitVec = VecInit(entry.map(_.hit(reqAddr(i).vpn/*, satp.asid*/))) + + val reqAddrReg = if (isDtlb) RegNext(reqAddr(i)) else reqAddr(i) + val cmdReg = if (isDtlb) RegNext(cmd(i)) else cmd(i) + val validReg = if (isDtlb) RegNext(valid(i)) else valid(i) + val entryHitVecReg = if (isDtlb) RegNext(entryHitVec) else entryHitVec + + val hitVec = (v.asBools zip entryHitVecReg).map{ case (a,b) => a&b } + val pfHitVec = (pf.asBools zip entryHitVecReg).map{ case (a,b) => a&b } + val pfArray = ParallelOR(pfHitVec).asBool && validReg && vmEnable + val hit = ParallelOR(hitVec).asBool && validReg && vmEnable && ~pfArray + val miss = !hit && validReg && vmEnable && ~pfArray + val hitppn = ParallelMux(hitVec zip entry.map(_.ppn)) + val hitPerm = ParallelMux(hitVec zip entry.map(_.perm)) + val hitLevel= ParallelMux(hitVec zip entry.map(_.level)) + val multiHit = { + val hitSum = PopCount(hitVec) + val pfHitSum = PopCount(pfHitVec) + !(hitSum===0.U || hitSum===1.U) || !(pfHitSum===0.U || pfHitSum===1.U) + } - // resp // TODO: A/D has not being concerned - for(i <- 0 until Width) { - val paddr = LookupTreeDefault(hitLevel(i), Cat(hitppn(i), reqAddr(i).off), List( - 0.U -> Cat(hitppn(i)(ppnLen - 1, 2*vpnnLen), reqAddr(i).vpn(2*vpnnLen - 1, 0), reqAddr(i).off), - 1.U -> Cat(hitppn(i)(ppnLen - 1, vpnnLen), reqAddr(i).vpn(vpnnLen - 1, 0), reqAddr(i).off), - 2.U -> Cat(hitppn(i), reqAddr(i).off) + // resp // TODO: A/D has not being concerned + val paddr = LookupTreeDefault(hitLevel, Cat(hitppn, reqAddrReg.off), List( + 0.U -> Cat(hitppn(ppnLen - 1, 2*vpnnLen), reqAddrReg.vpn(2*vpnnLen - 1, 0), reqAddrReg.off), + 1.U -> Cat(hitppn(ppnLen - 1, vpnnLen), reqAddrReg.vpn(vpnnLen - 1, 0), reqAddrReg.off), + 2.U -> Cat(hitppn, reqAddrReg.off) )) + val vaddr = SignExt(req(i).bits.vaddr, PAddrBits) req(i).ready := resp(i).ready - resp(i).valid := valid(i) - resp(i).bits.paddr := Mux(vmEnable, paddr, SignExt(req(i).bits.vaddr, PAddrBits)) - resp(i).bits.miss := miss(i) + resp(i).valid := validReg + resp(i).bits.paddr := Mux(vmEnable, paddr, if (isDtlb) RegNext(vaddr) else vaddr) + resp(i).bits.miss := miss - val perm = hitPerm(i) // NOTE: given the excp, the out module choose one to use? - val update = false.B && hit(i) && (!hitPerm(i).a || !hitPerm(i).d && TlbCmd.isWrite(cmd(i))) // update A/D through exception + val perm = hitPerm // NOTE: given the excp, the out module choose one to use? + val update = false.B && hit && (!hitPerm.a || !hitPerm.d && TlbCmd.isWrite(cmdReg)) // update A/D through exception val modeCheck = !(mode === ModeU && !perm.u || mode === ModeS && perm.u && (!priv.sum || ifecth)) - val ldPf = (pfArray(i) && TlbCmd.isRead(cmd(i)) && true.B /*!isAMO*/) || hit(i) && !(modeCheck && (perm.r || priv.mxr && perm.x)) && (TlbCmd.isRead(cmd(i)) && true.B/*!isAMO*/) // TODO: handle isAMO - val stPf = (pfArray(i) && TlbCmd.isWrite(cmd(i)) || false.B /*isAMO*/ ) || hit(i) && !(modeCheck && perm.w) && (TlbCmd.isWrite(cmd(i)) || false.B/*TODO isAMO. */) - val instrPf = (pfArray(i) && TlbCmd.isExec(cmd(i))) || hit(i) && !(modeCheck && perm.x) && TlbCmd.isExec(cmd(i)) + val ldPf = (pfArray && TlbCmd.isRead(cmdReg) && true.B /*!isAMO*/) || hit && !(modeCheck && (perm.r || priv.mxr && perm.x)) && (TlbCmd.isRead(cmdReg) && true.B/*!isAMO*/) // TODO: handle isAMO + val stPf = (pfArray && TlbCmd.isWrite(cmdReg) || false.B /*isAMO*/ ) || hit && !(modeCheck && perm.w) && (TlbCmd.isWrite(cmdReg) || false.B/*TODO isAMO. */) + val instrPf = (pfArray && TlbCmd.isExec(cmdReg)) || hit && !(modeCheck && perm.x) && TlbCmd.isExec(cmdReg) resp(i).bits.excp.pf.ld := ldPf || update resp(i).bits.excp.pf.st := stPf || update resp(i).bits.excp.pf.instr := instrPf || update + + (hit, miss, pfHitVec, multiHit) } + val readResult = (0 until Width).map(TLBRead(_)) + val hitVec = readResult.map(res => res._1) + val missVec = readResult.map(res => res._2) + val pfHitVecVec = readResult.map(res => res._3) + val multiHitVec = readResult.map(res => res._4) + val hasMissReq = Cat(missVec).orR + // ptw val state_idle :: state_wait :: Nil = Enum(2) val state = RegInit(state_idle) ptw <> DontCare // TODO: need check it - ptw.req.valid := ParallelOR(miss).asBool && state===state_idle && !sfence.valid + ptw.req.valid := hasMissReq && state===state_idle && !sfence.valid ptw.resp.ready := state===state_wait // val ptwReqSeq = Wire(Seq.fill(Width)(new comBundle())) val ptwReqSeq = Seq.fill(Width)(Wire(new comBundle())) for (i <- 0 until Width) { - ptwReqSeq(i).valid := valid(i) && miss(i) - ptwReqSeq(i).roqIdx := req(i).bits.roqIdx - ptwReqSeq(i).bits.vpn := reqAddr(i).vpn + ptwReqSeq(i).valid := ((if (isDtlb) RegNext(valid(i)) else valid(i)) && missVec(i)) + ptwReqSeq(i).roqIdx := (if (isDtlb) RegNext(req(i).bits.roqIdx) else req(i).bits.roqIdx) + ptwReqSeq(i).bits.vpn := (if (isDtlb) RegNext(reqAddr(i).vpn) else reqAddr(i).vpn) } ptw.req.bits := Compare(ptwReqSeq).bits switch (state) { is (state_idle) { - when (ParallelOR(miss).asBool && ptw.req.fire()) { + when (hasMissReq && ptw.req.fire()) { state := state_wait } assert(!ptw.resp.valid) @@ -345,7 +361,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ } // reset pf when pf hit - val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVec(i)).asUInt, 0.U) }) + val pfHitReset = ParallelOR(widthMap{i => Mux(resp(i).fire(), VecInit(pfHitVecVec(i)).asUInt, 0.U) }) val pfHitRefill = ParallelOR(pfHitReset.asBools) // refill @@ -409,15 +425,15 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ ExcitingUtils.addSource(valid(1)/* && vmEnable*/, "perfCntDtlbReqCnt1", Perf) ExcitingUtils.addSource(valid(2)/* && vmEnable*/, "perfCntDtlbReqCnt2", Perf) ExcitingUtils.addSource(valid(3)/* && vmEnable*/, "perfCntDtlbReqCnt3", Perf) - ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && miss(0), "perfCntDtlbMissCnt0", Perf) - ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && miss(1), "perfCntDtlbMissCnt1", Perf) - ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && miss(2), "perfCntDtlbMissCnt2", Perf) - ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && miss(3), "perfCntDtlbMissCnt3", Perf) + ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntDtlbMissCnt0", Perf) + ExcitingUtils.addSource(valid(1)/* && vmEnable*/ && missVec(1), "perfCntDtlbMissCnt1", Perf) + ExcitingUtils.addSource(valid(2)/* && vmEnable*/ && missVec(2), "perfCntDtlbMissCnt2", Perf) + ExcitingUtils.addSource(valid(3)/* && vmEnable*/ && missVec(3), "perfCntDtlbMissCnt3", Perf) } if (!env.FPGAPlatform && !isDtlb) { ExcitingUtils.addSource(valid(0)/* && vmEnable*/, "perfCntItlbReqCnt0", Perf) - ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && miss(0), "perfCntItlbMissCnt0", Perf) + ExcitingUtils.addSource(valid(0)/* && vmEnable*/ && missVec(0), "perfCntItlbMissCnt0", Perf) } // Log @@ -428,7 +444,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ XSDebug(sfence.valid, p"Sfence: ${sfence}\n") XSDebug(ParallelOR(valid)|| ptw.resp.valid, p"CSR: ${csr}\n") - XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hit).asUInt)} miss:${Binary(VecInit(miss).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n") + XSDebug(ParallelOR(valid) || ptw.resp.valid, p"vmEnable:${vmEnable} hit:${Binary(VecInit(hitVec).asUInt)} miss:${Binary(VecInit(missVec).asUInt)} v:${Hexadecimal(v)} pf:${Hexadecimal(pf)} state:${state}\n") XSDebug(ptw.req.fire(), p"PTW req:${ptw.req.bits}\n") XSDebug(ptw.resp.valid, p"PTW resp:${ptw.resp.bits} (v:${ptw.resp.valid}r:${ptw.resp.ready}) \n") @@ -437,7 +453,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{ // assert((hit(i)&pfArray(i))===false.B, "hit(%d):%d pfArray(%d):%d v:0x%x pf:0x%x", i.U, hit(i), i.U, pfArray(i), v, pf) // } // for(i <- 0 until Width) { - // XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVec:0x${Hexadecimal(VecInit(pfHitVec(i)).asUInt)}\n") + // XSDebug(multiHit, p"vpn:0x${Hexadecimal(reqAddr(i).vpn)} hitVec:0x${Hexadecimal(VecInit(hitVec(i)).asUInt)} pfHitVecVec:0x${Hexadecimal(VecInit(pfHitVecVec(i)).asUInt)}\n") // } // for(i <- 0 until TlbEntrySize) { // XSDebug(multiHit, p"entry(${i.U}): v:${v(i)} ${entry(i)}\n") diff --git a/src/main/scala/xiangshan/cache/icache.scala b/src/main/scala/xiangshan/cache/icache.scala index 3637e12d89d71943c7f3f0f043ec396b69f1bdca..f20097119cbf083f0af6433c197200a4792ce375 100644 --- a/src/main/scala/xiangshan/cache/icache.scala +++ b/src/main/scala/xiangshan/cache/icache.scala @@ -41,7 +41,7 @@ trait HasICacheParameters extends HasL1CacheParameters { // icache Queue val groupAlign = log2Up(cacheParams.blockBytes) def groupPC(pc: UInt): UInt = Cat(pc(PAddrBits-1, groupAlign), 0.U(groupAlign.W)) - + //ECC encoding def encRowBits = cacheParams.dataCode.width(rowBits) def encTagBits = cacheParams.tagCode.width(tagBits) @@ -178,7 +178,7 @@ class ICacheMetaArray extends ICachArray val metaArray = Module(new SRAMTemplate(UInt(encTagBits.W), set=nSets, way=nWays, shouldReset = true)) - //read + //read metaArray.io.r.req.valid := io.read.valid io.read.ready := metaArray.io.r.req.ready io.write.ready := DontCare @@ -206,7 +206,7 @@ class ICacheDataArray extends ICachArray val dataArray = List.fill(blockWords){ Module(new SRAMTemplate(UInt(encRowBits.W), set=nSets, way = nWays))} - //read + //read //do ECC decoding after way choose for(b <- 0 until blockWords){ dataArray(b).io.r.req.valid := io.read.valid @@ -225,8 +225,8 @@ class ICacheDataArray extends ICachArray for(b <- 0 until blockWords){ dataArray(b).io.w.req.valid := io.write.valid - dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx, - data=write_data_encoded(b), + dataArray(b).io.w.req.bits.apply( setIdx=write.virIdx, + data=write_data_encoded(b), waymask=write.waymask) } @@ -273,7 +273,7 @@ class ICache extends ICacheModule val metaArray = Module(new ICacheMetaArray) val dataArray = Module(new ICacheDataArray) // 256-bit valid - val validArray = RegInit(0.U((nSets * nWays).W)) + val validArray = RegInit(0.U((nSets * nWays).W)) //---------------------------- // Stage 1 @@ -283,7 +283,7 @@ class ICache extends ICacheModule s1_req_mask := io.req.bits.mask s2_ready := WireInit(false.B) s1_fire := s1_valid && (s2_ready || io.flush(0)) - + // SRAM(Meta and Data) read request val s1_idx = get_idx(s1_req_pc) @@ -294,8 +294,8 @@ class ICache extends ICacheModule XSDebug("[Stage 1] v : r : f (%d %d %d) request pc: 0x%x mask: %b\n",s1_valid,s2_ready,s1_fire,s1_req_pc,s1_req_mask) XSDebug("[Stage 1] index: %d\n",s1_idx) - - + + //---------------------------- // Stage 2 //---------------------------- @@ -325,9 +325,9 @@ class ICache extends ICacheModule val invalidVec = ~validMeta val hasInvalidWay = invalidVec.orR val refillInvalidWaymask = PriorityMask(invalidVec) - + val waymask = Mux(s2_hit, hitVec.asUInt, Mux(hasInvalidWay, refillInvalidWaymask, victimWayMask)) - + s2_hit := ParallelOR(hitVec) || s2_tlb_resp.excp.pf.instr || s2_access_fault s2_ready := s2_fire || !s2_valid || io.flush(0) @@ -336,8 +336,8 @@ class ICache extends ICacheModule XSDebug(p"[Stage 2] tlb resp: v ${io.tlb.resp.valid} r ${io.tlb.resp.ready} ${s2_tlb_resp}\n") XSDebug("[Stage 2] tag: %x hit:%d\n",s2_tag,s2_hit) XSDebug("[Stage 2] validMeta: %b victimWayMaks:%b invalidVec:%b hitVec:%b waymask:%b \n",validMeta,victimWayMask,invalidVec.asUInt,hitVec.asUInt,waymask.asUInt) - - + + //---------------------------- // Stage 3 //---------------------------- @@ -351,16 +351,16 @@ class ICache extends ICacheModule val s3_access_fault = RegEnable(s2_access_fault,init=false.B,enable=s2_fire) when(io.flush(1)) { s3_valid := false.B } .elsewhen(s2_fire) { s3_valid := s2_valid } - .elsewhen(io.resp.fire()) { s3_valid := false.B } + .elsewhen(io.resp.fire()) { s3_valid := false.B } val refillDataReg = Reg(Vec(refillCycles,UInt(beatBits.W))) - // icache hit + // icache hit // data ECC encoding // simply cut the hit cacheline val dataHitWay = VecInit(s3_data.map(b => Mux1H(s3_wayMask,b).asUInt)) val outPacket = Wire(UInt((FetchWidth * 32).W)) - val dataHitWayDecoded = VecInit( - (0 until blockWords).map{r => + val dataHitWayDecoded = VecInit( + (0 until blockWords).map{r => val row = dataHitWay.asTypeOf(Vec(blockWords,UInt(encRowBits.W)))(r) val decodedRow = cacheParams.dataCode.decode(row) assert(!(s3_valid && s3_hit && decodedRow.uncorrectable)) @@ -368,7 +368,7 @@ class ICache extends ICacheModule } ) outPacket := cutHelper(dataHitWay,s3_req_pc(5,1).asUInt,s3_req_mask.asUInt) - + //ICache MissQueue val icacheMissQueue = Module(new IcacheMissQueue) val blocking = RegInit(false.B) @@ -394,9 +394,9 @@ class ICache extends ICacheModule //refill write val metaWriteReq = icacheMissQueue.io.meta_write.bits icacheMissQueue.io.meta_write.ready := true.B - metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid - metaArray.io.write.bits.apply(tag=metaWriteReq.meta_write_tag, - idx=metaWriteReq.meta_write_idx, + metaArray.io.write.valid := icacheMissQueue.io.meta_write.valid + metaArray.io.write.bits.apply(tag=metaWriteReq.meta_write_tag, + idx=metaWriteReq.meta_write_idx, waymask=metaWriteReq.meta_write_waymask) val wayNum = OHToUInt(metaWriteReq.meta_write_waymask.asTypeOf(Vec(nWays,Bool()))) @@ -408,7 +408,7 @@ class ICache extends ICacheModule //data icacheMissQueue.io.refill.ready := true.B val refillReq = icacheMissQueue.io.refill.bits - dataArray.io.write.valid := icacheMissQueue.io.refill.valid + dataArray.io.write.valid := icacheMissQueue.io.refill.valid dataArray.io.write.bits.apply(data=refillReq.refill_data, idx=refillReq.refill_idx, waymask=refillReq.refill_waymask) @@ -440,7 +440,7 @@ class ICache extends ICacheModule //---------------------------- //icache request io.req.ready := metaArray.io.read.ready && dataArray.io.read.ready && s2_ready - + //icache response: to pre-decoder io.resp.valid := s3_valid && (s3_hit || icacheMissQueue.io.resp.valid) io.resp.bits.data := Mux((s3_valid && s3_hit),outPacket,refillDataOut) @@ -456,7 +456,7 @@ class ICache extends ICacheModule io.tlb.req.bits.cmd := TlbCmd.exec io.tlb.req.bits.roqIdx := DontCare io.tlb.req.bits.debug.pc := s2_req_pc - + //To L1 plus io.mem_acquire <> icacheMissQueue.io.mem_acquire icacheMissQueue.io.mem_grant <> io.mem_grant diff --git a/src/main/scala/xiangshan/cache/ldu.scala b/src/main/scala/xiangshan/cache/ldu.scala index 341b067764aa276eb440defb3dea534f82b0cbcb..2182b557448e568c0fd92b035f8cd417a71c85de 100644 --- a/src/main/scala/xiangshan/cache/ldu.scala +++ b/src/main/scala/xiangshan/cache/ldu.scala @@ -128,7 +128,7 @@ class LoadPipe extends DCacheModule val s2_data_word = s2_data_words(s2_word_idx) val s2_decoded = cacheParams.dataCode.decode(s2_data_word) val s2_data_word_decoded = s2_decoded.corrected - assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable)) + // assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable)) val resp = Wire(ValidIO(new DCacheWordResp)) diff --git a/src/main/scala/xiangshan/frontend/BPU.scala b/src/main/scala/xiangshan/frontend/BPU.scala index 9b2cbeea3da25beec503812fd6f71da6d4ade729..e9fec342b21388942cf3fadc793a34acb1b9ddb0 100644 --- a/src/main/scala/xiangshan/frontend/BPU.scala +++ b/src/main/scala/xiangshan/frontend/BPU.scala @@ -11,7 +11,7 @@ import chisel3.experimental.chiselName trait HasBPUParameter extends HasXSParameter { val BPUDebug = true val EnableCFICommitLog = true - val EnbaleCFIPredLog = false + val EnbaleCFIPredLog = true val EnableBPUTimeRecord = EnableCFICommitLog || EnbaleCFIPredLog } @@ -368,6 +368,9 @@ class BPUStage3 extends BPUStage { XSDebug(io.inFire && s3IO.predecode.mask(i), "predecode(%d): brType:%d, br:%d, jal:%d, jalr:%d, call:%d, ret:%d, RVC:%d, excType:%d\n", i.U, p.brType, p.isBr, p.isJal, p.isJalr, p.isCall, p.isRet, p.isRVC, p.excType) } + XSDebug(p"brs:${Binary(brs)} jals:${Binary(jals)} jalrs:${Binary(jalrs)} calls:${Binary(calls)} rets:${Binary(rets)} rvcs:${Binary(RVCs)}\n") + XSDebug(p"callIdx:${callIdx} retIdx:${retIdx}\n") + XSDebug(p"brPred:${Binary(brPred)} loopRes:${Binary(loopRes)} prevHalfTaken:${prevHalfTaken} brTakens:${Binary(brTakens)}\n") } if (EnbaleCFIPredLog) { @@ -560,14 +563,6 @@ class BPU extends BaseBPU { s2.io.debug_hist := s2_hist s3.io.debug_hist := s3_hist - // val s1_histPtr = RegEnable(io.in.histPtr, enable=s1_fire) - // val s2_histPtr = RegEnable(s1_histPtr, enable=s2_fire) - // val s3_histPtr = RegEnable(s2_histPtr, enable=s3_fire) - - // s1.io.debug_histPtr := s1_histPtr - // s2.io.debug_histPtr := s2_histPtr - // s3.io.debug_histPtr := s3_histPtr - //**********************Stage 2****************************// tage.io.flush := io.flush(1) // TODO: fix this tage.io.pc.valid := s2_fire diff --git a/src/main/scala/xiangshan/frontend/Bim.scala b/src/main/scala/xiangshan/frontend/Bim.scala index d449f146b0daa98d153dbfb9b603352abbcb763f..f9a1461028c2eec6153c9df1bef827e6c80cfffe 100644 --- a/src/main/scala/xiangshan/frontend/Bim.scala +++ b/src/main/scala/xiangshan/frontend/Bim.scala @@ -35,8 +35,8 @@ class BIM extends BasePredictor with BimParams { val bimAddr = new TableAddr(log2Up(BimSize), BimBanks) - val bankAlignedPC = bankAligned(io.pc.bits) - val pcLatch = RegEnable(bankAlignedPC, io.pc.valid) + val if1_bankAlignedPC = bankAligned(io.pc.bits) + val if2_pc = RegEnable(if1_bankAlignedPC, io.pc.valid) val bim = List.fill(BimBanks) { Module(new SRAMTemplate(UInt(2.W), set = nRows, shouldReset = false, holdRead = true)) @@ -48,34 +48,34 @@ class BIM extends BasePredictor with BimParams { when (resetRow === (nRows-1).U) { doing_reset := false.B } // this bank means cache bank - val startsAtOddBank = bankInGroup(bankAlignedPC)(0) + val if1_startsAtOddBank = bankInGroup(if1_bankAlignedPC)(0) - val realMask = Mux(startsAtOddBank, + val if1_realMask = Mux(if1_startsAtOddBank, Cat(io.inMask(bankWidth-1,0), io.inMask(PredictWidth-1, bankWidth)), io.inMask) - val isInNextRow = VecInit((0 until BimBanks).map(i => Mux(startsAtOddBank, (i < bankWidth).B, false.B))) + val if1_isInNextRow = VecInit((0 until BimBanks).map(i => Mux(if1_startsAtOddBank, (i < bankWidth).B, false.B))) - val baseRow = bimAddr.getBankIdx(bankAlignedPC) + val if1_baseRow = bimAddr.getBankIdx(if1_bankAlignedPC) - val realRow = VecInit((0 until BimBanks).map(b => Mux(isInNextRow(b), (baseRow+1.U)(log2Up(nRows)-1, 0), baseRow))) + val if1_realRow = VecInit((0 until BimBanks).map(b => Mux(if1_isInNextRow(b), (if1_baseRow+1.U)(log2Up(nRows)-1, 0), if1_baseRow))) - val realRowLatch = VecInit(realRow.map(RegEnable(_, enable=io.pc.valid))) + val if2_realRow = VecInit(if1_realRow.map(RegEnable(_, enable=io.pc.valid))) for (b <- 0 until BimBanks) { - bim(b).io.r.req.valid := realMask(b) && io.pc.valid - bim(b).io.r.req.bits.setIdx := realRow(b) + bim(b).io.r.req.valid := if1_realMask(b) && io.pc.valid + bim(b).io.r.req.bits.setIdx := if1_realRow(b) } - val bimRead = VecInit(bim.map(_.io.r.resp.data(0))) + val if2_bimRead = VecInit(bim.map(_.io.r.resp.data(0))) - val startsAtOddBankLatch = bankInGroup(pcLatch)(0) + val if2_startsAtOddBank = bankInGroup(if2_pc)(0) for (b <- 0 until BimBanks) { - val realBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U) - else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U)) - val ctr = bimRead(realBank) + val realBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, (b+bankWidth).U, b.U) + else Mux(if2_startsAtOddBank, (b-bankWidth).U, b.U)) + val ctr = if2_bimRead(realBank) io.resp.ctrs(b) := ctr io.meta.ctrs(b) := ctr } diff --git a/src/main/scala/xiangshan/frontend/Btb.scala b/src/main/scala/xiangshan/frontend/Btb.scala index 2eb400ec4bb991f30784e8bb63ea7ef8e7fc4e0c..1ae94b05f53ba5ed12c86c844179c3441a1a92ea 100644 --- a/src/main/scala/xiangshan/frontend/Btb.scala +++ b/src/main/scala/xiangshan/frontend/Btb.scala @@ -72,9 +72,9 @@ class BTB extends BasePredictor with BTBParams{ override val io = IO(new BTBIO) val btbAddr = new TableAddr(log2Up(BtbSize/BtbWays), BtbBanks) - val bankAlignedPC = bankAligned(io.pc.bits) + val if1_bankAlignedPC = bankAligned(io.pc.bits) - val pcLatch = RegEnable(bankAlignedPC, io.pc.valid) + val if2_pc = RegEnable(if1_bankAlignedPC, io.pc.valid) val data = List.fill(BtbWays) { List.fill(BtbBanks) { @@ -91,61 +91,61 @@ class BTB extends BasePredictor with BTBParams{ // BTB read requests // this bank means cache bank - val startsAtOddBank = bankInGroup(bankAlignedPC)(0) + val if1_startsAtOddBank = bankInGroup(if1_bankAlignedPC)(0) - val baseBank = btbAddr.getBank(bankAlignedPC) + val if1_baseBank = btbAddr.getBank(if1_bankAlignedPC) - val realMask = Mux(startsAtOddBank, + val if1_realMask = Mux(if1_startsAtOddBank, Cat(io.inMask(bankWidth-1,0), io.inMask(PredictWidth-1, bankWidth)), io.inMask) - val realMaskLatch = RegEnable(realMask, io.pc.valid) + val if2_realMask = RegEnable(if1_realMask, io.pc.valid) - val isInNextRow = VecInit((0 until BtbBanks).map(i => Mux(startsAtOddBank, (i < bankWidth).B, false.B))) + val if1_isInNextRow = VecInit((0 until BtbBanks).map(i => Mux(if1_startsAtOddBank, (i < bankWidth).B, false.B))) - val baseRow = btbAddr.getBankIdx(bankAlignedPC) + val if1_baseRow = btbAddr.getBankIdx(if1_bankAlignedPC) - val nextRowStartsUp = baseRow.andR + val if1_nextRowStartsUp = if1_baseRow.andR - val realRow = VecInit((0 until BtbBanks).map(b => Mux(isInNextRow(b), (baseRow+1.U)(log2Up(nRows)-1, 0), baseRow))) + val if1_realRow = VecInit((0 until BtbBanks).map(b => Mux(if1_isInNextRow(b), (if1_baseRow+1.U)(log2Up(nRows)-1, 0), if1_baseRow))) - val realRowLatch = VecInit(realRow.map(RegEnable(_, enable=io.pc.valid))) + val if2_realRow = VecInit(if1_realRow.map(RegEnable(_, enable=io.pc.valid))) for (w <- 0 until BtbWays) { for (b <- 0 until BtbBanks) { - meta(w)(b).io.r.req.valid := realMask(b) && io.pc.valid - meta(w)(b).io.r.req.bits.setIdx := realRow(b) - data(w)(b).io.r.req.valid := realMask(b) && io.pc.valid - data(w)(b).io.r.req.bits.setIdx := realRow(b) + meta(w)(b).io.r.req.valid := if1_realMask(b) && io.pc.valid + meta(w)(b).io.r.req.bits.setIdx := if1_realRow(b) + data(w)(b).io.r.req.valid := if1_realMask(b) && io.pc.valid + data(w)(b).io.r.req.bits.setIdx := if1_realRow(b) } } for (b <- 0 to 1) { edata(b).io.r.req.valid := io.pc.valid - val row = if (b == 0) { Mux(startsAtOddBank, realRow(bankWidth), realRow(0)) } - else { Mux(startsAtOddBank, realRow(0), realRow(bankWidth))} + val row = if (b == 0) { Mux(if1_startsAtOddBank, if1_realRow(bankWidth), if1_realRow(0)) } + else { Mux(if1_startsAtOddBank, if1_realRow(0), if1_realRow(bankWidth))} edata(b).io.r.req.bits.setIdx := row } // Entries read from SRAM - val metaRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => meta(w)(b).io.r.resp.data(0))))) - val dataRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => data(w)(b).io.r.resp.data(0))))) - val edataRead = VecInit((0 to 1).map(i => edata(i).io.r.resp.data(0))) + val if2_metaRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => meta(w)(b).io.r.resp.data(0))))) + val if2_dataRead = VecInit((0 until BtbWays).map(w => VecInit((0 until BtbBanks).map( b => data(w)(b).io.r.resp.data(0))))) + val if2_edataRead = VecInit((0 to 1).map(i => edata(i).io.r.resp.data(0))) - val baseBankLatch = btbAddr.getBank(pcLatch) - val startsAtOddBankLatch = bankInGroup(pcLatch)(0) - val baseTag = btbAddr.getTag(pcLatch) + val if2_baseBank = btbAddr.getBank(if2_pc) + val if2_startsAtOddBank = bankInGroup(if2_pc)(0) + val if2_baseTag = btbAddr.getTag(if2_pc) - val tagIncremented = VecInit((0 until BtbBanks).map(b => RegEnable(isInNextRow(b.U) && nextRowStartsUp, io.pc.valid))) - val realTags = VecInit((0 until BtbBanks).map(b => Mux(tagIncremented(b), baseTag + 1.U, baseTag))) + val if2_tagIncremented = VecInit((0 until BtbBanks).map(b => RegEnable(if1_isInNextRow(b.U) && if1_nextRowStartsUp, io.pc.valid))) + val if2_realTags = VecInit((0 until BtbBanks).map(b => Mux(if2_tagIncremented(b), if2_baseTag + 1.U, if2_baseTag))) - val totalHits = VecInit((0 until BtbBanks).map( b => + val if2_totalHits = VecInit((0 until BtbBanks).map( b => VecInit((0 until BtbWays).map( w => // This should correspond to the real mask from last valid cycle! - metaRead(w)(b).tag === realTags(b) && metaRead(w)(b).valid && realMaskLatch(b) + if2_metaRead(w)(b).tag === if2_realTags(b) && if2_metaRead(w)(b).valid && if2_realMask(b) )) )) - val bankHits = VecInit(totalHits.map(_.reduce(_||_))) - val bankHitWays = VecInit(totalHits.map(PriorityEncoder(_))) + val if2_bankHits = VecInit(if2_totalHits.map(_.reduce(_||_))) + val if2_bankHitWays = VecInit(if2_totalHits.map(PriorityEncoder(_))) def allocWay(valids: UInt, meta_tags: UInt, req_tag: UInt) = { @@ -167,30 +167,30 @@ class BTB extends BasePredictor with BTBParams{ } } val allocWays = VecInit((0 until BtbBanks).map(b => - allocWay(VecInit(metaRead.map(w => w(b).valid)).asUInt, - VecInit(metaRead.map(w => w(b).tag)).asUInt, - realTags(b)))) + allocWay(VecInit(if2_metaRead.map(w => w(b).valid)).asUInt, + VecInit(if2_metaRead.map(w => w(b).tag)).asUInt, + if2_realTags(b)))) val writeWay = VecInit((0 until BtbBanks).map( - b => Mux(bankHits(b), bankHitWays(b), allocWays(b)) + b => Mux(if2_bankHits(b), if2_bankHitWays(b), allocWays(b)) )) for (b <- 0 until BtbBanks) { - val realBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U) - else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U)) - val meta_entry = metaRead(bankHitWays(realBank))(realBank) - val data_entry = dataRead(bankHitWays(realBank))(realBank) - val edataBank = (if (b < bankWidth) Mux(startsAtOddBankLatch, 1.U, 0.U) - else Mux(startsAtOddBankLatch, 0.U, 1.U)) + val realBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, (b+bankWidth).U, b.U) + else Mux(if2_startsAtOddBank, (b-bankWidth).U, b.U)) + val meta_entry = if2_metaRead(if2_bankHitWays(realBank))(realBank) + val data_entry = if2_dataRead(if2_bankHitWays(realBank))(realBank) + val edataBank = (if (b < bankWidth) Mux(if2_startsAtOddBank, 1.U, 0.U) + else Mux(if2_startsAtOddBank, 0.U, 1.U)) // Use real pc to calculate the target - io.resp.targets(b) := Mux(data_entry.extended, edataRead(edataBank), (pcLatch.asSInt + (b << 1).S + data_entry.offset).asUInt) - io.resp.hits(b) := bankHits(realBank) + io.resp.targets(b) := Mux(data_entry.extended, if2_edataRead(edataBank), (if2_pc.asSInt + (b << 1).S + data_entry.offset).asUInt) + io.resp.hits(b) := if2_bankHits(realBank) io.resp.types(b) := meta_entry.btbType io.resp.isRVC(b) := meta_entry.isRVC io.meta.writeWay(b) := writeWay(realBank) - io.meta.hitJal(b) := bankHits(realBank) && meta_entry.btbType === BTBtype.J + io.meta.hitJal(b) := if2_bankHits(realBank) && meta_entry.btbType === BTBtype.J } def pdInfoToBTBtype(pd: PreDecodeInfo) = { @@ -244,35 +244,35 @@ class BTB extends BasePredictor with BTBParams{ XSDebug("isInNextRow: ") (0 until BtbBanks).foreach(i => { - XSDebug(false, true.B, "%d ", isInNextRow(i)) + XSDebug(false, true.B, "%d ", if1_isInNextRow(i)) if (i == BtbBanks-1) { XSDebug(false, true.B, "\n") } }) val validLatch = RegNext(io.pc.valid) - XSDebug(io.pc.valid, "read: pc=0x%x, baseBank=%d, realMask=%b\n", bankAlignedPC, baseBank, realMask) + XSDebug(io.pc.valid, "read: pc=0x%x, baseBank=%d, realMask=%b\n", if1_bankAlignedPC, if1_baseBank, if1_realMask) XSDebug(validLatch, "read_resp: pc=0x%x, readIdx=%d-------------------------------\n", - pcLatch, btbAddr.getIdx(pcLatch)) + if2_pc, btbAddr.getIdx(if2_pc)) if (debug_verbose) { for (i <- 0 until BtbBanks){ for (j <- 0 until BtbWays) { XSDebug(validLatch, "read_resp[w=%d][b=%d][r=%d] is valid(%d) mask(%d), tag=0x%x, offset=0x%x, type=%d, isExtend=%d, isRVC=%d\n", - j.U, i.U, realRowLatch(i), metaRead(j)(i).valid, realMaskLatch(i), metaRead(j)(i).tag, dataRead(j)(i).offset, metaRead(j)(i).btbType, dataRead(j)(i).extended, metaRead(j)(i).isRVC) + j.U, i.U, if2_realRow(i), if2_metaRead(j)(i).valid, if2_realMask(i), if2_metaRead(j)(i).tag, if2_dataRead(j)(i).offset, if2_metaRead(j)(i).btbType, if2_dataRead(j)(i).extended, if2_metaRead(j)(i).isRVC) } } } // e.g: baseBank == 5 => (5, 6,..., 15, 0, 1, 2, 3, 4) - val bankIdxInOrder = VecInit((0 until BtbBanks).map(b => (baseBankLatch +& b.U)(log2Up(BtbBanks)-1,0))) + val bankIdxInOrder = VecInit((0 until BtbBanks).map(b => (if2_baseBank +& b.U)(log2Up(BtbBanks)-1,0))) for (i <- 0 until BtbBanks) { val idx = bankIdxInOrder(i) - XSDebug(validLatch && bankHits(bankIdxInOrder(i)), "resp(%d): bank(%d) hits, tgt=%x, isRVC=%d, type=%d\n", + XSDebug(validLatch && if2_bankHits(bankIdxInOrder(i)), "resp(%d): bank(%d) hits, tgt=%x, isRVC=%d, type=%d\n", i.U, idx, io.resp.targets(i), io.resp.isRVC(i), io.resp.types(i)) } XSDebug(updateValid, "update_req: cycle=%d, pc=0x%x, target=0x%x, misPred=%d, offset=%x, extended=%d, way=%d, bank=%d, row=0x%x\n", u.brInfo.debug_btb_cycle, u.pc, new_target, u.isMisPred, new_offset, new_extended, updateWay, updateBankIdx, updateRow) for (i <- 0 until BtbBanks) { // Conflict when not hit and allocating a valid entry - val conflict = metaRead(allocWays(i))(i).valid && !bankHits(i) + val conflict = if2_metaRead(allocWays(i))(i).valid && !if2_bankHits(i) XSDebug(conflict, "bank(%d) is trying to allocate a valid way(%d)\n", i.U, allocWays(i)) // There is another circumstance when a branch is on its way to update while another // branch chose the same way to udpate, then after the first branch is wrote in, diff --git a/src/main/scala/xiangshan/frontend/Frontend.scala b/src/main/scala/xiangshan/frontend/Frontend.scala index bfc58954129409b4922c52e6dd952b4f01c6654f..bae1c479d4e82a81492a35b1e80e6700daaf3d85 100644 --- a/src/main/scala/xiangshan/frontend/Frontend.scala +++ b/src/main/scala/xiangshan/frontend/Frontend.scala @@ -9,10 +9,10 @@ import xiangshan.cache._ class Frontend extends XSModule { val io = IO(new Bundle() { - val icacheReq = DecoupledIO(new ICacheReq) - val icacheResp = Flipped(DecoupledIO(new ICacheResp)) - val icacheFlush = Output(UInt(2.W)) - val icacheToTlb = Flipped(new BlockTlbRequestIO) + val icacheMemAcq = DecoupledIO(new L1plusCacheReq) + val icacheMemGrant = Flipped(DecoupledIO(new L1plusCacheResp)) + val l1plusFlush = Output(Bool()) + val fencei = Input(Bool()) val ptw = new TlbPtwIO val backend = new FrontendToBackendIO val sfence = Input(new SfenceBundle) @@ -21,6 +21,7 @@ class Frontend extends XSModule { val ifu = Module(new IFU) val ibuffer = Module(new Ibuffer) + val icache = Module(new ICache) val needFlush = io.backend.redirect.valid @@ -29,12 +30,16 @@ class Frontend extends XSModule { ifu.io.inOrderBrInfo <> io.backend.inOrderBrInfo ifu.io.outOfOrderBrInfo <> io.backend.outOfOrderBrInfo //icache - io.icacheReq <> ifu.io.icacheReq - io.icacheFlush <> ifu.io.icacheFlush - ifu.io.icacheResp <> io.icacheResp + ifu.io.icacheResp <> icache.io.resp + icache.io.req <> ifu.io.icacheReq + icache.io.flush <> ifu.io.icacheFlush + icache.io.fencei := io.fencei + io.l1plusFlush := icache.io.l1plusflush + io.icacheMemAcq <> icache.io.mem_acquire + icache.io.mem_grant <> io.icacheMemGrant //itlb to ptw io.ptw <> TLB( - in = Seq(io.icacheToTlb), + in = Seq(icache.io.tlb), sfence = io.sfence, csr = io.tlbCsr, width = 1, diff --git a/src/main/scala/xiangshan/frontend/IFU.scala b/src/main/scala/xiangshan/frontend/IFU.scala index 75f9b526eeaff688f67f49221f6fa89a48728c5e..5924c593afcbb3388c538d71ca7772788a41d861 100644 --- a/src/main/scala/xiangshan/frontend/IFU.scala +++ b/src/main/scala/xiangshan/frontend/IFU.scala @@ -121,7 +121,7 @@ class IFU extends XSModule with HasIFUConst // val if2_newPtr, if3_newPtr, if4_newPtr = Wire(UInt(log2Up(ExtHistoryLength).W)) - + val if1_gh, if2_gh, if3_gh, if4_gh = Wire(new GlobalHistory) val if2_predicted_gh, if3_predicted_gh, if4_predicted_gh = Wire(new GlobalHistory) val final_gh = RegInit(0.U.asTypeOf(new GlobalHistory)) @@ -149,7 +149,7 @@ class IFU extends XSModule with HasIFUConst } val if2_bp = bpu.io.out(0) - + // val if2_GHInfo = wrapGHInfo(if2_bp, if2_predHist) // if taken, bp_redirect should be true // when taken on half RVI, we suppress this redirect signal @@ -159,14 +159,6 @@ class IFU extends XSModule with HasIFUConst } if2_predicted_gh := if2_gh.update(if2_bp.hasNotTakenBrs, if2_bp.takenOnBr) - // when (if2_fire && if2_GHInfo.shifted) { - // val if2_newPtr = if2_GHInfo.newPtr() - // updatePtr := true.B - // newPtr := if2_newPtr - // extHist(if2_newPtr) := if2_GHInfo.takenOnBr.asUInt - // } - - //********************** IF3 ****************************// val if3_valid = RegInit(init = false.B) @@ -192,9 +184,9 @@ class IFU extends XSModule with HasIFUConst // val if4_prevHalfInstr = Wire(new PrevHalfInstr) // 32-bit instr crosses 2 pages, and the higher 16-bit triggers page fault val crossPageIPF = WireInit(false.B) - + val if3_pendingPrevHalfInstr = if3_prevHalfInstr.valid - + // the previous half of RVI instruction waits until it meets its last half val if3_prevHalfInstrMet = if3_pendingPrevHalfInstr && (if3_prevHalfInstr.pc + 2.U) === if3_pc && if3_valid // set to invalid once consumed or redirect from backend @@ -230,7 +222,7 @@ class IFU extends XSModule with HasIFUConst // GHInfo from last pred does not corresponds with this packet // if3_ghInfoNotIdenticalRedirect ) - + val if3_target = WireInit(snpc(if3_pc)) /* when (prevHalfMetRedirect) { @@ -263,14 +255,14 @@ class IFU extends XSModule with HasIFUConst val if4_mask = RegEnable(icacheResp.mask, if3_fire) val if4_snpc = Mux(inLoop, if4_pc + (PopCount(if4_mask) << 1), snpc(if4_pc)) - + val if4_predHist = RegEnable(if3_predHist, enable=if3_fire) // wait until prevHalfInstr written into reg if4_ready := (if4_fire && !hasPrevHalfInstrReq || !if4_valid || if4_flush) && GTimer() > 500.U when (if4_flush) { if4_valid := false.B } .elsewhen (if3_fire) { if4_valid := true.B } .elsewhen (if4_fire) { if4_valid := false.B } - + val if4_bp = Wire(new BranchPrediction) if4_bp := bpu.io.out(2) if4_bp.takens := bpu.io.out(2).takens & if4_mask @@ -294,7 +286,7 @@ class IFU extends XSModule with HasIFUConst if4_bp.targets(i) := if4_jal_tgts(i) } } - + // we need this to tell BPU the prediction of prev half // because the prediction is with the start of each inst val if4_prevHalfInstr = RegInit(0.U.asTypeOf(new PrevHalfInstr)) @@ -361,11 +353,6 @@ class IFU extends XSModule with HasIFUConst when (if4_redirect) { if1_npc := if4_target } - // val if4_newPtr = if4_GHInfo.newPtr() - // updatePtr := true.B - // newPtr := if4_newPtr - // extHist(if4_newPtr) := if4_GHInfo.takenOnBr.asUInt - // } when (if4_fire) { final_gh := if4_predicted_gh @@ -442,7 +429,7 @@ class IFU extends XSModule with HasIFUConst bpu.io.predecode.mask := if4_pd.mask bpu.io.predecode.lastHalf := if4_pd.lastHalf bpu.io.predecode.pd := if4_pd.pd - bpu.io.predecode.hasLastHalfRVI := if4_pc =/= if4_pd.pc(0) + bpu.io.predecode.hasLastHalfRVI := if4_prevHalfInstrMet bpu.io.realMask := if4_mask bpu.io.prevHalf := if4_prevHalfInstr diff --git a/src/main/scala/xiangshan/frontend/RAS.scala b/src/main/scala/xiangshan/frontend/RAS.scala index 8fcec24880aa70d7c65ad79687794be2683f88b4..e0185e9f711e01915e4337e4d1823e781e411a6a 100644 --- a/src/main/scala/xiangshan/frontend/RAS.scala +++ b/src/main/scala/xiangshan/frontend/RAS.scala @@ -50,6 +50,7 @@ class RAS extends BasePredictor } override val io = IO(new RASIO) + override val debug = true @chiselName class RASStack(val rasSize: Int) extends XSModule { @@ -66,6 +67,11 @@ class RAS extends BasePredictor val copy_out_mem = Output(Vec(rasSize, rasEntry())) val copy_out_sp = Output(UInt(log2Up(rasSize).W)) }) + val debugIO = IO(new Bundle{ + val write_entry = Output(rasEntry()) + val alloc_new = Output(Bool()) + val sp = Output(UInt(log2Up(rasSize).W)) + }) @chiselName class Stack(val size: Int) extends XSModule { val io = IO(new Bundle { @@ -98,9 +104,13 @@ class RAS extends BasePredictor val alloc_new = io.new_addr =/= top_addr stack.wen := io.push_valid || io.pop_valid && top_ctr =/= 1.U stack.wIdx := Mux(io.pop_valid && top_ctr =/= 1.U, sp - 1.U, Mux(alloc_new, sp, sp - 1.U)) - stack.wdata := Mux(io.pop_valid && top_ctr =/= 1.U, - RASEntry(top_addr, top_ctr - 1.U), - Mux(alloc_new, RASEntry(io.new_addr, 1.U), RASEntry(top_addr, top_ctr + 1.U))) + val write_addr = Mux(io.pop_valid && top_ctr =/= 1.U, top_addr, io.new_addr) + val write_ctr = Mux(io.pop_valid && top_ctr =/= 1.U, top_ctr - 1.U, Mux(alloc_new, 1.U, top_ctr + 1.U)) + val write_entry = RASEntry(write_addr, write_ctr) + stack.wdata := write_entry + debugIO.write_entry := write_entry + debugIO.alloc_new := alloc_new + debugIO.sp := sp when (io.push_valid && alloc_new) { sp := sp + 1.U @@ -138,7 +148,9 @@ class RAS extends BasePredictor // val commit_ras = Reg(Vec(RasSize, rasEntry())) // val commit_sp = RegInit(0.U(log2Up(RasSize).W)) - val spec_ras = Module(new RASStack(RasSize)).io + val spec = Module(new RASStack(RasSize)) + val spec_ras = spec.io + val spec_push = WireInit(false.B) val spec_pop = WireInit(false.B) @@ -153,7 +165,8 @@ class RAS extends BasePredictor spec_push := !spec_is_full && io.callIdx.valid && io.pc.valid spec_pop := !spec_is_empty && io.is_ret && io.pc.valid - val commit_ras = Module(new RASStack(RasSize)).io + val commit = Module(new RASStack(RasSize)) + val commit_ras = commit.io val commit_push = WireInit(false.B) val commit_pop = WireInit(false.B) @@ -179,7 +192,7 @@ class RAS extends BasePredictor spec_ras.copy_valid := copy_next spec_ras.copy_in_mem := commit_ras.copy_out_mem spec_ras.copy_in_sp := commit_ras.copy_out_sp - commit_ras.copy_valid := DontCare + commit_ras.copy_valid := false.B commit_ras.copy_in_mem := DontCare commit_ras.copy_in_sp := DontCare @@ -189,26 +202,28 @@ class RAS extends BasePredictor io.branchInfo.rasToqAddr := DontCare if (BPUDebug && debug) { - // XSDebug("----------------RAS(spec)----------------\n") - // XSDebug(" index addr ctr \n") - // for(i <- 0 until RasSize){ - // XSDebug(" (%d) 0x%x %d",i.U,spec_ras(i).retAddr,spec_ras(i).ctr) - // when(i.U === spec_sp){XSDebug(false,true.B," <----sp")} - // XSDebug(false,true.B,"\n") - // } - // XSDebug("----------------RAS(commit)----------------\n") - // XSDebug(" index addr ctr \n") - // for(i <- 0 until RasSize){ - // XSDebug(" (%d) 0x%x %d",i.U,commit_ras(i).retAddr,commit_ras(i).ctr) - // when(i.U === commit_sp){XSDebug(false,true.B," <----sp")} - // XSDebug(false,true.B,"\n") - // } - - // XSDebug(spec_push, "(spec_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",spec_ras_write.retAddr,spec_ras_write.ctr,sepc_alloc_new,spec_sp.asUInt) - // XSDebug(spec_pop, "(spec_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target) - // XSDebug(commit_push, "(commit_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",commit_ras_write.retAddr,commit_ras_write.ctr,sepc_alloc_new,commit_sp.asUInt) - // XSDebug(commit_pop, "(commit_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target) - // XSDebug("copyValid:%d copyNext:%d \n",copy_valid,copy_next) + val spec_debug = spec.debugIO + val commit_debug = commit.debugIO + XSDebug("----------------RAS(spec)----------------\n") + XSDebug(" index addr ctr \n") + for(i <- 0 until RasSize){ + XSDebug(" (%d) 0x%x %d",i.U,spec_ras.copy_out_mem(i).retAddr,spec_ras.copy_out_mem(i).ctr) + when(i.U === spec_ras.copy_out_sp){XSDebug(false,true.B," <----sp")} + XSDebug(false,true.B,"\n") + } + XSDebug("----------------RAS(commit)----------------\n") + XSDebug(" index addr ctr \n") + for(i <- 0 until RasSize){ + XSDebug(" (%d) 0x%x %d",i.U,commit_ras.copy_out_mem(i).retAddr,commit_ras.copy_out_mem(i).ctr) + when(i.U === commit_ras.copy_out_sp){XSDebug(false,true.B," <----sp")} + XSDebug(false,true.B,"\n") + } + + XSDebug(spec_push, "(spec_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",spec_new_addr,spec_debug.write_entry.ctr,spec_debug.alloc_new,spec_debug.sp.asUInt) + XSDebug(spec_pop, "(spec_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target) + XSDebug(commit_push, "(commit_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n",commit_new_addr,commit_debug.write_entry.ctr,commit_debug.alloc_new,commit_debug.sp.asUInt) + XSDebug(commit_pop, "(commit_ras)pop outValid:%d outAddr: 0x%x \n",io.out.valid,io.out.bits.target) + XSDebug("copyValid:%d copyNext:%d \n",copy_valid,copy_next) } diff --git a/src/main/scala/xiangshan/frontend/Tage.scala b/src/main/scala/xiangshan/frontend/Tage.scala index 1a4f21f38c6a231d7ab087f7bb60e8664d98e32d..89837258d5510fd20e9c937a472434583310092f 100644 --- a/src/main/scala/xiangshan/frontend/Tage.scala +++ b/src/main/scala/xiangshan/frontend/Tage.scala @@ -121,26 +121,26 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio val tageEntrySz = 1 + tagLen + TageCtrBits - val bankAlignedPC = bankAligned(io.req.bits.pc) + val if2_bankAlignedPC = bankAligned(io.req.bits.pc) // this bank means cache bank - val startsAtOddBank = bankInGroup(bankAlignedPC)(0) + val if2_startsAtOddBank = bankInGroup(if2_bankAlignedPC)(0) // use real address to index // val unhashed_idxes = VecInit((0 until TageBanks).map(b => ((io.req.bits.pc >> 1.U) + b.U) >> log2Up(TageBanks).U)) - val unhashed_idx = Wire(Vec(2, UInt((log2Ceil(nRows)+tagLen).W))) + val if2_unhashed_idx = Wire(Vec(2, UInt((log2Ceil(nRows)+tagLen).W))) // the first bank idx always correspond with pc - unhashed_idx(0) := io.req.bits.pc >> (1+log2Ceil(TageBanks)) + if2_unhashed_idx(0) := io.req.bits.pc >> (1+log2Ceil(TageBanks)) // when pc is at odd bank, the second bank is at the next idx - unhashed_idx(1) := unhashed_idx(0) + startsAtOddBank + if2_unhashed_idx(1) := if2_unhashed_idx(0) + if2_startsAtOddBank - // val idxes_and_tags = (0 until TageBanks).map(b => compute_tag_and_hash(unhashed_idxes(b.U), io.req.bits.hist)) - // val (idx, tag) = compute_tag_and_hash(unhashed_idx, io.req.bits.hist) - val idxes_and_tags = unhashed_idx.map(compute_tag_and_hash(_, io.req.bits.hist)) - // val idxes = VecInit(idxes_and_tags.map(_._1)) - // val tags = VecInit(idxes_and_tags.map(_._2)) + // val idxes_and_tags = (0 until TageBanks).map(b => compute_tag_and_hash(if2_unhashed_idxes(b.U), io.req.bits.hist)) + // val (idx, tag) = compute_tag_and_hash(if2_unhashed_idx, io.req.bits.hist) + val if2_idxes_and_tags = if2_unhashed_idx.map(compute_tag_and_hash(_, io.req.bits.hist)) + // val idxes = VecInit(if2_idxes_and_tags.map(_._1)) + // val tags = VecInit(if2_idxes_and_tags.map(_._2)) - val idxes_latch = RegEnable(VecInit(idxes_and_tags.map(_._1)), io.req.valid) - val tags_latch = RegEnable(VecInit(idxes_and_tags.map(_._2)), io.req.valid) - // and_tags_latch = RegEnable(idxes_and_tags, enable=io.req.valid) + val if3_idxes = RegEnable(VecInit(if2_idxes_and_tags.map(_._1)), io.req.valid) + val if3_tags = RegEnable(VecInit(if2_idxes_and_tags.map(_._2)), io.req.valid) + // and_if3_tags = RegEnable(if2_idxes_and_tags, enable=io.req.valid) // val idxLatch = RegEnable(idx, enable=io.req.valid) // val tagLatch = RegEnable(tag, enable=io.req.valid) @@ -175,59 +175,59 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio val lo_us = List.fill(TageBanks)(Module(new HL_Bank(nRows))) val table = List.fill(TageBanks)(Module(new SRAMTemplate(new TageEntry, set=nRows, shouldReset=false, holdRead=true, singlePort=false))) - val hi_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool()))) - val lo_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool()))) - val table_r = WireInit(0.U.asTypeOf(Vec(TageBanks, new TageEntry))) + val if3_hi_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool()))) + val if3_lo_us_r = WireInit(0.U.asTypeOf(Vec(TageBanks, Bool()))) + val if3_table_r = WireInit(0.U.asTypeOf(Vec(TageBanks, new TageEntry))) - val baseBank = io.req.bits.pc(log2Up(TageBanks), 1) - val baseBankLatch = RegEnable(baseBank, enable=io.req.valid) + val if2_baseBank = io.req.bits.pc(log2Up(TageBanks), 1) + val if3_baseBank = RegEnable(if2_baseBank, enable=io.req.valid) - val bankIdxInOrder = VecInit((0 until TageBanks).map(b => (baseBankLatch +& b.U)(log2Up(TageBanks)-1, 0))) + val if3_bankIdxInOrder = VecInit((0 until TageBanks).map(b => (if3_baseBank +& b.U)(log2Up(TageBanks)-1, 0))) - val realMask = Mux(startsAtOddBank, + val if2_realMask = Mux(if2_startsAtOddBank, Cat(io.req.bits.mask(bankWidth-1,0), io.req.bits.mask(PredictWidth-1, bankWidth)), io.req.bits.mask) - val maskLatch = RegEnable(realMask, enable=io.req.valid) + val if3_realMask = RegEnable(if2_realMask, enable=io.req.valid) (0 until TageBanks).map( b => { - val idxes = VecInit(idxes_and_tags.map(_._1)) - val idx = (if (b < bankWidth) Mux(startsAtOddBank, idxes(1), idxes(0)) - else Mux(startsAtOddBank, idxes(0), idxes(1))) - hi_us(b).io.r.req.valid := io.req.valid && realMask(b) + val idxes = VecInit(if2_idxes_and_tags.map(_._1)) + val idx = (if (b < bankWidth) Mux(if2_startsAtOddBank, idxes(1), idxes(0)) + else Mux(if2_startsAtOddBank, idxes(0), idxes(1))) + hi_us(b).io.r.req.valid := io.req.valid && if2_realMask(b) hi_us(b).io.r.req.bits.setIdx := idx - lo_us(b).io.r.req.valid := io.req.valid && realMask(b) + lo_us(b).io.r.req.valid := io.req.valid && if2_realMask(b) lo_us(b).io.r.req.bits.setIdx := idx table(b).reset := reset.asBool - table(b).io.r.req.valid := io.req.valid && realMask(b) + table(b).io.r.req.valid := io.req.valid && if2_realMask(b) table(b).io.r.req.bits.setIdx := idx - hi_us_r(b) := hi_us(b).io.r.resp.data - lo_us_r(b) := lo_us(b).io.r.resp.data - table_r(b) := table(b).io.r.resp.data(0) + if3_hi_us_r(b) := hi_us(b).io.r.resp.data + if3_lo_us_r(b) := lo_us(b).io.r.resp.data + if3_table_r(b) := table(b).io.r.resp.data(0) } ) - val startsAtOddBankLatch = RegEnable(startsAtOddBank, io.req.valid) + val if3_startsAtOddBank = RegEnable(if2_startsAtOddBank, io.req.valid) - val req_rhits = VecInit((0 until TageBanks).map(b => { - val tag = (if (b < bankWidth) Mux(startsAtOddBank, tags_latch(1), tags_latch(0)) - else Mux(startsAtOddBank, tags_latch(0), tags_latch(1))) - val bank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U) - else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U)) - table_r(bank).valid && table_r(bank).tag === tag + val if3_req_rhits = VecInit((0 until TageBanks).map(b => { + val tag = (if (b < bankWidth) Mux(if3_startsAtOddBank, if3_tags(1), if3_tags(0)) + else Mux(if3_startsAtOddBank, if3_tags(0), if3_tags(1))) + val bank = (if (b < bankWidth) Mux(if3_startsAtOddBank, (b+bankWidth).U, b.U) + else Mux(if3_startsAtOddBank, (b-bankWidth).U, b.U)) + if3_table_r(bank).valid && if3_table_r(bank).tag === tag })) (0 until TageBanks).map(b => { - val bank = (if (b < bankWidth) Mux(startsAtOddBankLatch, (b+bankWidth).U, b.U) - else Mux(startsAtOddBankLatch, (b-bankWidth).U, b.U)) - io.resp(b).valid := req_rhits(b) && maskLatch(b) - io.resp(b).bits.ctr := table_r(bank).ctr - io.resp(b).bits.u := Cat(hi_us_r(bank),lo_us_r(bank)) + val bank = (if (b < bankWidth) Mux(if3_startsAtOddBank, (b+bankWidth).U, b.U) + else Mux(if3_startsAtOddBank, (b-bankWidth).U, b.U)) + io.resp(b).valid := if3_req_rhits(b) && if3_realMask(b) + io.resp(b).bits.ctr := if3_table_r(bank).ctr + io.resp(b).bits.u := Cat(if3_hi_us_r(bank),if3_lo_us_r(bank)) }) @@ -292,7 +292,7 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio // when (RegNext(wrbypass_rhit)) { // for (b <- 0 until TageBanks) { // when (RegNext(wrbypass_rctr_hits(b.U + baseBank))) { - // io.resp(b).bits.ctr := rhit_ctrs(bankIdxInOrder(b)) + // io.resp(b).bits.ctr := rhit_ctrs(if3_bankIdxInOrder(b)) // } // } // } @@ -335,17 +335,17 @@ class TageTable(val nRows: Int, val histLen: Int, val tagLen: Int, val uBitPerio val u = io.update val b = PriorityEncoder(u.mask) val ub = PriorityEncoder(u.uMask) - val idx = idxes_and_tags.map(_._1) - val tag = idxes_and_tags.map(_._2) + val idx = if2_idxes_and_tags.map(_._1) + val tag = if2_idxes_and_tags.map(_._2) XSDebug(io.req.valid, "tableReq: pc=0x%x, hist=%x, idx=(%d,%d), tag=(%x,%x), baseBank=%d, mask=%b, realMask=%b\n", - io.req.bits.pc, io.req.bits.hist, idx(0), idx(1), tag(0), tag(1), baseBank, io.req.bits.mask, realMask) + io.req.bits.pc, io.req.bits.hist, idx(0), idx(1), tag(0), tag(1), if2_baseBank, io.req.bits.mask, if2_realMask) for (i <- 0 until TageBanks) { - XSDebug(RegNext(io.req.valid) && req_rhits(i), "TageTableResp[%d]: idx=(%d,%d), hit:%d, ctr:%d, u:%d\n", - i.U, idxes_latch(0), idxes_latch(1), req_rhits(i), io.resp(i).bits.ctr, io.resp(i).bits.u) + XSDebug(RegNext(io.req.valid) && if3_req_rhits(i), "TageTableResp[%d]: idx=(%d,%d), hit:%d, ctr:%d, u:%d\n", + i.U, if3_idxes(0), if3_idxes(1), if3_req_rhits(i), io.resp(i).bits.ctr, io.resp(i).bits.u) } - XSDebug(RegNext(io.req.valid), "TageTableResp: hits:%b, maskLatch is %b\n", req_rhits.asUInt, maskLatch) - XSDebug(RegNext(io.req.valid) && !req_rhits.reduce(_||_), "TageTableResp: no hits!\n") + XSDebug(RegNext(io.req.valid), "TageTableResp: hits:%b, maskLatch is %b\n", if3_req_rhits.asUInt, if3_realMask) + XSDebug(RegNext(io.req.valid) && !if3_req_rhits.reduce(_||_), "TageTableResp: no hits!\n") XSDebug(io.update.mask.reduce(_||_), "update Table: pc:%x, fetchIdx:%d, hist:%x, bank:%d, taken:%d, alloc:%d, oldCtr:%d\n", u.pc, u.fetchIdx, u.hist, b, u.taken(b), u.alloc(b), u.oldCtr(b)) @@ -435,12 +435,12 @@ class Tage extends BaseTage { override val debug = true // Keep the table responses to process in s3 - val resps = VecInit(tables.map(t => RegEnable(t.io.resp, enable=io.s3Fire))) - val scResps = VecInit(scTables.map(t => RegEnable(t.io.resp, enable=io.s3Fire))) + val if4_resps = RegEnable(VecInit(tables.map(t => t.io.resp)), enable=io.s3Fire) + val if4_scResps = RegEnable(VecInit(scTables.map(t => t.io.resp)), enable=io.s3Fire) // val flushLatch = RegNext(io.flush) - val s2_bim = RegEnable(io.bim, enable=io.pc.valid) // actually it is s2Fire - val s3_bim = RegEnable(s2_bim, enable=io.s3Fire) + val if3_bim = RegEnable(io.bim, enable=io.pc.valid) // actually it is s2Fire + val if4_bim = RegEnable(if3_bim, enable=io.s3Fire) val debug_pc_s2 = RegEnable(io.pc.bits, enable=io.pc.valid) val debug_pc_s3 = RegEnable(debug_pc_s2, enable=io.s3Fire) @@ -482,37 +482,37 @@ class Tage extends BaseTage { // access tag tables and output meta info for (w <- 0 until TageBanks) { - val tageTaken = WireInit(s3_bim.ctrs(w)(1).asBool) - var altPred = s3_bim.ctrs(w)(1) - val finalAltPred = WireInit(s3_bim.ctrs(w)(1)) - var provided = false.B - var provider = 0.U - io.resp.takens(w) := s3_bim.ctrs(w)(1) + val if4_tageTaken = WireInit(if4_bim.ctrs(w)(1).asBool) + var if4_altPred = if4_bim.ctrs(w)(1) + val if4_finalAltPred = WireInit(if4_bim.ctrs(w)(1)) + var if4_provided = false.B + var if4_provider = 0.U + io.resp.takens(w) := if4_bim.ctrs(w)(1) for (i <- 0 until TageNTables) { - val hit = resps(i)(w).valid - val ctr = resps(i)(w).bits.ctr + val hit = if4_resps(i)(w).valid + val ctr = if4_resps(i)(w).bits.ctr when (hit) { - io.resp.takens(w) := Mux(ctr === 3.U || ctr === 4.U, altPred, ctr(2)) // Use altpred on weak taken - tageTaken := Mux(ctr === 3.U || ctr === 4.U, altPred, ctr(2)) - finalAltPred := altPred + io.resp.takens(w) := Mux(ctr === 3.U || ctr === 4.U, if4_altPred, ctr(2)) // Use altpred on weak taken + if4_tageTaken := Mux(ctr === 3.U || ctr === 4.U, if4_altPred, ctr(2)) + if4_finalAltPred := if4_altPred } - provided = provided || hit // Once hit then provide - provider = Mux(hit, i.U, provider) // Use the last hit as provider - altPred = Mux(hit, ctr(2), altPred) // Save current pred as potential altpred + if4_provided = if4_provided || hit // Once hit then provide + if4_provider = Mux(hit, i.U, if4_provider) // Use the last hit as provider + if4_altPred = Mux(hit, ctr(2), if4_altPred) // Save current pred as potential altpred } - io.resp.hits(w) := provided - io.meta(w).provider.valid := provided - io.meta(w).provider.bits := provider - io.meta(w).altDiffers := finalAltPred =/= io.resp.takens(w) - io.meta(w).providerU := resps(provider)(w).bits.u - io.meta(w).providerCtr := resps(provider)(w).bits.ctr - io.meta(w).taken := tageTaken + io.resp.hits(w) := if4_provided + io.meta(w).provider.valid := if4_provided + io.meta(w).provider.bits := if4_provider + io.meta(w).altDiffers := if4_finalAltPred =/= io.resp.takens(w) + io.meta(w).providerU := if4_resps(if4_provider)(w).bits.u + io.meta(w).providerCtr := if4_resps(if4_provider)(w).bits.ctr + io.meta(w).taken := if4_tageTaken // Create a mask fo tables which did not hit our query, and also contain useless entries // and also uses a longer history than the provider - val allocatableSlots = (VecInit(resps.map(r => !r(w).valid && r(w).bits.u === 0.U)).asUInt & - ~(LowerMask(UIntToOH(provider), TageNTables) & Fill(TageNTables, provided.asUInt)) + val allocatableSlots = (VecInit(if4_resps.map(r => !r(w).valid && r(w).bits.u === 0.U)).asUInt & + ~(LowerMask(UIntToOH(if4_provider), TageNTables) & Fill(TageNTables, if4_provided.asUInt)) ) val allocLFSR = LFSR64()(TageNTables - 1, 0) val firstEntry = PriorityEncoder(allocatableSlots) @@ -525,12 +525,12 @@ class Tage extends BaseTage { scMeta := DontCare val scTableSums = VecInit( (0 to 1) map { i => { - // val providerCtr = resps(provider)(w).bits.ctr.zext() + // val providerCtr = if4_resps(if4_provider)(w).bits.ctr.zext() // val pvdrCtrCentered = (((providerCtr - 4.S) << 1) + 1.S) << 3 // sum += pvdrCtrCentered if (EnableSC) { (0 until SCNTables) map { j => - scTables(j).getCenteredValue(scResps(j)(w).ctr(i)) + scTables(j).getCenteredValue(if4_scResps(j)(w).ctr(i)) } reduce (_+_) // TODO: rewrite with adder tree } else 0.S @@ -539,21 +539,21 @@ class Tage extends BaseTage { ) if (EnableSC) { - scMeta.tageTaken := tageTaken - scMeta.scUsed := provided - scMeta.scPred := tageTaken + scMeta.tageTaken := if4_tageTaken + scMeta.scUsed := if4_provided + scMeta.scPred := if4_tageTaken scMeta.sumAbs := 0.U - when (provided) { - val providerCtr = resps(provider)(w).bits.ctr.zext() + when (if4_provided) { + val providerCtr = if4_resps(if4_provider)(w).bits.ctr.zext() val pvdrCtrCentered = ((((providerCtr - 4.S) << 1).asSInt + 1.S) << 3).asSInt - val totalSum = scTableSums(tageTaken.asUInt) + pvdrCtrCentered + val totalSum = scTableSums(if4_tageTaken.asUInt) + pvdrCtrCentered val sumAbs = totalSum.abs().asUInt val sumBelowThreshold = totalSum.abs.asUInt < useThreshold val scPred = totalSum >= 0.S scMeta.sumAbs := sumAbs - scMeta.ctrs := VecInit(scResps.map(r => r(w).ctr(tageTaken.asUInt))) + scMeta.ctrs := VecInit(if4_scResps.map(r => r(w).ctr(if4_tageTaken.asUInt))) for (i <- 0 until SCNTables) { - XSDebug(RegNext(io.s3Fire), p"SCTable(${i.U})(${w.U}): ctr:(${scResps(i)(w).ctr(0)},${scResps(i)(w).ctr(1)})\n") + XSDebug(RegNext(io.s3Fire), p"SCTable(${i.U})(${w.U}): ctr:(${if4_scResps(i)(w).ctr(0)},${if4_scResps(i)(w).ctr(1)})\n") } XSDebug(RegNext(io.s3Fire), p"SC(${w.U}): pvdCtr(${providerCtr}), pvdCentred(${pvdrCtrCentered}), totalSum(${totalSum}), abs(${sumAbs}) useThres(${useThreshold}), scPred(${scPred})\n") // Use prediction from Statistical Corrector @@ -664,7 +664,7 @@ class Tage extends BaseTage { XSDebug(RegNext(io.s3Fire), "s3FireOnLastCycle: resp: pc=%x, hist=%x, hits=%b, takens=%b\n", debug_pc_s3, debug_hist_s3, io.resp.hits.asUInt, io.resp.takens.asUInt) for (i <- 0 until TageNTables) { - XSDebug(RegNext(io.s3Fire), "TageTable(%d): valids:%b, resp_ctrs:%b, resp_us:%b\n", i.U, VecInit(resps(i).map(_.valid)).asUInt, Cat(resps(i).map(_.bits.ctr)), Cat(resps(i).map(_.bits.u))) + XSDebug(RegNext(io.s3Fire), "TageTable(%d): valids:%b, resp_ctrs:%b, resp_us:%b\n", i.U, VecInit(if4_resps(i).map(_.valid)).asUInt, Cat(if4_resps(i).map(_.bits.ctr)), Cat(if4_resps(i).map(_.bits.u))) } XSDebug(io.update.valid, "update: pc=%x, fetchpc=%x, cycle=%d, hist=%x, taken:%d, misPred:%d, bimctr:%d, pvdr(%d):%d, altDiff:%d, pvdrU:%d, pvdrCtr:%d, alloc(%d):%d\n", u.pc, u.pc - (bri.fetchIdx << 1.U), bri.debug_tage_cycle, updateHist, u.taken, u.isMisPred, bri.bimCtr, m.provider.valid, m.provider.bits, m.altDiffers, m.providerU, m.providerCtr, m.allocate.valid, m.allocate.bits) diff --git a/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala index 016f01c6aca467187e09be2e1c836398372d14ed..fe2ec389daf3ff32312376c662544868a387898c 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala @@ -70,9 +70,9 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP val loadCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.LOAD) val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).lqIdx.value) - val tailMask = (((1.U((LoadQueueSize + 1).W)) << deqPtr).asUInt - 1.U)(LoadQueueSize - 1, 0) - val headMask = (((1.U((LoadQueueSize + 1).W)) << enqPtr).asUInt - 1.U)(LoadQueueSize - 1, 0) - val enqDeqMask1 = tailMask ^ headMask + val deqMask = UIntToMask(deqPtr, LoadQueueSize) + val enqMask = UIntToMask(enqPtr, LoadQueueSize) + val enqDeqMask1 = deqMask ^ enqMask val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1) // Enqueue at dispatch @@ -172,7 +172,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP allocated(i) && miss(i) && !inflight }) - val missRefillSel = getFirstOne(missRefillSelVec, tailMask) + val missRefillSel = getFirstOne(missRefillSelVec, deqMask) val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr) io.dcache.req.valid := missRefillSelVec.asUInt.orR io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD @@ -307,7 +307,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP // allocatedMask: dequeuePtr can go to the next 1-bit val allocatedMask = VecInit((0 until LoadQueueSize).map(i => allocated(i) || !enqDeqMask(i))) // find the first one from deqPtr (deqPtr) - val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, deqPtrExt.flag) + val nextTail1 = getFirstOneWithFlag(allocatedMask, deqMask, deqPtrExt.flag) val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, enqPtrExt) deqPtrExt := nextTail @@ -319,9 +319,6 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP } }) - // rollback check - val rollback = Wire(Vec(StorePipelineWidth, Valid(new Redirect))) - def getFirstOne(mask: Vec[Bool], startMask: UInt) = { val length = mask.length val highBits = (0 until length).map(i => mask(i) & ~startMask(i)) @@ -372,91 +369,88 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP }) // store backward query and rollback - // val needCheck = Seq.fill(8)(WireInit(true.B)) - (0 until StorePipelineWidth).foreach(i => { - rollback(i) := DontCare - - when(io.storeIn(i).valid) { - val startIndex = io.storeIn(i).bits.uop.lqIdx.value - val lqIdxMask = ((1.U((LoadQueueSize + 1).W) << startIndex).asUInt - 1.U)(LoadQueueSize - 1, 0) - val xorMask = lqIdxMask ^ headMask - val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag - val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask) - - // check if load already in lq needs to be rolledback - val lqViolationVec = VecInit((0 until LoadQueueSize).map(j => { - val addrMatch = allocated(j) && - io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3) - val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j)) - // TODO: update refilled data - val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k)) - Cat(violationVec).orR() && entryNeedCheck - })) - val lqViolation = lqViolationVec.asUInt().orR() - val lqViolationIndex = getFirstOne(lqViolationVec, lqIdxMask) - val lqViolationUop = uop(lqViolationIndex) - XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n") - - // when l/s writeback to roq together, check if rollback is needed - val wbViolationVec = VecInit((0 until LoadPipelineWidth).map(j => { - io.loadIn(j).valid && - isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) && - io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) && - (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR - })) - val wbViolation = wbViolationVec.asUInt().orR() - val wbViolationUop = getOldestInTwo(wbViolationVec, io.loadIn.map(_.bits.uop)) - XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n") - - // check if rollback is needed for load in l1 - val l1ViolationVec = VecInit((0 until LoadPipelineWidth).map(j => { - io.forward(j).valid && // L4 valid\ - isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) && - io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) && - (io.storeIn(i).bits.mask & io.forward(j).mask).orR - })) - val l1Violation = l1ViolationVec.asUInt().orR() - val l1ViolationUop = getOldestInTwo(l1ViolationVec, io.forward.map(_.uop)) - XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n") - - val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation) - val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop) - rollback(i).valid := Cat(rollbackValidVec).orR - val mask = getAfterMask(rollbackValidVec, rollbackUopVec) - val oneAfterZero = mask(1)(0) - val rollbackUop = Mux(oneAfterZero && mask(2)(0), - rollbackUopVec(0), - Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2))) - rollback(i).bits.roqIdx := rollbackUop.roqIdx - 1.U - - rollback(i).bits.isReplay := true.B - rollback(i).bits.isMisPred := false.B - rollback(i).bits.isException := false.B - rollback(i).bits.isFlushPipe := false.B - rollback(i).bits.target := rollbackUop.cf.pc - rollback(i).bits.brTag := rollbackUop.brTag - - XSDebug( - l1Violation, - "need rollback (l4 load) pc %x roqidx %d target %x\n", - io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt - ) - XSDebug( - lqViolation, - "need rollback (ld wb before store) pc %x roqidx %d target %x\n", - io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt - ) - XSDebug( - wbViolation, - "need rollback (ld/st wb together) pc %x roqidx %d target %x\n", - io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt - ) - }.otherwise { - rollback(i).valid := false.B - } - }) + def detectRollback(i: Int) = { + val startIndex = io.storeIn(i).bits.uop.lqIdx.value + val lqIdxMask = UIntToMask(startIndex, LoadQueueSize) + val xorMask = lqIdxMask ^ enqMask + val sameFlag = io.storeIn(i).bits.uop.lqIdx.flag === enqPtrExt.flag + val toEnqPtrMask = Mux(sameFlag, xorMask, ~xorMask) + + // check if load already in lq needs to be rolledback + val lqViolationVec = RegNext(VecInit((0 until LoadQueueSize).map(j => { + val addrMatch = allocated(j) && + io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === dataModule.io.rdata(j).paddr(PAddrBits - 1, 3) + val entryNeedCheck = toEnqPtrMask(j) && addrMatch && (datavalid(j) || listening(j) || miss(j)) + // TODO: update refilled data + val violationVec = (0 until 8).map(k => dataModule.io.rdata(j).mask(k) && io.storeIn(i).bits.mask(k)) + Cat(violationVec).orR() && entryNeedCheck + }))) + val lqViolation = lqViolationVec.asUInt().orR() + val lqViolationIndex = getFirstOne(lqViolationVec, RegNext(lqIdxMask)) + val lqViolationUop = uop(lqViolationIndex) + // lqViolationUop.lqIdx.flag := deqMask(lqViolationIndex) ^ deqPtrExt.flag + // lqViolationUop.lqIdx.value := lqViolationIndex + XSDebug(lqViolation, p"${Binary(Cat(lqViolationVec))}, $startIndex, $lqViolationIndex\n") + + // when l/s writeback to roq together, check if rollback is needed + val wbViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => { + io.loadIn(j).valid && + isAfter(io.loadIn(j).bits.uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) && + io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.loadIn(j).bits.paddr(PAddrBits - 1, 3) && + (io.storeIn(i).bits.mask & io.loadIn(j).bits.mask).orR + }))) + val wbViolation = wbViolationVec.asUInt().orR() + val wbViolationUop = getOldestInTwo(wbViolationVec, RegNext(VecInit(io.loadIn.map(_.bits.uop)))) + XSDebug(wbViolation, p"${Binary(Cat(wbViolationVec))}, $wbViolationUop\n") + + // check if rollback is needed for load in l1 + val l1ViolationVec = RegNext(VecInit((0 until LoadPipelineWidth).map(j => { + io.forward(j).valid && // L1 valid + isAfter(io.forward(j).uop.roqIdx, io.storeIn(i).bits.uop.roqIdx) && + io.storeIn(i).bits.paddr(PAddrBits - 1, 3) === io.forward(j).paddr(PAddrBits - 1, 3) && + (io.storeIn(i).bits.mask & io.forward(j).mask).orR + }))) + val l1Violation = l1ViolationVec.asUInt().orR() + val l1ViolationUop = getOldestInTwo(l1ViolationVec, RegNext(VecInit(io.forward.map(_.uop)))) + XSDebug(l1Violation, p"${Binary(Cat(l1ViolationVec))}, $l1ViolationUop\n") + + val rollbackValidVec = Seq(lqViolation, wbViolation, l1Violation) + val rollbackUopVec = Seq(lqViolationUop, wbViolationUop, l1ViolationUop) + + val mask = getAfterMask(rollbackValidVec, rollbackUopVec) + val oneAfterZero = mask(1)(0) + val rollbackUop = Mux(oneAfterZero && mask(2)(0), + rollbackUopVec(0), + Mux(!oneAfterZero && mask(2)(1), rollbackUopVec(1), rollbackUopVec(2))) + + XSDebug( + l1Violation, + "need rollback (l4 load) pc %x roqidx %d target %x\n", + io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, l1ViolationUop.roqIdx.asUInt + ) + XSDebug( + lqViolation, + "need rollback (ld wb before store) pc %x roqidx %d target %x\n", + io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, lqViolationUop.roqIdx.asUInt + ) + XSDebug( + wbViolation, + "need rollback (ld/st wb together) pc %x roqidx %d target %x\n", + io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.roqIdx.asUInt, wbViolationUop.roqIdx.asUInt + ) + + (RegNext(io.storeIn(i).valid) && Cat(rollbackValidVec).orR, rollbackUop) + } - def rollbackSel(a: Valid[Redirect], b: Valid[Redirect]): ValidIO[Redirect] = { + // rollback check + val rollback = Wire(Vec(StorePipelineWidth, Valid(new MicroOp))) + for (i <- 0 until StorePipelineWidth) { + val detectedRollback = detectRollback(i) + rollback(i).valid := detectedRollback._1 + rollback(i).bits := detectedRollback._2 + } + + def rollbackSel(a: Valid[MicroOp], b: Valid[MicroOp]): ValidIO[MicroOp] = { Mux( a.valid, Mux( @@ -468,7 +462,21 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP ) } - io.rollback := ParallelOperation(rollback, rollbackSel) + val rollbackSelected = ParallelOperation(rollback, rollbackSel) + val lastCycleRedirect = RegNext(io.brqRedirect) + + io.rollback := DontCare + // Note that we use roqIdx - 1.U to flush the load instruction itself. + // Thus, here if last cycle's roqIdx equals to this cycle's roqIdx, it still triggers the redirect. + io.rollback.valid := rollbackSelected.valid && (!lastCycleRedirect.valid || !isAfter(rollbackSelected.bits.roqIdx, lastCycleRedirect.bits.roqIdx)) + + io.rollback.bits.roqIdx := rollbackSelected.bits.roqIdx - 1.U + io.rollback.bits.isReplay := true.B + io.rollback.bits.isMisPred := false.B + io.rollback.bits.isException := false.B + io.rollback.bits.isFlushPipe := false.B + io.rollback.bits.target := rollbackSelected.bits.cf.pc + io.rollback.bits.brTag := rollbackSelected.bits.brTag // Memory mapped IO / other uncached operations @@ -496,7 +504,7 @@ class LoadQueue extends XSModule with HasDCacheParameters with HasCircularQueueP io.uncache.resp.ready := true.B - when(io.uncache.req.fire()){ + when (io.uncache.req.fire()) { pending(deqPtr) := false.B } diff --git a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala index e5007dcb3c36e8347cd28fff277645c54e3e288a..6dca78ebe8c15de737e90b3246a01d05d14111c6 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala @@ -63,8 +63,8 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue val storeCommit = (0 until CommitWidth).map(i => io.commits.valid(i) && !io.commits.isWalk && io.commits.uop(i).ctrl.commitType === CommitType.STORE) val mcommitIdx = (0 until CommitWidth).map(i => io.commits.uop(i).sqIdx.value) - val tailMask = (((1.U((StoreQueueSize + 1).W)) << deqPtr).asUInt - 1.U)(StoreQueueSize - 1, 0) - val headMask = (((1.U((StoreQueueSize + 1).W)) << enqPtr).asUInt - 1.U)(StoreQueueSize - 1, 0) + val tailMask = UIntToMask(deqPtr, StoreQueueSize) + val headMask = UIntToMask(enqPtr, StoreQueueSize) val enqDeqMask1 = tailMask ^ headMask val enqDeqMask = Mux(sameFlag, enqDeqMask1, ~enqDeqMask1) @@ -228,7 +228,7 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise val differentFlag = deqPtrExt.flag =/= io.forward(i).sqIdx.flag - val forwardMask = ((1.U((StoreQueueSize + 1).W)) << io.forward(i).sqIdx.value).asUInt - 1.U + val forwardMask = UIntToMask(io.forward(i).sqIdx.value, StoreQueueSize) val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B))) for (j <- 0 until StoreQueueSize) { storeWritebackedVec(j) := datavalid(j) && allocated(j) // all datavalid terms need to be checked diff --git a/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala b/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala index ad39f03c5bf71260a5134a6fdc28ebf20f5d5202..573e99bfbeea507369da24ac3dc6f75963ad22be 100644 --- a/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala @@ -64,7 +64,7 @@ class AtomicsUnit extends XSModule with MemoryOpConstants{ // we send feedback right after we receives request // also, we always treat amo as tlb hit // since we will continue polling tlb all by ourself - io.tlbFeedback.valid := RegNext(io.in.fire()) + io.tlbFeedback.valid := RegNext(RegNext(io.in.valid)) io.tlbFeedback.bits.hit := true.B io.tlbFeedback.bits.roqIdx := in.uop.roqIdx diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index b8b14c7e942df632a1da3c2b07c878827a50291c..19e25fabb3ef1101fed4153ed0f5f5074368c36d 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -21,35 +21,23 @@ class LoadUnit_S0 extends XSModule { val io = IO(new Bundle() { val in = Flipped(Decoupled(new ExuInput)) val out = Decoupled(new LsPipelineBundle) - val redirect = Flipped(ValidIO(new Redirect)) val dtlbReq = DecoupledIO(new TlbReq) - val dtlbResp = Flipped(DecoupledIO(new TlbResp)) - val tlbFeedback = ValidIO(new TlbFeedback) val dcacheReq = DecoupledIO(new DCacheLoadReq) }) val s0_uop = io.in.bits.uop val s0_vaddr = io.in.bits.src1 + s0_uop.ctrl.imm - val s0_paddr = io.dtlbResp.bits.paddr - val s0_tlb_miss = io.dtlbResp.bits.miss val s0_mask = genWmask(s0_vaddr, s0_uop.ctrl.fuOpType(1,0)) // query DTLB - io.dtlbReq.valid := io.out.valid + io.dtlbReq.valid := io.in.valid io.dtlbReq.bits.vaddr := s0_vaddr io.dtlbReq.bits.cmd := TlbCmd.read io.dtlbReq.bits.roqIdx := s0_uop.roqIdx io.dtlbReq.bits.debug.pc := s0_uop.cf.pc - io.dtlbResp.ready := io.out.ready // TODO: check it: io.out.fire()? - - // feedback tlb result to RS - // Note: can be moved to s1 - io.tlbFeedback.valid := io.out.valid - io.tlbFeedback.bits.hit := !s0_tlb_miss - io.tlbFeedback.bits.roqIdx := s0_uop.roqIdx // query DCache - io.dcacheReq.valid := io.in.valid && !s0_uop.roqIdx.needFlush(io.redirect) + io.dcacheReq.valid := io.in.valid io.dcacheReq.bits.cmd := MemoryOpConstants.M_XRD io.dcacheReq.bits.addr := s0_vaddr io.dcacheReq.bits.mask := s0_mask @@ -72,21 +60,18 @@ class LoadUnit_S0 extends XSModule { "b11".U -> (s0_vaddr(2, 0) === 0.U) //d )) - io.out.valid := io.dcacheReq.fire() && // dcache may not accept load request - !io.in.bits.uop.roqIdx.needFlush(io.redirect) + io.out.valid := io.in.valid && io.dcacheReq.ready + io.out.bits := DontCare io.out.bits.vaddr := s0_vaddr - io.out.bits.paddr := s0_paddr - io.out.bits.tlbMiss := io.dtlbResp.bits.miss io.out.bits.mask := s0_mask io.out.bits.uop := s0_uop io.out.bits.uop.cf.exceptionVec(loadAddrMisaligned) := !addrAligned - io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld - io.in.ready := io.out.fire() + io.in.ready := !io.in.valid || (io.out.ready && io.dcacheReq.ready) - XSDebug(io.dcacheReq.fire(), "[DCACHE LOAD REQ] pc %x vaddr %x paddr will be %x\n", - s0_uop.cf.pc, s0_vaddr, s0_paddr + XSDebug(io.dcacheReq.fire(), + p"[DCACHE LOAD REQ] pc ${Hexadecimal(s0_uop.cf.pc)}, vaddr ${Hexadecimal(s0_vaddr)}\n" ) } @@ -97,20 +82,28 @@ class LoadUnit_S1 extends XSModule { val io = IO(new Bundle() { val in = Flipped(Decoupled(new LsPipelineBundle)) val out = Decoupled(new LsPipelineBundle) - val redirect = Flipped(ValidIO(new Redirect)) - val s1_paddr = Output(UInt(PAddrBits.W)) + val dtlbResp = Flipped(DecoupledIO(new TlbResp)) + val tlbFeedback = ValidIO(new TlbFeedback) + val dcachePAddr = Output(UInt(PAddrBits.W)) val sbuffer = new LoadForwardQueryIO val lsq = new LoadForwardQueryIO }) val s1_uop = io.in.bits.uop - val s1_paddr = io.in.bits.paddr - val s1_tlb_miss = io.in.bits.tlbMiss + val s1_paddr = io.dtlbResp.bits.paddr + val s1_tlb_miss = io.dtlbResp.bits.miss val s1_mmio = !s1_tlb_miss && AddressSpace.isMMIO(s1_paddr) && !io.out.bits.uop.cf.exceptionVec.asUInt.orR val s1_mask = io.in.bits.mask io.out.bits := io.in.bits // forwardXX field will be updated in s1 - io.s1_paddr := s1_paddr + + io.dtlbResp.ready := true.B + // feedback tlb result to RS + io.tlbFeedback.valid := io.in.valid + io.tlbFeedback.bits.hit := !s1_tlb_miss + io.tlbFeedback.bits.roqIdx := s1_uop.roqIdx + + io.dcachePAddr := s1_paddr // load forward query datapath io.sbuffer.valid := io.in.valid @@ -127,15 +120,13 @@ class LoadUnit_S1 extends XSModule { io.lsq.mask := s1_mask io.lsq.pc := s1_uop.cf.pc // FIXME: remove it - io.out.bits.forwardMask := io.sbuffer.forwardMask - io.out.bits.forwardData := io.sbuffer.forwardData - - io.out.valid := io.in.valid && !s1_tlb_miss && !s1_uop.roqIdx.needFlush(io.redirect) + io.out.valid := io.in.valid && !s1_tlb_miss io.out.bits.paddr := s1_paddr io.out.bits.mmio := s1_mmio io.out.bits.tlbMiss := s1_tlb_miss + io.out.bits.uop.cf.exceptionVec(loadPageFault) := io.dtlbResp.bits.excp.pf.ld - io.in.ready := io.out.ready || !io.in.valid + io.in.ready := !io.in.valid || io.out.ready } @@ -146,9 +137,9 @@ class LoadUnit_S2 extends XSModule { val io = IO(new Bundle() { val in = Flipped(Decoupled(new LsPipelineBundle)) val out = Decoupled(new LsPipelineBundle) - val redirect = Flipped(ValidIO(new Redirect)) val dcacheResp = Flipped(DecoupledIO(new DCacheWordResp)) val lsq = new LoadForwardQueryIO + val sbuffer = new LoadForwardQueryIO }) val s2_uop = io.in.bits.uop @@ -197,7 +188,7 @@ class LoadUnit_S2 extends XSModule { // TODO: ECC check - io.out.valid := io.in.valid // && !s2_uop.needFlush(io.redirect) will cause comb. loop + io.out.valid := io.in.valid // Inst will be canceled in store queue / lsq, // so we do not need to care about flush in load / store unit's out.valid io.out.bits := io.in.bits @@ -208,10 +199,16 @@ class LoadUnit_S2 extends XSModule { io.in.ready := io.out.ready || !io.in.valid // merge forward result + // lsq has higher priority than sbuffer io.lsq := DontCare + io.sbuffer := DontCare // generate XLEN/8 Muxs for (i <- 0 until XLEN / 8) { - when(io.lsq.forwardMask(i)) { + when (io.sbuffer.forwardMask(i)) { + io.out.bits.forwardMask(i) := true.B + io.out.bits.forwardData(i) := io.sbuffer.forwardData(i) + } + when (io.lsq.forwardMask(i)) { io.out.bits.forwardMask(i) := true.B io.out.bits.forwardData(i) := io.lsq.forwardData(i) } @@ -224,18 +221,6 @@ class LoadUnit_S2 extends XSModule { } -// class LoadUnit_S3 extends XSModule { -// val io = IO(new Bundle() { -// val in = Flipped(Decoupled(new LsPipelineBundle)) -// val out = Decoupled(new LsPipelineBundle) -// val redirect = Flipped(ValidIO(new Redirect)) -// }) - -// io.in.ready := true.B -// io.out.bits := io.in.bits -// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect) -// } - class LoadUnit extends XSModule { val io = IO(new Bundle() { val ldin = Flipped(Decoupled(new ExuInput)) @@ -251,33 +236,27 @@ class LoadUnit extends XSModule { val load_s0 = Module(new LoadUnit_S0) val load_s1 = Module(new LoadUnit_S1) val load_s2 = Module(new LoadUnit_S2) - // val load_s3 = Module(new LoadUnit_S3) load_s0.io.in <> io.ldin - load_s0.io.redirect <> io.redirect load_s0.io.dtlbReq <> io.dtlb.req - load_s0.io.dtlbResp <> io.dtlb.resp load_s0.io.dcacheReq <> io.dcache.req - load_s0.io.tlbFeedback <> io.tlbFeedback - PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, false.B) + PipelineConnect(load_s0.io.out, load_s1.io.in, true.B, load_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect)) - io.dcache.s1_paddr := load_s1.io.out.bits.paddr - load_s1.io.redirect <> io.redirect + load_s1.io.dtlbResp <> io.dtlb.resp + load_s1.io.tlbFeedback <> io.tlbFeedback + io.dcache.s1_paddr <> load_s1.io.dcachePAddr io.dcache.s1_kill := DontCare // FIXME - io.sbuffer <> load_s1.io.sbuffer - io.lsq.forward <> load_s1.io.lsq + load_s1.io.sbuffer <> io.sbuffer + load_s1.io.lsq <> io.lsq.forward - PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, false.B) + PipelineConnect(load_s1.io.out, load_s2.io.in, true.B, load_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect)) - load_s2.io.redirect <> io.redirect load_s2.io.dcacheResp <> io.dcache.resp - load_s2.io.lsq := DontCare - load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData - load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask - - // PipelineConnect(load_s2.io.fp_out, load_s3.io.in, true.B, false.B) - // load_s3.io.redirect <> io.redirect + load_s2.io.lsq.forwardData <> io.lsq.forward.forwardData + load_s2.io.lsq.forwardMask <> io.lsq.forward.forwardMask + load_s2.io.sbuffer.forwardData <> io.sbuffer.forwardData + load_s2.io.sbuffer.forwardMask <> io.sbuffer.forwardMask XSDebug(load_s0.io.out.valid, p"S0: pc ${Hexadecimal(load_s0.io.out.bits.uop.cf.pc)}, lId ${Hexadecimal(load_s0.io.out.bits.uop.lqIdx.asUInt)}, " + diff --git a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala index d807375bd7cf457727b93c3148b25d3b587cb3f6..90b232ab4dd5fdb783a1108214b2374448837150 100644 --- a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala @@ -12,10 +12,7 @@ class StoreUnit_S0 extends XSModule { val io = IO(new Bundle() { val in = Flipped(Decoupled(new ExuInput)) val out = Decoupled(new LsPipelineBundle) - val redirect = Flipped(ValidIO(new Redirect)) val dtlbReq = DecoupledIO(new TlbReq) - val dtlbResp = Flipped(DecoupledIO(new TlbResp)) - val tlbFeedback = ValidIO(new TlbFeedback) }) // send req to dtlb @@ -26,16 +23,15 @@ class StoreUnit_S0 extends XSModule { io.dtlbReq.bits.cmd := TlbCmd.write io.dtlbReq.bits.roqIdx := io.in.bits.uop.roqIdx io.dtlbReq.bits.debug.pc := io.in.bits.uop.cf.pc - io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready? io.out.bits := DontCare io.out.bits.vaddr := saddr - io.out.bits.paddr := io.dtlbResp.bits.paddr + io.out.bits.data := genWdata(io.in.bits.src2, io.in.bits.uop.ctrl.fuOpType(1,0)) io.out.bits.uop := io.in.bits.uop - io.out.bits.miss := io.dtlbResp.bits.miss + io.out.bits.miss := DontCare io.out.bits.mask := genWmask(io.out.bits.vaddr, io.in.bits.uop.ctrl.fuOpType(1,0)) - io.out.valid := io.in.valid && !io.dtlbResp.bits.miss && !io.out.bits.uop.roqIdx.needFlush(io.redirect) + io.out.valid := io.in.valid io.in.ready := io.out.ready // exception check @@ -46,18 +42,7 @@ class StoreUnit_S0 extends XSModule { "b11".U -> (io.out.bits.vaddr(2,0) === 0.U) //d )) io.out.bits.uop.cf.exceptionVec(storeAddrMisaligned) := !addrAligned - io.out.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st - // Send TLB feedback to store issue queue - // TODO: should be moved to S1 - io.tlbFeedback.valid := RegNext(io.in.valid && io.out.ready) - io.tlbFeedback.bits.hit := RegNext(!io.out.bits.miss) - io.tlbFeedback.bits.roqIdx := RegNext(io.out.bits.uop.roqIdx) - XSDebug(io.tlbFeedback.valid, - "S1 Store: tlbHit: %d roqIdx: %d\n", - io.tlbFeedback.bits.hit, - io.tlbFeedback.bits.roqIdx.asUInt - ) } // Load Pipeline Stage 1 @@ -67,30 +52,41 @@ class StoreUnit_S1 extends XSModule { val in = Flipped(Decoupled(new LsPipelineBundle)) val out = Decoupled(new LsPipelineBundle) // val fp_out = Decoupled(new LsPipelineBundle) - val stout = DecoupledIO(new ExuOutput) // writeback store - val redirect = Flipped(ValidIO(new Redirect)) + val lsq = ValidIO(new LsPipelineBundle) + val dtlbResp = Flipped(DecoupledIO(new TlbResp)) + val tlbFeedback = ValidIO(new TlbFeedback) }) - // get paddr from dtlb, check if rollback is needed - // writeback store inst to lsq - // writeback to LSQ + val s1_paddr = io.dtlbResp.bits.paddr + val s1_tlb_miss = io.dtlbResp.bits.miss + io.in.ready := true.B - io.out.bits := io.in.bits - io.out.bits.miss := false.B - io.out.bits.mmio := AddressSpace.isMMIO(io.in.bits.paddr) - io.out.valid := io.in.fire() // TODO: && ! FP - io.stout.bits.uop := io.in.bits.uop - // io.stout.bits.uop.cf.exceptionVec := // TODO: update according to TLB result - io.stout.bits.data := DontCare - io.stout.bits.redirectValid := false.B - io.stout.bits.redirect := DontCare - io.stout.bits.brUpdate := DontCare - io.stout.bits.debug.isMMIO := io.out.bits.mmio - io.stout.bits.fflags := DontCare + io.dtlbResp.ready := true.B // TODO: why dtlbResp needs a ready? + // Send TLB feedback to store issue queue + io.tlbFeedback.valid := io.in.valid + io.tlbFeedback.bits.hit := !s1_tlb_miss + io.tlbFeedback.bits.roqIdx := io.in.bits.uop.roqIdx + XSDebug(io.tlbFeedback.valid, + "S1 Store: tlbHit: %d roqIdx: %d\n", + io.tlbFeedback.bits.hit, + io.tlbFeedback.bits.roqIdx.asUInt + ) + + // get paddr from dtlb, check if rollback is needed + // writeback store inst to lsq + io.lsq.valid := io.in.valid // TODO: && ! FP + io.lsq.bits := io.in.bits + io.lsq.bits.paddr := s1_paddr + io.lsq.bits.miss := false.B + io.lsq.bits.mmio := AddressSpace.isMMIO(s1_paddr) + io.lsq.bits.uop.cf.exceptionVec(storePageFault) := io.dtlbResp.bits.excp.pf.st + + // mmio inst with exception will be writebacked immediately val hasException = io.out.bits.uop.cf.exceptionVec.asUInt.orR - io.stout.valid := io.in.fire() && (!io.out.bits.mmio || hasException) // mmio inst will be writebacked immediately + io.out.valid := io.in.valid && (!io.out.bits.mmio || hasException) && !s1_tlb_miss + io.out.bits := io.lsq.bits // if fp // io.fp_out.valid := ... @@ -98,17 +94,24 @@ class StoreUnit_S1 extends XSModule { } -// class StoreUnit_S2 extends XSModule { -// val io = IO(new Bundle() { -// val in = Flipped(Decoupled(new LsPipelineBundle)) -// val out = Decoupled(new LsPipelineBundle) -// val redirect = Flipped(ValidIO(new Redirect)) -// }) +class StoreUnit_S2 extends XSModule { + val io = IO(new Bundle() { + val in = Flipped(Decoupled(new LsPipelineBundle)) + val stout = DecoupledIO(new ExuOutput) // writeback store + }) + + io.in.ready := true.B -// io.in.ready := true.B -// io.out.bits := io.in.bits -// io.out.valid := io.in.valid && !io.out.bits.uop.roqIdx.needFlush(io.redirect) -// } + io.stout.valid := io.in.valid + io.stout.bits.uop := io.in.bits.uop + io.stout.bits.data := DontCare + io.stout.bits.redirectValid := false.B + io.stout.bits.redirect := DontCare + io.stout.bits.brUpdate := DontCare + io.stout.bits.debug.isMMIO := io.in.bits.mmio + io.stout.bits.fflags := DontCare + +} class StoreUnit extends XSModule { val io = IO(new Bundle() { @@ -122,25 +125,21 @@ class StoreUnit extends XSModule { val store_s0 = Module(new StoreUnit_S0) val store_s1 = Module(new StoreUnit_S1) - // val store_s2 = Module(new StoreUnit_S2) + val store_s2 = Module(new StoreUnit_S2) store_s0.io.in <> io.stin - store_s0.io.redirect <> io.redirect store_s0.io.dtlbReq <> io.dtlb.req - store_s0.io.dtlbResp <> io.dtlb.resp - store_s0.io.tlbFeedback <> io.tlbFeedback - PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, false.B) - // PipelineConnect(store_s1.io.fp_out, store_s2.io.in, true.B, false.B) + PipelineConnect(store_s0.io.out, store_s1.io.in, true.B, store_s0.io.out.bits.uop.roqIdx.needFlush(io.redirect)) - store_s1.io.redirect <> io.redirect - store_s1.io.stout <> io.stout - // send result to sq - io.lsq.valid := store_s1.io.out.valid - io.lsq.bits := store_s1.io.out.bits + store_s1.io.lsq <> io.lsq // send result to sq + store_s1.io.dtlbResp <> io.dtlb.resp + store_s1.io.tlbFeedback <> io.tlbFeedback + + PipelineConnect(store_s1.io.out, store_s2.io.in, true.B, store_s1.io.out.bits.uop.roqIdx.needFlush(io.redirect)) + + store_s2.io.stout <> io.stout - store_s1.io.out.ready := true.B - private def printPipeLine(pipeline: LsPipelineBundle, cond: Bool, name: String): Unit = { XSDebug(cond, p"$name" + p" pc ${Hexadecimal(pipeline.uop.cf.pc)} " + @@ -154,4 +153,4 @@ class StoreUnit extends XSModule { printPipeLine(store_s0.io.out.bits, store_s0.io.out.valid, "S0") printPipeLine(store_s1.io.out.bits, store_s1.io.out.valid, "S1") -} \ No newline at end of file +} diff --git a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala index b3b0143bcf3b7da169a53725bcb219bf5be08acd..372482c8dddb96a21d131e7dacba3f5f8d00f720 100644 --- a/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala +++ b/src/main/scala/xiangshan/mem/sbuffer/NewSbuffer.scala @@ -104,7 +104,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { def isOneOf(key: UInt, seq: Seq[UInt]): Bool = if(seq.isEmpty) false.B else Cat(seq.map(_===key)).orR() - def witdhMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f + def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f def maskData(mask: UInt, data: UInt): UInt = { @@ -160,7 +160,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { def stateCanMerge(s: UInt): Bool = isOneOf(s, Seq(s_valid, s_inflight_req)) - val mergeMask = witdhMap(i => + val mergeMask = widthMap(i => req.valid && stateCanMerge(state_old(i)) && getTag(req.bits.addr)===mem_old(i).tag ) val canMerge = Cat(mergeMask).orR() @@ -184,7 +184,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { state_new.zip(mem_new) } - val bufferRead = VecInit((0 until StoreBufferSize) map (i => buffer.read(i.U))) + val bufferRead = VecInit((0 until StoreBufferSize) map (i => buffer(i))) val initialSbuffer = stateVec.zip(bufferRead) val updatedSbuffer = io.in.zipWithIndex.foldLeft[Seq[SbufferEntry]](initialSbuffer)(enqSbuffer) val updatedState = updatedSbuffer.map(_._1) @@ -205,8 +205,8 @@ class NewSbuffer extends XSModule with HasSbufferCst { XSDebug(req.fire(), p"accept req [$i]: " + p"addr:${Hexadecimal(req.bits.addr)} " + - p"mask:${Binary(req.bits.mask)} " + - p"data:${Hexadecimal(req.bits.data)}\n" + p"mask:${Binary(req.bits.mask)} " + + p"data:${Hexadecimal(req.bits.data)}\n" ) XSDebug(req.valid && !req.ready, p"req [$i] blocked by sbuffer\n" @@ -257,7 +257,7 @@ class NewSbuffer extends XSModule with HasSbufferCst { def noSameBlockInflight(idx: UInt): Bool = { val tag = updatedSbufferLine(idx).tag - !Cat(witdhMap(i => { + !Cat(widthMap(i => { // stateVec(idx) itself must not be s_inflight* isOneOf(stateVec(i), Seq(s_inflight_req, s_inflight_resp)) && tag===updatedSbufferLine(i).tag @@ -316,62 +316,42 @@ class NewSbuffer extends XSModule with HasSbufferCst { // ---------------------- Load Data Forward --------------------- - // (buff, do_forward) - // pass 'do_forward' here to avoid duplicated tag compare - type ForwardBuf = (SbufferLine, Bool) - - def forwardQuery(forward: LoadForwardQueryIO, buff: ForwardBuf): LoadForwardQueryIO = { - val bufLine = buff._1 - val do_forward = buff._2 - val forwardWire = WireInit(forward) - val forwardMask = forwardWire.forwardMask - val forwardData = forwardWire.forwardData - val dataVec = VecInit((0 until CacheLineBytes).map(i => - bufLine.data(i*8+7, i*8) - )) - when(do_forward){ - (0 until DataBytes).map(i => { - val lineOffset = Cat(getWordOffset(forward.paddr), i.U(3.W)) - when(bufLine.mask(lineOffset) && forward.mask(i)){ - forwardMask(i) := true.B - forwardData(i) := dataVec(lineOffset) - } - }) - } - forwardWire - } - - for((forward, i) <- io.forward.zipWithIndex){ - val tag_matches = witdhMap(i => bufferRead(i).tag===getTag(forward.paddr)) - val valid_tag_matches = witdhMap(i => tag_matches(i) && stateVec(i)===s_valid) - val inflight_tag_matches = witdhMap(i => + for ((forward, i) <- io.forward.zipWithIndex) { + val tag_matches = widthMap(i => bufferRead(i).tag===getTag(forward.paddr)) + val valid_tag_matches = widthMap(i => tag_matches(i) && stateVec(i)===s_valid) + val inflight_tag_matches = widthMap(i => tag_matches(i) && (stateVec(i)===s_inflight_req || stateVec(i)===s_inflight_resp) ) - val (valid_forward_idx, valid_tag_match) = PriorityEncoderWithFlag(valid_tag_matches) - val (inflight_forwad_idx, inflight_tag_match) = PriorityEncoderWithFlag(inflight_tag_matches) + val line_offset_mask = UIntToOH(getWordOffset(forward.paddr)) - val valid_line = bufferRead(valid_forward_idx) - val inflight_line = bufferRead(inflight_forwad_idx) + val valid_tag_match_reg = valid_tag_matches.map(RegNext(_)) + val inflight_tag_match_reg = inflight_tag_matches.map(RegNext(_)) + val line_offset_reg = RegNext(line_offset_mask) - val initialForward = WireInit(forward) - initialForward.forwardMask := 0.U.asTypeOf(Vec(DataBytes, Bool())) - initialForward.forwardData := DontCare + val selectedValidLine = Mux1H(valid_tag_match_reg, bufferRead) + val selectedValidMask = Mux1H(line_offset_reg, selectedValidLine.mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) + val selectedValidData = Mux1H(line_offset_reg, selectedValidLine.data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) - val forwardResult = Seq( - (inflight_line, inflight_tag_match), - (valid_line, valid_tag_match) - ).foldLeft(initialForward)(forwardQuery) + val selectedInflightLine = Mux1H(inflight_tag_match_reg, bufferRead) + val selectedInflightMask = Mux1H(line_offset_reg, selectedInflightLine.mask.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) + val selectedInflightData = Mux1H(line_offset_reg, selectedInflightLine.data.asTypeOf(Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) - forward.forwardMask := forwardResult.forwardMask - forward.forwardData := forwardResult.forwardData + for (j <- 0 until DataBytes) { + forward.forwardMask(j) := false.B + forward.forwardData(j) := DontCare - XSDebug(inflight_tag_match, - p"inflight tag match: forward [$i] <> buf[$inflight_forwad_idx]\n" - ) - XSDebug(valid_tag_match, - p"valid tag match: forward [$i] <> buf[$valid_forward_idx]\n" - ) - XSDebug(inflight_tag_match || valid_tag_match, + // valid entries have higher priority than inflight entries + when (selectedInflightMask(j)) { + forward.forwardMask(j) := true.B + forward.forwardData(j) := selectedInflightData(j) + } + when (selectedValidMask(j)) { + forward.forwardMask(j) := true.B + forward.forwardData(j) := selectedValidData(j) + } + } + + XSDebug(Cat(inflight_tag_matches).orR || Cat(valid_tag_matches).orR, p"[$i] forward paddr:${Hexadecimal(forward.paddr)}\n" ) }