未验证 提交 3ff0763b 编写于 作者: L ljw 提交者: GitHub

Merge branch 'master' into srt4-divider

Subproject commit 5ca43398ac8b1b293291bd4e6e8c233be6c66968
Subproject commit 37d27a8f3b7d288c4663eebd8571018357fd827a
......@@ -129,8 +129,9 @@ trait HasPipelineReg { this: FunctionUnit =>
object FunctionUnit extends HasXSParameter {
def multiplier = new ArrayMultiplier(XLEN+1)
def divider = new SRT4Divider(XLEN)
def multiplier = new ArrayMultiplier(XLEN+1, Seq(0, 2))
def alu = new Alu
def jmp = new Jump
......
......@@ -4,8 +4,7 @@ import chisel3._
import chisel3.util._
import xiangshan._
import utils._
import xiangshan.backend._
import xiangshan.backend.fu.FunctionUnit._
import xiangshan.backend.fu.fpu.util.{C22, C32, C53}
class MulDivCtrl extends Bundle{
val sign = Bool()
......@@ -13,14 +12,17 @@ class MulDivCtrl extends Bundle{
val isHi = Bool() // return hi bits of result ?
}
class ArrayMultiplier(len: Int, latency: Int = 3)
extends FunctionUnit(
FuConfig(FuType.mul, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false, CertainLatency(latency)),
len
)
class AbstractMultiplier(len: Int, latency: Int = 2) extends FunctionUnit(
FuConfig(FuType.mul, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false, CertainLatency(latency)),
len
){
val ctrl = IO(Input(new MulDivCtrl))
}
class NaiveMultiplier(len: Int, latency: Int = 3)
extends AbstractMultiplier(len, latency)
with HasPipelineReg
{
val ctrl = IO(Input(new MulDivCtrl))
val (src1, src2) = (io.in.bits.src(0), io.in.bits.src(1))
......@@ -38,5 +40,134 @@ class ArrayMultiplier(len: Int, latency: Int = 3)
val res = Mux(ctrlVec.last.isHi, dataVec.last(2*xlen-1, xlen), dataVec.last(xlen-1,0))
io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31,0),xlen), res)
XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")
}
class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len, doReg.size) with HasPipelineReg {
val doRegSorted = doReg.sortWith(_ < _)
println(doRegSorted)
val (a, b) = (io.in.bits.src(0), io.in.bits.src(1))
val b_sext, bx2, neg_b, neg_bx2 = Wire(UInt((len+1).W))
b_sext := SignExt(b, len+1)
bx2 := b_sext << 1
neg_b := (~b_sext).asUInt()
neg_bx2 := neg_b << 1
val columns: Array[Seq[Bool]] = Array.fill(2*len)(Seq())
var last_x = WireInit(0.U(3.W))
for(i <- Range(0, len, 2)){
val x = if(i==0) Cat(a(1,0), 0.U(1.W)) else if(i+1==len) SignExt(a(i, i-1), 3) else a(i+1, i-1)
val pp_temp = MuxLookup(x, 0.U, Seq(
1.U -> b_sext,
2.U -> b_sext,
3.U -> bx2,
4.U -> neg_bx2,
5.U -> neg_b,
6.U -> neg_b
))
val s = pp_temp(len)
val t = MuxLookup(last_x, 0.U(2.W), Seq(
4.U -> 2.U(2.W),
5.U -> 1.U(2.W),
6.U -> 1.U(2.W)
))
last_x = x
val (pp, weight) = i match {
case 0 =>
(Cat(~s, s, s, pp_temp), 0)
case n if (n==len-1) || (n==len-2) =>
(Cat(~s, pp_temp, t), i-2)
case _ =>
(Cat(1.U(1.W), ~s, pp_temp, t), i-2)
}
for(j <- columns.indices){
if(j >= weight && j < (weight + pp.getWidth)){
columns(j) = columns(j) :+ pp(j-weight)
}
}
}
def addOneColumn(col: Seq[Bool], cin: Seq[Bool]): (Seq[Bool], Seq[Bool], Seq[Bool]) = {
var sum = Seq[Bool]()
var cout1 = Seq[Bool]()
var cout2 = Seq[Bool]()
col.size match {
case 1 => // do nothing
sum = col ++ cin
case 2 =>
val c22 = Module(new C22)
c22.io.in := col
sum = c22.io.out(0).asBool() +: cin
cout2 = Seq(c22.io.out(1).asBool())
case 3 =>
val c32 = Module(new C32)
c32.io.in := col
sum = c32.io.out(0).asBool() +: cin
cout2 = Seq(c32.io.out(1).asBool())
case 4 =>
val c53 = Module(new C53)
for((x, y) <- c53.io.in.take(4) zip col){
x := y
}
c53.io.in.last := (if(cin.nonEmpty) cin.head else 0.U)
sum = Seq(c53.io.out(0).asBool()) ++ (if(cin.nonEmpty) cin.drop(1) else Nil)
cout1 = Seq(c53.io.out(1).asBool())
cout2 = Seq(c53.io.out(2).asBool())
case n =>
val cin_1 = if(cin.nonEmpty) Seq(cin.head) else Nil
val cin_2 = if(cin.nonEmpty) cin.drop(1) else Nil
val (s_1, c_1_1, c_1_2) = addOneColumn(col take 4, cin_1)
val (s_2, c_2_1, c_2_2) = addOneColumn(col drop 4, cin_2)
sum = s_1 ++ s_2
cout1 = c_1_1 ++ c_2_1
cout2 = c_1_2 ++ c_2_2
}
(sum, cout1, cout2)
}
def max(in: Iterable[Int]): Int = in.reduce((a, b) => if(a>b) a else b)
def addAll(cols: Array[Seq[Bool]], depth: Int): (UInt, UInt) = {
if(max(cols.map(_.size)) <= 2){
val sum = Cat(cols.map(_(0)).reverse)
var k = 0
while(cols(k).size == 1) k = k+1
val carry = Cat(cols.drop(k).map(_(1)).reverse)
(sum, Cat(carry, 0.U(k.W)))
} else {
val columns_next = Array.fill(2*len)(Seq[Bool]())
var cout1, cout2 = Seq[Bool]()
for( i <- cols.indices){
val (s, c1, c2) = addOneColumn(cols(i), cout1)
columns_next(i) = s ++ cout2
cout1 = c1
cout2 = c2
}
val needReg = doRegSorted.contains(depth)
val toNextLayer = if(needReg)
columns_next.map(_.map(PipelineReg(doRegSorted.indexOf(depth) + 1)(_)))
else
columns_next
addAll(toNextLayer, depth+1)
}
}
val (sum, carry) = addAll(cols = columns, depth = 0)
val result = sum + carry
var ctrlVec = Seq(ctrl)
for(i <- 1 to latency){
ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i-1))
}
val xlen = io.out.bits.data.getWidth
val res = Mux(ctrlVec.last.isHi, result(2*xlen-1, xlen), result(xlen-1,0))
io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31,0),xlen), res)
XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")
}
\ No newline at end of file
......@@ -54,7 +54,7 @@ class StoreMissEntry extends DCacheModule
when (state =/= s_invalid) {
XSDebug("entry: %d state: %d\n", io.id, state)
XSDebug("entry: %d state: %d idx: %x tag: %x\n", io.id, state, io.idx.bits, io.tag.bits)
}
// --------------------------------------------
......@@ -158,12 +158,13 @@ class StoreMissQueue extends DCacheModule
val replay_arb = Module(new Arbiter(new DCacheLineReq, cfg.nStoreMissEntries))
val resp_arb = Module(new Arbiter(new DCacheLineResp, cfg.nStoreMissEntries))
val idx_matches = Wire(Vec(cfg.nLoadMissEntries, Bool()))
val tag_matches = Wire(Vec(cfg.nLoadMissEntries, Bool()))
val idx_matches = Wire(Vec(cfg.nStoreMissEntries, Bool()))
val tag_matches = Wire(Vec(cfg.nStoreMissEntries, Bool()))
val tag_match = Mux1H(idx_matches, tag_matches)
val idx_match = idx_matches.reduce(_||_)
XSDebug("idx_match: %b tag_match: %b\n", idx_match, tag_match)
val req = io.lsu.req
val entry_alloc_idx = Wire(UInt())
......
......@@ -201,21 +201,24 @@ class L1plusCacheTest extends FlatSpec with ChiselScalatestTester with Matchers
}
// emulated queue
class IdPool(val nReqIds: Int) {
class IdPool(val nReqIds: Int, name: String) {
val freeIds = new Array[Boolean](nReqIds)
def allocate(): Int = {
for (i <- 0 until freeIds.size) {
if (freeIds(i)) {
println(f"$name allocate: $i")
freeIds(i) = false
return i
}
}
// no free id to allocate
println(f"$name allocate failed")
return -1
}
def free(id: Int): Unit = {
println(f"$name free: $id")
assert(!freeIds(id))
freeIds(id) = true
}
......@@ -248,7 +251,7 @@ case class QueueEntry(
class Queue(nEntries: Int, name: String) {
// Queue
// ---------------------------------------
val idPool = new IdPool(nEntries)
val idPool = new IdPool(nEntries, name + "IdPool")
val queue = new ArrayBuffer[QueueEntry]()
def enq(req: Req) = {
// for unissued reqs, they have id = -1
......@@ -313,48 +316,51 @@ class StoreQueue(nEntries: Int) extends Queue(nEntries, "StoreQueue") {
def sendReq(port: DCacheLineIO): Unit = {
val req = port.req
// has last cycle's req been fired?
if (reqWaiting && req.ready.peek().litToBoolean) {
reqWaiting = false
// no requests waiting on line
// reset valid signal
req.valid.poke(false.B)
}
// can we send a new request in this cycle
val reqIdx = select()
if (reqWaiting || reqIdx == -1) {
return
}
if (!reqWaiting) {
val reqIdx = select()
if (reqIdx == -1) {
// no more request to send
req.valid.poke(false.B)
return
}
val tId = idPool.allocate()
if (tId == -1) {
return
val tId = idPool.allocate()
if (tId == -1) {
// no more request to send
req.valid.poke(false.B)
return
}
// try sending a new request in this cycle
// select a req to issue
reqWaiting = true
issue(reqIdx, tId)
val CMD_WRITE = MemoryOpConstants.M_XWR
val FULL_MASK = BigInt("ffffffffffffffff", 16).U
val r = queue(reqIdx).req
req.valid.poke(true.B)
req.bits.cmd.poke(CMD_WRITE)
req.bits.addr.poke(r.addr.U)
req.bits.data.poke(r.data.U)
req.bits.mask.poke(FULL_MASK)
req.bits.meta.id.poke(tId.U)
req.bits.meta.vaddr.poke(r.addr.U)
req.bits.meta.paddr.poke(r.addr.U)
// req.bits.meta.uop.poke(0.U.asTypeOf(new MicroOp))
req.bits.meta.mmio.poke(false.B)
req.bits.meta.tlb_miss.poke(false.B)
req.bits.meta.mask.poke(FULL_MASK)
req.bits.meta.replay.poke(false.B)
}
// try sending a new request in this cycle
// select a req to issue
reqWaiting = true
issue(reqIdx, tId)
val CMD_WRITE = MemoryOpConstants.M_XWR
val FULL_MASK = BigInt("ffffffffffffffff", 16).U
val r = queue(reqIdx).req
req.valid.poke(true.B)
req.bits.cmd.poke(CMD_WRITE)
req.bits.addr.poke(r.addr.U)
req.bits.data.poke(r.data.U)
req.bits.mask.poke(FULL_MASK)
req.bits.meta.id.poke(tId.U)
req.bits.meta.vaddr.poke(r.addr.U)
req.bits.meta.paddr.poke(r.addr.U)
// req.bits.meta.uop.poke(0.U.asTypeOf(new MicroOp))
req.bits.meta.mmio.poke(false.B)
req.bits.meta.tlb_miss.poke(false.B)
req.bits.meta.mask.poke(FULL_MASK)
req.bits.meta.replay.poke(false.B)
if (req.valid.peek().litToBoolean && req.ready.peek().litToBoolean) {
reqWaiting = false
}
}
def handleResp(port: DCacheLineIO) = {
......@@ -380,37 +386,40 @@ class LoadQueue(nEntries: Int) extends Queue(nEntries, "LoadQueue") {
def sendReq(port: L1plusCacheIO): Unit = {
val req = port.req
// has last cycle's req been fired?
if (reqWaiting && req.ready.peek().litToBoolean) {
reqWaiting = false
// no requests waiting on line
// reset valid signal
req.valid.poke(false.B)
}
// can we send a new request in this cycle
val reqIdx = select()
if (reqWaiting || reqIdx == -1) {
return
}
if (!reqWaiting) {
val reqIdx = select()
if (reqIdx == -1) {
// no more request to send
req.valid.poke(false.B)
return
}
val tId = idPool.allocate()
if (tId == -1) {
return
}
val tId = idPool.allocate()
if (tId == -1) {
// no more request to send
req.valid.poke(false.B)
return
}
// try sending a new request in this cycle
// select a req to issue
// try sending a new request in this cycle
// select a req to issue
reqWaiting = true
issue(reqIdx, tId)
reqWaiting = true
issue(reqIdx, tId)
val CMD_READ = MemoryOpConstants.M_XRD
val CMD_READ = MemoryOpConstants.M_XRD
val r = queue(reqIdx).req
req.valid.poke(true.B)
req.bits.cmd.poke(CMD_READ)
req.bits.addr.poke(r.addr.U)
req.bits.id.poke(tId.U)
}
val r = queue(reqIdx).req
req.valid.poke(true.B)
req.bits.cmd.poke(CMD_READ)
req.bits.addr.poke(r.addr.U)
req.bits.id.poke(tId.U)
if (req.valid.peek().litToBoolean && req.ready.peek().litToBoolean) {
reqWaiting = false
}
}
def handleResp(port: L1plusCacheIO) = {
......
package cache
import scala.collection.mutable.ArrayBuffer
import chipsalliance.rocketchip.config.{Field, Parameters}
import chisel3._
import chisel3.util._
import chiseltest.experimental.TestOptionBuilder._
import chiseltest.internal.VerilatorBackendAnnotation
import chiseltest._
import chisel3.experimental.BundleLiterals._
import firrtl.stage.RunFirrtlTransformAnnotation
import chiseltest.ChiselScalatestTester
import device.AXI4RAM
import freechips.rocketchip.amba.axi4.AXI4UserYanker
import freechips.rocketchip.diplomacy.{AddressSet, LazyModule, LazyModuleImp}
import freechips.rocketchip.tilelink.{TLBuffer, TLCacheCork, TLToAXI4, TLXbar}
import org.scalatest.{FlatSpec, Matchers}
import sifive.blocks.inclusivecache.{CacheParameters, InclusiveCache, InclusiveCacheMicroParameters, InclusiveCacheControlParameters}
import utils.{DebugIdentityNode, HoldUnless, XSDebug}
import xiangshan.{HasXSLog, MicroOp}
import xiangshan.cache.{DCache, L1plusCache, Uncache, DCacheWordIO, DCacheLineIO, L1plusCacheIO, MemoryOpConstants}
import xiangshan.testutils.AddSinks
import xstransforms.PrintModuleName
import scala.util.Random
class L2NonInclusiveGetTestTopIO extends Bundle {
val l1plus = new L1plusCacheIO()
val dcacheStore = new DCacheLineIO()
val l2Flush = new DCacheWordIO
}
class L2NonInclusiveGetTestTop()(implicit p: Parameters) extends LazyModule {
val uncache = LazyModule(new Uncache())
val dcache = LazyModule(new DCache())
val l1plusCache = LazyModule(new L1plusCache())
val l2 = LazyModule(new InclusiveCache(
CacheParameters(
level = 2,
ways = 4,
sets = 4 * 1024 / (64 * 4 * 4),
blockBytes = 64,
beatBytes = 32,
cacheName = s"L2"
),
InclusiveCacheMicroParameters(
writeBytes = 8
),
Some(InclusiveCacheControlParameters(
address = 0x8000000L,
beatBytes = 8))))
val ram = LazyModule(new AXI4RAM(
AddressSet(0x0L, 0x7ffffffL),
memByte = 128 * 1024 * 1024,
useBlackBox = false
))
val xbar = TLXbar()
xbar := TLBuffer() := DebugIdentityNode() := dcache.clientNode
xbar := TLBuffer() := DebugIdentityNode() := l1plusCache.clientNode
l2.node := DebugIdentityNode() := xbar
ram.node :=
AXI4UserYanker() :=
TLToAXI4() :=
TLBuffer() :=
TLCacheCork() :=
DebugIdentityNode() :=
l2.node
// connect uncache access to l2 control node
l2.ctlnode.get := DebugIdentityNode() := uncache.clientNode
lazy val module = new LazyModuleImp(this) with HasXSLog {
val io = IO(Flipped(new L2NonInclusiveGetTestTopIO))
AddSinks()
dcache.module.io <> DontCare
dcache.module.io.lsu.store <> io.dcacheStore
l1plusCache.module.io <> io.l1plus
uncache.module.io.lsroq <> io.l2Flush
}
}
class L2NonInclusiveGetTest extends FlatSpec with ChiselScalatestTester with Matchers {
behavior of "L2Cache"
val mem_size = 128 * 1024 * 1024
val block_size = 64
val block_bits = log2Up(block_size)
// val nblocks = mem_size / block_size
val nblocks = 100
// data structures
// our golden version cache
val cache_blocks = new Array[BigInt](nblocks)
for (i <- 0 until nblocks) {
cache_blocks(i) = BigInt(0)
}
// ----------------------------------------
// useful request parameter values
val CMD_READ = MemoryOpConstants.M_XRD
val CMD_WRITE = MemoryOpConstants.M_XWR
// 64bit full mask
val FULL_MASK_64 = BigInt("ffffffffffffffff", 16).U
val L2_FLUSH_BASE_ADDR = 0x8000000L
val CONFIG_ADDR = L2_FLUSH_BASE_ADDR + 0x0
val FLUSH64_ADDR = L2_FLUSH_BASE_ADDR + 0x200
val FLUSH32_ADDR = L2_FLUSH_BASE_ADDR + 0x240
val r = scala.util.Random
top.Parameters.set(top.Parameters.debugParameters)
val annos = Seq(
VerilatorBackendAnnotation,
RunFirrtlTransformAnnotation(new PrintModuleName)
)
it should "run" in {
implicit val p = Parameters((site, up, here) => {
case L1plusCacheTestKey => 0
})
test(LazyModule(new L2NonInclusiveGetTestTop()).module)
.withAnnotations(annos){ c =>
c.clock.step(100)
val sq = new StoreQueue(8)
val lq = new LoadQueue(8)
def init() = {
sq.init()
lq.init()
// initialize DUT inputs
c.io.dcacheStore.req.valid.poke(false.B)
c.io.dcacheStore.resp.ready.poke(false.B)
c.io.l1plus.req.valid.poke(false.B)
c.io.l1plus.resp.ready.poke(false.B)
c.io.l1plus.flush.poke(false.B)
c.io.l2Flush.req.valid.poke(false.B)
c.io.l2Flush.resp.ready.poke(false.B)
}
def mmio_read(addr: BigInt): BigInt = {
// send req
val req = c.io.l2Flush.req
req.valid.poke(true.B)
req.bits.cmd.poke(CMD_READ)
req.bits.addr.poke(addr.U)
req.bits.data.poke(0.U)
req.bits.mask.poke(FULL_MASK_64)
req.bits.meta.id.poke(0.U)
req.bits.meta.vaddr.poke(addr.U)
req.bits.meta.paddr.poke(addr.U)
// req.bits.meta.uop.poke(0.U.asTypeOf(new MicroOp))
req.bits.meta.mmio.poke(true.B)
req.bits.meta.tlb_miss.poke(false.B)
req.bits.meta.mask.poke(FULL_MASK_64)
req.bits.meta.replay.poke(false.B)
while (!req.ready.peek().litToBoolean) {
c.clock.step()
}
// actually send the req
c.clock.step()
// lower valid
req.valid.poke(false.B)
// recv resp
val resp = c.io.l2Flush.resp
resp.ready.poke(true.B)
while (!resp.valid.peek().litToBoolean) {
c.clock.step()
}
val data = resp.bits.data.peek().litValue
// actually recv the response
c.clock.step()
// lower ready
resp.ready.poke(false.B)
return data
}
def mmio_write(addr: BigInt, data: BigInt) = {
// send req
val req = c.io.l2Flush.req
req.valid.poke(true.B)
req.bits.cmd.poke(CMD_WRITE)
req.bits.addr.poke(addr.U)
req.bits.data.poke(data.U)
req.bits.mask.poke(FULL_MASK_64)
req.bits.meta.id.poke(0.U)
req.bits.meta.vaddr.poke(addr.U)
req.bits.meta.paddr.poke(addr.U)
// req.bits.meta.uop.poke(0.U.asTypeOf(new MicroOp))
req.bits.meta.mmio.poke(true.B)
req.bits.meta.tlb_miss.poke(false.B)
req.bits.meta.mask.poke(FULL_MASK_64)
req.bits.meta.replay.poke(false.B)
while (!req.ready.peek().litToBoolean) {
c.clock.step()
}
// actually send the req
c.clock.step()
// lower valid
req.valid.poke(false.B) // recv resp
val resp = c.io.l2Flush.resp
resp.ready.poke(true.B)
while (!resp.valid.peek().litToBoolean) {
c.clock.step()
}
// actually recv the response
c.clock.step()
// lower ready
resp.ready.poke(false.B)
}
def get_l2_configurations() = {
val config = mmio_read(CONFIG_ADDR)
val nBank = config & 0xf
val nWay = config >> 8 & 0xf
val nSet = 1 << (config.toInt >> 16 & 0xf)
val nBlock = 1 << (config.toInt >> 24 & 0xf)
println(f"L2 configuration: nBank: $nBank nWay: $nWay nSet: $nSet nBlock: $nBlock")
}
def flush_l2_block(addr: BigInt) = {
mmio_write(FLUSH64_ADDR, addr)
println(f"L2 flush block: $addr%x")
}
def flush_l1plus() = {
c.io.l1plus.flush.poke(true.B)
while (!c.io.l1plus.empty.peek().litToBoolean) {
c.clock.step()
}
c.io.l1plus.flush.poke(false.B)
}
def flush_l2_range(begin: BigInt, end: BigInt) = {
var addr = begin >> block_bits << block_bits
while (addr < end) {
flush_l2_block(addr)
addr += block_size
}
}
def evaluate() = {
while (!sq.isFinished() || !lq.isFinished()) {
sq.tick(c.io.dcacheStore)
lq.tick(c.io.l1plus)
c.clock.step()
}
}
get_l2_configurations()
// ----------------------------------------
// scan test
def populate_memory() = {
println(s"scan test")
init()
// first, initialize every memory block with random numbers
for (i <- 0 until nblocks) {
val addr = i * 64
val words = (0 until 8) map { _ =>
(BigInt(r.nextLong() & 0x7fffffffffffffffL))
}
val data = words.foldLeft(BigInt(0))((sum, i) => sum << 64 | i)
cache_blocks(i) = data
println(f"enq store addr: $addr%x data: $data%x")
sq.enq(Req(addr, data))
}
// execute reqs
evaluate()
}
def flush_memory() = {
flush_l2_range(0, (nblocks - 1)* block_size)
}
def read_memory() = {
// read them out
for (i <- 0 until nblocks) {
val addr = i * 64
val data = cache_blocks(i)
println(f"enq load addr: $addr%x data: $data%x")
lq.enq(Req(addr, data))
}
// execute reqs
evaluate()
}
for (i <- 0 until 10) {
populate_memory()
flush_memory()
// these loads should cause get miss
flush_l1plus()
read_memory()
populate_memory()
// these loads should not miss
flush_l1plus()
read_memory()
}
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册