Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenXiangShan
XiangShan
提交
3726264a
X
XiangShan
项目概览
OpenXiangShan
/
XiangShan
大约 1 年 前同步成功
通知
1183
Star
3914
Fork
526
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
X
XiangShan
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3726264a
编写于
1月 12, 2021
作者:
Fa_wang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/master' into opt-sbuffer-timing
上级
6f687286
2d936b39
变更
24
隐藏空白更改
内联
并排
Showing
24 changed file
with
750 addition
and
903 deletion
+750
-903
src/main/scala/xiangshan/XSCore.scala
src/main/scala/xiangshan/XSCore.scala
+5
-6
src/main/scala/xiangshan/backend/CtrlBlock.scala
src/main/scala/xiangshan/backend/CtrlBlock.scala
+8
-4
src/main/scala/xiangshan/backend/FloatBlock.scala
src/main/scala/xiangshan/backend/FloatBlock.scala
+13
-1
src/main/scala/xiangshan/backend/IntegerBlock.scala
src/main/scala/xiangshan/backend/IntegerBlock.scala
+11
-3
src/main/scala/xiangshan/backend/MemBlock.scala
src/main/scala/xiangshan/backend/MemBlock.scala
+25
-13
src/main/scala/xiangshan/backend/dispatch/Dispatch.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch.scala
+11
-4
src/main/scala/xiangshan/backend/dispatch/Dispatch2Fp.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch2Fp.scala
+32
-27
src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala
+30
-27
src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala
+23
-24
src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala
...scala/xiangshan/backend/issue/ReservationStationNew.scala
+61
-8
src/main/scala/xiangshan/backend/rename/RenameTable.scala
src/main/scala/xiangshan/backend/rename/RenameTable.scala
+2
-2
src/main/scala/xiangshan/cache/atomics.scala
src/main/scala/xiangshan/cache/atomics.scala
+52
-17
src/main/scala/xiangshan/cache/atomicsMissQueue.scala
src/main/scala/xiangshan/cache/atomicsMissQueue.scala
+17
-29
src/main/scala/xiangshan/cache/dcache.scala
src/main/scala/xiangshan/cache/dcache.scala
+2
-1
src/main/scala/xiangshan/cache/dcacheWrapper.scala
src/main/scala/xiangshan/cache/dcacheWrapper.scala
+74
-107
src/main/scala/xiangshan/cache/ldu.scala
src/main/scala/xiangshan/cache/ldu.scala
+69
-24
src/main/scala/xiangshan/cache/loadMissQueue.scala
src/main/scala/xiangshan/cache/loadMissQueue.scala
+0
-262
src/main/scala/xiangshan/cache/missQueue.scala
src/main/scala/xiangshan/cache/missQueue.scala
+143
-203
src/main/scala/xiangshan/cache/storeMissQueue.scala
src/main/scala/xiangshan/cache/storeMissQueue.scala
+22
-32
src/main/scala/xiangshan/cache/stu.scala
src/main/scala/xiangshan/cache/stu.scala
+51
-11
src/main/scala/xiangshan/cache/uncache.scala
src/main/scala/xiangshan/cache/uncache.scala
+1
-1
src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala
src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala
+3
-5
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
+69
-74
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
+26
-18
未找到文件。
src/main/scala/xiangshan/XSCore.scala
浏览文件 @
3726264a
...
...
@@ -345,10 +345,12 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
integerBlock
.
io
.
wakeUpIn
.
fastUops
<>
floatBlock
.
io
.
wakeUpIntOut
.
fastUops
integerBlock
.
io
.
wakeUpIn
.
fast
<>
floatBlock
.
io
.
wakeUpIntOut
.
fast
integerBlock
.
io
.
wakeUpIn
.
slow
<>
floatBlock
.
io
.
wakeUpIntOut
.
slow
++
memBlock
.
io
.
wakeUpIntOut
.
slow
integerBlock
.
io
.
toMemBlock
<>
memBlock
.
io
.
fromIntBlock
floatBlock
.
io
.
wakeUpIn
.
fastUops
<>
integerBlock
.
io
.
wakeUpFpOut
.
fastUops
floatBlock
.
io
.
wakeUpIn
.
fast
<>
integerBlock
.
io
.
wakeUpFpOut
.
fast
floatBlock
.
io
.
wakeUpIn
.
slow
<>
integerBlock
.
io
.
wakeUpFpOut
.
slow
++
memBlock
.
io
.
wakeUpFpOut
.
slow
floatBlock
.
io
.
toMemBlock
<>
memBlock
.
io
.
fromFpBlock
integerBlock
.
io
.
wakeUpIntOut
.
fast
.
map
(
_
.
ready
:=
true
.
B
)
...
...
@@ -395,13 +397,10 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ptw
.
io
.
tlb
(
0
)
<>
memBlock
.
io
.
ptw
ptw
.
io
.
tlb
(
1
)
<>
frontend
.
io
.
ptw
ptw
.
io
.
sfence
<>
integerBlock
.
io
.
fenceio
.
sfence
ptw
.
io
.
csr
<>
integerBlock
.
io
.
csrio
.
tlb
ptw
.
io
.
csr
<>
integerBlock
.
io
.
csrio
.
tlb
dcache
.
io
.
lsu
.
load
<>
memBlock
.
io
.
dcache
.
loadUnitToDcacheVec
dcache
.
io
.
lsu
.
lsq
<>
memBlock
.
io
.
dcache
.
loadMiss
dcache
.
io
.
lsu
.
atomics
<>
memBlock
.
io
.
dcache
.
atomics
dcache
.
io
.
lsu
.
store
<>
memBlock
.
io
.
dcache
.
sbufferToDcache
uncache
.
io
.
lsq
<>
memBlock
.
io
.
dcache
.
uncache
dcache
.
io
.
lsu
<>
memBlock
.
io
.
dcache
uncache
.
io
.
lsq
<>
memBlock
.
io
.
uncache
if
(!
env
.
FPGAPlatform
)
{
val
debugIntReg
,
debugFpReg
=
WireInit
(
VecInit
(
Seq
.
fill
(
32
)(
0.
U
(
XLEN
.
W
))))
...
...
src/main/scala/xiangshan/backend/CtrlBlock.scala
浏览文件 @
3726264a
...
...
@@ -16,21 +16,22 @@ import xiangshan.mem.LsqEnqIO
class
CtrlToIntBlockIO
extends
XSBundle
{
val
enqIqCtrl
=
Vec
(
exuParameters
.
IntExuCnt
,
DecoupledIO
(
new
MicroOp
))
val
enqIqData
=
Vec
(
exuParameters
.
IntExuCnt
,
Output
(
new
ExuInput
))
val
readRf
=
Vec
(
NRIntReadPorts
,
Flipped
(
new
RfReadPort
(
XLEN
)))
// int block only uses port 0~7
val
readPortIndex
=
Vec
(
exuParameters
.
IntExuCnt
,
Output
(
UInt
(
log2Ceil
(
8
/
2
).
W
)))
// TODO parameterize 8 here
val
redirect
=
ValidIO
(
new
Redirect
)
}
class
CtrlToFpBlockIO
extends
XSBundle
{
val
enqIqCtrl
=
Vec
(
exuParameters
.
FpExuCnt
,
DecoupledIO
(
new
MicroOp
))
val
enqIqData
=
Vec
(
exuParameters
.
FpExuCnt
,
Output
(
new
ExuInput
))
val
readRf
=
Vec
(
NRFpReadPorts
,
Flipped
(
new
RfReadPort
(
XLEN
+
1
)))
// fp block uses port 0~11
val
readPortIndex
=
Vec
(
exuParameters
.
FpExuCnt
,
Output
(
UInt
(
log2Ceil
((
NRFpReadPorts
-
exuParameters
.
StuCnt
)
/
3
).
W
)))
val
redirect
=
ValidIO
(
new
Redirect
)
}
class
CtrlToLsBlockIO
extends
XSBundle
{
val
enqIqCtrl
=
Vec
(
exuParameters
.
LsExuCnt
,
DecoupledIO
(
new
MicroOp
))
val
enqIqData
=
Vec
(
exuParameters
.
LsExuCnt
,
Output
(
new
ExuInput
))
val
enqLsq
=
Flipped
(
new
LsqEnqIO
)
val
redirect
=
ValidIO
(
new
Redirect
)
}
...
...
@@ -113,7 +114,7 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
}
dispatch
.
io
.
numExist
<>
io
.
fromIntBlock
.
numExist
++
io
.
fromFpBlock
.
numExist
++
io
.
fromLsBlock
.
numExist
dispatch
.
io
.
enqIQCtrl
<>
io
.
toIntBlock
.
enqIqCtrl
++
io
.
toFpBlock
.
enqIqCtrl
++
io
.
toLsBlock
.
enqIqCtrl
dispatch
.
io
.
enqIQData
<>
io
.
toIntBlock
.
enqIqData
++
io
.
toFpBlock
.
enqIqData
++
io
.
toLsBlock
.
enqIqData
//
dispatch.io.enqIQData <> io.toIntBlock.enqIqData ++ io.toFpBlock.enqIqData ++ io.toLsBlock.enqIqData
val
flush
=
redirectValid
&&
RedirectLevel
.
isUnconditional
(
redirect
.
level
)
...
...
@@ -150,6 +151,9 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
io
.
toLsBlock
.
redirect
.
valid
:=
redirectValid
io
.
toLsBlock
.
redirect
.
bits
:=
redirect
dispatch
.
io
.
readPortIndex
.
intIndex
<>
io
.
toIntBlock
.
readPortIndex
dispatch
.
io
.
readPortIndex
.
fpIndex
<>
io
.
toFpBlock
.
readPortIndex
// roq to int block
io
.
roqio
.
toCSR
<>
roq
.
io
.
csr
io
.
roqio
.
exception
.
valid
:=
roq
.
io
.
redirectOut
.
valid
&&
roq
.
io
.
redirectOut
.
bits
.
isException
()
...
...
src/main/scala/xiangshan/backend/FloatBlock.scala
浏览文件 @
3726264a
...
...
@@ -3,6 +3,7 @@ package xiangshan.backend
import
chisel3._
import
chisel3.util._
import
xiangshan._
import
utils._
import
xiangshan.backend.regfile.Regfile
import
xiangshan.backend.exu._
import
xiangshan.backend.issue.
{
ReservationStationCtrl
,
ReservationStationData
}
...
...
@@ -25,6 +26,7 @@ class FloatBlock
val
io
=
IO
(
new
Bundle
{
val
fromCtrlBlock
=
Flipped
(
new
CtrlToFpBlockIO
)
val
toCtrlBlock
=
new
FpBlockToCtrlIO
val
toMemBlock
=
new
FpBlockToMemBlockIO
val
wakeUpIn
=
new
WakeUpBundle
(
fastWakeUpIn
.
size
,
slowWakeUpIn
.
size
)
val
wakeUpFpOut
=
Flipped
(
new
WakeUpBundle
(
fastFpOut
.
size
,
slowFpOut
.
size
))
...
...
@@ -57,6 +59,7 @@ class FloatBlock
def
needData
(
a
:
ExuConfig
,
b
:
ExuConfig
)
:
Boolean
=
(
a
.
readIntRf
&&
b
.
writeIntRf
)
||
(
a
.
readFpRf
&&
b
.
writeFpRf
)
val
readPortIndex
=
RegNext
(
io
.
fromCtrlBlock
.
readPortIndex
)
val
reservedStations
=
exeUnits
.
map
(
_
.
config
).
zipWithIndex
.
map
({
case
(
cfg
,
i
)
=>
var
certainLatency
=
-
1
if
(
cfg
.
hasCertainLatency
)
{
...
...
@@ -85,7 +88,15 @@ class FloatBlock
rsCtrl
.
io
.
redirect
<>
redirect
// TODO: remove it
rsCtrl
.
io
.
numExist
<>
io
.
toCtrlBlock
.
numExist
(
i
)
rsCtrl
.
io
.
enqCtrl
<>
io
.
fromCtrlBlock
.
enqIqCtrl
(
i
)
rsData
.
io
.
enqData
<>
io
.
fromCtrlBlock
.
enqIqData
(
i
)
rsData
.
io
.
srcRegValue
:=
DontCare
val
src1Value
=
VecInit
((
0
until
4
).
map
(
i
=>
fpRf
.
io
.
readPorts
(
i
*
3
).
data
))
val
src2Value
=
VecInit
((
0
until
4
).
map
(
i
=>
fpRf
.
io
.
readPorts
(
i
*
3
+
1
).
data
))
val
src3Value
=
VecInit
((
0
until
4
).
map
(
i
=>
fpRf
.
io
.
readPorts
(
i
*
3
+
2
).
data
))
rsData
.
io
.
srcRegValue
(
0
)
:=
src1Value
(
readPortIndex
(
i
))
rsData
.
io
.
srcRegValue
(
1
)
:=
src2Value
(
readPortIndex
(
i
))
rsData
.
io
.
srcRegValue
(
2
)
:=
src3Value
(
readPortIndex
(
i
))
rsData
.
io
.
redirect
<>
redirect
rsData
.
io
.
writeBackedData
<>
writeBackData
...
...
@@ -142,6 +153,7 @@ class FloatBlock
// read fp rf from ctrl block
fpRf
.
io
.
readPorts
<>
io
.
fromCtrlBlock
.
readRf
(
0
until
exuParameters
.
StuCnt
).
foreach
(
i
=>
io
.
toMemBlock
.
readFpRf
(
i
).
data
:=
fpRf
.
io
.
readPorts
(
i
+
12
).
data
)
// write fp rf arbiter
val
fpWbArbiter
=
Module
(
new
Wb
(
(
exeUnits
.
map
(
_
.
config
)
++
fastWakeUpIn
++
slowWakeUpIn
).
map
(
_
.
wbFpPriority
),
...
...
src/main/scala/xiangshan/backend/IntegerBlock.scala
浏览文件 @
3726264a
...
...
@@ -3,8 +3,8 @@ package xiangshan.backend
import
chisel3._
import
chisel3.util._
import
xiangshan._
import
xiangshan.backend.exu.Exu.
{
jumpExeUnitCfg
,
ldExeUnitCfg
,
stExeUnitCfg
}
import
xiangshan.backend.exu.
{
AluExeUnit
,
ExuConfig
,
JumpExeUnit
,
MulDivExeUnit
,
Wb
}
import
xiangshan.backend.exu.Exu.
{
ldExeUnitCfg
,
stExeUnitCfg
}
import
xiangshan.backend.exu.
_
import
xiangshan.backend.fu.FenceToSbuffer
import
xiangshan.backend.issue.
{
ReservationStationCtrl
,
ReservationStationData
}
import
xiangshan.backend.regfile.Regfile
...
...
@@ -65,6 +65,7 @@ class IntegerBlock
val
io
=
IO
(
new
Bundle
{
val
fromCtrlBlock
=
Flipped
(
new
CtrlToIntBlockIO
)
val
toCtrlBlock
=
new
IntBlockToCtrlIO
val
toMemBlock
=
new
IntBlockToMemBlockIO
val
wakeUpIn
=
new
WakeUpBundle
(
fastWakeUpIn
.
size
,
slowWakeUpIn
.
size
)
val
wakeUpFpOut
=
Flipped
(
new
WakeUpBundle
(
fastFpOut
.
size
,
slowFpOut
.
size
))
...
...
@@ -110,6 +111,7 @@ class IntegerBlock
def
needData
(
a
:
ExuConfig
,
b
:
ExuConfig
)
:
Boolean
=
(
a
.
readIntRf
&&
b
.
writeIntRf
)
||
(
a
.
readFpRf
&&
b
.
writeFpRf
)
val
readPortIndex
=
RegNext
(
io
.
fromCtrlBlock
.
readPortIndex
)
val
reservationStations
=
exeUnits
.
map
(
_
.
config
).
zipWithIndex
.
map
({
case
(
cfg
,
i
)
=>
var
certainLatency
=
-
1
if
(
cfg
.
hasCertainLatency
)
{
...
...
@@ -140,7 +142,12 @@ class IntegerBlock
rsCtrl
.
io
.
redirect
<>
redirect
// TODO: remove it
rsCtrl
.
io
.
numExist
<>
io
.
toCtrlBlock
.
numExist
(
i
)
rsCtrl
.
io
.
enqCtrl
<>
io
.
fromCtrlBlock
.
enqIqCtrl
(
i
)
rsData
.
io
.
enqData
<>
io
.
fromCtrlBlock
.
enqIqData
(
i
)
rsData
.
io
.
srcRegValue
:=
DontCare
val
src1Value
=
VecInit
((
0
until
4
).
map
(
i
=>
intRf
.
io
.
readPorts
(
i
*
2
).
data
))
val
src2Value
=
VecInit
((
0
until
4
).
map
(
i
=>
intRf
.
io
.
readPorts
(
i
*
2
+
1
).
data
))
rsData
.
io
.
srcRegValue
(
0
)
:=
src1Value
(
readPortIndex
(
i
))
rsData
.
io
.
srcRegValue
(
1
)
:=
src2Value
(
readPortIndex
(
i
))
rsData
.
io
.
redirect
<>
redirect
rsData
.
io
.
writeBackedData
<>
writeBackData
...
...
@@ -208,6 +215,7 @@ class IntegerBlock
// read int rf from ctrl block
intRf
.
io
.
readPorts
<>
io
.
fromCtrlBlock
.
readRf
(
0
until
NRMemReadPorts
).
foreach
(
i
=>
io
.
toMemBlock
.
readIntRf
(
i
).
data
:=
intRf
.
io
.
readPorts
(
i
+
8
).
data
)
// write int rf arbiter
val
intWbArbiter
=
Module
(
new
Wb
(
(
exeUnits
.
map
(
_
.
config
)
++
fastWakeUpIn
++
slowWakeUpIn
).
map
(
_
.
wbIntPriority
),
...
...
src/main/scala/xiangshan/backend/MemBlock.scala
浏览文件 @
3726264a
...
...
@@ -10,7 +10,7 @@ import xiangshan.cache._
import
xiangshan.mem._
import
xiangshan.backend.fu.FenceToSbuffer
import
xiangshan.backend.issue.
{
ReservationStationCtrl
,
ReservationStationData
}
import
xiangshan.backend.
fu.FunctionUnit.
{
lduCfg
,
mouCfg
,
stuCfg
}
import
xiangshan.backend.
regfile.RfReadPort
class
LsBlockToCtrlIO
extends
XSBundle
{
val
stOut
=
Vec
(
exuParameters
.
StuCnt
,
ValidIO
(
new
ExuOutput
))
// write to roq
...
...
@@ -18,12 +18,12 @@ class LsBlockToCtrlIO extends XSBundle {
val
replay
=
ValidIO
(
new
Redirect
)
}
class
MemBlockToDcache
IO
extends
XSBundle
{
val
loadUnitToDcacheVec
=
Vec
(
exuParameters
.
LduCnt
,
new
DCacheLoadIO
)
val
loadMiss
=
new
DCacheLineIO
val
atomics
=
new
DCacheWordIO
val
sbufferToDcache
=
new
DCacheLineIO
val
uncache
=
new
DCacheWordIO
class
IntBlockToMemBlock
IO
extends
XSBundle
{
val
readIntRf
=
Vec
(
NRMemReadPorts
,
new
RfReadPort
(
XLEN
)
)
}
class
FpBlockToMemBlockIO
extends
XSBundle
{
val
readFpRf
=
Vec
(
exuParameters
.
StuCnt
,
new
RfReadPort
(
XLEN
+
1
))
}
class
MemBlock
...
...
@@ -38,6 +38,8 @@ class MemBlock
val
io
=
IO
(
new
Bundle
{
val
fromCtrlBlock
=
Flipped
(
new
CtrlToLsBlockIO
)
val
fromIntBlock
=
Flipped
(
new
IntBlockToMemBlockIO
)
val
fromFpBlock
=
Flipped
(
new
FpBlockToMemBlockIO
)
val
toCtrlBlock
=
new
LsBlockToCtrlIO
val
wakeUpIn
=
new
WakeUpBundle
(
fastWakeUpIn
.
size
,
slowWakeUpIn
.
size
)
...
...
@@ -46,7 +48,8 @@ class MemBlock
val
ptw
=
new
TlbPtwIO
// TODO: dcache should be inside MemBlock
val
dcache
=
new
MemBlockToDcacheIO
val
dcache
=
Flipped
(
new
DCacheToLsuIO
)
val
uncache
=
new
DCacheWordIO
val
sfence
=
Input
(
new
SfenceBundle
)
val
tlbCsr
=
Input
(
new
TlbCsrBundle
)
val
fenceToSbuffer
=
Flipped
(
new
FenceToSbuffer
)
...
...
@@ -76,6 +79,9 @@ class MemBlock
val
intExeWbReqs
=
ldOut0
+:
loadUnits
.
tail
.
map
(
_
.
io
.
ldout
)
val
fpExeWbReqs
=
loadUnits
.
map
(
_
.
io
.
fpout
)
val
readPortIndex
=
Seq
(
0
,
1
,
2
,
4
)
io
.
fromIntBlock
.
readIntRf
.
foreach
(
_
.
addr
:=
DontCare
)
io
.
fromFpBlock
.
readFpRf
.
foreach
(
_
.
addr
:=
DontCare
)
val
reservationStations
=
(
loadExuConfigs
++
storeExuConfigs
).
zipWithIndex
.
map
({
case
(
cfg
,
i
)
=>
var
certainLatency
=
-
1
if
(
cfg
.
hasCertainLatency
)
{
...
...
@@ -111,7 +117,13 @@ class MemBlock
rsCtrl
.
io
.
redirect
<>
redirect
// TODO: remove it
rsCtrl
.
io
.
numExist
<>
io
.
toCtrlBlock
.
numExist
(
i
)
rsCtrl
.
io
.
enqCtrl
<>
io
.
fromCtrlBlock
.
enqIqCtrl
(
i
)
rsData
.
io
.
enqData
<>
io
.
fromCtrlBlock
.
enqIqData
(
i
)
val
src2IsFp
=
RegNext
(
io
.
fromCtrlBlock
.
enqIqCtrl
(
i
).
bits
.
ctrl
.
src2Type
===
SrcType
.
fp
)
rsData
.
io
.
srcRegValue
:=
DontCare
rsData
.
io
.
srcRegValue
(
0
)
:=
io
.
fromIntBlock
.
readIntRf
(
readPortIndex
(
i
)).
data
if
(
i
>=
exuParameters
.
LduCnt
)
{
rsData
.
io
.
srcRegValue
(
1
)
:=
Mux
(
src2IsFp
,
io
.
fromFpBlock
.
readFpRf
(
i
-
exuParameters
.
LduCnt
).
data
,
io
.
fromIntBlock
.
readIntRf
(
readPortIndex
(
i
)
+
1
).
data
)
}
rsData
.
io
.
redirect
<>
redirect
rsData
.
io
.
writeBackedData
<>
writeBackData
...
...
@@ -166,7 +178,7 @@ class MemBlock
// get input form dispatch
loadUnits
(
i
).
io
.
ldin
<>
reservationStations
(
i
).
io
.
deq
// dcache access
loadUnits
(
i
).
io
.
dcache
<>
io
.
dcache
.
load
UnitToDcacheVec
(
i
)
loadUnits
(
i
).
io
.
dcache
<>
io
.
dcache
.
load
(
i
)
// forward
loadUnits
(
i
).
io
.
lsq
.
forward
<>
lsq
.
io
.
forward
(
i
)
loadUnits
(
i
).
io
.
sbuffer
<>
sbuffer
.
io
.
forward
(
i
)
...
...
@@ -210,14 +222,14 @@ class MemBlock
lsq
.
io
.
brqRedirect
:=
io
.
fromCtrlBlock
.
redirect
lsq
.
io
.
roqDeqPtr
:=
io
.
lsqio
.
roqDeqPtr
io
.
toCtrlBlock
.
replay
<>
lsq
.
io
.
rollback
lsq
.
io
.
dcache
<>
io
.
dcache
.
l
oadMiss
lsq
.
io
.
uncache
<>
io
.
dcache
.
uncache
lsq
.
io
.
dcache
<>
io
.
dcache
.
l
sq
lsq
.
io
.
uncache
<>
io
.
uncache
// LSQ to store buffer
lsq
.
io
.
sbuffer
<>
sbuffer
.
io
.
in
// Sbuffer
sbuffer
.
io
.
dcache
<>
io
.
dcache
.
s
bufferToDcach
e
sbuffer
.
io
.
dcache
<>
io
.
dcache
.
s
tor
e
// flush sbuffer
val
fenceFlush
=
io
.
fenceToSbuffer
.
flushSb
...
...
src/main/scala/xiangshan/backend/dispatch/Dispatch.scala
浏览文件 @
3726264a
...
...
@@ -42,7 +42,12 @@ class Dispatch extends XSModule {
// to reservation stations
val
numExist
=
Input
(
Vec
(
exuParameters
.
ExuCnt
,
UInt
(
log2Ceil
(
IssQueSize
).
W
)))
val
enqIQCtrl
=
Vec
(
exuParameters
.
ExuCnt
,
DecoupledIO
(
new
MicroOp
))
val
enqIQData
=
Vec
(
exuParameters
.
ExuCnt
,
Output
(
new
ExuInput
))
// send reg file read port index to reservation stations
val
readPortIndex
=
new
Bundle
{
val
intIndex
=
Vec
(
exuParameters
.
IntExuCnt
,
Output
(
UInt
(
log2Ceil
(
8
/
2
).
W
)))
val
fpIndex
=
Vec
(
exuParameters
.
FpExuCnt
,
Output
(
UInt
(
log2Ceil
((
NRFpReadPorts
-
exuParameters
.
StuCnt
)
/
3
).
W
)))
// ls: hardwired to (0, 1, 2, 4)
}
})
val
dispatch1
=
Module
(
new
Dispatch1
)
...
...
@@ -80,7 +85,8 @@ class Dispatch extends XSModule {
intDispatch
.
io
.
regRdy
.
zipWithIndex
.
map
({
case
(
r
,
i
)
=>
r
<>
io
.
intPregRdy
(
i
)})
intDispatch
.
io
.
numExist
.
zipWithIndex
.
map
({
case
(
num
,
i
)
=>
num
:=
io
.
numExist
(
i
)})
intDispatch
.
io
.
enqIQCtrl
.
zipWithIndex
.
map
({
case
(
enq
,
i
)
=>
enq
<>
io
.
enqIQCtrl
(
i
)})
intDispatch
.
io
.
enqIQData
.
zipWithIndex
.
map
({
case
(
enq
,
i
)
=>
enq
<>
io
.
enqIQData
(
i
)})
// intDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i)})
intDispatch
.
io
.
readPortIndex
<>
io
.
readPortIndex
.
intIndex
// Fp dispatch queue to Fp reservation stations
val
fpDispatch
=
Module
(
new
Dispatch2Fp
)
...
...
@@ -89,7 +95,8 @@ class Dispatch extends XSModule {
fpDispatch
.
io
.
regRdy
.
zipWithIndex
.
map
({
case
(
r
,
i
)
=>
r
<>
io
.
fpPregRdy
(
i
)})
fpDispatch
.
io
.
numExist
.
zipWithIndex
.
map
({
case
(
num
,
i
)
=>
num
:=
io
.
numExist
(
i
+
exuParameters
.
IntExuCnt
)})
fpDispatch
.
io
.
enqIQCtrl
.
zipWithIndex
.
map
({
case
(
enq
,
i
)
=>
enq
<>
io
.
enqIQCtrl
(
i
+
exuParameters
.
IntExuCnt
)})
fpDispatch
.
io
.
enqIQData
.
zipWithIndex
.
map
({
case
(
enq
,
i
)
=>
enq
<>
io
.
enqIQData
(
i
+
exuParameters
.
IntExuCnt
)})
// fpDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(i + exuParameters.IntExuCnt)})
fpDispatch
.
io
.
readPortIndex
<>
io
.
readPortIndex
.
fpIndex
// Load/store dispatch queue to load/store issue queues
val
lsDispatch
=
Module
(
new
Dispatch2Ls
)
...
...
@@ -100,5 +107,5 @@ class Dispatch extends XSModule {
lsDispatch
.
io
.
fpRegRdy
.
zipWithIndex
.
map
({
case
(
r
,
i
)
=>
r
<>
io
.
fpPregRdy
(
i
+
12
)})
lsDispatch
.
io
.
numExist
.
zipWithIndex
.
map
({
case
(
num
,
i
)
=>
num
:=
io
.
numExist
(
exuParameters
.
IntExuCnt
+
exuParameters
.
FpExuCnt
+
i
)})
lsDispatch
.
io
.
enqIQCtrl
.
zipWithIndex
.
map
({
case
(
enq
,
i
)
=>
enq
<>
io
.
enqIQCtrl
(
exuParameters
.
IntExuCnt
+
exuParameters
.
FpExuCnt
+
i
)})
lsDispatch
.
io
.
enqIQData
.
zipWithIndex
.
map
({
case
(
enq
,
i
)
=>
enq
<>
io
.
enqIQData
(
exuParameters
.
IntExuCnt
+
exuParameters
.
FpExuCnt
+
i
)})
//
lsDispatch.io.enqIQData.zipWithIndex.map({case (enq, i) => enq <> io.enqIQData(exuParameters.IntExuCnt + exuParameters.FpExuCnt + i)})
}
src/main/scala/xiangshan/backend/dispatch/Dispatch2Fp.scala
浏览文件 @
3726264a
...
...
@@ -14,7 +14,7 @@ class Dispatch2Fp extends XSModule {
val
regRdy
=
Vec
(
NRFpReadPorts
-
exuParameters
.
StuCnt
,
Input
(
Bool
()))
val
numExist
=
Input
(
Vec
(
exuParameters
.
FpExuCnt
,
UInt
(
log2Ceil
(
IssQueSize
).
W
)))
val
enqIQCtrl
=
Vec
(
exuParameters
.
FpExuCnt
,
DecoupledIO
(
new
MicroOp
))
val
enqIQData
=
Vec
(
exuParameters
.
FpExuCnt
,
Output
(
new
ExuInput
))
val
readPortIndex
=
Vec
(
exuParameters
.
FpExuCnt
,
Output
(
UInt
(
log2Ceil
((
NRFpReadPorts
-
exuParameters
.
StuCnt
)
/
3
).
W
)
))
})
/**
...
...
@@ -55,9 +55,9 @@ class Dispatch2Fp extends XSModule {
io
.
readRf
(
3
*
i
+
1
).
addr
:=
io
.
fromDq
(
index
(
fpReadPortSrc
(
i
))).
bits
.
psrc2
io
.
readRf
(
3
*
i
+
2
).
addr
:=
io
.
fromDq
(
index
(
fpReadPortSrc
(
i
))).
bits
.
psrc3
}
val
readPortIndex
=
Wire
(
Vec
(
exuParameters
.
FpExuCnt
,
UInt
(
log2Ceil
(
NRFpReadPorts
-
exuParameters
.
StuCnt
)
.
W
)))
fpStaticIndex
.
zipWithIndex
.
map
({
case
(
index
,
i
)
=>
readPortIndex
(
index
)
:=
(
3
*
i
)
.
U
})
fpDynamicIndex
.
zipWithIndex
.
map
({
case
(
index
,
i
)
=>
readPortIndex
(
index
)
:=
3.
U
*
fpDynamicExuSrc
(
i
)})
val
readPortIndex
=
Wire
(
Vec
(
exuParameters
.
FpExuCnt
,
UInt
(
2
.
W
)))
fpStaticIndex
.
zipWithIndex
.
map
({
case
(
index
,
i
)
=>
readPortIndex
(
index
)
:=
i
.
U
})
fpDynamicIndex
.
zipWithIndex
.
map
({
case
(
index
,
i
)
=>
readPortIndex
(
index
)
:=
fpDynamicExuSrc
(
i
)})
/**
* Part 3: dispatch to reservation stations
...
...
@@ -73,9 +73,13 @@ class Dispatch2Fp extends XSModule {
enq
.
valid
:=
fmiscIndexGen
.
io
.
mapping
(
i
-
exuParameters
.
FmacCnt
).
valid
&&
fmiscReady
}
enq
.
bits
:=
io
.
fromDq
(
indexVec
(
i
)).
bits
enq
.
bits
.
src1State
:=
io
.
regRdy
(
readPortIndex
(
i
))
enq
.
bits
.
src2State
:=
io
.
regRdy
(
readPortIndex
(
i
)
+
1.
U
)
enq
.
bits
.
src3State
:=
io
.
regRdy
(
readPortIndex
(
i
)
+
2.
U
)
val
src1Ready
=
VecInit
((
0
until
4
).
map
(
i
=>
io
.
regRdy
(
i
*
3
)))
val
src2Ready
=
VecInit
((
0
until
4
).
map
(
i
=>
io
.
regRdy
(
i
*
3
+
1
)))
val
src3Ready
=
VecInit
((
0
until
4
).
map
(
i
=>
io
.
regRdy
(
i
*
3
+
2
)))
enq
.
bits
.
src1State
:=
src1Ready
(
readPortIndex
(
i
))
enq
.
bits
.
src2State
:=
src2Ready
(
readPortIndex
(
i
))
enq
.
bits
.
src3State
:=
src3Ready
(
readPortIndex
(
i
))
XSInfo
(
enq
.
fire
(),
p
"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} "
+
p
"srcState(${enq.bits.src1State} ${enq.bits.src2State} ${enq.bits.src3State}) "
+
...
...
@@ -99,25 +103,26 @@ class Dispatch2Fp extends XSModule {
XSError
(
PopCount
(
io
.
fromDq
.
map
(
_
.
fire
()))
=/=
PopCount
(
io
.
enqIQCtrl
.
map
(
_
.
fire
())),
"deq =/= enq\n"
)
/**
* Part 5:
the second stage of dispatch 2 (send data to reservation station)
* Part 5:
send read port index of register file to reservation station
*/
val
readPortIndexReg
=
Reg
(
Vec
(
exuParameters
.
FpExuCnt
,
UInt
(
log2Ceil
(
NRFpReadPorts
-
exuParameters
.
StuCnt
).
W
)))
val
uopReg
=
Reg
(
Vec
(
exuParameters
.
FpExuCnt
,
new
MicroOp
))
val
dataValidRegDebug
=
Reg
(
Vec
(
exuParameters
.
FpExuCnt
,
Bool
()))
for
(
i
<-
0
until
exuParameters
.
FpExuCnt
)
{
readPortIndexReg
(
i
)
:=
readPortIndex
(
i
)
uopReg
(
i
)
:=
io
.
enqIQCtrl
(
i
).
bits
dataValidRegDebug
(
i
)
:=
io
.
enqIQCtrl
(
i
).
fire
()
io
.
enqIQData
(
i
)
:=
DontCare
io
.
enqIQData
(
i
).
src1
:=
io
.
readRf
(
readPortIndexReg
(
i
)).
data
io
.
enqIQData
(
i
).
src2
:=
io
.
readRf
(
readPortIndexReg
(
i
)
+
1.
U
).
data
io
.
enqIQData
(
i
).
src3
:=
io
.
readRf
(
readPortIndexReg
(
i
)
+
2.
U
).
data
XSDebug
(
dataValidRegDebug
(
i
),
p
"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from "
+
p
"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), "
+
p
"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)}), "
+
p
"(${readPortIndexReg(i)+2.U}, ${uopReg(i).psrc3}, ${Hexadecimal(io.enqIQData(i).src3)})\n"
)
}
io
.
readPortIndex
:=
readPortIndex
// val readPortIndexReg = Reg(Vec(exuParameters.FpExuCnt, UInt(log2Ceil(NRFpReadPorts - exuParameters.StuCnt).W)))
// val uopReg = Reg(Vec(exuParameters.FpExuCnt, new MicroOp))
// val dataValidRegDebug = Reg(Vec(exuParameters.FpExuCnt, Bool()))
// for (i <- 0 until exuParameters.FpExuCnt) {
// readPortIndexReg(i) := readPortIndex(i)
// uopReg(i) := io.enqIQCtrl(i).bits
// dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
//
// io.enqIQData(i) := DontCare
// io.enqIQData(i).src1 := io.readRf(readPortIndexReg(i)).data
// io.enqIQData(i).src2 := io.readRf(readPortIndexReg(i) + 1.U).data
// io.enqIQData(i).src3 := io.readRf(readPortIndexReg(i) + 2.U).data
//
// XSDebug(dataValidRegDebug(i),
// p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
// p"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
// p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)}), " +
// p"(${readPortIndexReg(i)+2.U}, ${uopReg(i).psrc3}, ${Hexadecimal(io.enqIQData(i).src3)})\n")
// }
}
src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala
浏览文件 @
3726264a
...
...
@@ -6,7 +6,6 @@ import xiangshan._
import
utils._
import
xiangshan.backend.exu.Exu._
import
xiangshan.backend.regfile.RfReadPort
import
xiangshan.backend.exu._
class
Dispatch2Int
extends
XSModule
{
val
io
=
IO
(
new
Bundle
()
{
...
...
@@ -15,7 +14,7 @@ class Dispatch2Int extends XSModule {
val
regRdy
=
Vec
(
NRIntReadPorts
-
NRMemReadPorts
,
Input
(
Bool
()))
val
numExist
=
Input
(
Vec
(
exuParameters
.
IntExuCnt
,
UInt
(
log2Ceil
(
IssQueSize
).
W
)))
val
enqIQCtrl
=
Vec
(
exuParameters
.
IntExuCnt
,
DecoupledIO
(
new
MicroOp
))
val
enqIQData
=
Vec
(
exuParameters
.
IntExuCnt
,
Output
(
new
ExuInput
))
val
readPortIndex
=
Vec
(
exuParameters
.
IntExuCnt
,
Output
(
UInt
(
log2Ceil
(
8
/
2
).
W
)
))
})
/**
...
...
@@ -59,9 +58,9 @@ class Dispatch2Int extends XSModule {
io
.
readRf
(
2
*
i
).
addr
:=
io
.
fromDq
(
index
(
intReadPortSrc
(
i
))).
bits
.
psrc1
io
.
readRf
(
2
*
i
+
1
).
addr
:=
io
.
fromDq
(
index
(
intReadPortSrc
(
i
))).
bits
.
psrc2
}
val
readPortIndex
=
Wire
(
Vec
(
exuParameters
.
IntExuCnt
,
UInt
(
log2Ceil
(
NRIntReadPorts
)
.
W
)))
intStaticIndex
.
zipWithIndex
.
map
({
case
(
index
,
i
)
=>
readPortIndex
(
index
)
:=
(
2
*
i
)
.
U
})
intDynamicIndex
.
zipWithIndex
.
map
({
case
(
index
,
i
)
=>
readPortIndex
(
index
)
:=
2.
U
*
intDynamicExuSrc
(
i
)})
val
readPortIndex
=
Wire
(
Vec
(
exuParameters
.
IntExuCnt
,
UInt
(
2
.
W
)))
intStaticIndex
.
zipWithIndex
.
map
({
case
(
index
,
i
)
=>
readPortIndex
(
index
)
:=
i
.
U
})
intDynamicIndex
.
zipWithIndex
.
map
({
case
(
index
,
i
)
=>
readPortIndex
(
index
)
:=
intDynamicExuSrc
(
i
)})
/**
* Part 3: dispatch to reservation stations
...
...
@@ -81,8 +80,11 @@ class Dispatch2Int extends XSModule {
enq
.
valid
:=
mduIndexGen
.
io
.
mapping
(
i
-
(
exuParameters
.
JmpCnt
+
exuParameters
.
AluCnt
)).
valid
&&
mduReady
}
enq
.
bits
:=
io
.
fromDq
(
indexVec
(
i
)).
bits
enq
.
bits
.
src1State
:=
io
.
regRdy
(
readPortIndex
(
i
))
enq
.
bits
.
src2State
:=
io
.
regRdy
(
readPortIndex
(
i
)
+
1.
U
)
val
src1Ready
=
VecInit
((
0
until
4
).
map
(
i
=>
io
.
regRdy
(
i
*
2
)))
val
src2Ready
=
VecInit
((
0
until
4
).
map
(
i
=>
io
.
regRdy
(
i
*
2
+
1
)))
enq
.
bits
.
src1State
:=
src1Ready
(
readPortIndex
(
i
))
enq
.
bits
.
src2State
:=
src2Ready
(
readPortIndex
(
i
))
XSInfo
(
enq
.
fire
(),
p
"pc 0x${Hexadecimal(enq.bits.cf.pc)} with type ${enq.bits.ctrl.fuType} "
+
p
"srcState(${enq.bits.src1State} ${enq.bits.src2State}) "
+
...
...
@@ -107,25 +109,26 @@ class Dispatch2Int extends XSModule {
XSError
(
PopCount
(
io
.
fromDq
.
map
(
_
.
fire
()))
=/=
PopCount
(
io
.
enqIQCtrl
.
map
(
_
.
fire
())),
"deq =/= enq\n"
)
/**
* Part 5:
the second stage of dispatch 2 (send data to reservation station)
* Part 5:
send read port index of register file to reservation station
*/
val
readPortIndexReg
=
Reg
(
Vec
(
exuParameters
.
IntExuCnt
,
UInt
(
log2Ceil
(
NRIntReadPorts
).
W
)))
val
uopReg
=
Reg
(
Vec
(
exuParameters
.
IntExuCnt
,
new
MicroOp
))
val
dataValidRegDebug
=
Reg
(
Vec
(
exuParameters
.
IntExuCnt
,
Bool
()))
for
(
i
<-
0
until
exuParameters
.
IntExuCnt
)
{
readPortIndexReg
(
i
)
:=
readPortIndex
(
i
)
uopReg
(
i
)
:=
io
.
enqIQCtrl
(
i
).
bits
dataValidRegDebug
(
i
)
:=
io
.
enqIQCtrl
(
i
).
fire
()
io
.
enqIQData
(
i
)
:=
DontCare
io
.
enqIQData
(
i
).
src1
:=
Mux
(
uopReg
(
i
).
ctrl
.
src1Type
===
SrcType
.
pc
,
SignExt
(
uopReg
(
i
).
cf
.
pc
,
XLEN
),
io
.
readRf
(
readPortIndexReg
(
i
)).
data
)
io
.
enqIQData
(
i
).
src2
:=
Mux
(
uopReg
(
i
).
ctrl
.
src2Type
===
SrcType
.
imm
,
uopReg
(
i
).
ctrl
.
imm
,
io
.
readRf
(
readPortIndexReg
(
i
)
+
1.
U
).
data
)
XSDebug
(
dataValidRegDebug
(
i
),
p
"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from "
+
p
"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), "
+
p
"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n"
)
}
io
.
readPortIndex
:=
readPortIndex
// val readPortIndexReg = Reg(Vec(exuParameters.IntExuCnt, UInt(log2Ceil(NRIntReadPorts).W)))
// val uopReg = Reg(Vec(exuParameters.IntExuCnt, new MicroOp))
// val dataValidRegDebug = Reg(Vec(exuParameters.IntExuCnt, Bool()))
// for (i <- 0 until exuParameters.IntExuCnt) {
// readPortIndexReg(i) := readPortIndex(i)
// uopReg(i) := io.enqIQCtrl(i).bits
// dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
//
// io.enqIQData(i) := DontCare
// io.enqIQData(i).src1 := Mux(uopReg(i).ctrl.src1Type === SrcType.pc,
// SignExt(uopReg(i).cf.pc, XLEN), io.readRf(readPortIndexReg(i)).data)
// io.enqIQData(i).src2 := Mux(uopReg(i).ctrl.src2Type === SrcType.imm,
// uopReg(i).ctrl.imm, io.readRf(readPortIndexReg(i) + 1.U).data)
//
// XSDebug(dataValidRegDebug(i),
// p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
// p"(${readPortIndexReg(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
// p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
}
src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala
浏览文件 @
3726264a
...
...
@@ -18,7 +18,6 @@ class Dispatch2Ls extends XSModule {
val
fpRegRdy
=
Vec
(
exuParameters
.
StuCnt
,
Input
(
Bool
()))
val
numExist
=
Input
(
Vec
(
exuParameters
.
LsExuCnt
,
UInt
(
log2Ceil
(
IssQueSize
).
W
)))
val
enqIQCtrl
=
Vec
(
exuParameters
.
LsExuCnt
,
DecoupledIO
(
new
MicroOp
))
val
enqIQData
=
Vec
(
exuParameters
.
LsExuCnt
,
Output
(
new
ExuInput
))
})
/**
...
...
@@ -111,27 +110,27 @@ class Dispatch2Ls extends XSModule {
/**
* Part 5: the second stage of dispatch 2 (send data to reservation station)
*/
val
uopReg
=
Reg
(
Vec
(
exuParameters
.
LsExuCnt
,
new
MicroOp
))
val
dataValidRegDebug
=
Reg
(
Vec
(
exuParameters
.
LsExuCnt
,
Bool
()))
for
(
i
<-
0
until
exuParameters
.
LsExuCnt
)
{
uopReg
(
i
)
:=
io
.
enqIQCtrl
(
i
).
bits
dataValidRegDebug
(
i
)
:=
io
.
enqIQCtrl
(
i
).
fire
()
io
.
enqIQData
(
i
)
:=
DontCare
// assert(uopReg(i).ctrl.src1Type =/= SrcType.pc)
io
.
enqIQData
(
i
).
src1
:=
io
.
readIntRf
(
readPort
(
i
)).
data
if
(
i
>=
exuParameters
.
LduCnt
)
{
io
.
enqIQData
(
i
).
src2
:=
Mux
(
uopReg
(
i
).
ctrl
.
src2Type
===
SrcType
.
imm
,
uopReg
(
i
).
ctrl
.
imm
,
Mux
(
uopReg
(
i
).
ctrl
.
src2Type
===
SrcType
.
fp
,
io
.
readFpRf
(
i
-
exuParameters
.
LduCnt
).
data
,
io
.
readIntRf
(
readPort
(
i
)
+
1
).
data
))
}
XSDebug
(
dataValidRegDebug
(
i
),
p
"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from "
+
p
"(${readPort(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), "
+
p
"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n"
)
}
//
val uopReg = Reg(Vec(exuParameters.LsExuCnt, new MicroOp))
//
val dataValidRegDebug = Reg(Vec(exuParameters.LsExuCnt, Bool()))
//
for (i <- 0 until exuParameters.LsExuCnt) {
//
uopReg(i) := io.enqIQCtrl(i).bits
//
dataValidRegDebug(i) := io.enqIQCtrl(i).fire()
//
//
io.enqIQData(i) := DontCare
//
// assert(uopReg(i).ctrl.src1Type =/= SrcType.pc)
//
io.enqIQData(i).src1 := io.readIntRf(readPort(i)).data
//
if (i >= exuParameters.LduCnt) {
//
io.enqIQData(i).src2 := Mux(
//
uopReg(i).ctrl.src2Type === SrcType.imm,
//
uopReg(i).ctrl.imm,
//
Mux(uopReg(i).ctrl.src2Type === SrcType.fp,
//
io.readFpRf(i - exuParameters.LduCnt).data,
//
io.readIntRf(readPort(i) + 1).data))
//
}
//
//
XSDebug(dataValidRegDebug(i),
//
p"pc 0x${Hexadecimal(uopReg(i).cf.pc)} reads operands from " +
//
p"(${readPort(i) }, ${uopReg(i).psrc1}, ${Hexadecimal(io.enqIQData(i).src1)}), " +
//
p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
//
}
}
src/main/scala/xiangshan/backend/issue/ReservationStationNew.scala
浏览文件 @
3726264a
...
...
@@ -5,6 +5,7 @@ import chisel3.util._
import
xiangshan._
import
utils._
import
xiangshan.backend.exu.
{
Exu
,
ExuConfig
}
import
xiangshan.backend.regfile.RfReadPort
class
BypassQueue
(
number
:
Int
)
extends
XSModule
{
val
io
=
IO
(
new
Bundle
{
...
...
@@ -315,6 +316,24 @@ class ReservationStationData
srcNum
:
Int
=
3
)
extends
XSModule
{
object
DispatchType
extends
Enumeration
{
val
Disp2Int
,
Disp2Fp
,
Disp2Ls
=
Value
}
def
dispatchType
(
exuConfig
:
ExuConfig
)
:
DispatchType.Value
=
{
exuConfig
match
{
case
Exu
.
aluExeUnitCfg
=>
DispatchType
.
Disp2Int
case
Exu
.
jumpExeUnitCfg
=>
DispatchType
.
Disp2Int
case
Exu
.
mulDivExeUnitCfg
=>
DispatchType
.
Disp2Int
case
Exu
.
fmacExeUnitCfg
=>
DispatchType
.
Disp2Fp
case
Exu
.
fmiscExeUnitCfg
=>
DispatchType
.
Disp2Fp
case
Exu
.
ldExeUnitCfg
=>
DispatchType
.
Disp2Ls
case
Exu
.
stExeUnitCfg
=>
DispatchType
.
Disp2Ls
}
}
val
iqSize
=
IssQueSize
val
iqIdxWidth
=
log2Up
(
iqSize
)
val
fastWakeup
=
fixedDelay
>=
0
// NOTE: if do not enable fastWakeup(bypass), set fixedDelay to -1
...
...
@@ -324,15 +343,14 @@ class ReservationStationData
// flush
val
redirect
=
Flipped
(
ValidIO
(
new
Redirect
))
// enq Data at next cycle (regfile has 1 cycle latency)
val
enqData
=
Input
(
new
ExuInput
)
// send to exu
val
deq
=
DecoupledIO
(
new
ExuInput
)
// listen to RSCtrl
val
ctrl
=
Flipped
(
new
RSCtrlDataIO
)
// read src op value
val
srcRegValue
=
Vec
(
srcNum
,
Input
(
UInt
((
XLEN
+
1
).
W
)))
// broadcast selected uop to other issue queues
val
selectedUop
=
ValidIO
(
new
MicroOp
)
...
...
@@ -372,12 +390,47 @@ class ReservationStationData
p
" src2:${enqUop.psrc2}|${enqUop.src2State}|${enqUop.ctrl.src2Type} src3:${enqUop.psrc3}|"
+
p
"${enqUop.src3State}|${enqUop.ctrl.src3Type} pc:0x${Hexadecimal(enqUop.cf.pc)} roqIdx:${enqUop.roqIdx}\n"
)
}
when
(
enqEnReg
)
{
// TODO: turn to srcNum, not the 3
data
(
enqPtrReg
)(
0
)
:=
io
.
enqData
.
src1
data
(
enqPtrReg
)(
1
)
:=
io
.
enqData
.
src2
data
(
enqPtrReg
)(
2
)
:=
io
.
enqData
.
src3
XSDebug
(
p
"enqData: enqPtrReg:${enqPtrReg} src1:${Hexadecimal(io.enqData.src1)}"
+
p
" src2:${Hexadecimal(io.enqData.src2)} src3:${Hexadecimal(io.enqData.src2)}\n"
)
exuCfg
match
{
case
Exu
.
aluExeUnitCfg
=>
// src1: pc or reg
data
(
enqPtrReg
)(
0
)
:=
Mux
(
uop
(
enqPtrReg
).
ctrl
.
src1Type
===
SrcType
.
pc
,
SignExt
(
uop
(
enqPtrReg
).
cf
.
pc
,
XLEN
),
io
.
srcRegValue
(
0
))
// src2: imm or reg
data
(
enqPtrReg
)(
1
)
:=
Mux
(
uop
(
enqPtrReg
).
ctrl
.
src2Type
===
SrcType
.
imm
,
uop
(
enqPtrReg
).
ctrl
.
imm
,
io
.
srcRegValue
(
1
))
case
Exu
.
jumpExeUnitCfg
=>
// src1: pc or reg
data
(
enqPtrReg
)(
0
)
:=
Mux
(
uop
(
enqPtrReg
).
ctrl
.
src1Type
===
SrcType
.
pc
,
SignExt
(
uop
(
enqPtrReg
).
cf
.
pc
,
XLEN
),
io
.
srcRegValue
(
0
))
// src2: imm
data
(
enqPtrReg
)(
1
)
:=
uop
(
enqPtrReg
).
ctrl
.
imm
case
Exu
.
mulDivExeUnitCfg
=>
// src1: reg
data
(
enqPtrReg
)(
0
)
:=
io
.
srcRegValue
(
0
)
// src2: reg
data
(
enqPtrReg
)(
1
)
:=
io
.
srcRegValue
(
1
)
case
Exu
.
fmacExeUnitCfg
=>
(
0
until
exuCfg
.
fpSrcCnt
).
foreach
(
i
=>
data
(
enqPtrReg
)(
i
)
:=
io
.
srcRegValue
(
i
))
case
Exu
.
fmiscExeUnitCfg
=>
(
0
until
exuCfg
.
fpSrcCnt
).
foreach
(
i
=>
data
(
enqPtrReg
)(
i
)
:=
io
.
srcRegValue
(
i
))
case
Exu
.
ldExeUnitCfg
=>
data
(
enqPtrReg
)(
0
)
:=
io
.
srcRegValue
(
0
)
data
(
enqPtrReg
)(
1
)
:=
Mux
(
uop
(
enqPtrReg
).
ctrl
.
src2Type
===
SrcType
.
imm
,
uop
(
enqPtrReg
).
ctrl
.
imm
,
io
.
srcRegValue
(
1
))
case
Exu
.
stExeUnitCfg
=>
data
(
enqPtrReg
)(
0
)
:=
io
.
srcRegValue
(
0
)
data
(
enqPtrReg
)(
1
)
:=
Mux
(
uop
(
enqPtrReg
).
ctrl
.
src2Type
===
SrcType
.
imm
,
uop
(
enqPtrReg
).
ctrl
.
imm
,
io
.
srcRegValue
(
1
))
// default
case
_
=>
XSDebug
(
false
.
B
,
"Unhandled exu-config"
)
}
XSDebug
(
p
"${exuCfg.name}: enqPtrReg:${enqPtrReg} pc: ${Hexadecimal(uop(enqPtrReg).cf.pc)}\n"
)
XSDebug
(
p
"[srcRegValue] src1: ${Hexadecimal(io.srcRegValue(0))} src2: ${Hexadecimal(io.srcRegValue(1))} src3: ${Hexadecimal(io.srcRegValue(2))}\n"
)
}
def
wbHit
(
uop
:
MicroOp
,
src
:
UInt
,
srctype
:
UInt
)
:
Bool
=
{
...
...
src/main/scala/xiangshan/backend/rename/RenameTable.scala
浏览文件 @
3726264a
...
...
@@ -6,13 +6,13 @@ import xiangshan._
class
RatReadPort
extends
XSBundle
{
val
addr
=
Input
(
UInt
(
5.
W
))
val
rdata
=
Output
(
UInt
(
XLEN
.
W
))
val
rdata
=
Output
(
UInt
(
PhyRegIdxWidth
.
W
))
}
class
RatWritePort
extends
XSBundle
{
val
wen
=
Input
(
Bool
())
val
addr
=
Input
(
UInt
(
5.
W
))
val
wdata
=
Input
(
UInt
(
XLEN
.
W
))
val
wdata
=
Input
(
UInt
(
PhyRegIdxWidth
.
W
))
}
class
RenameTable
(
float
:
Boolean
)
extends
XSModule
{
...
...
src/main/scala/xiangshan/cache/atomics.scala
浏览文件 @
3726264a
...
...
@@ -20,6 +20,9 @@ class AtomicsPipe extends DCacheModule
val
inflight_req_block_addrs
=
Output
(
Vec
(
3
,
Valid
(
UInt
())))
val
block_probe_addr
=
Output
(
Valid
(
UInt
()))
val
wb_invalidate_lrsc
=
Input
(
Valid
(
UInt
()))
// send miss request to miss queue
val
miss_req
=
DecoupledIO
(
new
MissReq
)
})
// LSU requests
...
...
@@ -63,6 +66,17 @@ class AtomicsPipe extends DCacheModule
def
wayMap
[
T
<:
Data
](
f
:
Int
=>
T
)
=
VecInit
((
0
until
nWays
).
map
(
f
))
val
s1_tag_eq_way
=
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
).
tag
===
(
get_tag
(
s1_addr
))).
asUInt
val
s1_tag_match_way
=
wayMap
((
w
:
Int
)
=>
s1_tag_eq_way
(
w
)
&&
meta_resp
(
w
).
coh
.
isValid
()).
asUInt
val
s1_tag_match
=
s1_tag_match_way
.
orR
val
s1_hit_meta
=
Mux1H
(
s1_tag_match_way
,
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
)))
val
s1_hit_state
=
s1_hit_meta
.
coh
// replacement policy
val
replacer
=
cacheParams
.
replacement
val
s1_repl_way_en
=
UIntToOH
(
replacer
.
way
)
val
s1_repl_meta
=
Mux1H
(
s1_repl_way_en
,
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
)))
when
(
io
.
miss_req
.
fire
())
{
replacer
.
miss
}
// ---------------------------------------
...
...
@@ -74,10 +88,18 @@ class AtomicsPipe extends DCacheModule
val
s2_tag_match_way
=
RegNext
(
s1_tag_match_way
)
val
s2_tag_match
=
s2_tag_match_way
.
orR
val
s2_hit_meta
=
RegNext
(
s1_hit_meta
)
val
s2_hit_state
=
Mux1H
(
s2_tag_match_way
,
wayMap
((
w
:
Int
)
=>
RegNext
(
meta_resp
(
w
).
coh
)))
val
s2_has_permission
=
s2_hit_state
.
onAccess
(
s2_req
.
cmd
).
_1
val
s2_new_hit_state
=
s2_hit_state
.
onAccess
(
s2_req
.
cmd
).
_3
val
s2_repl_meta
=
RegNext
(
s1_repl_meta
)
val
s2_repl_way_en
=
RegNext
(
s1_repl_way_en
)
val
s2_old_meta
=
Mux
(
s2_tag_match
,
s2_hit_meta
,
s2_repl_meta
)
val
s2_way_en
=
Mux
(
s2_tag_match
,
s2_tag_match_way
,
s2_repl_way_en
)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
...
...
@@ -89,24 +111,19 @@ class AtomicsPipe extends DCacheModule
// eg: write to exclusive but clean block
val
s2_hit
=
s2_tag_match
&&
s2_has_permission
&&
s2_hit_state
===
s2_new_hit_state
val
s2_nack
=
Wire
(
Bool
())
val
s2_data
=
Wire
(
Vec
(
nWays
,
UInt
(
encRowBits
.
W
)))
val
data_resp
=
io
.
data_resp
for
(
w
<-
0
until
nWays
)
{
s2_data
(
w
)
:=
data_resp
(
w
)(
get_row
(
s2_req
.
addr
))
}
val
s2_data_muxed
=
Mux1H
(
s2_tag_match_way
,
s2_data
)
// the index of word in a row, in case rowBits != wordBits
val
s2_word_idx
=
if
(
rowWords
==
1
)
0.
U
else
s2_req
.
addr
(
log2Up
(
rowWords
*
wordBytes
)-
1
,
log2Up
(
wordBytes
))
// when req got nacked, upper levels should replay this request
val
s2_nack_hit
=
RegNext
(
s1_nack
)
// Can't allocate MSHR for same set currently being written back
// the same set is busy
val
s2_nack_set_busy
=
s2_valid
&&
false
.
B
val
s2_nack_hit
=
RegNext
(
s1_nack
)
// can no allocate mshr for store miss
val
s2_nack_no_mshr
=
io
.
miss_req
.
valid
&&
!
io
.
miss_req
.
ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val
s2_nack_data
=
false
.
B
s2_nack
:=
s2_nack_hit
||
s2_nack_set_busy
||
s2_nack_data
s2_nack
:=
s2_nack_hit
||
s2_nack_no_mshr
||
s2_nack_data
// lr/sc
val
debug_sc_fail_addr
=
RegInit
(
0.
U
)
...
...
@@ -174,7 +191,8 @@ class AtomicsPipe extends DCacheModule
dump_pipeline_valids
(
"AtomicsPipe s2"
,
"s2_hit"
,
s2_valid
&&
s2_hit
)
dump_pipeline_valids
(
"AtomicsPipe s2"
,
"s2_nack"
,
s2_valid
&&
s2_nack
)
dump_pipeline_valids
(
"AtomicsPipe s2"
,
"s2_nack_hit"
,
s2_valid
&&
s2_nack_hit
)
dump_pipeline_valids
(
"AtomicsPipe s2"
,
"s2_nack_set_busy"
,
s2_valid
&&
s2_nack_set_busy
)
dump_pipeline_valids
(
"AtomicsPipe s2"
,
"s2_nack_no_mshr"
,
s2_valid
&&
s2_nack_no_mshr
)
dump_pipeline_valids
(
"AtomicsPipe s2"
,
"s2_nack_data"
,
s2_valid
&&
s2_nack_data
)
when
(
s2_valid
)
{
XSDebug
(
"lrsc_count: %d lrsc_valid: %b lrsc_addr: %x\n"
,
lrsc_count
,
lrsc_valid
,
lrsc_addr
)
...
...
@@ -185,6 +203,15 @@ class AtomicsPipe extends DCacheModule
}
// load data gen
val
s2_data
=
Wire
(
Vec
(
nWays
,
UInt
(
encRowBits
.
W
)))
val
data_resp
=
io
.
data_resp
for
(
w
<-
0
until
nWays
)
{
s2_data
(
w
)
:=
data_resp
(
w
)(
get_row
(
s2_req
.
addr
))
}
val
s2_data_muxed
=
Mux1H
(
s2_tag_match_way
,
s2_data
)
// the index of word in a row, in case rowBits != wordBits
val
s2_word_idx
=
if
(
rowWords
==
1
)
0.
U
else
s2_req
.
addr
(
log2Up
(
rowWords
*
wordBytes
)-
1
,
log2Up
(
wordBytes
))
val
s2_data_words
=
Wire
(
Vec
(
rowWords
,
UInt
(
encWordBits
.
W
)))
for
(
w
<-
0
until
rowWords
)
{
s2_data_words
(
w
)
:=
s2_data_muxed
(
encWordBits
*
(
w
+
1
)
-
1
,
encWordBits
*
w
)
...
...
@@ -195,6 +222,14 @@ class AtomicsPipe extends DCacheModule
assert
(!(
s2_valid
&&
s2_hit
&&
!
s2_nack
&&
s2_decoded
.
uncorrectable
))
// send load miss to miss queue
io
.
miss_req
.
valid
:=
s2_valid
&&
!
s2_nack_hit
&&
!
s2_nack_data
&&
!
s2_hit
io
.
miss_req
.
bits
.
cmd
:=
s2_req
.
cmd
io
.
miss_req
.
bits
.
addr
:=
get_block_addr
(
s2_req
.
addr
)
io
.
miss_req
.
bits
.
tag_match
:=
s2_tag_match
io
.
miss_req
.
bits
.
way_en
:=
s2_way_en
io
.
miss_req
.
bits
.
old_meta
:=
s2_old_meta
io
.
miss_req
.
bits
.
client_id
:=
s2_req
.
meta
.
id
val
resp
=
Wire
(
ValidIO
(
new
DCacheWordResp
))
resp
.
valid
:=
s2_valid
...
...
@@ -203,16 +238,16 @@ class AtomicsPipe extends DCacheModule
// reuse this field to pass lr sc valid to commit
// nemu use this to see whether lr sc counter is still valid
resp
.
bits
.
meta
.
id
:=
lrsc_valid
resp
.
bits
.
miss
:=
!
s2_hit
resp
.
bits
.
nack
:=
s2_nack
resp
.
bits
.
miss
:=
!
s2_hit
||
s2_nack
resp
.
bits
.
replay
:=
resp
.
bits
.
miss
&&
(!
io
.
miss_req
.
fire
()
||
s2_nack
)
io
.
lsu
.
resp
.
valid
:=
resp
.
valid
io
.
lsu
.
resp
.
bits
:=
resp
.
bits
assert
(!(
resp
.
valid
&&
!
io
.
lsu
.
resp
.
ready
))
when
(
resp
.
valid
)
{
XSDebug
(
s
"AtomicsPipe resp: data: %x id: %d replay
: %b miss: %b nack
: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
nack
)
XSDebug
(
s
"AtomicsPipe resp: data: %x id: %d replay
ed_req: %b miss: %b need_replay
: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
replay
)
}
...
...
src/main/scala/xiangshan/cache/atomicsMissQueue.scala
浏览文件 @
3726264a
...
...
@@ -12,12 +12,11 @@ class AtomicsMissQueue extends DCacheModule
val
io
=
IO
(
new
DCacheBundle
{
val
lsu
=
Flipped
(
new
DCacheWordIO
)
val
replay
=
new
DCacheWordIO
val
miss_req
=
DecoupledIO
(
new
MissReq
)
val
miss_resp
=
Flipped
(
ValidIO
(
new
MissResp
))
val
miss_finish
=
DecoupledIO
(
new
MissFinish
)
})
val
s_invalid
::
s_replay_req
::
s_replay_resp
::
s_resp
::
s_miss_re
q
::
s_miss_resp
::
s_miss_finish
::
Nil
=
Enum
(
7
)
val
s_invalid
::
s_replay_req
::
s_replay_resp
::
s_resp
::
s_miss_re
sp
::
s_miss_finish
::
Nil
=
Enum
(
6
)
val
state
=
RegInit
(
s_invalid
)
val
id
=
0.
U
...
...
@@ -35,12 +34,9 @@ class AtomicsMissQueue extends DCacheModule
io
.
replay
.
req
.
bits
:=
DontCare
io
.
replay
.
resp
.
ready
:=
false
.
B
io
.
miss_req
.
valid
:=
false
.
B
io
.
miss_req
.
bits
:=
DontCare
io
.
miss_finish
.
valid
:=
false
.
B
io
.
miss_finish
.
bits
:=
DontCare
when
(
state
=/=
s_invalid
)
{
XSDebug
(
"state: %d\n"
,
state
)
}
...
...
@@ -68,34 +64,30 @@ class AtomicsMissQueue extends DCacheModule
when
(
state
===
s_replay_resp
)
{
io
.
replay
.
resp
.
ready
:=
true
.
B
when
(
io
.
replay
.
resp
.
fire
())
{
// req missed
when
(
io
.
replay
.
resp
.
bits
.
miss
)
{
// replayed reqs should not miss
assert
(!
req
.
meta
.
replay
)
when
(!
req
.
meta
.
replay
)
{
state
:=
s_miss_req
// the req missed and did not enter mshr
// so replay it until it hits or enters mshr
when
(
io
.
replay
.
resp
.
bits
.
replay
)
{
state
:=
s_replay_req
}
.
otherwise
{
// the req missed and enters mshr
// wait for miss response
state
:=
s_miss_resp
}
}
.
otherwise
{
// req hits, everything OK
resp
:=
io
.
replay
.
resp
.
bits
when
(!
req
.
meta
.
replay
)
{
state
:=
s_resp
}
.
otherwise
{
// if it's a replayed request
// we need to tell mshr, we are done
state
:=
s_miss_finish
}
}
assert
(!
io
.
replay
.
resp
.
bits
.
nack
)
}
}
// --------------------------------------------
when
(
state
===
s_miss_req
)
{
io
.
miss_req
.
valid
:=
true
.
B
io
.
miss_req
.
bits
.
cmd
:=
req
.
cmd
io
.
miss_req
.
bits
.
addr
:=
req_block_addr
io
.
miss_req
.
bits
.
client_id
:=
id
when
(
io
.
miss_req
.
fire
())
{
state
:=
s_miss_resp
}
}
...
...
@@ -129,25 +121,21 @@ class AtomicsMissQueue extends DCacheModule
// debug output
when
(
io
.
lsu
.
req
.
fire
())
{
XSDebug
(
s
"io.lsu.req cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n"
,
XSDebug
(
s
"io.lsu.req cmd: %x addr: %x data: %x mask: %x id: %d replay
ed_req
: %b\n"
,
io
.
lsu
.
req
.
bits
.
cmd
,
io
.
lsu
.
req
.
bits
.
addr
,
io
.
lsu
.
req
.
bits
.
data
,
io
.
lsu
.
req
.
bits
.
mask
,
io
.
lsu
.
req
.
bits
.
meta
.
id
,
io
.
lsu
.
req
.
bits
.
meta
.
replay
)
}
val
replay
=
io
.
replay
.
req
when
(
replay
.
fire
())
{
XSDebug
(
s
"replay cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n"
,
XSDebug
(
s
"replay cmd: %x addr: %x data: %x mask: %x id: %d replay
ed_req
: %b\n"
,
replay
.
bits
.
cmd
,
replay
.
bits
.
addr
,
replay
.
bits
.
data
,
replay
.
bits
.
mask
,
replay
.
bits
.
meta
.
id
,
replay
.
bits
.
meta
.
replay
)
}
when
(
io
.
lsu
.
resp
.
fire
())
{
XSDebug
(
s
"io.lsu.resp: data: %x id: %d replay
: %b miss: %b nack
: %b\n"
,
io
.
lsu
.
resp
.
bits
.
data
,
io
.
lsu
.
resp
.
bits
.
meta
.
id
,
io
.
lsu
.
resp
.
bits
.
meta
.
replay
,
io
.
lsu
.
resp
.
bits
.
miss
,
io
.
lsu
.
resp
.
bits
.
nack
)
XSDebug
(
s
"io.lsu.resp: data: %x id: %d replay
ed_req: %b miss: %b need_replay
: %b\n"
,
io
.
lsu
.
resp
.
bits
.
data
,
io
.
lsu
.
resp
.
bits
.
meta
.
id
,
io
.
lsu
.
resp
.
bits
.
meta
.
replay
,
io
.
lsu
.
resp
.
bits
.
miss
,
io
.
lsu
.
resp
.
bits
.
replay
)
}
val
miss_req
=
io
.
miss_req
XSDebug
(
miss_req
.
fire
(),
"miss_req cmd: %x addr: %x client_id: %d\n"
,
miss_req
.
bits
.
cmd
,
miss_req
.
bits
.
addr
,
miss_req
.
bits
.
client_id
)
val
miss_resp
=
io
.
miss_resp
XSDebug
(
miss_resp
.
fire
(),
"miss_resp client_id: %d entry_id: %d\n"
,
miss_resp
.
bits
.
client_id
,
miss_resp
.
bits
.
entry_id
)
...
...
src/main/scala/xiangshan/cache/dcache.scala
浏览文件 @
3726264a
...
...
@@ -57,7 +57,8 @@ trait HasDCacheParameters extends HasL1CacheParameters {
storeMissQueueEntryIdWidth
),
miscMissQueueEntryIdWidth
)
def
nClientMissQueues
=
3
// clients: ldu 0, ldu1, stu, atomics
def
nClientMissQueues
=
4
def
clientIdWidth
=
log2Up
(
nClientMissQueues
)
def
missQueueClientIdWidth
=
clientIdWidth
+
clientMissQueueEntryIdWidth
def
clientIdMSB
=
missQueueClientIdWidth
-
1
...
...
src/main/scala/xiangshan/cache/dcacheWrapper.scala
浏览文件 @
3726264a
...
...
@@ -25,19 +25,7 @@ class DCacheMeta extends DCacheBundle {
val
replay
=
Bool
()
// whether it's a replayed request?
}
// for load from load unit
// cycle 0: vaddr
// cycle 1: paddr
class
DCacheLoadReq
extends
DCacheBundle
{
val
cmd
=
UInt
(
M_SZ
.
W
)
val
addr
=
UInt
(
VAddrBits
.
W
)
val
data
=
UInt
(
DataBits
.
W
)
val
mask
=
UInt
((
DataBits
/
8
).
W
)
val
meta
=
new
DCacheMeta
}
// special memory operations(lr/sc, atomics)
// memory request in word granularity(load, mmio, lr/sc, atomics)
class
DCacheWordReq
extends
DCacheBundle
{
val
cmd
=
UInt
(
M_SZ
.
W
)
...
...
@@ -47,7 +35,7 @@ class DCacheWordReq extends DCacheBundle
val
meta
=
new
DCacheMeta
}
//
ordinary store
//
memory request in word granularity(store)
class
DCacheLineReq
extends
DCacheBundle
{
val
cmd
=
UInt
(
M_SZ
.
W
)
...
...
@@ -57,16 +45,6 @@ class DCacheLineReq extends DCacheBundle
val
meta
=
new
DCacheMeta
}
class
DCacheLoadResp
extends
DCacheBundle
{
val
data
=
UInt
(
DataBits
.
W
)
val
meta
=
new
DCacheMeta
// cache req missed, send it to miss queue
val
miss
=
Bool
()
// cache req nacked, replay it later
val
nack
=
Bool
()
}
class
DCacheWordResp
extends
DCacheBundle
{
val
data
=
UInt
(
DataBits
.
W
)
...
...
@@ -74,7 +52,7 @@ class DCacheWordResp extends DCacheBundle
// cache req missed, send it to miss queue
val
miss
=
Bool
()
// cache req nacked, replay it later
val
nack
=
Bool
()
val
replay
=
Bool
()
}
class
DCacheLineResp
extends
DCacheBundle
...
...
@@ -84,16 +62,13 @@ class DCacheLineResp extends DCacheBundle
// cache req missed, send it to miss queue
val
miss
=
Bool
()
// cache req nacked, replay it later
val
nack
=
Bool
()
val
replay
=
Bool
()
}
class
DCacheLoadIO
extends
DCacheBundle
class
Refill
extends
DCacheBundle
{
val
req
=
DecoupledIO
(
new
DCacheWordReq
)
val
resp
=
Flipped
(
DecoupledIO
(
new
DCacheWordResp
))
// kill previous cycle's req
val
s1_kill
=
Output
(
Bool
())
val
s1_paddr
=
Output
(
UInt
(
PAddrBits
.
W
))
val
addr
=
UInt
(
PAddrBits
.
W
)
val
data
=
UInt
((
cfg
.
blockBytes
*
8
).
W
)
}
class
DCacheWordIO
extends
DCacheBundle
...
...
@@ -102,6 +77,16 @@ class DCacheWordIO extends DCacheBundle
val
resp
=
Flipped
(
DecoupledIO
(
new
DCacheWordResp
))
}
// used by load unit
class
DCacheLoadIO
extends
DCacheWordIO
{
// kill previous cycle's req
val
s1_kill
=
Output
(
Bool
())
// cycle 0: virtual address: req.addr
// cycle 1: physical address: s1_paddr
val
s1_paddr
=
Output
(
UInt
(
PAddrBits
.
W
))
}
class
DCacheLineIO
extends
DCacheBundle
{
val
req
=
DecoupledIO
(
new
DCacheLineReq
)
...
...
@@ -110,7 +95,7 @@ class DCacheLineIO extends DCacheBundle
class
DCacheToLsuIO
extends
DCacheBundle
{
val
load
=
Vec
(
LoadPipelineWidth
,
Flipped
(
new
DCacheLoadIO
))
// for speculative load
val
lsq
=
Flipped
(
new
DCacheLineIO
)
// lsq load/store
val
lsq
=
ValidIO
(
new
Refill
)
// refill to load queue, wake up load misses
val
store
=
Flipped
(
new
DCacheLineIO
)
// for sbuffer
val
atomics
=
Flipped
(
new
DCacheWordIO
)
// atomics reqs
}
...
...
@@ -156,7 +141,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val
ldu
=
Seq
.
fill
(
LoadPipelineWidth
)
{
Module
(
new
LoadPipe
)
}
val
stu
=
Module
(
new
StorePipe
)
val
atomics
=
Module
(
new
AtomicsPipe
)
val
loadMissQueue
=
Module
(
new
LoadMissQueue
)
val
storeMissQueue
=
Module
(
new
StoreMissQueue
)
val
atomicsMissQueue
=
Module
(
new
AtomicsMissQueue
)
val
missQueue
=
Module
(
new
MissQueue
(
edge
))
...
...
@@ -179,16 +163,14 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
// To simplify port arbitration
// MissQueue, Prober and StorePipe all use port 0
// if contention got severe, considering load balancing on two ports?
val
MetaReadPortCount
=
5
val
MissQueueMetaReadPort
=
0
val
ProberMetaReadPort
=
1
val
StorePipeMetaReadPort
=
2
val
LoadPipeMetaReadPort
=
3
val
AtomicsPipeMetaReadPort
=
4
val
MetaReadPortCount
=
4
val
ProberMetaReadPort
=
0
val
StorePipeMetaReadPort
=
1
val
LoadPipeMetaReadPort
=
2
val
AtomicsPipeMetaReadPort
=
3
val
metaReadArb
=
Module
(
new
Arbiter
(
new
L1MetaReadReq
,
MetaReadPortCount
))
metaReadArb
.
io
.
in
(
MissQueueMetaReadPort
)
<>
missQueue
.
io
.
meta_read
metaReadArb
.
io
.
in
(
ProberMetaReadPort
)
<>
prober
.
io
.
meta_read
metaReadArb
.
io
.
in
(
StorePipeMetaReadPort
)
<>
stu
.
io
.
meta_read
metaReadArb
.
io
.
in
(
LoadPipeMetaReadPort
)
<>
ldu
(
0
).
io
.
meta_read
...
...
@@ -196,7 +178,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
metaArray
.
io
.
read
(
0
)
<>
metaReadArb
.
io
.
out
missQueue
.
io
.
meta_resp
<>
metaArray
.
io
.
resp
(
0
)
prober
.
io
.
meta_resp
<>
metaArray
.
io
.
resp
(
0
)
stu
.
io
.
meta_resp
<>
metaArray
.
io
.
resp
(
0
)
ldu
(
0
).
io
.
meta_resp
<>
metaArray
.
io
.
resp
(
0
)
...
...
@@ -217,19 +198,18 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val
dataWriteArb
=
Module
(
new
Arbiter
(
new
L1DataWriteReq
,
DataWritePortCount
))
dataWriteArb
.
io
.
in
(
StorePipeDataWritePort
)
<>
stu
.
io
.
data_write
dataWriteArb
.
io
.
in
(
MissQueueDataWritePort
)
<>
missQueue
.
io
.
refill
dataWriteArb
.
io
.
in
(
MissQueueDataWritePort
)
<>
missQueue
.
io
.
data_write
dataWriteArb
.
io
.
in
(
AtomicsPipeDataWritePort
)
<>
atomics
.
io
.
data_write
dataArray
.
io
.
write
<>
dataWriteArb
.
io
.
out
// To simplify port arbitration
// WritebackUnit and StorePipe use port 0
val
DataReadPortCount
=
5
val
DataReadPortCount
=
4
val
WritebackDataReadPort
=
0
val
StorePipeDataReadPort
=
1
val
LoadPipeDataReadPort
=
2
val
AtomicsPipeDataReadPort
=
3
val
LoadMissDataReadPort
=
4
val
dataReadArb
=
Module
(
new
Arbiter
(
new
L1DataReadReq
,
DataReadPortCount
))
...
...
@@ -237,14 +217,12 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
dataReadArb
.
io
.
in
(
StorePipeDataReadPort
)
<>
stu
.
io
.
data_read
dataReadArb
.
io
.
in
(
LoadPipeDataReadPort
)
<>
ldu
(
0
).
io
.
data_read
dataReadArb
.
io
.
in
(
AtomicsPipeDataReadPort
)
<>
atomics
.
io
.
data_read
dataReadArb
.
io
.
in
(
LoadMissDataReadPort
)
<>
loadMissQueue
.
io
.
data_req
dataArray
.
io
.
read
(
0
)
<>
dataReadArb
.
io
.
out
dataArray
.
io
.
resp
(
0
)
<>
wb
.
io
.
data_resp
dataArray
.
io
.
resp
(
0
)
<>
stu
.
io
.
data_resp
dataArray
.
io
.
resp
(
0
)
<>
atomics
.
io
.
data_resp
dataArray
.
io
.
resp
(
0
)
<>
ldu
(
0
).
io
.
data_resp
dataArray
.
io
.
resp
(
0
)
<>
loadMissQueue
.
io
.
data_resp
for
(
w
<-
1
until
LoadPipelineWidth
)
{
dataArray
.
io
.
read
(
w
)
<>
ldu
(
w
).
io
.
data_read
...
...
@@ -272,9 +250,6 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
assert
(!(
io
.
lsu
.
load
(
w
).
req
.
fire
()
&&
io
.
lsu
.
load
(
w
).
req
.
bits
.
meta
.
tlb_miss
),
"TLB missed requests should not go to cache"
)
}
// load miss queue
loadMissQueue
.
io
.
lsu
<>
io
.
lsu
.
lsq
//----------------------------------------
// store pipe and store miss queue
storeMissQueue
.
io
.
lsu
<>
io
.
lsu
.
store
...
...
@@ -322,34 +297,39 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
//----------------------------------------
// miss queue
val
loadMissQueueClientId
=
0.
U
(
clientIdWidth
.
W
)
val
storeMissQueueClientId
=
1.
U
(
clientIdWidth
.
W
)
val
atomicsMissQueueClientId
=
2.
U
(
clientIdWidth
.
W
)
require
(
LoadPipelineWidth
==
2
,
"We hard code the number of load misses"
)
val
loadMissQueueClientId_0
=
0.
U
(
clientIdWidth
.
W
)
val
loadMissQueueClientId_1
=
1.
U
(
clientIdWidth
.
W
)
val
storeMissQueueClientId
=
2.
U
(
clientIdWidth
.
W
)
val
atomicsMissQueueClientId
=
3.
U
(
clientIdWidth
.
W
)
// Request
val
missReqArb
=
Module
(
new
Arbiter
(
new
MissReq
,
3
))
val
missReqArb
=
Module
(
new
Arbiter
(
new
MissReq
,
nClientMissQueues
))
val
missReq
=
missQueue
.
io
.
req
val
loadMissReq
=
loadMissQueue
.
io
.
miss_req
val
storeMissReq
=
storeMissQueue
.
io
.
miss_req
val
atomicsMissReq
=
atomicsMissQueue
.
io
.
miss_req
missReqArb
.
io
.
in
(
0
).
valid
:=
loadMissReq
.
valid
loadMissReq
.
ready
:=
missReqArb
.
io
.
in
(
0
).
ready
missReqArb
.
io
.
in
(
0
).
bits
:=
loadMissReq
.
bits
missReqArb
.
io
.
in
(
0
).
bits
.
client_id
:=
Cat
(
loadMissQueueClientId
,
loadMissReq
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
missReqArb
.
io
.
in
(
1
).
valid
:=
storeMissReq
.
valid
storeMissReq
.
ready
:=
missReqArb
.
io
.
in
(
1
).
ready
missReqArb
.
io
.
in
(
1
).
bits
:=
storeMissReq
.
bits
missReqArb
.
io
.
in
(
1
).
bits
.
client_id
:=
Cat
(
storeMissQueueClientId
,
val
loadMissReq_0
=
ldu
(
0
).
io
.
miss_req
val
loadMissReq_1
=
ldu
(
1
).
io
.
miss_req
val
storeMissReq
=
stu
.
io
.
miss_req
val
atomicsMissReq
=
atomics
.
io
.
miss_req
missReqArb
.
io
.
in
(
0
)
<>
loadMissReq_0
missReqArb
.
io
.
in
(
0
).
bits
.
client_id
:=
Cat
(
loadMissQueueClientId_0
,
loadMissReq_0
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
missReqArb
.
io
.
in
(
1
)
<>
loadMissReq_1
missReqArb
.
io
.
in
(
1
).
bits
.
client_id
:=
Cat
(
loadMissQueueClientId_1
,
loadMissReq_0
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
missReqArb
.
io
.
in
(
2
).
valid
:=
storeMissReq
.
valid
storeMissReq
.
ready
:=
missReqArb
.
io
.
in
(
2
).
ready
missReqArb
.
io
.
in
(
2
).
bits
:=
storeMissReq
.
bits
missReqArb
.
io
.
in
(
2
).
bits
.
client_id
:=
Cat
(
storeMissQueueClientId
,
storeMissReq
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
missReqArb
.
io
.
in
(
2
).
valid
:=
atomicsMissReq
.
valid
atomicsMissReq
.
ready
:=
missReqArb
.
io
.
in
(
2
).
ready
missReqArb
.
io
.
in
(
2
).
bits
:=
atomicsMissReq
.
bits
missReqArb
.
io
.
in
(
2
).
bits
.
client_id
:=
Cat
(
atomicsMissQueueClientId
,
missReqArb
.
io
.
in
(
3
).
valid
:=
atomicsMissReq
.
valid
atomicsMissReq
.
ready
:=
missReqArb
.
io
.
in
(
3
).
ready
missReqArb
.
io
.
in
(
3
).
bits
:=
atomicsMissReq
.
bits
missReqArb
.
io
.
in
(
3
).
bits
.
client_id
:=
Cat
(
atomicsMissQueueClientId
,
atomicsMissReq
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
val
miss_block
=
block_miss
(
missReqArb
.
io
.
out
.
bits
.
addr
)
...
...
@@ -357,18 +337,13 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
XSDebug
(
miss_block
,
"MissQueue blocked\n"
)
// Response
// store and atomics wait for miss queue responses
val
missResp
=
missQueue
.
io
.
resp
val
loadMissResp
=
loadMissQueue
.
io
.
miss_resp
val
storeMissResp
=
storeMissQueue
.
io
.
miss_resp
val
atomicsMissResp
=
atomicsMissQueue
.
io
.
miss_resp
val
atomicsMissResp
=
atomicsMissQueue
.
io
.
miss_resp
val
clientId
=
missResp
.
bits
.
client_id
(
clientIdMSB
,
clientIdLSB
)
val
isLoadMissResp
=
clientId
===
loadMissQueueClientId
loadMissResp
.
valid
:=
missResp
.
valid
&&
isLoadMissResp
loadMissResp
.
bits
:=
missResp
.
bits
loadMissResp
.
bits
.
client_id
:=
missResp
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
)
val
isStoreMissResp
=
clientId
===
storeMissQueueClientId
storeMissResp
.
valid
:=
missResp
.
valid
&&
isStoreMissResp
storeMissResp
.
bits
:=
missResp
.
bits
...
...
@@ -381,31 +356,27 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
// Finish
val
missFinish
=
missQueue
.
io
.
finish
val
loadMissFinish
=
loadMissQueue
.
io
.
miss_finish
val
storeMissFinish
=
storeMissQueue
.
io
.
miss_finish
val
atomicsMissFinish
=
atomicsMissQueue
.
io
.
miss_finish
val
missFinishArb
=
Module
(
new
Arbiter
(
new
MissFinish
,
3
))
missFinishArb
.
io
.
in
(
0
).
valid
:=
loadMissFinish
.
valid
loadMissFinish
.
ready
:=
missFinishArb
.
io
.
in
(
0
).
ready
missFinishArb
.
io
.
in
(
0
).
bits
.
entry_id
:=
loadMissFinish
.
bits
.
entry_id
missFinishArb
.
io
.
in
(
0
).
bits
.
client_id
:=
Cat
(
loadMissQueueClientId
,
loadMissFinish
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
missFinishArb
.
io
.
in
(
1
).
valid
:=
storeMissFinish
.
valid
storeMissFinish
.
ready
:=
missFinishArb
.
io
.
in
(
1
).
ready
missFinishArb
.
io
.
in
(
1
).
bits
.
entry_id
:=
storeMissFinish
.
bits
.
entry_id
missFinishArb
.
io
.
in
(
1
).
bits
.
client_id
:=
Cat
(
storeMissQueueClientId
,
storeMissFinish
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
missFinishArb
.
io
.
in
(
2
).
valid
:=
atomicsMissFinish
.
valid
atomicsMissFinish
.
ready
:=
missFinishArb
.
io
.
in
(
2
).
ready
missFinishArb
.
io
.
in
(
2
).
bits
.
entry_id
:=
atomicsMissFinish
.
bits
.
entry_id
missFinishArb
.
io
.
in
(
2
).
bits
.
client_id
:=
Cat
(
atomicsMissQueueClientId
,
val
atomicsMissFinish
=
atomicsMissQueue
.
io
.
miss_finish
val
missFinishArb
=
Module
(
new
Arbiter
(
new
MissFinish
,
2
))
missFinishArb
.
io
.
in
(
0
).
valid
:=
storeMissFinish
.
valid
storeMissFinish
.
ready
:=
missFinishArb
.
io
.
in
(
0
).
ready
missFinishArb
.
io
.
in
(
0
).
bits
.
entry_id
:=
storeMissFinish
.
bits
.
entry_id
missFinishArb
.
io
.
in
(
0
).
bits
.
client_id
:=
Cat
(
storeMissQueueClientId
,
storeMissFinish
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
missFinishArb
.
io
.
in
(
1
).
valid
:=
atomicsMissFinish
.
valid
atomicsMissFinish
.
ready
:=
missFinishArb
.
io
.
in
(
1
).
ready
missFinishArb
.
io
.
in
(
1
).
bits
.
entry_id
:=
atomicsMissFinish
.
bits
.
entry_id
missFinishArb
.
io
.
in
(
1
).
bits
.
client_id
:=
Cat
(
atomicsMissQueueClientId
,
atomicsMissFinish
.
bits
.
client_id
(
entryIdMSB
,
entryIdLSB
))
missFinish
<>
missFinishArb
.
io
.
out
// refill to load queue
io
.
lsu
.
lsq
<>
missQueue
.
io
.
refill
// tilelink stuff
bus
.
a
<>
missQueue
.
io
.
mem_acquire
bus
.
e
<>
missQueue
.
io
.
mem_finish
...
...
@@ -464,10 +435,12 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val
atomics_addr_matches
=
VecInit
(
atomics
.
io
.
inflight_req_block_addrs
map
(
entry
=>
entry
.
valid
&&
entry
.
bits
===
get_block_addr
(
addr
)))
val
atomics_addr_match
=
atomics_addr_matches
.
reduce
(
_
||
_
)
val
prober_addr_match
=
prober
.
io
.
inflight_req_block_addr
.
valid
&&
prober
.
io
.
inflight_req_block_addr
.
bits
===
get_block_addr
(
addr
)
val
miss_idx_matches
=
VecInit
(
missQueue
.
io
.
inflight_req_idxes
map
(
entry
=>
entry
.
valid
&&
entry
.
bits
===
get_idx
(
addr
)))
val
miss_idx_match
=
miss_idx_matches
.
reduce
(
_
||
_
)
store_addr_match
||
atomics_addr_match
||
miss_idx_match
store_addr_match
||
atomics_addr_match
||
prober_addr_match
||
miss_idx_match
}
def
block_store
(
addr
:
UInt
)
=
{
...
...
@@ -487,18 +460,12 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
}
def
block_miss
(
addr
:
UInt
)
=
{
val
store_idx_matches
=
VecInit
(
stu
.
io
.
inflight_req_idxes
map
(
entry
=>
entry
.
valid
&&
entry
.
bits
===
get_idx
(
addr
)))
val
store_idx_match
=
store_idx_matches
.
reduce
(
_
||
_
)
val
atomics_idx_matches
=
VecInit
(
atomics
.
io
.
inflight_req_idxes
map
(
entry
=>
entry
.
valid
&&
entry
.
bits
===
get_idx
(
addr
)))
val
atomics_idx_match
=
atomics_idx_matches
.
reduce
(
_
||
_
)
val
prober_idx_match
=
prober
.
io
.
inflight_req_idx
.
valid
&&
prober
.
io
.
inflight_req_idx
.
bits
===
get_idx
(
addr
)
val
miss_idx_matches
=
VecInit
(
missQueue
.
io
.
inflight_req_idxes
map
(
entry
=>
entry
.
valid
&&
entry
.
bits
===
get_idx
(
addr
)))
val
miss_idx_match
=
miss_idx_matches
.
reduce
(
_
||
_
)
store_idx_match
||
atomics_idx_match
||
prober_idx_match
||
miss_idx_match
prober_idx_match
||
miss_idx_match
}
def
block_probe
(
addr
:
UInt
)
=
{
...
...
src/main/scala/xiangshan/cache/ldu.scala
浏览文件 @
3726264a
...
...
@@ -8,14 +8,19 @@ import utils.XSDebug
class
LoadPipe
extends
DCacheModule
{
val
io
=
IO
(
new
DCacheBundle
{
// incoming requests
val
lsu
=
Flipped
(
new
DCacheLoadIO
)
// req got nacked in stage 0?
val
nack
=
Input
(
Bool
())
// meta and data array read port
val
data_read
=
DecoupledIO
(
new
L1DataReadReq
)
val
data_resp
=
Input
(
Vec
(
nWays
,
Vec
(
blockRows
,
Bits
(
encRowBits
.
W
))))
val
meta_read
=
DecoupledIO
(
new
L1MetaReadReq
)
val
meta_resp
=
Input
(
Vec
(
nWays
,
new
L1Metadata
))
//
req got nacked in stage 0?
val
nack
=
Input
(
Bool
()
)
//
send miss request to miss queue
val
miss_req
=
DecoupledIO
(
new
MissReq
)
})
// LSU requests
...
...
@@ -67,6 +72,17 @@ class LoadPipe extends DCacheModule
def
wayMap
[
T
<:
Data
](
f
:
Int
=>
T
)
=
VecInit
((
0
until
nWays
).
map
(
f
))
val
s1_tag_eq_way
=
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
).
tag
===
(
get_tag
(
s1_addr
))).
asUInt
val
s1_tag_match_way
=
wayMap
((
w
:
Int
)
=>
s1_tag_eq_way
(
w
)
&&
meta_resp
(
w
).
coh
.
isValid
()).
asUInt
val
s1_tag_match
=
s1_tag_match_way
.
orR
val
s1_hit_meta
=
Mux1H
(
s1_tag_match_way
,
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
)))
val
s1_hit_state
=
s1_hit_meta
.
coh
// replacement policy
val
replacer
=
cacheParams
.
replacement
val
s1_repl_way_en
=
UIntToOH
(
replacer
.
way
)
val
s1_repl_meta
=
Mux1H
(
s1_repl_way_en
,
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
)))
when
(
io
.
miss_req
.
fire
())
{
replacer
.
miss
}
assert
(!(
s1_valid
&&
s1_req
.
meta
.
replay
&&
io
.
lsu
.
s1_kill
),
"lsq tried to kill an replayed request!"
)
...
...
@@ -79,11 +95,20 @@ class LoadPipe extends DCacheModule
val
s2_addr
=
RegNext
(
s1_addr
)
val
s2_tag_match_way
=
RegNext
(
s1_tag_match_way
)
val
s2_tag_match
=
s2_tag_match_way
.
orR
val
s2_hit_state
=
Mux1H
(
s2_tag_match_way
,
wayMap
((
w
:
Int
)
=>
RegNext
(
meta_resp
(
w
).
coh
)))
val
s2_tag_match
=
RegNext
(
s1_tag_match
)
val
s2_hit_meta
=
RegNext
(
s1_hit_meta
)
val
s2_hit_state
=
RegNext
(
s1_hit_state
)
val
s2_has_permission
=
s2_hit_state
.
onAccess
(
s2_req
.
cmd
).
_1
val
s2_new_hit_state
=
s2_hit_state
.
onAccess
(
s2_req
.
cmd
).
_3
val
s2_repl_meta
=
RegNext
(
s1_repl_meta
)
val
s2_repl_way_en
=
RegNext
(
s1_repl_way_en
)
val
s2_old_meta
=
Mux
(
s2_tag_match
,
s2_hit_meta
,
s2_repl_meta
)
val
s2_way_en
=
Mux
(
s2_tag_match
,
s2_tag_match_way
,
s2_repl_way_en
)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
...
...
@@ -94,6 +119,7 @@ class LoadPipe extends DCacheModule
// It's possible that we had permission but state changes on hit:
// eg: write to exclusive but clean block
val
s2_hit
=
s2_tag_match
&&
s2_has_permission
&&
s2_hit_state
===
s2_new_hit_state
// nacked or not
val
s2_nack
=
Wire
(
Bool
())
val
s2_data
=
Wire
(
Vec
(
nWays
,
UInt
(
encRowBits
.
W
)))
val
data_resp
=
io
.
data_resp
...
...
@@ -104,22 +130,6 @@ class LoadPipe extends DCacheModule
val
s2_data_muxed
=
Mux1H
(
s2_tag_match_way
,
s2_data
)
// the index of word in a row, in case rowBits != wordBits
val
s2_word_idx
=
if
(
rowWords
==
1
)
0.
U
else
s2_addr
(
log2Up
(
rowWords
*
wordBytes
)-
1
,
log2Up
(
wordBytes
))
val
s2_nack_hit
=
RegNext
(
s1_nack
)
// Can't allocate MSHR for same set currently being written back
// the same set is busy
val
s2_nack_set_busy
=
s2_valid
&&
false
.
B
// Bank conflict on data arrays
val
s2_nack_data
=
false
.
B
s2_nack
:=
s2_nack_hit
||
s2_nack_set_busy
||
s2_nack_data
// only dump these signals when they are actually valid
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_hit"
,
s2_valid
&&
s2_hit
)
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_nack"
,
s2_valid
&&
s2_nack
)
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_nack_hit"
,
s2_valid
&&
s2_nack_hit
)
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_nack_set_busy"
,
s2_valid
&&
s2_nack_set_busy
)
// load data gen
val
s2_data_words
=
Wire
(
Vec
(
rowWords
,
UInt
(
encWordBits
.
W
)))
for
(
w
<-
0
until
rowWords
)
{
...
...
@@ -128,23 +138,58 @@ class LoadPipe extends DCacheModule
val
s2_data_word
=
s2_data_words
(
s2_word_idx
)
val
s2_decoded
=
cacheParams
.
dataCode
.
decode
(
s2_data_word
)
val
s2_data_word_decoded
=
s2_decoded
.
corrected
// annotate out this assertion
// when TLB misses, s2_hit may still be true
// which may cause unnecessary assertion
// assert(!(s2_valid && s2_hit && !s2_nack && s2_decoded.uncorrectable))
// when req got nacked, upper levels should replay this request
// the same set is busy
val
s2_nack_hit
=
RegNext
(
s1_nack
)
// can no allocate mshr for load miss
val
s2_nack_no_mshr
=
io
.
miss_req
.
valid
&&
!
io
.
miss_req
.
ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val
s2_nack_data
=
false
.
B
s2_nack
:=
s2_nack_hit
||
s2_nack_no_mshr
||
s2_nack_data
// only dump these signals when they are actually valid
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_hit"
,
s2_valid
&&
s2_hit
)
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_nack"
,
s2_valid
&&
s2_nack
)
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_nack_hit"
,
s2_valid
&&
s2_nack_hit
)
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_nack_no_mshr"
,
s2_valid
&&
s2_nack_no_mshr
)
// send load miss to miss queue
io
.
miss_req
.
valid
:=
s2_valid
&&
!
s2_nack_hit
&&
!
s2_nack_data
&&
!
s2_hit
io
.
miss_req
.
bits
.
cmd
:=
s2_req
.
cmd
io
.
miss_req
.
bits
.
addr
:=
get_block_addr
(
s2_addr
)
io
.
miss_req
.
bits
.
tag_match
:=
s2_tag_match
io
.
miss_req
.
bits
.
way_en
:=
s2_way_en
io
.
miss_req
.
bits
.
old_meta
:=
s2_old_meta
io
.
miss_req
.
bits
.
client_id
:=
0.
U
// send back response
val
resp
=
Wire
(
ValidIO
(
new
DCacheWordResp
))
resp
.
valid
:=
s2_valid
resp
.
bits
.
data
:=
s2_data_word_decoded
resp
.
bits
.
meta
:=
s2_req
.
meta
resp
.
bits
.
miss
:=
!
s2_hit
resp
.
bits
.
nack
:=
s2_nack
// on miss or nack, upper level should replay request
// but if we successfully sent the request to miss queue
// upper level does not need to replay request
// they can sit in load queue and wait for refill
resp
.
bits
.
miss
:=
!
s2_hit
||
s2_nack
resp
.
bits
.
replay
:=
resp
.
bits
.
miss
&&
(!
io
.
miss_req
.
fire
()
||
s2_nack
)
io
.
lsu
.
resp
.
valid
:=
resp
.
valid
io
.
lsu
.
resp
.
bits
:=
resp
.
bits
assert
(!(
resp
.
valid
&&
!
io
.
lsu
.
resp
.
ready
))
when
(
resp
.
valid
)
{
XSDebug
(
s
"LoadPipe resp: data: %x id: %d replay
: %b miss: %b nack
: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
nack
)
XSDebug
(
s
"LoadPipe resp: data: %x id: %d replay
ed_req: %b miss: %b need_replay
: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
replay
)
}
// -------
...
...
src/main/scala/xiangshan/cache/loadMissQueue.scala
已删除
100644 → 0
浏览文件 @
6f687286
package
xiangshan.cache
import
chisel3._
import
chisel3.util._
import
utils.XSDebug
import
bus.tilelink._
class
LoadMissEntry
extends
DCacheModule
{
val
io
=
IO
(
new
Bundle
{
val
id
=
Input
(
UInt
())
val
lsu
=
Flipped
(
new
DCacheLineIO
)
val
miss_req
=
DecoupledIO
(
new
MissReq
)
val
miss_resp
=
Flipped
(
ValidIO
(
new
MissResp
))
val
miss_finish
=
DecoupledIO
(
new
MissFinish
)
val
data_req
=
DecoupledIO
(
new
L1DataReadReq
)
val
data_resp
=
Input
(
Vec
(
nWays
,
Vec
(
blockRows
,
Bits
(
encRowBits
.
W
))))
val
idx
=
Output
(
Valid
(
UInt
()))
val
tag
=
Output
(
Valid
(
UInt
()))
})
val
s_invalid
::
s_miss_req
::
s_miss_resp
::
s_miss_finish
::
s_data_read_req
::
s_data_read_resp
::
s_resp
::
Nil
=
Enum
(
7
)
val
state
=
RegInit
(
s_invalid
)
val
req
=
Reg
(
new
DCacheLineReq
)
val
resp
=
Reg
(
new
DCacheLineResp
)
val
req_idx
=
get_idx
(
req
.
addr
)
val
req_tag
=
get_tag
(
req
.
addr
)
val
req_block_addr
=
get_block_addr
(
req
.
addr
)
val
reg_miss_resp
=
Reg
(
new
MissResp
)
// assign default values to output signals
io
.
lsu
.
req
.
ready
:=
state
===
s_invalid
io
.
lsu
.
resp
.
valid
:=
false
.
B
io
.
lsu
.
resp
.
bits
:=
DontCare
io
.
miss_req
.
valid
:=
false
.
B
io
.
miss_req
.
bits
:=
DontCare
io
.
miss_finish
.
valid
:=
false
.
B
io
.
miss_finish
.
bits
:=
DontCare
io
.
data_req
.
valid
:=
false
.
B
io
.
data_req
.
bits
:=
DontCare
io
.
idx
.
valid
:=
state
=/=
s_invalid
io
.
tag
.
valid
:=
state
=/=
s_invalid
io
.
idx
.
bits
:=
req_idx
io
.
tag
.
bits
:=
req_tag
when
(
state
=/=
s_invalid
)
{
XSDebug
(
"entry: %d state: %d\n"
,
io
.
id
,
state
)
}
// --------------------------------------------
// s_invalid: receive requests
when
(
state
===
s_invalid
)
{
when
(
io
.
lsu
.
req
.
fire
())
{
assert
(
io
.
lsu
.
req
.
bits
.
cmd
===
M_XRD
)
assert
(!
io
.
lsu
.
req
.
bits
.
meta
.
replay
)
req
:=
io
.
lsu
.
req
.
bits
resp
.
meta
:=
io
.
lsu
.
req
.
bits
.
meta
resp
.
miss
:=
false
.
B
resp
.
nack
:=
false
.
B
state
:=
s_miss_req
}
}
// --------------------------------------------
when
(
state
===
s_miss_req
)
{
io
.
miss_req
.
valid
:=
true
.
B
io
.
miss_req
.
bits
.
cmd
:=
req
.
cmd
io
.
miss_req
.
bits
.
addr
:=
req_block_addr
io
.
miss_req
.
bits
.
client_id
:=
io
.
id
when
(
io
.
miss_req
.
fire
())
{
state
:=
s_miss_resp
}
}
when
(
state
===
s_miss_resp
)
{
when
(
io
.
miss_resp
.
fire
())
{
reg_miss_resp
:=
io
.
miss_resp
.
bits
resp
.
data
:=
io
.
miss_resp
.
bits
.
data
when
(
io
.
miss_resp
.
bits
.
has_data
)
{
state
:=
s_resp
}
.
otherwise
{
// miss queue says that data is already in dcache
// so we need to read it
state
:=
s_data_read_req
}
}
}
val
dataArrayLatency
=
2
val
data_array_ctr
=
Reg
(
UInt
(
log2Up
(
dataArrayLatency
).
W
))
when
(
state
===
s_data_read_req
)
{
// Data read for new requests
io
.
data_req
.
valid
:=
true
.
B
io
.
data_req
.
bits
.
addr
:=
req_block_addr
io
.
data_req
.
bits
.
way_en
:=
reg_miss_resp
.
way_en
io
.
data_req
.
bits
.
rmask
:=
~
0.
U
(
blockRows
.
W
)
when
(
io
.
data_req
.
fire
())
{
state
:=
s_data_read_resp
data_array_ctr
:=
0.
U
}
}
when
(
state
===
s_data_read_resp
)
{
data_array_ctr
:=
data_array_ctr
+
1.
U
when
(
data_array_ctr
===
(
dataArrayLatency
-
1
).
U
)
{
val
way_idx
=
OHToUInt
(
reg_miss_resp
.
way_en
)
resp
.
data
:=
Cat
((
0
until
blockRows
).
reverse
map
{
i
=>
val
row
=
io
.
data_resp
(
way_idx
)(
i
)
// decode each word in this row
val
row_decoded
=
Cat
((
0
until
rowWords
).
reverse
map
{
w
=>
val
data_word
=
row
(
encWordBits
*
(
w
+
1
)
-
1
,
encWordBits
*
w
)
val
decoded
=
cacheParams
.
dataCode
.
decode
(
data_word
)
val
data_word_decoded
=
decoded
.
corrected
assert
(!
decoded
.
uncorrectable
)
data_word_decoded
})
row_decoded
})
state
:=
s_resp
}
}
// --------------------------------------------
when
(
state
===
s_resp
)
{
io
.
lsu
.
resp
.
valid
:=
true
.
B
io
.
lsu
.
resp
.
bits
:=
resp
when
(
io
.
lsu
.
resp
.
fire
())
{
state
:=
s_miss_finish
}
}
when
(
state
===
s_miss_finish
)
{
io
.
miss_finish
.
valid
:=
true
.
B
io
.
miss_finish
.
bits
.
client_id
:=
io
.
id
io
.
miss_finish
.
bits
.
entry_id
:=
reg_miss_resp
.
entry_id
when
(
io
.
miss_finish
.
fire
())
{
state
:=
s_invalid
}
}
// debug output
when
(
io
.
lsu
.
req
.
fire
())
{
XSDebug
(
s
"LoadMissEntryTransaction req %d\n"
,
io
.
id
)
}
when
(
io
.
lsu
.
resp
.
fire
())
{
XSDebug
(
s
"LoadMissEntryTransaction resp %d\n"
,
io
.
id
)
}
}
class
LoadMissQueue
extends
DCacheModule
{
val
io
=
IO
(
new
Bundle
{
val
lsu
=
Flipped
(
new
DCacheLineIO
)
val
miss_req
=
DecoupledIO
(
new
MissReq
)
val
miss_resp
=
Flipped
(
ValidIO
(
new
MissResp
))
val
miss_finish
=
DecoupledIO
(
new
MissFinish
)
val
data_req
=
DecoupledIO
(
new
L1DataReadReq
)
val
data_resp
=
Input
(
Vec
(
nWays
,
Vec
(
blockRows
,
Bits
(
encRowBits
.
W
))))
})
val
miss_req_arb
=
Module
(
new
Arbiter
(
new
MissReq
,
cfg
.
nLoadMissEntries
))
val
miss_finish_arb
=
Module
(
new
Arbiter
(
new
MissFinish
,
cfg
.
nLoadMissEntries
))
val
data_req_arb
=
Module
(
new
Arbiter
(
new
L1DataReadReq
,
cfg
.
nLoadMissEntries
))
val
resp_arb
=
Module
(
new
Arbiter
(
new
DCacheLineResp
,
cfg
.
nLoadMissEntries
))
val
idx_matches
=
Wire
(
Vec
(
cfg
.
nLoadMissEntries
,
Bool
()))
val
tag_matches
=
Wire
(
Vec
(
cfg
.
nLoadMissEntries
,
Bool
()))
val
tag_match
=
Mux1H
(
idx_matches
,
tag_matches
)
val
idx_match
=
idx_matches
.
reduce
(
_
||
_
)
val
req
=
io
.
lsu
.
req
val
entry_alloc_idx
=
Wire
(
UInt
())
val
pri_rdy
=
WireInit
(
false
.
B
)
val
pri_val
=
req
.
valid
&&
!
idx_match
val
entry_id_MSB
=
reqIdWidth
-
1
val
entry_id_LSB
=
reqIdWidth
-
loadMissQueueEntryIdWidth
val
entries
=
(
0
until
cfg
.
nLoadMissEntries
)
map
{
i
=>
val
entry
=
Module
(
new
LoadMissEntry
)
entry
.
io
.
id
:=
i
.
U
(
loadMissQueueEntryIdWidth
.
W
)
idx_matches
(
i
)
:=
entry
.
io
.
idx
.
valid
&&
entry
.
io
.
idx
.
bits
===
get_idx
(
req
.
bits
.
addr
)
tag_matches
(
i
)
:=
entry
.
io
.
tag
.
valid
&&
entry
.
io
.
tag
.
bits
===
get_tag
(
req
.
bits
.
addr
)
// lsu req and resp
val
entry_lsu
=
entry
.
io
.
lsu
entry_lsu
.
req
.
valid
:=
(
i
.
U
===
entry_alloc_idx
)
&&
pri_val
when
(
i
.
U
===
entry_alloc_idx
)
{
pri_rdy
:=
entry_lsu
.
req
.
ready
}
entry_lsu
.
req
.
bits
:=
req
.
bits
resp_arb
.
io
.
in
(
i
)
<>
entry_lsu
.
resp
miss_req_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
miss_req
data_req_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
data_req
entry
.
io
.
miss_resp
.
valid
:=
(
i
.
U
===
io
.
miss_resp
.
bits
.
client_id
)
&&
io
.
miss_resp
.
valid
entry
.
io
.
miss_resp
.
bits
:=
io
.
miss_resp
.
bits
entry
.
io
.
data_resp
:=
io
.
data_resp
miss_finish_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
miss_finish
entry
}
entry_alloc_idx
:=
PriorityEncoder
(
entries
.
map
(
m
=>
m
.
io
.
lsu
.
req
.
ready
))
// whenever index matches, do not let it in
req
.
ready
:=
pri_rdy
&&
!
idx_match
io
.
lsu
.
resp
<>
resp_arb
.
io
.
out
io
.
miss_req
<>
miss_req_arb
.
io
.
out
io
.
data_req
<>
data_req_arb
.
io
.
out
io
.
miss_finish
<>
miss_finish_arb
.
io
.
out
// debug output
when
(
req
.
fire
())
{
XSDebug
(
s
"req cmd: %x addr: %x data: %x mask: %x id: %d replay: %b\n"
,
req
.
bits
.
cmd
,
req
.
bits
.
addr
,
req
.
bits
.
data
,
req
.
bits
.
mask
,
req
.
bits
.
meta
.
id
,
req
.
bits
.
meta
.
replay
)
}
val
resp
=
io
.
lsu
.
resp
when
(
resp
.
fire
())
{
XSDebug
(
s
"resp: data: %x id: %d replay: %b miss: %b nack: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
nack
)
}
val
miss_req
=
io
.
miss_req
XSDebug
(
miss_req
.
fire
(),
"miss_req cmd: %x addr: %x client_id: %d\n"
,
miss_req
.
bits
.
cmd
,
miss_req
.
bits
.
addr
,
miss_req
.
bits
.
client_id
)
val
miss_resp
=
io
.
miss_resp
XSDebug
(
miss_resp
.
fire
(),
"miss_resp client_id: %d entry_id: %d has_data: %b data: %x\n"
,
miss_resp
.
bits
.
client_id
,
miss_resp
.
bits
.
entry_id
,
miss_resp
.
bits
.
has_data
,
miss_resp
.
bits
.
data
)
val
miss_finish
=
io
.
miss_finish
XSDebug
(
miss_finish
.
fire
(),
"miss_finish client_id: %d entry_id: %d\n"
,
miss_finish
.
bits
.
client_id
,
miss_finish
.
bits
.
entry_id
)
}
src/main/scala/xiangshan/cache/missQueue.scala
浏览文件 @
3726264a
...
...
@@ -10,15 +10,15 @@ class MissReq extends DCacheBundle
val
cmd
=
UInt
(
M_SZ
.
W
)
val
addr
=
UInt
(
PAddrBits
.
W
)
val
client_id
=
UInt
(
missQueueClientIdWidth
.
W
)
val
tag_match
=
Bool
()
val
way_en
=
Bits
(
nWays
.
W
)
val
old_meta
=
new
L1Metadata
}
class
MissResp
extends
DCacheBundle
{
val
client_id
=
UInt
(
missQueueClientIdWidth
.
W
)
val
entry_id
=
UInt
(
missQueueEntryIdWidth
.
W
)
val
way_en
=
Bits
(
nWays
.
W
)
val
has_data
=
Bool
()
val
data
=
UInt
(
blockBits
.
W
)
}
class
MissFinish
extends
DCacheBundle
...
...
@@ -39,52 +39,56 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
val
req
=
Flipped
(
DecoupledIO
(
new
MissReq
))
val
resp
=
DecoupledIO
(
new
MissResp
)
val
finish
=
Flipped
(
DecoupledIO
(
new
MissFinish
))
// refill to load queue to wake up missed requests
val
refill
=
ValidIO
(
new
Refill
)
val
block_idx
=
Output
(
Valid
(
UInt
()))
val
block_addr
=
Output
(
Valid
(
UInt
()))
val
block_probe_idx
=
Output
(
Valid
(
UInt
()))
val
block_probe_addr
=
Output
(
Valid
(
UInt
()))
// bus
val
mem_acquire
=
DecoupledIO
(
new
TLBundleA
(
edge
.
bundle
))
val
mem_grant
=
Flipped
(
DecoupledIO
(
new
TLBundleD
(
edge
.
bundle
)))
val
mem_finish
=
DecoupledIO
(
new
TLBundleE
(
edge
.
bundle
))
val
meta_read
=
DecoupledIO
(
new
L1MetaReadReq
)
val
meta_resp
=
Input
(
Vec
(
nWays
,
new
L1Metadata
))
val
meta_write
=
DecoupledIO
(
new
L1MetaWriteReq
)
val
refill
=
DecoupledIO
(
new
L1DataWriteReq
)
// write back
val
wb_req
=
DecoupledIO
(
new
WritebackReq
(
edge
.
bundle
.
sourceBits
))
val
wb_resp
=
Input
(
Bool
())
// write meta and data
val
meta_write
=
DecoupledIO
(
new
L1MetaWriteReq
)
val
data_write
=
DecoupledIO
(
new
L1DataWriteReq
)
// for synchronization
val
block_idx
=
Output
(
Valid
(
UInt
()))
val
block_addr
=
Output
(
Valid
(
UInt
()))
val
block_probe_idx
=
Output
(
Valid
(
UInt
()))
val
block_probe_addr
=
Output
(
Valid
(
UInt
()))
// watch prober's write back requests
val
probe_wb_req
=
Flipped
(
ValidIO
(
new
WritebackReq
(
edge
.
bundle
.
sourceBits
)))
val
probe_active
=
Flipped
(
ValidIO
(
UInt
()))
})
// MSHR:
// 1. get req
// 2. re
ad meta data and make replacement decisions
// 3.
do writeback/refill
when necessary
// 4.
send response back to client
// 5.
wait for client's finish
// 6.
update meta data
// 2. re
fill when necessary
// 3.
writeback
when necessary
// 4.
update meta data
// 5.
send response back to client
// 6.
wait for client's finish
// 7. done
val
s_invalid
::
s_
meta_read_req
::
s_meta_read_resp
::
s_decide_next_state
::
s_refill_req
::
s_refill_resp
::
s_mem_finish
::
s_wait_probe_exit
::
s_send_resp
::
s_wb_req
::
s_wb_resp
::
s_data_write_req
::
s_meta_write_req
::
s_client_finish
::
Nil
=
Enum
(
14
)
val
s_invalid
::
s_
refill_req
::
s_refill_resp
::
s_mem_finish
::
s_wait_probe_exit
::
s_wb_req
::
s_wb_resp
::
s_data_write_req
::
s_meta_write_req
::
s_send_resp
::
s_client_finish
::
Nil
=
Enum
(
11
)
val
state
=
RegInit
(
s_invalid
)
val
req
=
Reg
(
new
MissReq
)
val
req_reg
=
Reg
(
new
MissReq
)
val
req
=
Mux
(
io
.
req
.
fire
(),
io
.
req
.
bits
,
req_reg
)
val
req_idx
=
get_idx
(
req
.
addr
)
val
req_tag
=
get_tag
(
req
.
addr
)
val
req_block_addr
=
get_block_addr
(
req
.
addr
)
// meta read results
val
req_tag_match
=
Reg
(
Bool
())
val
req_old_meta
=
Reg
(
new
L1Metadata
)
val
req_way_en
=
Reg
(
UInt
(
nWays
.
W
))
val
req_tag_match
=
req
.
tag_match
val
req_old_meta
=
req
.
old_meta
val
req_way_en
=
req
.
way_en
// what permission to release for the old block?
val
(
_
,
shrink_param
,
coh_on_clear
)
=
req_old_meta
.
coh
.
onCacheControl
(
M_FLUSH
)
...
...
@@ -101,24 +105,14 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
val
should_refill_data
=
Reg
(
Bool
())
val
needs_writeback
=
Reg
(
Bool
())
// for read, to shorten latency
// we send back response as soon as possible
// for read, we do not need to replay requests
// just refill data to load queue, and then, we can exit
// no need to walk through send_resp and client_finish state
//
// for store and amo
// we send back response when we have finished everything
// inform clients to replay requests
val
early_response
=
Reg
(
Bool
())
io
.
block_idx
.
valid
:=
state
=/=
s_invalid
io
.
block_addr
.
valid
:=
state
=/=
s_invalid
io
.
block_idx
.
bits
:=
req_idx
io
.
block_addr
.
bits
:=
req_block_addr
// to preserve forward progress, we allow probe when we are dealing with acquire/grant
io
.
block_probe_idx
.
valid
:=
state
=/=
s_invalid
&&
state
=/=
s_refill_req
&&
state
=/=
s_refill_resp
io
.
block_probe_addr
.
valid
:=
state
=/=
s_invalid
&&
state
=/=
s_refill_req
&&
state
=/=
s_refill_resp
io
.
block_probe_idx
.
bits
:=
req_idx
io
.
block_probe_addr
.
bits
:=
req_block_addr
val
no_replay
=
Reg
(
Bool
())
// assign default values to output signals
io
.
req
.
ready
:=
false
.
B
...
...
@@ -126,25 +120,35 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io
.
resp
.
bits
:=
DontCare
io
.
finish
.
ready
:=
false
.
B
io
.
refill
.
valid
:=
false
.
B
io
.
refill
.
bits
:=
DontCare
io
.
mem_acquire
.
valid
:=
false
.
B
io
.
mem_acquire
.
bits
:=
DontCare
io
.
mem_grant
.
ready
:=
false
.
B
io
.
mem_finish
.
valid
:=
false
.
B
io
.
mem_finish
.
bits
:=
DontCare
io
.
meta_read
.
valid
:=
false
.
B
io
.
meta_read
.
bits
:=
DontCare
io
.
wb_req
.
valid
:=
false
.
B
io
.
wb_req
.
bits
:=
DontCare
io
.
meta_write
.
valid
:=
false
.
B
io
.
meta_write
.
bits
:=
DontCare
io
.
refill
.
valid
:=
false
.
B
io
.
refill
.
bits
:=
DontCare
io
.
data_write
.
valid
:=
false
.
B
io
.
data_write
.
bits
:=
DontCare
io
.
wb_req
.
valid
:=
false
.
B
io
.
wb_req
.
bits
:=
DontCare
io
.
block_idx
.
valid
:=
state
=/=
s_invalid
io
.
block_addr
.
valid
:=
state
=/=
s_invalid
// break combinational loop
io
.
block_idx
.
bits
:=
get_idx
(
req_reg
.
addr
)
io
.
block_addr
.
bits
:=
get_block_addr
(
req_reg
.
addr
)
// to preserve forward progress, we allow probe when we are dealing with acquire/grant
io
.
block_probe_idx
.
valid
:=
state
=/=
s_invalid
&&
state
=/=
s_refill_req
&&
state
=/=
s_refill_resp
io
.
block_probe_addr
.
valid
:=
state
=/=
s_invalid
&&
state
=/=
s_refill_req
&&
state
=/=
s_refill_resp
io
.
block_probe_idx
.
bits
:=
get_idx
(
req_reg
.
addr
)
io
.
block_probe_addr
.
bits
:=
get_block_addr
(
req_reg
.
addr
)
when
(
state
=/=
s_invalid
)
{
XSDebug
(
"entry: %d state: %d\n"
,
io
.
id
,
state
)
...
...
@@ -154,89 +158,25 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io
.
id
,
io
.
block_probe_idx
.
valid
,
io
.
block_probe_idx
.
bits
,
io
.
block_probe_addr
.
valid
,
io
.
block_probe_addr
.
bits
)
}
// --------------------------------------------
// s_invalid: receive requests
when
(
state
===
s_invalid
)
{
io
.
req
.
ready
:=
true
.
B
when
(
io
.
req
.
fire
())
{
grantack
.
valid
:=
false
.
B
refill_ctr
:=
0.
U
should_refill_data
:=
false
.
B
needs_writeback
:=
false
.
B
early_response
:=
false
.
B
req
:=
io
.
req
.
bits
state
:=
s_meta_read_req
}
}
// --------------------------------------------
// s_meta_read_req: read meta data
when
(
state
===
s_meta_read_req
)
{
io
.
meta_read
.
valid
:=
true
.
B
val
meta_read
=
io
.
meta_read
.
bits
meta_read
.
idx
:=
req_idx
meta_read
.
way_en
:=
~
0.
U
(
nWays
.
W
)
meta_read
.
tag
:=
DontCare
when
(
io
.
meta_read
.
fire
())
{
state
:=
s_meta_read_resp
}
}
// s_meta_read_resp: handle meta read response
// check hit, miss
when
(
state
===
s_meta_read_resp
)
{
// tag check
def
wayMap
[
T
<:
Data
](
f
:
Int
=>
T
)
=
VecInit
((
0
until
nWays
).
map
(
f
))
val
tag_eq_way
=
wayMap
((
w
:
Int
)
=>
io
.
meta_resp
(
w
).
tag
===
(
req_tag
)).
asUInt
val
tag_match_way
=
wayMap
((
w
:
Int
)
=>
tag_eq_way
(
w
)
&&
io
.
meta_resp
(
w
).
coh
.
isValid
()).
asUInt
val
tag_match
=
tag_match_way
.
orR
val
hit_meta
=
Mux1H
(
tag_match_way
,
wayMap
((
w
:
Int
)
=>
io
.
meta_resp
(
w
)))
val
hit_state
=
hit_meta
.
coh
val
has_permission
=
hit_state
.
onAccess
(
req
.
cmd
).
_1
val
new_hit_state
=
hit_state
.
onAccess
(
req
.
cmd
).
_3
val
hit
=
tag_match
&&
has_permission
&&
hit_state
===
new_hit_state
// replacement policy
val
replacer
=
cacheParams
.
replacement
val
replaced_way_en
=
UIntToOH
(
replacer
.
way
)
val
repl_meta
=
Mux1H
(
replaced_way_en
,
wayMap
((
w
:
Int
)
=>
io
.
meta_resp
(
w
)))
req_tag_match
:=
tag_match
req_old_meta
:=
Mux
(
tag_match
,
hit_meta
,
repl_meta
)
req_way_en
:=
Mux
(
tag_match
,
tag_match_way
,
replaced_way_en
)
replacer
.
miss
state
:=
s_decide_next_state
}
// decision making
def
decide_next_state
()
:
UInt
=
{
val
new_state
=
WireInit
(
s_invalid
)
val
old_coh
=
req_old_meta
.
coh
val
needs_wb
=
old_coh
.
onCacheControl
(
M_FLUSH
).
_1
// does the line we are evicting need to be written back
early_response
:=
req
.
cmd
===
M_XRD
no_replay
:=
req
.
cmd
===
M_XRD
when
(
req_tag_match
)
{
val
(
is_hit
,
_
,
coh_on_hit
)
=
old_coh
.
onAccess
(
req
.
cmd
)
when
(
is_hit
)
{
// set dirty bit
// we do not need to assert write any more
// read may go here as well
// eg: when several load miss on the same block
when
(
req
.
cmd
===
M_XRD
)
{
// normal read
// read hit, no need to update meta
new_coh
:=
old_coh
new_state
:=
s_send_resp
}
.
otherwise
{
assert
(
isWrite
(
req
.
cmd
))
new_coh
:=
coh_on_hit
new_state
:=
s_meta_write_req
}
// read should never go here
// we get here only when we need to set dirty bit
assert
(
isWrite
(
req
.
cmd
))
// go update meta
new_coh
:=
coh_on_hit
new_state
:=
s_meta_write_req
}
.
otherwise
{
// upgrade permissions
new_coh
:=
old_coh
new_state
:=
s_refill_req
...
...
@@ -251,30 +191,17 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
new_state
}
// this state is unnecessary, we can make decisions in s_meta_read_resp
when
(
state
===
s_decide_next_state
)
{
state
:=
decide_next_state
()
}
// --------------------------------------------
// write back
when
(
state
===
s_wb_req
)
{
io
.
wb_req
.
valid
:=
true
.
B
io
.
wb_req
.
bits
.
tag
:=
req_old_meta
.
tag
io
.
wb_req
.
bits
.
idx
:=
req_idx
io
.
wb_req
.
bits
.
param
:=
shrink_param
io
.
wb_req
.
bits
.
way_en
:=
req_way_en
io
.
wb_req
.
bits
.
source
:=
io
.
id
io
.
wb_req
.
bits
.
voluntary
:=
true
.
B
when
(
io
.
wb_req
.
fire
())
{
state
:=
s_wb_resp
}
}
when
(
state
===
s_invalid
)
{
io
.
req
.
ready
:=
true
.
B
when
(
state
===
s_wb_resp
)
{
when
(
io
.
wb_resp
)
{
state
:=
s_data_write_req
when
(
io
.
req
.
fire
())
{
grantack
.
valid
:=
false
.
B
refill_ctr
:=
0.
U
should_refill_data
:=
false
.
B
needs_writeback
:=
false
.
B
no_replay
:=
false
.
B
req_reg
:=
io
.
req
.
bits
state
:=
decide_next_state
()
}
}
...
...
@@ -294,8 +221,9 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
}
// ecc-encoded data
val
refill_data
=
Reg
(
Vec
(
blockRows
,
UInt
(
encRowBits
.
W
)))
//
not encoded
data
//
raw
data
val
refill_data_raw
=
Reg
(
Vec
(
blockRows
,
UInt
(
rowBits
.
W
)))
when
(
state
===
s_refill_resp
)
{
io
.
mem_grant
.
ready
:=
true
.
B
...
...
@@ -334,6 +262,12 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
}
// refill data to load queue
io
.
refill
.
valid
:=
RegNext
(
state
===
s_refill_resp
&&
refill_done
&&
should_refill_data
&&
no_replay
)
io
.
refill
.
bits
.
addr
:=
req_block_addr
io
.
refill
.
bits
.
data
:=
refill_data_raw
.
asUInt
when
(
state
===
s_mem_finish
)
{
io
.
mem_finish
.
valid
:=
grantack
.
valid
io
.
mem_finish
.
bits
:=
grantack
.
bits
...
...
@@ -344,31 +278,23 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
}
// --------------------------------------------
// sync with probe
when
(
state
===
s_wait_probe_exit
)
{
// we only wait for probe, when prober is manipulating our set
val
should_wait_for_probe_exit
=
io
.
probe_active
.
valid
&&
io
.
probe_active
.
bits
===
req_idx
when
(!
should_wait_for_probe_exit
)
{
// no data
when
(
early_response
)
{
// load miss respond right after finishing tilelink transactions
assert
(
should_refill_data
)
state
:=
s_send_resp
when
(
needs_writeback
)
{
// write back data
state
:=
s_wb_req
}
.
otherwise
{
// if we do not do early respond
// we must be a write
when
(
needs_writeback
)
{
// write back data
assert
(
should_refill_data
)
state
:=
s_wb_req
}
.
otherwise
{
// no need to write back
when
(
should_refill_data
)
{
// fill data into dcache
state
:=
s_data_write_req
}
otherwise
{
// just got permission, no need to fill data into dcache
state
:=
s_meta_write_req
}
// no need to write back
when
(
should_refill_data
)
{
// fill data into dcache
state
:=
s_data_write_req
}
otherwise
{
// permission update only
state
:=
s_meta_write_req
}
}
}
...
...
@@ -397,20 +323,42 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
}
when
(
prober_writeback_our_block
)
{
req_old_meta
.
coh
:=
onShrink
(
io
.
probe_wb_req
.
bits
.
param
)
req_reg
.
old_meta
.
coh
:=
onShrink
(
io
.
probe_wb_req
.
bits
.
param
)
}
// --------------------------------------------
// write back
when
(
state
===
s_wb_req
)
{
io
.
wb_req
.
valid
:=
true
.
B
io
.
wb_req
.
bits
.
tag
:=
req_old_meta
.
tag
io
.
wb_req
.
bits
.
idx
:=
req_idx
io
.
wb_req
.
bits
.
param
:=
shrink_param
io
.
wb_req
.
bits
.
way_en
:=
req_way_en
io
.
wb_req
.
bits
.
source
:=
io
.
id
io
.
wb_req
.
bits
.
voluntary
:=
true
.
B
when
(
io
.
wb_req
.
fire
())
{
state
:=
s_wb_resp
}
}
when
(
state
===
s_wb_resp
)
{
when
(
io
.
wb_resp
)
{
state
:=
s_data_write_req
}
}
// --------------------------------------------
// data write
when
(
state
===
s_data_write_req
)
{
io
.
refill
.
valid
:=
true
.
B
io
.
refill
.
bits
.
addr
:=
req_block_addr
io
.
refill
.
bits
.
way_en
:=
req_way_en
io
.
refill
.
bits
.
wmask
:=
VecInit
((
0
until
blockRows
)
map
(
i
=>
~
0.
U
(
rowWords
.
W
)))
io
.
refill
.
bits
.
rmask
:=
DontCare
io
.
refill
.
bits
.
data
:=
refill_data
when
(
io
.
refill
.
fire
())
{
io
.
data_write
.
valid
:=
true
.
B
io
.
data_write
.
bits
.
addr
:=
req_block_addr
io
.
data_write
.
bits
.
way_en
:=
req_way_en
io
.
data_write
.
bits
.
wmask
:=
VecInit
((
0
until
blockRows
)
map
(
i
=>
~
0.
U
(
rowWords
.
W
)))
io
.
data_write
.
bits
.
rmask
:=
DontCare
io
.
data_write
.
bits
.
data
:=
refill_data
when
(
io
.
data_write
.
fire
())
{
state
:=
s_meta_write_req
}
}
...
...
@@ -425,8 +373,9 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io
.
meta_write
.
bits
.
way_en
:=
req_way_en
when
(
io
.
meta_write
.
fire
())
{
when
(
early_response
)
{
state
:=
s_client_finish
when
(
no_replay
)
{
// no need to replay, exit now
state
:=
s_invalid
}
.
otherwise
{
state
:=
s_send_resp
}
...
...
@@ -438,9 +387,6 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
io
.
resp
.
valid
:=
true
.
B
io
.
resp
.
bits
.
client_id
:=
req
.
client_id
io
.
resp
.
bits
.
entry_id
:=
io
.
id
io
.
resp
.
bits
.
way_en
:=
req_way_en
io
.
resp
.
bits
.
has_data
:=
should_refill_data
io
.
resp
.
bits
.
data
:=
refill_data_raw
.
asUInt
when
(
io
.
resp
.
fire
())
{
// additional assertion
...
...
@@ -448,18 +394,7 @@ class MissEntry(edge: TLEdgeOut) extends DCacheModule
assert
(
is_hit
,
"We still don't have permissions for this block"
)
assert
(
new_coh
===
coh_on_hit
,
"Incorrect coherence meta data"
)
// read miss
when
(
early_response
&&
should_refill_data
)
{
when
(
needs_writeback
)
{
// write back data later
state
:=
s_wb_req
}
.
otherwise
{
// for read, we will write data later
state
:=
s_data_write_req
}
}
.
otherwise
{
state
:=
s_client_finish
}
state
:=
s_client_finish
}
}
...
...
@@ -478,19 +413,18 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
val
req
=
Flipped
(
DecoupledIO
(
new
MissReq
))
val
resp
=
ValidIO
(
new
MissResp
)
val
finish
=
Flipped
(
DecoupledIO
(
new
MissFinish
))
val
refill
=
ValidIO
(
new
Refill
)
val
mem_acquire
=
Decoupled
(
new
TLBundleA
(
edge
.
bundle
))
val
mem_grant
=
Flipped
(
Decoupled
(
new
TLBundleD
(
edge
.
bundle
)))
val
mem_finish
=
Decoupled
(
new
TLBundleE
(
edge
.
bundle
))
val
meta_read
=
Decoupled
(
new
L1MetaReadReq
)
val
meta_resp
=
Input
(
Vec
(
nWays
,
new
L1Metadata
))
val
meta_write
=
Decoupled
(
new
L1MetaWriteReq
)
val
refill
=
Decoupled
(
new
L1DataWriteReq
)
val
wb_req
=
Decoupled
(
new
WritebackReq
(
edge
.
bundle
.
sourceBits
))
val
wb_resp
=
Input
(
Bool
())
val
meta_write
=
Decoupled
(
new
L1MetaWriteReq
)
val
data_write
=
Decoupled
(
new
L1DataWriteReq
)
val
probe_wb_req
=
Flipped
(
ValidIO
(
new
WritebackReq
(
edge
.
bundle
.
sourceBits
)))
val
probe_active
=
Flipped
(
ValidIO
(
UInt
()))
...
...
@@ -502,9 +436,9 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
})
val
resp_arb
=
Module
(
new
Arbiter
(
new
MissResp
,
cfg
.
nMissEntries
))
val
meta_read_arb
=
Module
(
new
Arbiter
(
new
L1MetaReadReq
,
cfg
.
nMissEntries
))
val
refill_arb
=
Module
(
new
Arbiter
(
new
Refill
,
cfg
.
nMissEntries
))
val
meta_write_arb
=
Module
(
new
Arbiter
(
new
L1MetaWriteReq
,
cfg
.
nMissEntries
))
val
refill_arb
=
Module
(
new
Arbiter
(
new
L1DataWriteReq
,
cfg
.
nMissEntries
))
val
data_write_arb
=
Module
(
new
Arbiter
(
new
L1DataWriteReq
,
cfg
.
nMissEntries
))
val
wb_req_arb
=
Module
(
new
Arbiter
(
new
WritebackReq
(
edge
.
bundle
.
sourceBits
),
cfg
.
nMissEntries
))
// assign default values to output signals
...
...
@@ -528,6 +462,8 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
// entry resp
resp_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
resp
refill_arb
.
io
.
in
(
i
).
valid
:=
entry
.
io
.
refill
.
valid
refill_arb
.
io
.
in
(
i
).
bits
:=
entry
.
io
.
refill
.
bits
// entry finish
entry
.
io
.
finish
.
valid
:=
(
i
.
U
===
io
.
finish
.
bits
.
entry_id
)
&&
io
.
finish
.
valid
...
...
@@ -536,11 +472,8 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
io
.
finish
.
ready
:=
entry
.
io
.
finish
.
ready
}
meta_read_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
meta_read
entry
.
io
.
meta_resp
:=
io
.
meta_resp
meta_write_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
meta_write
refill_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
refill
data_write_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
data_write
wb_req_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
wb_req
entry
.
io
.
wb_resp
:=
io
.
wb_resp
...
...
@@ -568,9 +501,16 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
io
.
resp
.
bits
:=
resp_arb
.
io
.
out
.
bits
resp_arb
.
io
.
out
.
ready
:=
true
.
B
io
.
meta_read
<>
meta_read_arb
.
io
.
out
io
.
refill
.
valid
:=
refill_arb
.
io
.
out
.
valid
io
.
refill
.
bits
:=
refill_arb
.
io
.
out
.
bits
refill_arb
.
io
.
out
.
ready
:=
true
.
B
// one refill at a time
val
refill_vec
=
refill_arb
.
io
.
in
.
map
(
c
=>
c
.
valid
)
assert
(
PopCount
(
refill_vec
)
===
0.
U
||
PopCount
(
refill_vec
)
===
1.
U
)
io
.
meta_write
<>
meta_write_arb
.
io
.
out
io
.
refill
<>
refill
_arb
.
io
.
out
io
.
data_write
<>
data_write
_arb
.
io
.
out
io
.
wb_req
<>
wb_req_arb
.
io
.
out
TLArbiter
.
lowestFromSeq
(
edge
,
io
.
mem_acquire
,
entries
.
map
(
_
.
io
.
mem_acquire
))
...
...
@@ -592,8 +532,8 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
XSDebug
(
finish
.
fire
(),
"finish client_id: %d entry_id: %d\n"
,
finish
.
bits
.
client_id
,
finish
.
bits
.
entry_id
)
// print
refill
XSDebug
(
io
.
refill
.
fire
(),
"refill addr %x\n"
,
io
.
refill
.
bits
.
addr
)
// print
data_write
XSDebug
(
io
.
data_write
.
fire
(),
"refill addr %x\n"
,
io
.
data_write
.
bits
.
addr
)
// print meta_write
XSDebug
(
io
.
meta_write
.
fire
(),
"meta_write idx %x way_en: %x old_tag: %x new_coh: %d new_tag: %x\n"
,
...
...
src/main/scala/xiangshan/cache/storeMissQueue.scala
浏览文件 @
3726264a
...
...
@@ -14,7 +14,6 @@ class StoreMissEntry extends DCacheModule
val
lsu
=
Flipped
(
new
DCacheLineIO
)
val
replay
=
new
DCacheLineIO
val
miss_req
=
DecoupledIO
(
new
MissReq
)
val
miss_resp
=
Flipped
(
ValidIO
(
new
MissResp
))
val
miss_finish
=
DecoupledIO
(
new
MissFinish
)
...
...
@@ -22,7 +21,7 @@ class StoreMissEntry extends DCacheModule
val
tag
=
Output
(
Valid
(
UInt
()))
})
val
s_invalid
::
s_replay_req
::
s_replay_resp
::
s_resp
::
s_miss_re
q
::
s_miss_resp
::
s_miss_finish
::
Nil
=
Enum
(
7
)
val
s_invalid
::
s_replay_req
::
s_replay_resp
::
s_resp
::
s_miss_re
sp
::
s_miss_finish
::
Nil
=
Enum
(
6
)
val
state
=
RegInit
(
s_invalid
)
val
req
=
Reg
(
new
DCacheLineReq
)
...
...
@@ -42,10 +41,8 @@ class StoreMissEntry extends DCacheModule
io
.
replay
.
req
.
bits
:=
DontCare
io
.
replay
.
resp
.
ready
:=
false
.
B
io
.
miss_req
.
valid
:=
false
.
B
io
.
miss_req
.
bits
:=
DontCare
io
.
miss_finish
.
valid
:=
false
.
B
io
.
miss_finish
.
bits
:=
DontCare
io
.
miss_finish
.
valid
:=
false
.
B
io
.
miss_finish
.
bits
:=
DontCare
io
.
idx
.
valid
:=
state
=/=
s_invalid
io
.
tag
.
valid
:=
state
=/=
s_invalid
...
...
@@ -73,6 +70,9 @@ class StoreMissEntry extends DCacheModule
when
(
state
===
s_replay_req
)
{
io
.
replay
.
req
.
valid
:=
true
.
B
io
.
replay
.
req
.
bits
:=
req
// use our own storeMissEntryId
// miss resp are routed by this id
io
.
replay
.
req
.
bits
.
meta
.
id
:=
io
.
id
when
(
io
.
replay
.
req
.
fire
())
{
state
:=
s_replay_resp
}
...
...
@@ -81,34 +81,30 @@ class StoreMissEntry extends DCacheModule
when
(
state
===
s_replay_resp
)
{
io
.
replay
.
resp
.
ready
:=
true
.
B
when
(
io
.
replay
.
resp
.
fire
())
{
// req missed
when
(
io
.
replay
.
resp
.
bits
.
miss
)
{
// replayed reqs should not miss
assert
(!
req
.
meta
.
replay
)
when
(!
req
.
meta
.
replay
)
{
state
:=
s_miss_req
// the req missed and did not enter mshr
// so replay it until it hits or enters mshr
when
(
io
.
replay
.
resp
.
bits
.
replay
)
{
state
:=
s_replay_req
}
.
otherwise
{
// the req missed and enters mshr
// wait for miss response
state
:=
s_miss_resp
}
}
.
otherwise
{
// req hits, everything OK
resp
:=
io
.
replay
.
resp
.
bits
when
(!
req
.
meta
.
replay
)
{
state
:=
s_resp
}
.
otherwise
{
// if it's a replayed request
// we need to tell mshr, we are done
state
:=
s_miss_finish
}
}
assert
(!
io
.
replay
.
resp
.
bits
.
nack
)
}
}
// --------------------------------------------
when
(
state
===
s_miss_req
)
{
io
.
miss_req
.
valid
:=
true
.
B
io
.
miss_req
.
bits
.
cmd
:=
req
.
cmd
io
.
miss_req
.
bits
.
addr
:=
req_block_addr
io
.
miss_req
.
bits
.
client_id
:=
io
.
id
when
(
io
.
miss_req
.
fire
())
{
state
:=
s_miss_resp
}
}
...
...
@@ -134,6 +130,8 @@ class StoreMissEntry extends DCacheModule
when
(
state
===
s_resp
)
{
io
.
lsu
.
resp
.
valid
:=
true
.
B
io
.
lsu
.
resp
.
bits
:=
resp
// response to sbuffer should carry the original request id
io
.
lsu
.
resp
.
bits
.
meta
.
id
:=
req
.
meta
.
id
when
(
io
.
lsu
.
resp
.
fire
())
{
state
:=
s_invalid
...
...
@@ -157,12 +155,10 @@ class StoreMissQueue extends DCacheModule
val
lsu
=
Flipped
(
new
DCacheLineIO
)
val
replay
=
new
DCacheLineIO
val
miss_req
=
DecoupledIO
(
new
MissReq
)
val
miss_resp
=
Flipped
(
ValidIO
(
new
MissResp
))
val
miss_finish
=
DecoupledIO
(
new
MissFinish
)
})
val
miss_req_arb
=
Module
(
new
Arbiter
(
new
MissReq
,
cfg
.
nStoreMissEntries
))
val
miss_finish_arb
=
Module
(
new
Arbiter
(
new
MissFinish
,
cfg
.
nStoreMissEntries
))
val
replay_arb
=
Module
(
new
Arbiter
(
new
DCacheLineReq
,
cfg
.
nStoreMissEntries
))
val
resp_arb
=
Module
(
new
Arbiter
(
new
DCacheLineResp
,
cfg
.
nStoreMissEntries
))
...
...
@@ -222,7 +218,6 @@ class StoreMissQueue extends DCacheModule
io
.
replay
.
resp
.
ready
:=
entry_replay
.
resp
.
ready
}
miss_req_arb
.
io
.
in
(
i
)
<>
entry
.
io
.
miss_req
entry
.
io
.
miss_resp
.
valid
:=
(
i
.
U
===
io
.
miss_resp
.
bits
.
client_id
)
&&
io
.
miss_resp
.
valid
entry
.
io
.
miss_resp
.
bits
:=
io
.
miss_resp
.
bits
...
...
@@ -236,7 +231,6 @@ class StoreMissQueue extends DCacheModule
req
.
ready
:=
pri_rdy
&&
!
idx_match
io
.
lsu
.
resp
<>
resp_arb
.
io
.
out
io
.
replay
.
req
<>
replay_arb
.
io
.
out
io
.
miss_req
<>
miss_req_arb
.
io
.
out
io
.
miss_finish
<>
miss_finish_arb
.
io
.
out
// debug output
...
...
@@ -253,14 +247,10 @@ class StoreMissQueue extends DCacheModule
val
resp
=
io
.
lsu
.
resp
when
(
resp
.
fire
())
{
XSDebug
(
s
"resp: data: %x id: %d replay: %b miss: %b
nack
: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
nack
)
XSDebug
(
s
"resp: data: %x id: %d replay: %b miss: %b
replay
: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
replay
)
}
val
miss_req
=
io
.
miss_req
XSDebug
(
miss_req
.
fire
(),
"miss_req cmd: %x addr: %x client_id: %d\n"
,
miss_req
.
bits
.
cmd
,
miss_req
.
bits
.
addr
,
miss_req
.
bits
.
client_id
)
val
miss_resp
=
io
.
miss_resp
XSDebug
(
miss_resp
.
fire
(),
"miss_resp client_id: %d entry_id: %d\n"
,
miss_resp
.
bits
.
client_id
,
miss_resp
.
bits
.
entry_id
)
...
...
src/main/scala/xiangshan/cache/stu.scala
浏览文件 @
3726264a
...
...
@@ -16,6 +16,9 @@ class StorePipe extends DCacheModule
val
meta_resp
=
Input
(
Vec
(
nWays
,
new
L1Metadata
))
val
inflight_req_idxes
=
Output
(
Vec
(
3
,
Valid
(
UInt
())))
val
inflight_req_block_addrs
=
Output
(
Vec
(
3
,
Valid
(
UInt
())))
// send miss request to miss queue
val
miss_req
=
DecoupledIO
(
new
MissReq
)
})
...
...
@@ -58,6 +61,17 @@ class StorePipe extends DCacheModule
def
wayMap
[
T
<:
Data
](
f
:
Int
=>
T
)
=
VecInit
((
0
until
nWays
).
map
(
f
))
val
s1_tag_eq_way
=
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
).
tag
===
(
get_tag
(
s1_addr
))).
asUInt
val
s1_tag_match_way
=
wayMap
((
w
:
Int
)
=>
s1_tag_eq_way
(
w
)
&&
meta_resp
(
w
).
coh
.
isValid
()).
asUInt
val
s1_tag_match
=
s1_tag_match_way
.
orR
val
s1_hit_meta
=
Mux1H
(
s1_tag_match_way
,
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
)))
val
s1_hit_state
=
s1_hit_meta
.
coh
// replacement policy
val
replacer
=
cacheParams
.
replacement
val
s1_repl_way_en
=
UIntToOH
(
replacer
.
way
)
val
s1_repl_meta
=
Mux1H
(
s1_repl_way_en
,
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
)))
when
(
io
.
miss_req
.
fire
())
{
replacer
.
miss
}
// stage 2
...
...
@@ -67,12 +81,19 @@ class StorePipe extends DCacheModule
dump_pipeline_reqs
(
"StorePipe s2"
,
s2_valid
,
s2_req
)
val
s2_tag_match_way
=
RegNext
(
s1_tag_match_way
)
val
s2_tag_match
=
s2_tag_match_way
.
orR
val
s2_hit_way
=
OHToUInt
(
s2_tag_match_way
,
nWays
)
val
s2_hit_state
=
Mux1H
(
s2_tag_match_way
,
wayMap
((
w
:
Int
)
=>
RegNext
(
meta_resp
(
w
).
coh
)))
val
s2_tag_match
=
RegNext
(
s1_tag_match
)
val
s2_hit_meta
=
RegNext
(
s1_hit_meta
)
val
s2_hit_state
=
RegNext
(
s1_hit_state
)
val
s2_has_permission
=
s2_hit_state
.
onAccess
(
s2_req
.
cmd
).
_1
val
s2_new_hit_state
=
s2_hit_state
.
onAccess
(
s2_req
.
cmd
).
_3
val
s2_repl_meta
=
RegNext
(
s1_repl_meta
)
val
s2_repl_way_en
=
RegNext
(
s1_repl_way_en
)
val
s2_old_meta
=
Mux
(
s2_tag_match
,
s2_hit_meta
,
s2_repl_meta
)
val
s2_way_en
=
Mux
(
s2_tag_match
,
s2_tag_match_way
,
s2_repl_way_en
)
// we not only need permissions
// we also require that state does not change on hit
// thus we require new_hit_state === old_hit_state
...
...
@@ -85,16 +106,24 @@ class StorePipe extends DCacheModule
val
s2_hit
=
s2_tag_match
&&
s2_has_permission
&&
s2_hit_state
===
s2_new_hit_state
val
s2_nack
=
Wire
(
Bool
())
// when req got nacked, upper levels should replay this request
// the same set is busy
val
s2_nack_hit
=
RegNext
(
s1_nack
)
val
s2_nack_set_busy
=
s2_valid
&&
false
.
B
// can no allocate mshr for store miss
val
s2_nack_no_mshr
=
io
.
miss_req
.
valid
&&
!
io
.
miss_req
.
ready
// Bank conflict on data arrays
// For now, we use DuplicatedDataArray, so no bank conflicts
val
s2_nack_data
=
false
.
B
s2_nack
:=
s2_nack_hit
||
s2_nack_set_busy
s2_nack
:=
s2_nack_hit
||
s2_nack_no_mshr
||
s2_nack_data
val
s2_info
=
p
"tag match: $s2_tag_match hasPerm: $s2_has_permission"
+
p
" hit state: $s2_hit_state new state: $s2_new_hit_state s2_nack: $s2_nack\n"
// deal with data
val
data_resp
=
io
.
data_resp
val
s2_data
=
data_resp
(
s2_hit_way
)
val
s2_data
=
Mux1H
(
s2_tag_match_way
,
data_resp
)
val
s2_data_decoded
=
(
0
until
blockRows
)
map
{
r
=>
(
0
until
rowWords
)
map
{
w
=>
val
data
=
s2_data
(
r
)(
encWordBits
*
(
w
+
1
)
-
1
,
encWordBits
*
w
)
...
...
@@ -139,22 +168,33 @@ class StorePipe extends DCacheModule
dump_pipeline_valids
(
"StorePipe s2"
,
"s2_hit"
,
s2_valid
&&
s2_hit
)
dump_pipeline_valids
(
"StorePipe s2"
,
"s2_nack"
,
s2_valid
&&
s2_nack
)
dump_pipeline_valids
(
"StorePipe s2"
,
"s2_nack_hit"
,
s2_valid
&&
s2_nack_hit
)
dump_pipeline_valids
(
"StorePipe s2"
,
"s2_nack_set_busy"
,
s2_valid
&&
s2_nack_set_busy
)
dump_pipeline_valids
(
"StorePipe s2"
,
"s2_nack_no_mshr"
,
s2_valid
&&
s2_nack_no_mshr
)
dump_pipeline_valids
(
"StorePipe s2"
,
"s2_nack_data"
,
s2_valid
&&
s2_nack_data
)
// send load miss to miss queue
io
.
miss_req
.
valid
:=
s2_valid
&&
!
s2_nack_hit
&&
!
s2_nack_data
&&
!
s2_hit
io
.
miss_req
.
bits
.
cmd
:=
s2_req
.
cmd
io
.
miss_req
.
bits
.
addr
:=
get_block_addr
(
s2_req
.
addr
)
io
.
miss_req
.
bits
.
tag_match
:=
s2_tag_match
io
.
miss_req
.
bits
.
way_en
:=
s2_way_en
io
.
miss_req
.
bits
.
old_meta
:=
s2_old_meta
io
.
miss_req
.
bits
.
client_id
:=
s2_req
.
meta
.
id
val
resp
=
Wire
(
Valid
(
new
DCacheLineResp
))
resp
.
valid
:=
s2_valid
resp
.
bits
.
data
:=
DontCare
resp
.
bits
.
meta
:=
s2_req
.
meta
resp
.
bits
.
miss
:=
!
s2_hit
resp
.
bits
.
nack
:=
s2_nack
resp
.
bits
.
miss
:=
!
s2_hit
||
s2_nack
resp
.
bits
.
replay
:=
resp
.
bits
.
miss
&&
(!
io
.
miss_req
.
fire
()
||
s2_nack
)
io
.
lsu
.
resp
.
valid
:=
resp
.
valid
io
.
lsu
.
resp
.
bits
:=
resp
.
bits
assert
(!(
resp
.
valid
&&
!
io
.
lsu
.
resp
.
ready
))
when
(
resp
.
valid
)
{
XSDebug
(
s
"StorePipe resp: data: %x id: %d replay
: %b miss: %b nack
: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
nack
)
XSDebug
(
s
"StorePipe resp: data: %x id: %d replay
ed_req: %b miss: %b need_replay
: %b\n"
,
resp
.
bits
.
data
,
resp
.
bits
.
meta
.
id
,
resp
.
bits
.
meta
.
replay
,
resp
.
bits
.
miss
,
resp
.
bits
.
replay
)
}
io
.
inflight_req_idxes
(
0
).
valid
:=
io
.
lsu
.
req
.
valid
...
...
src/main/scala/xiangshan/cache/uncache.scala
浏览文件 @
3726264a
...
...
@@ -110,7 +110,7 @@ class MMIOEntry(edge: TLEdgeOut) extends DCacheModule
// meta data should go with the response
io
.
resp
.
bits
.
meta
:=
req
.
meta
io
.
resp
.
bits
.
miss
:=
false
.
B
io
.
resp
.
bits
.
nack
:=
false
.
B
io
.
resp
.
bits
.
replay
:=
false
.
B
when
(
io
.
resp
.
fire
())
{
state
:=
s_invalid
...
...
src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala
浏览文件 @
3726264a
...
...
@@ -47,7 +47,7 @@ class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParam
}
val
refill
=
new
Bundle
()
{
val
wen
=
Input
(
Vec
(
size
,
Bool
()))
val
d
cache
=
Input
(
new
DCacheLineResp
)
val
d
ata
=
Input
(
UInt
((
cfg
.
blockBytes
*
8
).
W
)
)
}
val
needForward
=
Input
(
Vec
(
nchannel
,
Vec
(
2
,
UInt
(
size
.
W
))))
val
forward
=
Vec
(
nchannel
,
Flipped
(
new
LoadForwardQueryIO
))
...
...
@@ -106,9 +106,7 @@ class LSQueueData(size: Int, nchannel: Int) extends XSModule with HasDCacheParam
}
// split dcache result into words
val
words
=
VecInit
((
0
until
blockWords
)
map
{
i
=>
io
.
refill
.
dcache
.
data
(
DataBits
*
(
i
+
1
)
-
1
,
DataBits
*
i
)
})
val
words
=
VecInit
((
0
until
blockWords
)
map
{
i
=>
io
.
refill
.
data
(
DataBits
*
(
i
+
1
)
-
1
,
DataBits
*
i
)})
(
0
until
size
).
map
(
i
=>
{
...
...
@@ -248,7 +246,7 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
val
forward
=
Vec
(
LoadPipelineWidth
,
Flipped
(
new
LoadForwardQueryIO
))
val
commits
=
Flipped
(
new
RoqCommitIO
)
val
rollback
=
Output
(
Valid
(
new
Redirect
))
val
dcache
=
new
DCacheLineIO
val
dcache
=
Flipped
(
ValidIO
(
new
Refill
))
val
uncache
=
new
DCacheWordIO
val
roqDeqPtr
=
Input
(
new
RoqPtr
)
val
exceptionAddr
=
new
ExceptionAddrIO
...
...
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
浏览文件 @
3726264a
...
...
@@ -63,12 +63,12 @@ class LoadQueue extends XSModule
val
enq
=
new
LqEnqIO
val
brqRedirect
=
Input
(
Valid
(
new
Redirect
))
val
loadIn
=
Vec
(
LoadPipelineWidth
,
Flipped
(
Valid
(
new
LsPipelineBundle
)))
val
storeIn
=
Vec
(
StorePipelineWidth
,
Flipped
(
Valid
(
new
LsPipelineBundle
)))
// FIXME: Valid() only
val
storeIn
=
Vec
(
StorePipelineWidth
,
Flipped
(
Valid
(
new
LsPipelineBundle
)))
val
ldout
=
Vec
(
2
,
DecoupledIO
(
new
ExuOutput
))
// writeback int load
val
load_s1
=
Vec
(
LoadPipelineWidth
,
Flipped
(
new
LoadForwardQueryIO
))
val
commits
=
Flipped
(
new
RoqCommitIO
)
val
rollback
=
Output
(
Valid
(
new
Redirect
))
// replay now starts from load instead of store
val
dcache
=
new
DCacheLineIO
val
dcache
=
Flipped
(
ValidIO
(
new
Refill
))
val
uncache
=
new
DCacheWordIO
val
roqDeqPtr
=
Input
(
new
RoqPtr
)
val
exceptionAddr
=
new
ExceptionAddrIO
...
...
@@ -83,7 +83,7 @@ class LoadQueue extends XSModule
val
writebacked
=
RegInit
(
VecInit
(
List
.
fill
(
LoadQueueSize
)(
false
.
B
)))
// inst has been writebacked to CDB
val
commited
=
Reg
(
Vec
(
LoadQueueSize
,
Bool
()))
// inst has been writebacked to CDB
val
miss
=
Reg
(
Vec
(
LoadQueueSize
,
Bool
()))
// load inst missed, waiting for miss queue to accept miss request
val
listening
=
Reg
(
Vec
(
LoadQueueSize
,
Bool
()))
// waiting for refill result
//
val listening = Reg(Vec(LoadQueueSize, Bool())) // waiting for refill result
val
pending
=
Reg
(
Vec
(
LoadQueueSize
,
Bool
()))
// mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
val
debug_mmio
=
Reg
(
Vec
(
LoadQueueSize
,
Bool
()))
// mmio: inst is an mmio inst
...
...
@@ -124,7 +124,7 @@ class LoadQueue extends XSModule
writebacked
(
index
)
:=
false
.
B
commited
(
index
)
:=
false
.
B
miss
(
index
)
:=
false
.
B
listening
(
index
)
:=
false
.
B
//
listening(index) := false.B
pending
(
index
)
:=
false
.
B
}
io
.
enq
.
resp
(
i
)
:=
lqIdx
...
...
@@ -194,7 +194,7 @@ class LoadQueue extends XSModule
val
dcacheMissed
=
io
.
loadIn
(
i
).
bits
.
miss
&&
!
io
.
loadIn
(
i
).
bits
.
mmio
miss
(
loadWbIndex
)
:=
dcacheMissed
&&
!
io
.
loadIn
(
i
).
bits
.
uop
.
cf
.
exceptionVec
.
asUInt
.
orR
listening
(
loadWbIndex
)
:=
dcacheMissed
//
listening(loadWbIndex) := dcacheMissed
pending
(
loadWbIndex
)
:=
io
.
loadIn
(
i
).
bits
.
mmio
&&
!
io
.
loadIn
(
i
).
bits
.
uop
.
cf
.
exceptionVec
.
asUInt
.
orR
}
}
...
...
@@ -207,83 +207,78 @@ class LoadQueue extends XSModule
* (3) dcache response: datavalid
* (4) writeback to ROB: writeback
*/
val
inflightReqs
=
RegInit
(
VecInit
(
Seq
.
fill
(
cfg
.
nLoadMissEntries
)(
0.
U
.
asTypeOf
(
new
InflightBlockInfo
))))
val
inflightReqFull
=
inflightReqs
.
map
(
req
=>
req
.
valid
).
reduce
(
_
&&
_
)
val
reqBlockIndex
=
PriorityEncoder
(~
VecInit
(
inflightReqs
.
map
(
req
=>
req
.
valid
)).
asUInt
)
val
missRefillSelVec
=
VecInit
(
(
0
until
LoadQueueSize
).
map
{
i
=>
val
inflight
=
inflightReqs
.
map
(
req
=>
req
.
valid
&&
req
.
block_addr
===
get_block_addr
(
dataModule
.
io
.
rdata
(
i
).
paddr
)).
reduce
(
_
||
_
)
allocated
(
i
)
&&
miss
(
i
)
&&
!
inflight
})
val
missRefillSel
=
getFirstOne
(
missRefillSelVec
,
deqMask
)
val
missRefillBlockAddr
=
get_block_addr
(
dataModule
.
io
.
rdata
(
missRefillSel
).
paddr
)
io
.
dcache
.
req
.
valid
:=
missRefillSelVec
.
asUInt
.
orR
io
.
dcache
.
req
.
bits
.
cmd
:=
MemoryOpConstants
.
M_XRD
io
.
dcache
.
req
.
bits
.
addr
:=
missRefillBlockAddr
io
.
dcache
.
req
.
bits
.
data
:=
DontCare
io
.
dcache
.
req
.
bits
.
mask
:=
DontCare
io
.
dcache
.
req
.
bits
.
meta
.
id
:=
DontCare
io
.
dcache
.
req
.
bits
.
meta
.
vaddr
:=
DontCare
// dataModule.io.rdata(missRefillSel).vaddr
io
.
dcache
.
req
.
bits
.
meta
.
paddr
:=
missRefillBlockAddr
io
.
dcache
.
req
.
bits
.
meta
.
uop
:=
uop
(
missRefillSel
)
io
.
dcache
.
req
.
bits
.
meta
.
mmio
:=
false
.
B
// mmio(missRefillSel)
io
.
dcache
.
req
.
bits
.
meta
.
tlb_miss
:=
false
.
B
io
.
dcache
.
req
.
bits
.
meta
.
mask
:=
DontCare
io
.
dcache
.
req
.
bits
.
meta
.
replay
:=
false
.
B
io
.
dcache
.
resp
.
ready
:=
true
.
B
assert
(!(
debug_mmio
(
missRefillSel
)
&&
io
.
dcache
.
req
.
valid
))
when
(
io
.
dcache
.
req
.
fire
())
{
miss
(
missRefillSel
)
:=
false
.
B
listening
(
missRefillSel
)
:=
true
.
B
// val inflightReqs = RegInit(VecInit(Seq.fill(cfg.nLoadMissEntries)(0.U.asTypeOf(new InflightBlockInfo))))
// val inflightReqFull = inflightReqs.map(req => req.valid).reduce(_&&_)
// val reqBlockIndex = PriorityEncoder(~VecInit(inflightReqs.map(req => req.valid)).asUInt)
// val missRefillSelVec = VecInit(
// (0 until LoadQueueSize).map{ i =>
// val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(dataModule.io.rdata(i).paddr)).reduce(_||_)
// allocated(i) && miss(i) && !inflight
// })
// val missRefillSel = getFirstOne(missRefillSelVec, deqMask)
// val missRefillBlockAddr = get_block_addr(dataModule.io.rdata(missRefillSel).paddr)
// io.dcache.req.valid := missRefillSelVec.asUInt.orR
// io.dcache.req.bits.cmd := MemoryOpConstants.M_XRD
// io.dcache.req.bits.addr := missRefillBlockAddr
// io.dcache.req.bits.data := DontCare
// io.dcache.req.bits.mask := DontCare
// io.dcache.req.bits.meta.id := DontCare
// io.dcache.req.bits.meta.vaddr := DontCare // dataModule.io.rdata(missRefillSel).vaddr
// io.dcache.req.bits.meta.paddr := missRefillBlockAddr
// io.dcache.req.bits.meta.uop := uop(missRefillSel)
// io.dcache.req.bits.meta.mmio := false.B // dataModule.io.rdata(missRefillSel).mmio
// io.dcache.req.bits.meta.tlb_miss := false.B
// io.dcache.req.bits.meta.mask := DontCare
// io.dcache.req.bits.meta.replay := false.B
// assert(!(dataModule.io.rdata(missRefillSel).mmio && io.dcache.req.valid))
// when(io.dcache.req.fire()) {
// miss(missRefillSel) := false.B
// listening(missRefillSel) := true.B
// mark this block as inflight
inflightReqs
(
reqBlockIndex
).
valid
:=
true
.
B
inflightReqs
(
reqBlockIndex
).
block_addr
:=
missRefillBlockAddr
assert
(!
inflightReqs
(
reqBlockIndex
).
valid
)
}
when
(
io
.
dcache
.
resp
.
fire
())
{
val
inflight
=
inflightReqs
.
map
(
req
=>
req
.
valid
&&
req
.
block_addr
===
get_block_addr
(
io
.
dcache
.
resp
.
bits
.
meta
.
paddr
)).
reduce
(
_
||
_
)
assert
(
inflight
)
for
(
i
<-
0
until
cfg
.
nLoadMissEntries
)
{
when
(
inflightReqs
(
i
).
valid
&&
inflightReqs
(
i
).
block_addr
===
get_block_addr
(
io
.
dcache
.
resp
.
bits
.
meta
.
paddr
))
{
inflightReqs
(
i
).
valid
:=
false
.
B
}
}
}
when
(
io
.
dcache
.
req
.
fire
()){
XSDebug
(
"miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n"
,
io
.
dcache
.
req
.
bits
.
meta
.
uop
.
cf
.
pc
,
io
.
dcache
.
req
.
bits
.
meta
.
uop
.
roqIdx
.
asUInt
,
io
.
dcache
.
req
.
bits
.
meta
.
uop
.
lqIdx
.
asUInt
,
io
.
dcache
.
req
.
bits
.
addr
,
io
.
dcache
.
req
.
bits
.
meta
.
vaddr
)
}
when
(
io
.
dcache
.
resp
.
fire
()){
XSDebug
(
"miss resp: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x data %x\n"
,
io
.
dcache
.
resp
.
bits
.
meta
.
uop
.
cf
.
pc
,
io
.
dcache
.
resp
.
bits
.
meta
.
uop
.
roqIdx
.
asUInt
,
io
.
dcache
.
resp
.
bits
.
meta
.
uop
.
lqIdx
.
asUInt
,
io
.
dcache
.
resp
.
bits
.
meta
.
paddr
,
io
.
dcache
.
resp
.
bits
.
data
)
// inflightReqs(reqBlockIndex).valid := true.B
// inflightReqs(reqBlockIndex).block_addr := missRefillBlockAddr
// assert(!inflightReqs(reqBlockIndex).valid)
// }
// when(io.dcache.resp.fire()) {
// val inflight = inflightReqs.map(req => req.valid && req.block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)).reduce(_||_)
// assert(inflight)
// for (i <- 0 until cfg.nLoadMissEntries) {
// when (inflightReqs(i).valid && inflightReqs(i).block_addr === get_block_addr(io.dcache.resp.bits.meta.paddr)) {
// inflightReqs(i).valid := false.B
// }
// }
// }
// when(io.dcache.req.fire()){
// XSDebug("miss req: pc:0x%x roqIdx:%d lqIdx:%d (p)addr:0x%x vaddr:0x%x\n",
// io.dcache.req.bits.meta.uop.cf.pc, io.dcache.req.bits.meta.uop.roqIdx.asUInt, io.dcache.req.bits.meta.uop.lqIdx.asUInt,
// io.dcache.req.bits.addr, io.dcache.req.bits.meta.vaddr
// )
// }
when
(
io
.
dcache
.
valid
)
{
XSDebug
(
"miss resp: paddr:0x%x data %x\n"
,
io
.
dcache
.
bits
.
addr
,
io
.
dcache
.
bits
.
data
)
}
// Refill 64 bit in a cycle
// Refill data comes back from io.dcache.resp
dataModule
.
io
.
refill
.
d
cache
:=
io
.
dcache
.
resp
.
bits
dataModule
.
io
.
refill
.
d
ata
:=
io
.
dcache
.
bits
.
data
(
0
until
LoadQueueSize
).
map
(
i
=>
{
val
blockMatch
=
get_block_addr
(
dataModule
.
io
.
rdata
(
i
).
paddr
)
===
io
.
dcache
.
resp
.
bits
.
meta
.
paddr
val
blockMatch
=
get_block_addr
(
dataModule
.
io
.
rdata
(
i
).
paddr
)
===
get_block_addr
(
io
.
dcache
.
bits
.
addr
)
dataModule
.
io
.
refill
.
wen
(
i
)
:=
false
.
B
when
(
allocated
(
i
)
&&
listening
(
i
)
&&
blockMatch
&&
io
.
dcache
.
resp
.
fire
()
)
{
when
(
allocated
(
i
)
&&
miss
(
i
)
&&
blockMatch
&&
io
.
dcache
.
valid
)
{
dataModule
.
io
.
refill
.
wen
(
i
)
:=
true
.
B
datavalid
(
i
)
:=
true
.
B
listening
(
i
)
:=
false
.
B
miss
(
i
)
:=
false
.
B
}
})
...
...
@@ -417,7 +412,7 @@ class LoadQueue extends XSModule
val
lqViolationVec
=
RegNext
(
VecInit
((
0
until
LoadQueueSize
).
map
(
j
=>
{
val
addrMatch
=
allocated
(
j
)
&&
io
.
storeIn
(
i
).
bits
.
paddr
(
PAddrBits
-
1
,
3
)
===
dataModule
.
io
.
rdata
(
j
).
paddr
(
PAddrBits
-
1
,
3
)
val
entryNeedCheck
=
toEnqPtrMask
(
j
)
&&
addrMatch
&&
(
datavalid
(
j
)
||
listening
(
j
)
||
miss
(
j
))
val
entryNeedCheck
=
toEnqPtrMask
(
j
)
&&
addrMatch
&&
(
datavalid
(
j
)
||
miss
(
j
))
// TODO: update refilled data
val
violationVec
=
(
0
until
8
).
map
(
k
=>
dataModule
.
io
.
rdata
(
j
).
mask
(
k
)
&&
io
.
storeIn
(
i
).
bits
.
mask
(
k
))
Cat
(
violationVec
).
orR
()
&&
entryNeedCheck
...
...
@@ -562,7 +557,7 @@ class LoadQueue extends XSModule
dataModule
.
io
.
uncacheWrite
(
deqPtr
,
io
.
uncache
.
resp
.
bits
.
data
(
XLEN
-
1
,
0
))
dataModule
.
io
.
uncache
.
wen
:=
true
.
B
XSDebug
(
"uncache resp: data %x\n"
,
io
.
dcache
.
resp
.
bits
.
data
)
XSDebug
(
"uncache resp: data %x\n"
,
io
.
dcache
.
bits
.
data
)
}
// Read vaddr for mem exception
...
...
@@ -628,7 +623,7 @@ class LoadQueue extends XSModule
PrintFlag
(
allocated
(
i
)
&&
writebacked
(
i
),
"w"
)
PrintFlag
(
allocated
(
i
)
&&
commited
(
i
),
"c"
)
PrintFlag
(
allocated
(
i
)
&&
miss
(
i
),
"m"
)
PrintFlag
(
allocated
(
i
)
&&
listening
(
i
),
"l"
)
//
PrintFlag(allocated(i) && listening(i), "l")
PrintFlag
(
allocated
(
i
)
&&
pending
(
i
),
"p"
)
XSDebug
(
false
,
true
.
B
,
" "
)
if
(
i
%
4
==
3
||
i
==
LoadQueueSize
-
1
)
XSDebug
(
false
,
true
.
B
,
"\n"
)
...
...
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
浏览文件 @
3726264a
...
...
@@ -21,7 +21,7 @@ class LoadUnit_S0 extends XSModule {
val
in
=
Flipped
(
Decoupled
(
new
ExuInput
))
val
out
=
Decoupled
(
new
LsPipelineBundle
)
val
dtlbReq
=
DecoupledIO
(
new
TlbReq
)
val
dcacheReq
=
DecoupledIO
(
new
DCache
Loa
dReq
)
val
dcacheReq
=
DecoupledIO
(
new
DCache
Wor
dReq
)
})
val
s0_uop
=
io
.
in
.
bits
.
uop
...
...
@@ -82,27 +82,26 @@ class LoadUnit_S1 extends XSModule {
val
in
=
Flipped
(
Decoupled
(
new
LsPipelineBundle
))
val
out
=
Decoupled
(
new
LsPipelineBundle
)
val
dtlbResp
=
Flipped
(
DecoupledIO
(
new
TlbResp
))
val
tlbFeedback
=
ValidIO
(
new
TlbFeedback
)
val
dcachePAddr
=
Output
(
UInt
(
PAddrBits
.
W
))
val
dcacheKill
=
Output
(
Bool
())
val
sbuffer
=
new
LoadForwardQueryIO
val
lsq
=
new
LoadForwardQueryIO
})
val
s1_uop
=
io
.
in
.
bits
.
uop
val
s1_paddr
=
io
.
dtlbResp
.
bits
.
paddr
val
s1_exception
=
io
.
out
.
bits
.
uop
.
cf
.
exceptionVec
.
asUInt
.
orR
val
s1_tlb_miss
=
io
.
dtlbResp
.
bits
.
miss
val
s1_mmio
=
!
s1_tlb_miss
&&
AddressSpace
.
isMMIO
(
s1_paddr
)
&&
!
io
.
out
.
bits
.
uop
.
cf
.
exceptionVec
.
asUInt
.
orR
val
s1_mmio
=
!
s1_tlb_miss
&&
AddressSpace
.
isMMIO
(
s1_paddr
)
val
s1_mask
=
io
.
in
.
bits
.
mask
io
.
out
.
bits
:=
io
.
in
.
bits
// forwardXX field will be updated in s1
io
.
dtlbResp
.
ready
:=
true
.
B
// feedback tlb result to RS
io
.
tlbFeedback
.
valid
:=
io
.
in
.
valid
io
.
tlbFeedback
.
bits
.
hit
:=
!
s1_tlb_miss
io
.
tlbFeedback
.
bits
.
roqIdx
:=
s1_uop
.
roqIdx
// TOOD: PMA check
io
.
dcachePAddr
:=
s1_paddr
io
.
dcacheKill
:=
s1_tlb_miss
||
s1_exception
||
s1_mmio
// load forward query datapath
io
.
sbuffer
.
valid
:=
io
.
in
.
valid
...
...
@@ -119,9 +118,9 @@ class LoadUnit_S1 extends XSModule {
io
.
lsq
.
mask
:=
s1_mask
io
.
lsq
.
pc
:=
s1_uop
.
cf
.
pc
// FIXME: remove it
io
.
out
.
valid
:=
io
.
in
.
valid
&&
!
s1_tlb_miss
io
.
out
.
valid
:=
io
.
in
.
valid
//
&& !s1_tlb_miss
io
.
out
.
bits
.
paddr
:=
s1_paddr
io
.
out
.
bits
.
mmio
:=
s1_mmio
io
.
out
.
bits
.
mmio
:=
s1_mmio
&&
!
s1_exception
io
.
out
.
bits
.
tlbMiss
:=
s1_tlb_miss
io
.
out
.
bits
.
uop
.
cf
.
exceptionVec
(
loadPageFault
)
:=
io
.
dtlbResp
.
bits
.
excp
.
pf
.
ld
...
...
@@ -136,6 +135,7 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
val
io
=
IO
(
new
Bundle
()
{
val
in
=
Flipped
(
Decoupled
(
new
LsPipelineBundle
))
val
out
=
Decoupled
(
new
LsPipelineBundle
)
val
tlbFeedback
=
ValidIO
(
new
TlbFeedback
)
val
dcacheResp
=
Flipped
(
DecoupledIO
(
new
DCacheWordResp
))
val
lsq
=
new
LoadForwardQueryIO
val
sbuffer
=
new
LoadForwardQueryIO
...
...
@@ -144,12 +144,20 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
val
s2_uop
=
io
.
in
.
bits
.
uop
val
s2_mask
=
io
.
in
.
bits
.
mask
val
s2_paddr
=
io
.
in
.
bits
.
paddr
val
s2_tlb_miss
=
io
.
in
.
bits
.
tlbMiss
val
s2_mmio
=
io
.
in
.
bits
.
mmio
val
s2_exception
=
io
.
in
.
bits
.
uop
.
cf
.
exceptionVec
.
asUInt
.
orR
val
s2_cache_miss
=
io
.
dcacheResp
.
bits
.
miss
val
s2_cache_nack
=
io
.
dcacheResp
.
bits
.
nack
val
s2_cache_replay
=
io
.
dcacheResp
.
bits
.
replay
io
.
dcacheResp
.
ready
:=
true
.
B
assert
(!(
io
.
in
.
valid
&&
!
io
.
dcacheResp
.
valid
),
"DCache response got lost"
)
val
dcacheShouldResp
=
!(
s2_tlb_miss
||
s2_exception
||
s2_mmio
)
assert
(!(
io
.
in
.
valid
&&
dcacheShouldResp
&&
!
io
.
dcacheResp
.
valid
),
"DCache response got lost"
)
// feedback tlb result to RS
io
.
tlbFeedback
.
valid
:=
io
.
in
.
valid
io
.
tlbFeedback
.
bits
.
hit
:=
!
s2_tlb_miss
&&
(!
s2_cache_replay
||
s2_mmio
)
io
.
tlbFeedback
.
bits
.
roqIdx
:=
s2_uop
.
roqIdx
val
forwardMask
=
io
.
out
.
bits
.
forwardMask
val
forwardData
=
io
.
out
.
bits
.
forwardData
...
...
@@ -178,13 +186,13 @@ class LoadUnit_S2 extends XSModule with HasLoadHelper {
// TODO: ECC check
io
.
out
.
valid
:=
io
.
in
.
valid
io
.
out
.
valid
:=
io
.
in
.
valid
&&
!
s2_tlb_miss
&&
(!
s2_cache_replay
||
s2_mmio
)
// Inst will be canceled in store queue / lsq,
// so we do not need to care about flush in load / store unit's out.valid
io
.
out
.
bits
:=
io
.
in
.
bits
io
.
out
.
bits
.
data
:=
rdataPartialLoad
io
.
out
.
bits
.
miss
:=
(
s2_cache_miss
||
s2_cache_nack
)
&&
!
fullForward
io
.
out
.
bits
.
mmio
:=
io
.
in
.
bits
.
mmio
io
.
out
.
bits
.
miss
:=
s2_cache_miss
&&
!
fullForward
io
.
out
.
bits
.
mmio
:=
s2_
mmio
io
.
in
.
ready
:=
io
.
out
.
ready
||
!
io
.
in
.
valid
...
...
@@ -234,14 +242,14 @@ class LoadUnit extends XSModule with HasLoadHelper {
PipelineConnect
(
load_s0
.
io
.
out
,
load_s1
.
io
.
in
,
true
.
B
,
load_s0
.
io
.
out
.
bits
.
uop
.
roqIdx
.
needFlush
(
io
.
redirect
))
load_s1
.
io
.
dtlbResp
<>
io
.
dtlb
.
resp
load_s1
.
io
.
tlbFeedback
<>
io
.
tlbFeedback
io
.
dcache
.
s1_paddr
<>
load_s1
.
io
.
dcachePAddr
io
.
dcache
.
s1_kill
:=
DontCare
// FIXME
io
.
dcache
.
s1_kill
<>
load_s1
.
io
.
dcacheKill
load_s1
.
io
.
sbuffer
<>
io
.
sbuffer
load_s1
.
io
.
lsq
<>
io
.
lsq
.
forward
PipelineConnect
(
load_s1
.
io
.
out
,
load_s2
.
io
.
in
,
true
.
B
,
load_s1
.
io
.
out
.
bits
.
uop
.
roqIdx
.
needFlush
(
io
.
redirect
))
load_s2
.
io
.
tlbFeedback
<>
io
.
tlbFeedback
load_s2
.
io
.
dcacheResp
<>
io
.
dcache
.
resp
load_s2
.
io
.
lsq
.
forwardData
<>
io
.
lsq
.
forward
.
forwardData
load_s2
.
io
.
lsq
.
forwardMask
<>
io
.
lsq
.
forward
.
forwardMask
...
...
@@ -302,4 +310,4 @@ class LoadUnit extends XSModule with HasLoadHelper {
when
(
io
.
fpout
.
fire
()){
XSDebug
(
"fpout %x\n"
,
io
.
fpout
.
bits
.
uop
.
cf
.
pc
)
}
}
\ No newline at end of file
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录