Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenXiangShan
XiangShan
提交
b35479a0
X
XiangShan
项目概览
OpenXiangShan
/
XiangShan
10 个月 前同步成功
通知
1183
Star
3914
Fork
526
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
X
XiangShan
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
b35479a0
编写于
2月 10, 2023
作者:
W
William Wang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/master' into constantin
上级
349f0b17
50c287a7
变更
30
展开全部
隐藏空白更改
内联
并排
Showing
30 changed file
with
1975 addition
and
220 deletion
+1975
-220
src/main/scala/top/Configs.scala
src/main/scala/top/Configs.scala
+3
-2
src/main/scala/utils/OverrideableQueue.scala
src/main/scala/utils/OverrideableQueue.scala
+41
-0
src/main/scala/xiangshan/Bundle.scala
src/main/scala/xiangshan/Bundle.scala
+8
-0
src/main/scala/xiangshan/Parameters.scala
src/main/scala/xiangshan/Parameters.scala
+4
-1
src/main/scala/xiangshan/XSCore.scala
src/main/scala/xiangshan/XSCore.scala
+8
-0
src/main/scala/xiangshan/XSTile.scala
src/main/scala/xiangshan/XSTile.scala
+4
-0
src/main/scala/xiangshan/backend/CtrlBlock.scala
src/main/scala/xiangshan/backend/CtrlBlock.scala
+12
-3
src/main/scala/xiangshan/backend/MemBlock.scala
src/main/scala/xiangshan/backend/MemBlock.scala
+103
-12
src/main/scala/xiangshan/backend/fu/CSR.scala
src/main/scala/xiangshan/backend/fu/CSR.scala
+27
-1
src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala
src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala
+66
-10
src/main/scala/xiangshan/cache/dcache/loadpipe/LoadPipe.scala
...main/scala/xiangshan/cache/dcache/loadpipe/LoadPipe.scala
+31
-10
src/main/scala/xiangshan/cache/dcache/mainpipe/MainPipe.scala
...main/scala/xiangshan/cache/dcache/mainpipe/MainPipe.scala
+27
-5
src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala
...ain/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala
+66
-28
src/main/scala/xiangshan/cache/dcache/mainpipe/RefillPipe.scala
...in/scala/xiangshan/cache/dcache/mainpipe/RefillPipe.scala
+19
-3
src/main/scala/xiangshan/cache/dcache/meta/AsynchronousMetaArray.scala
...a/xiangshan/cache/dcache/meta/AsynchronousMetaArray.scala
+9
-9
src/main/scala/xiangshan/cache/dcache/meta/LegacyMetaArray.scala
...n/scala/xiangshan/cache/dcache/meta/LegacyMetaArray.scala
+0
-0
src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
+2
-0
src/main/scala/xiangshan/cache/mmu/TLB.scala
src/main/scala/xiangshan/cache/mmu/TLB.scala
+20
-18
src/main/scala/xiangshan/frontend/IFU.scala
src/main/scala/xiangshan/frontend/IFU.scala
+1
-0
src/main/scala/xiangshan/frontend/icache/ICacheMainPipe.scala
...main/scala/xiangshan/frontend/icache/ICacheMainPipe.scala
+1
-0
src/main/scala/xiangshan/frontend/icache/IPrefetch.scala
src/main/scala/xiangshan/frontend/icache/IPrefetch.scala
+1
-0
src/main/scala/xiangshan/mem/MemCommon.scala
src/main/scala/xiangshan/mem/MemCommon.scala
+37
-3
src/main/scala/xiangshan/mem/MemTrace.scala
src/main/scala/xiangshan/mem/MemTrace.scala
+28
-0
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
+2
-1
src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala
src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala
+8
-1
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
+250
-113
src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala
src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala
+1
-0
src/main/scala/xiangshan/mem/prefetch/BasePrefecher.scala
src/main/scala/xiangshan/mem/prefetch/BasePrefecher.scala
+22
-0
src/main/scala/xiangshan/mem/prefetch/L1PrefetchInterface.scala
...in/scala/xiangshan/mem/prefetch/L1PrefetchInterface.scala
+78
-0
src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala
src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala
+1096
-0
未找到文件。
src/main/scala/top/Configs.scala
浏览文件 @
b35479a0
...
...
@@ -161,7 +161,8 @@ class MinimalConfig(n: Int = 1) extends Config(
l3nWays
=
8
,
spSize
=
2
,
),
L2CacheParamsOpt
=
None
// remove L2 Cache
L2CacheParamsOpt
=
None
,
// remove L2 Cache
prefetcher
=
None
// if L2 pf_recv_node does not exist, disable SMS prefetcher
)
)
case
SoCParamsKey
=>
...
...
@@ -244,7 +245,7 @@ class WithNKBL2
)),
reqField
=
Seq
(
PreferCacheField
()),
echoField
=
Seq
(
DirtyField
()),
prefetch
=
Some
(
huancun
.
prefetch
.
BOPParameter
s
()),
prefetch
=
Some
(
huancun
.
prefetch
.
PrefetchReceiverParam
s
()),
enablePerf
=
true
,
sramDepthDiv
=
2
,
tagECC
=
Some
(
"secded"
),
...
...
src/main/scala/utils/OverrideableQueue.scala
0 → 100644
浏览文件 @
b35479a0
package
utils
import
chisel3._
import
chisel3.util._
class
OverrideableQueue
[
T
<:
Data
](
gen
:
T
,
n
:
Int
)
extends
Module
{
val
io
=
IO
(
new
Bundle
()
{
val
in
=
Flipped
(
ValidIO
(
gen
))
val
out
=
Decoupled
(
gen
)
})
val
entries
=
Seq
.
fill
(
n
){
Reg
(
gen
)
}
val
valids
=
Seq
.
fill
(
n
){
RegInit
(
false
.
B
)
}
val
rd_ptr
=
RegInit
(
0.
U
(
log2Up
(
n
).
W
))
val
wr_ptr
=
RegInit
(
0.
U
(
log2Up
(
n
).
W
))
when
(
io
.
in
.
valid
){
wr_ptr
:=
wr_ptr
+
1.
U
}
when
(
io
.
out
.
fire
){
rd_ptr
:=
rd_ptr
+
1.
U
}
val
w_mask
=
(
0
until
n
).
map
(
i
=>
i
.
U
===
wr_ptr
)
val
r_mask
=
(
0
until
n
).
map
(
i
=>
i
.
U
===
rd_ptr
)
for
((
v
,
r
)
<-
valids
.
zip
(
r_mask
)){
when
(
r
&&
io
.
out
.
fire
){
v
:=
false
.
B
}
}
for
(((
v
,
e
),
w
)
<-
valids
.
zip
(
entries
).
zip
(
w_mask
)){
when
(
io
.
in
.
valid
&&
w
){
v
:=
true
.
B
e
:=
io
.
in
.
bits
}
}
io
.
out
.
valid
:=
Mux1H
(
r_mask
,
valids
)
io
.
out
.
bits
:=
Mux1H
(
r_mask
,
entries
)
}
src/main/scala/xiangshan/Bundle.scala
浏览文件 @
b35479a0
...
...
@@ -477,6 +477,14 @@ class CustomCSRCtrlIO(implicit p: Parameters) extends XSBundle {
// Prefetcher
val
l1I_pf_enable
=
Output
(
Bool
())
val
l2_pf_enable
=
Output
(
Bool
())
val
l1D_pf_enable
=
Output
(
Bool
())
val
l1D_pf_train_on_hit
=
Output
(
Bool
())
val
l1D_pf_enable_agt
=
Output
(
Bool
())
val
l1D_pf_enable_pht
=
Output
(
Bool
())
val
l1D_pf_active_threshold
=
Output
(
UInt
(
4.
W
))
val
l1D_pf_active_stride
=
Output
(
UInt
(
6.
W
))
val
l1D_pf_enable_stride
=
Output
(
Bool
())
val
l2_pf_store_only
=
Output
(
Bool
())
// ICache
val
icache_parity_enable
=
Output
(
Bool
())
// Labeled XiangShan
...
...
src/main/scala/xiangshan/Parameters.scala
浏览文件 @
b35479a0
...
...
@@ -30,6 +30,8 @@ import freechips.rocketchip.diplomacy.AddressSet
import
system.SoCParamsKey
import
huancun._
import
huancun.debug._
import
xiangshan.mem.prefetch.
{
PrefetcherParams
,
SMSParams
}
import
scala.math.min
case
object
XSTileKey
extends
Field
[
Seq
[
XSCoreParameters
]]
...
...
@@ -152,6 +154,7 @@ case class XSCoreParameters
LduCnt
=
2
,
StuCnt
=
2
),
prefetcher
:
Option
[
PrefetcherParams
]
=
Some
(
SMSParams
()),
LoadPipelineWidth
:
Int
=
2
,
StorePipelineWidth
:
Int
=
2
,
VecMemSrcInWidth
:
Int
=
2
,
...
...
@@ -237,7 +240,7 @@ case class XSCoreParameters
level
=
2
,
ways
=
8
,
sets
=
1024
,
// default 512KB L2
prefetch
=
Some
(
huancun
.
prefetch
.
BOPParameter
s
())
prefetch
=
Some
(
huancun
.
prefetch
.
PrefetchReceiverParam
s
())
)),
L2NBanks
:
Int
=
1
,
usePTWRepeater
:
Boolean
=
false
,
...
...
src/main/scala/xiangshan/XSCore.scala
浏览文件 @
b35479a0
...
...
@@ -31,6 +31,7 @@ import xiangshan.backend._
import
xiangshan.backend.exu.
{
ExuConfig
,
Wb2Ctrl
,
WbArbiterWrapper
}
import
xiangshan.cache.mmu._
import
xiangshan.frontend._
import
xiangshan.mem.L1PrefetchFuzzer
import
scala.collection.mutable.ListBuffer
...
...
@@ -327,6 +328,13 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)
exuBlocks
(
0
).
io
.
scheExtra
.
fpRfReadIn
.
get
<>
exuBlocks
(
1
).
io
.
scheExtra
.
fpRfReadOut
.
get
exuBlocks
(
0
).
io
.
scheExtra
.
fpStateReadIn
.
get
<>
exuBlocks
(
1
).
io
.
scheExtra
.
fpStateReadOut
.
get
for
((
c
,
e
)
<-
ctrlBlock
.
io
.
ld_pc_read
.
zip
(
exuBlocks
(
0
).
io
.
issue
.
get
)){
// read load pc at load s0
c
.
ptr
:=
e
.
bits
.
uop
.
cf
.
ftqPtr
c
.
offset
:=
e
.
bits
.
uop
.
cf
.
ftqOffset
}
// return load pc at load s2
memBlock
.
io
.
loadPc
<>
VecInit
(
ctrlBlock
.
io
.
ld_pc_read
.
map
(
_
.
data
))
memBlock
.
io
.
issue
<>
exuBlocks
(
0
).
io
.
issue
.
get
// By default, instructions do not have exceptions when they enter the function units.
memBlock
.
io
.
issue
.
map
(
_
.
bits
.
uop
.
clearExceptions
())
...
...
src/main/scala/xiangshan/XSTile.scala
浏览文件 @
b35479a0
...
...
@@ -126,6 +126,10 @@ class XSTile()(implicit p: Parameters) extends LazyModule
l2cache
match
{
case
Some
(
l2
)
=>
misc
.
l2_binder
.
get
:*=
l2
.
node
:*=
TLBuffer
()
:*=
TLBuffer
()
:*=
misc
.
l1_xbar
l2
.
pf_recv_node
.
map
(
recv
=>
{
println
(
"Connecting L1 prefetcher to L2!"
)
recv
:=
core
.
memBlock
.
pf_sender_opt
.
get
})
case
None
=>
}
...
...
src/main/scala/xiangshan/backend/CtrlBlock.scala
浏览文件 @
b35479a0
...
...
@@ -28,7 +28,7 @@ import xiangshan.backend.dispatch.{Dispatch, Dispatch2Rs, DispatchQueue}
import
xiangshan.backend.fu.PFEvent
import
xiangshan.backend.rename.
{
Rename
,
RenameTableWrapper
}
import
xiangshan.backend.rob.
{
Rob
,
RobCSRIO
,
RobLsqIO
}
import
xiangshan.frontend.
{
FtqRead
,
Ftq_RF_Components
}
import
xiangshan.frontend.
{
Ftq
Ptr
,
Ftq
Read
,
Ftq_RF_Components
}
import
xiangshan.mem.mdp.
{
LFST
,
SSIT
,
WaitTable
}
import
xiangshan.ExceptionNO._
import
xiangshan.backend.exu.ExuConfig
...
...
@@ -214,6 +214,7 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
val
lqCancelCnt
=
Input
(
UInt
(
log2Up
(
LoadQueueSize
+
1
).
W
))
val
sqCancelCnt
=
Input
(
UInt
(
log2Up
(
StoreQueueSize
+
1
).
W
))
val
sqDeq
=
Input
(
UInt
(
log2Ceil
(
EnsbufferWidth
+
1
).
W
))
val
ld_pc_read
=
Vec
(
exuParameters
.
LduCnt
,
Flipped
(
new
FtqRead
(
UInt
(
VAddrBits
.
W
))))
// from int block
val
exuRedirect
=
Vec
(
exuParameters
.
AluCnt
+
exuParameters
.
JmpCnt
,
Flipped
(
ValidIO
(
new
ExuOutput
)))
val
stIn
=
Vec
(
exuParameters
.
StuCnt
,
Flipped
(
ValidIO
(
new
ExuInput
)))
...
...
@@ -267,8 +268,11 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
val
fpDq
=
Module
(
new
DispatchQueue
(
dpParams
.
FpDqSize
,
RenameWidth
,
dpParams
.
FpDqDeqWidth
))
val
lsDq
=
Module
(
new
DispatchQueue
(
dpParams
.
LsDqSize
,
RenameWidth
,
dpParams
.
LsDqDeqWidth
))
val
redirectGen
=
Module
(
new
RedirectGenerator
)
// jumpPc (2) + redirects (1) + loadPredUpdate (1) + jalr_target (1) + robFlush (1)
val
pcMem
=
Module
(
new
SyncDataModuleTemplate
(
new
Ftq_RF_Components
,
FtqSize
,
6
,
1
,
"BackendPC"
))
// jumpPc (2) + redirects (1) + loadPredUpdate (1) + jalr_target (1) + [ld pc (LduCnt)] + robFlush (1)
val
pcMem
=
Module
(
new
SyncDataModuleTemplate
(
new
Ftq_RF_Components
,
FtqSize
,
6
+
exuParameters
.
LduCnt
,
1
,
"CtrlPcMem"
)
)
val
rob
=
outer
.
rob
.
module
pcMem
.
io
.
wen
.
head
:=
RegNext
(
io
.
frontend
.
fromFtq
.
pc_mem_wen
)
...
...
@@ -538,6 +542,11 @@ class CtrlBlockImp(outer: CtrlBlock)(implicit p: Parameters) extends LazyModuleI
val
jalrTargetRead
=
pcMem
.
io
.
rdata
(
4
).
startAddr
val
read_from_newest_entry
=
RegNext
(
jalrTargetReadPtr
)
===
RegNext
(
io
.
frontend
.
fromFtq
.
newest_entry_ptr
)
io
.
jalr_target
:=
Mux
(
read_from_newest_entry
,
RegNext
(
io
.
frontend
.
fromFtq
.
newest_entry_target
),
jalrTargetRead
)
for
(
i
<-
0
until
exuParameters
.
LduCnt
){
// load s0 -> get rdata (s1) -> reg next (s2) -> output (s2)
pcMem
.
io
.
raddr
(
i
+
5
)
:=
io
.
ld_pc_read
(
i
).
ptr
.
value
io
.
ld_pc_read
(
i
).
data
:=
pcMem
.
io
.
rdata
(
i
+
5
).
getPc
(
RegNext
(
io
.
ld_pc_read
(
i
).
offset
))
}
rob
.
io
.
hartId
:=
io
.
hartId
io
.
cpu_halt
:=
DelayN
(
rob
.
io
.
cpu_halt
,
5
)
...
...
src/main/scala/xiangshan/backend/MemBlock.scala
浏览文件 @
b35479a0
...
...
@@ -19,8 +19,9 @@ package xiangshan.backend
import
chipsalliance.rocketchip.config.Parameters
import
chisel3._
import
chisel3.util._
import
freechips.rocketchip.diplomacy.
{
LazyModule
,
LazyModuleImp
}
import
freechips.rocketchip.diplomacy.
{
BundleBridgeSource
,
LazyModule
,
LazyModuleImp
}
import
freechips.rocketchip.tile.HasFPUParameters
import
huancun.PrefetchRecv
import
utils._
import
utility._
import
xiangshan._
...
...
@@ -30,6 +31,7 @@ import xiangshan.backend.rob.RobLsqIO
import
xiangshan.cache._
import
xiangshan.cache.mmu.
{
VectorTlbPtwIO
,
TLBNonBlock
,
TlbReplace
}
import
xiangshan.mem._
import
xiangshan.mem.prefetch.
{
BasePrefecher
,
SMSParams
,
SMSPrefetcher
}
class
Std
(
implicit
p
:
Parameters
)
extends
FunctionUnit
{
io
.
in
.
ready
:=
true
.
B
...
...
@@ -43,6 +45,9 @@ class MemBlock()(implicit p: Parameters) extends LazyModule
val
dcache
=
LazyModule
(
new
DCacheWrapper
())
val
uncache
=
LazyModule
(
new
Uncache
())
val
pf_sender_opt
=
coreParams
.
prefetcher
.
map
(
_
=>
BundleBridgeSource
(()
=>
new
PrefetchRecv
)
)
lazy
val
module
=
new
MemBlockImp
(
this
)
...
...
@@ -69,6 +74,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val
loadFastMatch
=
Vec
(
exuParameters
.
LduCnt
,
Input
(
UInt
(
exuParameters
.
LduCnt
.
W
)))
val
loadFastImm
=
Vec
(
exuParameters
.
LduCnt
,
Input
(
UInt
(
12.
W
)))
val
rsfeedback
=
Vec
(
exuParameters
.
StuCnt
,
new
MemRSFeedbackIO
)
val
loadPc
=
Vec
(
exuParameters
.
LduCnt
,
Input
(
UInt
(
VAddrBits
.
W
)))
// for hw prefetch
val
stIssuePtr
=
Output
(
new
SqPtr
())
val
int2vlsu
=
Flipped
(
new
Int2VLSUIO
)
val
vec2vlsu
=
Flipped
(
new
Vec2VLSUIO
)
...
...
@@ -79,10 +85,12 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val
vlsu2vec
=
new
VLSU2VecIO
val
vlsu2int
=
new
VLSU2IntIO
val
vlsu2ctrl
=
new
VLSU2CtrlIO
// prefetch to l1 req
val
prefetch_req
=
Flipped
(
DecoupledIO
(
new
L1PrefetchReq
))
// misc
val
stIn
=
Vec
(
exuParameters
.
StuCnt
,
ValidIO
(
new
ExuInput
))
val
memoryViolation
=
ValidIO
(
new
Redirect
)
val
ptw
=
new
VectorTlbPtwIO
(
exuParameters
.
LduCnt
+
exuParameters
.
StuCnt
)
val
ptw
=
new
VectorTlbPtwIO
(
exuParameters
.
LduCnt
+
exuParameters
.
StuCnt
+
1
)
// load + store + hw prefetch
val
sfence
=
Input
(
new
SfenceBundle
)
val
tlbCsr
=
Input
(
new
TlbCsrBundle
)
val
fenceToSbuffer
=
Flipped
(
new
FenceToSbuffer
)
...
...
@@ -119,6 +127,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val
csrCtrl
=
DelayN
(
io
.
csrCtrl
,
2
)
dcache
.
io
.
csr
.
distribute_csr
<>
csrCtrl
.
distribute_csr
dcache
.
io
.
l2_pf_store_only
:=
RegNext
(
io
.
csrCtrl
.
l2_pf_store_only
,
false
.
B
)
io
.
csrUpdate
:=
RegNext
(
dcache
.
io
.
csr
.
update
)
io
.
error
<>
RegNext
(
RegNext
(
dcache
.
io
.
error
))
when
(!
csrCtrl
.
cache_error_enable
){
...
...
@@ -131,6 +140,31 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val
stdExeUnits
=
Seq
.
fill
(
exuParameters
.
StuCnt
)(
Module
(
new
StdExeUnit
))
val
stData
=
stdExeUnits
.
map
(
_
.
io
.
out
)
val
exeUnits
=
loadUnits
++
storeUnits
val
l1_pf_req
=
Wire
(
Decoupled
(
new
L1PrefetchReq
()))
val
prefetcherOpt
:
Option
[
BasePrefecher
]
=
coreParams
.
prefetcher
.
map
{
case
_:
SMSParams
=>
val
sms
=
Module
(
new
SMSPrefetcher
())
sms
.
io_agt_en
:=
RegNextN
(
io
.
csrCtrl
.
l1D_pf_enable_agt
,
2
,
Some
(
false
.
B
))
sms
.
io_pht_en
:=
RegNextN
(
io
.
csrCtrl
.
l1D_pf_enable_pht
,
2
,
Some
(
false
.
B
))
sms
.
io_act_threshold
:=
RegNextN
(
io
.
csrCtrl
.
l1D_pf_active_threshold
,
2
,
Some
(
12.
U
))
sms
.
io_act_stride
:=
RegNextN
(
io
.
csrCtrl
.
l1D_pf_active_stride
,
2
,
Some
(
30.
U
))
sms
.
io_stride_en
:=
RegNextN
(
io
.
csrCtrl
.
l1D_pf_enable_stride
,
2
,
Some
(
true
.
B
))
sms
}
prefetcherOpt
.
foreach
(
pf
=>
{
val
pf_to_l2
=
ValidIODelay
(
pf
.
io
.
pf_addr
,
2
)
outer
.
pf_sender_opt
.
get
.
out
.
head
.
_1
.
addr_valid
:=
pf_to_l2
.
valid
outer
.
pf_sender_opt
.
get
.
out
.
head
.
_1
.
addr
:=
pf_to_l2
.
bits
outer
.
pf_sender_opt
.
get
.
out
.
head
.
_1
.
l2_pf_en
:=
RegNextN
(
io
.
csrCtrl
.
l2_pf_enable
,
2
,
Some
(
true
.
B
))
pf
.
io
.
enable
:=
RegNextN
(
io
.
csrCtrl
.
l1D_pf_enable
,
2
,
Some
(
false
.
B
))
})
prefetcherOpt
match
{
case
Some
(
pf
)
=>
l1_pf_req
<>
pf
.
io
.
l1_req
case
None
=>
l1_pf_req
.
valid
:=
false
.
B
l1_pf_req
.
bits
:=
DontCare
}
val
pf_train_on_hit
=
RegNextN
(
io
.
csrCtrl
.
l1D_pf_train_on_hit
,
2
,
Some
(
true
.
B
))
loadUnits
.
zipWithIndex
.
map
(
x
=>
x
.
_1
.
suggestName
(
"LoadUnit_"
+
x
.
_2
))
storeUnits
.
zipWithIndex
.
map
(
x
=>
x
.
_1
.
suggestName
(
"StoreUnit_"
+
x
.
_2
))
...
...
@@ -159,6 +193,35 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io
.
otherFastWakeup
.
take
(
2
).
zip
(
loadUnits
.
map
(
_
.
io
.
fastUop
)).
foreach
{
case
(
a
,
b
)
=>
a
:=
b
}
val
stOut
=
io
.
writeback
.
drop
(
exuParameters
.
LduCnt
).
dropRight
(
exuParameters
.
StuCnt
)
// prefetch to l1 req
loadUnits
.
foreach
(
load_unit
=>
{
load_unit
.
io
.
prefetch_req
.
valid
<>
l1_pf_req
.
valid
load_unit
.
io
.
prefetch_req
.
bits
<>
l1_pf_req
.
bits
})
// when loadUnits(0) stage 0 is busy, hw prefetch will never use that pipeline
loadUnits
(
0
).
io
.
prefetch_req
.
bits
.
confidence
:=
0.
U
l1_pf_req
.
ready
:=
(
l1_pf_req
.
bits
.
confidence
>
0.
U
)
||
loadUnits
.
map
(!
_
.
io
.
ldin
.
valid
).
reduce
(
_
||
_
)
// l1 pf fuzzer interface
val
DebugEnableL1PFFuzzer
=
false
if
(
DebugEnableL1PFFuzzer
)
{
// l1 pf req fuzzer
val
fuzzer
=
Module
(
new
L1PrefetchFuzzer
())
fuzzer
.
io
.
vaddr
:=
DontCare
fuzzer
.
io
.
paddr
:=
DontCare
// override load_unit prefetch_req
loadUnits
.
foreach
(
load_unit
=>
{
load_unit
.
io
.
prefetch_req
.
valid
<>
fuzzer
.
io
.
req
.
valid
load_unit
.
io
.
prefetch_req
.
bits
<>
fuzzer
.
io
.
req
.
bits
})
fuzzer
.
io
.
req
.
ready
:=
l1_pf_req
.
ready
}
// TODO: fast load wakeup
val
lsq
=
Module
(
new
LsqWrappper
)
val
vlsq
=
Module
(
new
DummyVectorLsq
)
val
sbuffer
=
Module
(
new
Sbuffer
)
...
...
@@ -182,7 +245,11 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val
tlb_st
=
Module
(
new
TLBNonBlock
(
exuParameters
.
StuCnt
,
1
,
sttlbParams
))
tlb_st
.
io
// let the module have name in waveform
})
val
dtlb
=
dtlb_ld
++
dtlb_st
val
dtlb_prefetch
=
VecInit
(
Seq
.
fill
(
1
){
val
tlb_prefetch
=
Module
(
new
TLBNonBlock
(
1
,
2
,
sttlbParams
))
tlb_prefetch
.
io
// let the module have name in waveform
})
val
dtlb
=
dtlb_ld
++
dtlb_st
++
dtlb_prefetch
val
dtlb_reqs
=
dtlb
.
map
(
_
.
requestor
).
flatten
val
dtlb_pmps
=
dtlb
.
map
(
_
.
pmp
).
flatten
dtlb
.
map
(
_
.
sfence
:=
sfence
)
...
...
@@ -192,7 +259,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
require
(
ldtlbParams
.
outReplace
==
sttlbParams
.
outReplace
)
require
(
ldtlbParams
.
outReplace
)
val
replace
=
Module
(
new
TlbReplace
(
exuParameters
.
LduCnt
+
exuParameters
.
StuCnt
,
ldtlbParams
))
val
replace
=
Module
(
new
TlbReplace
(
exuParameters
.
LduCnt
+
exuParameters
.
StuCnt
+
1
,
ldtlbParams
))
replace
.
io
.
apply_sep
(
dtlb_ld
.
map
(
_
.
replace
)
++
dtlb_st
.
map
(
_
.
replace
),
io
.
ptw
.
resp
.
bits
.
data
.
entry
.
tag
)
}
else
{
if
(
ldtlbParams
.
outReplace
)
{
...
...
@@ -209,10 +276,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val
ptw_resp_v
=
RegNext
(
io
.
ptw
.
resp
.
valid
&&
!(
sfence
.
valid
&&
tlbcsr
.
satp
.
changed
),
init
=
false
.
B
)
io
.
ptw
.
resp
.
ready
:=
true
.
B
(
dtlb
.
map
(
a
=>
a
.
ptw
.
req
.
map
(
b
=>
b
)))
.
flatten
dtlb
.
flatMap
(
a
=>
a
.
ptw
.
req
)
.
zipWithIndex
.
map
{
case
(
tlb
,
i
)
=>
.
foreach
{
case
(
tlb
,
i
)
=>
tlb
<>
io
.
ptw
.
req
(
i
)
val
vector_hit
=
if
(
refillBothTlb
)
Cat
(
ptw_resp_next
.
vector
).
orR
else
if
(
i
<
exuParameters
.
LduCnt
)
Cat
(
ptw_resp_next
.
vector
.
take
(
exuParameters
.
LduCnt
)).
orR
...
...
@@ -220,12 +286,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
io
.
ptw
.
req
(
i
).
valid
:=
tlb
.
valid
&&
!(
ptw_resp_v
&&
vector_hit
&&
ptw_resp_next
.
data
.
entry
.
hit
(
tlb
.
bits
.
vpn
,
tlbcsr
.
satp
.
asid
,
allType
=
true
,
ignoreAsid
=
true
))
}
dtlb
.
map
(
_
.
ptw
.
resp
.
bits
:=
ptw_resp_next
.
data
)
dtlb
.
foreach
(
_
.
ptw
.
resp
.
bits
:=
ptw_resp_next
.
data
)
if
(
refillBothTlb
)
{
dtlb
.
map
(
_
.
ptw
.
resp
.
valid
:=
ptw_resp_v
&&
Cat
(
ptw_resp_next
.
vector
).
orR
)
dtlb
.
foreach
(
_
.
ptw
.
resp
.
valid
:=
ptw_resp_v
&&
Cat
(
ptw_resp_next
.
vector
).
orR
)
}
else
{
dtlb_ld
.
map
(
_
.
ptw
.
resp
.
valid
:=
ptw_resp_v
&&
Cat
(
ptw_resp_next
.
vector
.
take
(
exuParameters
.
LduCnt
)).
orR
)
dtlb_st
.
map
(
_
.
ptw
.
resp
.
valid
:=
ptw_resp_v
&&
Cat
(
ptw_resp_next
.
vector
.
drop
(
exuParameters
.
LduCnt
)).
orR
)
dtlb_ld
.
foreach
(
_
.
ptw
.
resp
.
valid
:=
ptw_resp_v
&&
Cat
(
ptw_resp_next
.
vector
.
take
(
exuParameters
.
LduCnt
)).
orR
)
dtlb_st
.
foreach
(
_
.
ptw
.
resp
.
valid
:=
ptw_resp_v
&&
Cat
(
ptw_resp_next
.
vector
.
drop
(
exuParameters
.
LduCnt
).
take
(
exuParameters
.
StuCnt
)).
orR
)
dtlb_prefetch
.
foreach
(
_
.
ptw
.
resp
.
valid
:=
ptw_resp_v
&&
Cat
(
ptw_resp_next
.
vector
.
drop
(
exuParameters
.
LduCnt
+
exuParameters
.
StuCnt
)).
orR
)
}
...
...
@@ -233,7 +300,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val
pmp
=
Module
(
new
PMP
())
pmp
.
io
.
distribute_csr
<>
csrCtrl
.
distribute_csr
val
pmp_check
=
VecInit
(
Seq
.
fill
(
exuParameters
.
LduCnt
+
exuParameters
.
StuCnt
)(
Module
(
new
PMPChecker
(
3
)).
io
))
val
pmp_check
=
VecInit
(
Seq
.
fill
(
exuParameters
.
LduCnt
+
exuParameters
.
StuCnt
+
1
)(
Module
(
new
PMPChecker
(
3
)).
io
))
for
((
p
,
d
)
<-
pmp_check
zip
dtlb_pmps
)
{
p
.
apply
(
tlbcsr
.
priv
.
dmode
,
pmp
.
io
.
pmp
,
pmp
.
io
.
pma
,
d
)
require
(
p
.
req
.
bits
.
size
.
getWidth
==
d
.
bits
.
size
.
getWidth
)
...
...
@@ -285,6 +352,18 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
for
(
s
<-
0
until
StorePipelineWidth
)
{
loadUnits
(
i
).
io
.
reExecuteQuery
(
s
)
:=
storeUnits
(
s
).
io
.
reExecuteQuery
}
// prefetch
prefetcherOpt
.
foreach
(
pf
=>
{
pf
.
io
.
ld_in
(
i
).
valid
:=
Mux
(
pf_train_on_hit
,
loadUnits
(
i
).
io
.
prefetch_train
.
valid
,
loadUnits
(
i
).
io
.
prefetch_train
.
valid
&&
loadUnits
(
i
).
io
.
prefetch_train
.
bits
.
isFirstIssue
&&
(
loadUnits
(
i
).
io
.
prefetch_train
.
bits
.
miss
||
loadUnits
(
i
).
io
.
prefetch_train
.
bits
.
meta_prefetch
)
)
pf
.
io
.
ld_in
(
i
).
bits
:=
loadUnits
(
i
).
io
.
prefetch_train
.
bits
pf
.
io
.
ld_in
(
i
).
bits
.
uop
.
cf
.
pc
:=
Mux
(
loadUnits
(
i
).
io
.
s2IsPointerChasing
,
io
.
loadPc
(
i
),
RegNext
(
io
.
loadPc
(
i
)))
})
// load to load fast forward: load(i) prefers data(i)
val
fastPriority
=
(
i
until
exuParameters
.
LduCnt
)
++
(
0
until
i
)
val
fastValidVec
=
fastPriority
.
map
(
j
=>
loadUnits
(
j
).
io
.
fastpathOut
.
valid
)
...
...
@@ -351,6 +430,13 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
p
"has trigger hit vec ${io.writeback(i).bits.uop.cf.trigger.backendHit}\n"
)
}
// Prefetcher
val
PrefetcherDTLBPortIndex
=
exuParameters
.
LduCnt
+
exuParameters
.
StuCnt
dtlb_reqs
(
PrefetcherDTLBPortIndex
)
:=
DontCare
dtlb_reqs
(
PrefetcherDTLBPortIndex
).
req
.
valid
:=
false
.
B
prefetcherOpt
.
foreach
(
pf
=>
{
dtlb_reqs
(
PrefetcherDTLBPortIndex
)
<>
pf
.
io
.
tlb_req
})
// StoreUnit
for
(
i
<-
0
until
exuParameters
.
StuCnt
)
{
...
...
@@ -539,9 +625,14 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
// for atomicsUnit, it uses loadUnit(0)'s TLB port
when
(
state
=/=
s_normal
)
{
// use store wb port instead of load
loadUnits
(
0
).
io
.
ldout
.
ready
:=
false
.
B
// use load_0's TLB
atomicsUnit
.
io
.
dtlb
<>
amoTlb
// hw prefetch should be disabled while executing atomic insts
loadUnits
.
map
(
i
=>
i
.
io
.
prefetch_req
.
valid
:=
false
.
B
)
// make sure there's no in-flight uops in load unit
assert
(!
loadUnits
(
0
).
io
.
ldout
.
valid
)
}
...
...
src/main/scala/xiangshan/backend/fu/CSR.scala
浏览文件 @
b35479a0
...
...
@@ -483,9 +483,35 @@ class CSR(implicit p: Parameters) extends FunctionUnit with HasCSRConst with PMP
// spfctl Bit 0: L1I Cache Prefetcher Enable
// spfctl Bit 1: L2Cache Prefetcher Enable
val
spfctl
=
RegInit
(
UInt
(
XLEN
.
W
),
"b11"
.
U
)
// spfctl Bit 2: L1D Cache Prefetcher Enable
// spfctl Bit 3: L1D train prefetch on hit
// spfctl Bit 4: L1D prefetch enable agt
// spfctl Bit 5: L1D prefetch enable pht
// spfctl Bit [9:6]: L1D prefetch active page threshold
// spfctl Bit [15:10]: L1D prefetch active page stride
// turn off L2 BOP, turn on L1 SMS by default
val
spfctl
=
RegInit
(
UInt
(
XLEN
.
W
),
Seq
(
0
<<
17
,
// L2 pf store only [17] init: false
1
<<
16
,
// L1D pf enable stride [16] init: true
30
<<
10
,
// L1D active page stride [15:10] init: 30
12
<<
6
,
// L1D active page threshold [9:6] init: 12
1
<<
5
,
// L1D enable pht [5] init: true
1
<<
4
,
// L1D enable agt [4] init: true
0
<<
3
,
// L1D train on hit [3] init: false
1
<<
2
,
// L1D pf enable [2] init: true
1
<<
1
,
// L2 pf enable [1] init: true
1
<<
0
,
// L1I pf enable [0] init: true
).
reduce
(
_
|
_
).
U
(
XLEN
.
W
))
csrio
.
customCtrl
.
l1I_pf_enable
:=
spfctl
(
0
)
csrio
.
customCtrl
.
l2_pf_enable
:=
spfctl
(
1
)
csrio
.
customCtrl
.
l1D_pf_enable
:=
spfctl
(
2
)
csrio
.
customCtrl
.
l1D_pf_train_on_hit
:=
spfctl
(
3
)
csrio
.
customCtrl
.
l1D_pf_enable_agt
:=
spfctl
(
4
)
csrio
.
customCtrl
.
l1D_pf_enable_pht
:=
spfctl
(
5
)
csrio
.
customCtrl
.
l1D_pf_active_threshold
:=
spfctl
(
9
,
6
)
csrio
.
customCtrl
.
l1D_pf_active_stride
:=
spfctl
(
15
,
10
)
csrio
.
customCtrl
.
l1D_pf_enable_stride
:=
spfctl
(
16
)
csrio
.
customCtrl
.
l2_pf_store_only
:=
spfctl
(
17
)
// sfetchctl Bit 0: L1I Cache Parity check enable
val
sfetchctl
=
RegInit
(
UInt
(
XLEN
.
W
),
"b0"
.
U
)
...
...
src/main/scala/xiangshan/cache/dcache/DCacheWrapper.scala
浏览文件 @
b35479a0
...
...
@@ -100,12 +100,21 @@ trait HasDCacheParameters extends HasL1CacheParameters {
def
blockProbeAfterGrantCycles
=
8
// give the processor some time to issue a request after a grant
def
nSourceType
=
3
def
nSourceType
=
10
def
sourceTypeWidth
=
log2Up
(
nSourceType
)
// non-prefetch source < 3
def
LOAD_SOURCE
=
0
def
STORE_SOURCE
=
1
def
AMO_SOURCE
=
2
def
SOFT_PREFETCH
=
3
// prefetch source >= 3
def
DCACHE_PREFETCH_SOURCE
=
3
def
SOFT_PREFETCH
=
4
def
HW_PREFETCH_AGT
=
5
def
HW_PREFETCH_PHT_CUR
=
6
def
HW_PREFETCH_PHT_INC
=
7
def
HW_PREFETCH_PHT_DEC
=
8
def
HW_PREFETCH_BOP
=
9
def
HW_PREFETCH_STRIDE
=
10
// each source use a id to distinguish its multiple reqs
def
reqIdWidth
=
log2Up
(
nEntries
)
max
log2Up
(
StoreBufferSize
)
...
...
@@ -142,6 +151,10 @@ trait HasDCacheParameters extends HasL1CacheParameters {
// uncache
val
uncacheIdxBits
=
log2Up
(
StoreQueueSize
)
max
log2Up
(
LoadQueueSize
)
// hardware prefetch parameters
// high confidence hardware prefetch port
val
HighConfHWPFLoadPort
=
LoadPipelineWidth
-
1
// use the last load port by default
val
IgnorePrefetchConfidence
=
false
// parameters about duplicating regs to solve fanout
// In Main Pipe:
...
...
@@ -274,6 +287,15 @@ class ReplacementWayReqIO(implicit p: Parameters) extends DCacheBundle {
val
way
=
Input
(
UInt
(
log2Up
(
nWays
).
W
))
}
class
DCacheExtraMeta
(
implicit
p
:
Parameters
)
extends
DCacheBundle
{
val
error
=
Bool
()
// cache line has been marked as corrupted by l2 / ecc error detected when store
val
prefetch
=
Bool
()
// cache line is first required by prefetch
val
access
=
Bool
()
// cache line has been accessed by load / store
// val debug_access_timestamp = UInt(64.W) // last time a load / store / refill access that cacheline
}
// memory request in word granularity(load, mmio, lr/sc, atomics)
class
DCacheWordReq
(
implicit
p
:
Parameters
)
extends
DCacheBundle
{
...
...
@@ -336,6 +358,8 @@ class BaseDCacheWordResp(implicit p: Parameters) extends DCacheBundle
class
DCacheWordResp
(
implicit
p
:
Parameters
)
extends
BaseDCacheWordResp
{
val
meta_prefetch
=
Bool
()
val
meta_access
=
Bool
()
// 1 cycle after data resp
val
error_delayed
=
Bool
()
// all kinds of errors, include tag error
}
...
...
@@ -461,6 +485,7 @@ class DCacheLoadIO(implicit p: Parameters) extends DCacheWordIO
// kill previous cycle's req
val
s1_kill
=
Output
(
Bool
())
val
s2_kill
=
Output
(
Bool
())
val
s2_pc
=
Output
(
UInt
(
VAddrBits
.
W
))
// cycle 0: virtual address: req.addr
// cycle 1: physical address: s1_paddr
val
s1_paddr_dup_lsu
=
Output
(
UInt
(
PAddrBits
.
W
))
// lsu side paddr
...
...
@@ -618,6 +643,7 @@ class DCacheToLsuIO(implicit p: Parameters) extends DCacheBundle {
class
DCacheIO
(
implicit
p
:
Parameters
)
extends
DCacheBundle
{
val
hartId
=
Input
(
UInt
(
8.
W
))
val
l2_pf_store_only
=
Input
(
Bool
())
val
lsu
=
new
DCacheToLsuIO
val
csr
=
new
L1CacheToCsrIO
val
error
=
new
L1CacheErrorInfo
...
...
@@ -664,8 +690,10 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
//----------------------------------------
// core data structures
val
bankedDataArray
=
Module
(
new
BankedDataArray
)
val
metaArray
=
Module
(
new
AsynchronousMetaArray
(
readPorts
=
LoadPipelineWidth
+
1
,
writePorts
=
2
))
val
errorArray
=
Module
(
new
ErrorArray
(
readPorts
=
LoadPipelineWidth
+
1
,
writePorts
=
2
))
// TODO: add it to meta array
val
metaArray
=
Module
(
new
L1CohMetaArray
(
readPorts
=
LoadPipelineWidth
+
1
,
writePorts
=
2
))
val
errorArray
=
Module
(
new
L1FlagMetaArray
(
readPorts
=
LoadPipelineWidth
+
1
,
writePorts
=
2
))
val
prefetchArray
=
Module
(
new
L1FlagMetaArray
(
readPorts
=
LoadPipelineWidth
+
1
,
writePorts
=
2
))
// prefetch flag array
val
accessArray
=
Module
(
new
L1FlagMetaArray
(
readPorts
=
LoadPipelineWidth
+
1
,
writePorts
=
LoadPipelineWidth
+
2
))
val
tagArray
=
Module
(
new
DuplicatedTagArray
(
readPorts
=
LoadPipelineWidth
+
1
))
bankedDataArray
.
dump
()
...
...
@@ -680,6 +708,7 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
val
wb
=
Module
(
new
WritebackQueue
(
edge
))
missQueue
.
io
.
hartId
:=
io
.
hartId
missQueue
.
io
.
l2_pf_store_only
:=
RegNext
(
io
.
l2_pf_store_only
,
false
.
B
)
val
errors
=
ldu
.
map
(
_
.
io
.
error
)
++
// load error
Seq
(
mainPipe
.
io
.
error
)
// store / misc error
...
...
@@ -687,6 +716,8 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
//----------------------------------------
// meta array
// read / write coh meta
val
meta_read_ports
=
ldu
.
map
(
_
.
io
.
meta_read
)
++
Seq
(
mainPipe
.
io
.
meta_read
)
val
meta_resp_ports
=
ldu
.
map
(
_
.
io
.
meta_resp
)
++
...
...
@@ -699,16 +730,41 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
meta_resp_ports
.
zip
(
metaArray
.
io
.
resp
).
foreach
{
case
(
p
,
r
)
=>
p
:=
r
}
meta_write_ports
.
zip
(
metaArray
.
io
.
write
).
foreach
{
case
(
p
,
w
)
=>
w
<>
p
}
val
error_flag_resp_ports
=
ldu
.
map
(
_
.
io
.
error_flag_resp
)
++
Seq
(
mainPipe
.
io
.
error_flag_resp
)
// read extra meta
meta_read_ports
.
zip
(
errorArray
.
io
.
read
).
foreach
{
case
(
p
,
r
)
=>
r
<>
p
}
meta_read_ports
.
zip
(
prefetchArray
.
io
.
read
).
foreach
{
case
(
p
,
r
)
=>
r
<>
p
}
meta_read_ports
.
zip
(
accessArray
.
io
.
read
).
foreach
{
case
(
p
,
r
)
=>
r
<>
p
}
val
extra_meta_resp_ports
=
ldu
.
map
(
_
.
io
.
extra_meta_resp
)
++
Seq
(
mainPipe
.
io
.
extra_meta_resp
)
extra_meta_resp_ports
.
zip
(
errorArray
.
io
.
resp
).
foreach
{
case
(
p
,
r
)
=>
{
(
0
until
nWays
).
map
(
i
=>
{
p
(
i
).
error
:=
r
(
i
)
})
}}
extra_meta_resp_ports
.
zip
(
prefetchArray
.
io
.
resp
).
foreach
{
case
(
p
,
r
)
=>
{
(
0
until
nWays
).
map
(
i
=>
{
p
(
i
).
prefetch
:=
r
(
i
)
})
}}
extra_meta_resp_ports
.
zip
(
accessArray
.
io
.
resp
).
foreach
{
case
(
p
,
r
)
=>
{
(
0
until
nWays
).
map
(
i
=>
{
p
(
i
).
access
:=
r
(
i
)
})
}}
// write extra meta
val
error_flag_write_ports
=
Seq
(
mainPipe
.
io
.
error_flag_write
,
refillPipe
.
io
.
error_flag_write
mainPipe
.
io
.
error_flag_write
,
// error flag generated by corrupted store
refillPipe
.
io
.
error_flag_write
// corrupted signal from l2
)
meta_read_ports
.
zip
(
errorArray
.
io
.
read
).
foreach
{
case
(
p
,
r
)
=>
r
<>
p
}
error_flag_resp_ports
.
zip
(
errorArray
.
io
.
resp
).
foreach
{
case
(
p
,
r
)
=>
p
:=
r
}
error_flag_write_ports
.
zip
(
errorArray
.
io
.
write
).
foreach
{
case
(
p
,
w
)
=>
w
<>
p
}
val
prefetch_flag_write_ports
=
Seq
(
mainPipe
.
io
.
prefetch_flag_write
,
// set prefetch_flag to false if coh is set to Nothing
refillPipe
.
io
.
prefetch_flag_write
// refill required by prefetch will set prefetch_flag
)
prefetch_flag_write_ports
.
zip
(
prefetchArray
.
io
.
write
).
foreach
{
case
(
p
,
w
)
=>
w
<>
p
}
val
access_flag_write_ports
=
ldu
.
map
(
_
.
io
.
access_flag_write
)
++
Seq
(
mainPipe
.
io
.
access_flag_write
,
refillPipe
.
io
.
access_flag_write
)
access_flag_write_ports
.
zip
(
accessArray
.
io
.
write
).
foreach
{
case
(
p
,
w
)
=>
w
<>
p
}
//----------------------------------------
// tag array
require
(
tagArray
.
io
.
read
.
size
==
(
ldu
.
size
+
1
))
...
...
src/main/scala/xiangshan/cache/dcache/loadpipe/LoadPipe.scala
浏览文件 @
b35479a0
...
...
@@ -34,7 +34,7 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
// meta and data array read port
val
meta_read
=
DecoupledIO
(
new
MetaReadReq
)
val
meta_resp
=
Input
(
Vec
(
nWays
,
new
Meta
))
val
e
rror_flag_resp
=
Input
(
Vec
(
nWays
,
Bool
()
))
val
e
xtra_meta_resp
=
Input
(
Vec
(
nWays
,
new
DCacheExtraMeta
))
val
tag_read
=
DecoupledIO
(
new
TagReadReq
)
val
tag_resp
=
Input
(
Vec
(
nWays
,
UInt
(
encTagBits
.
W
)))
...
...
@@ -43,6 +43,9 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
val
banked_data_resp
=
Input
(
new
L1BankedDataReadResult
())
val
read_error_delayed
=
Input
(
Bool
())
// access bit update
val
access_flag_write
=
DecoupledIO
(
new
FlagMetaWriteReq
)
// banked data read conflict
val
bank_conflict_slow
=
Input
(
Bool
())
val
bank_conflict_fast
=
Input
(
Bool
())
...
...
@@ -183,13 +186,16 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
// this simplifies our logic in s2 stage
val
s1_hit_meta
=
Mux
(
s1_tag_match_dup_dc
,
Mux1H
(
s1_tag_match_way_dup_dc
,
wayMap
((
w
:
Int
)
=>
meta_resp
(
w
))),
s1_fake_meta
)
val
s1_hit_coh
=
s1_hit_meta
.
coh
val
s1_hit_error
=
Mux
(
s1_tag_match_dup_dc
,
Mux1H
(
s1_tag_match_way_dup_dc
,
wayMap
((
w
:
Int
)
=>
io
.
error_flag_resp
(
w
))),
false
.
B
)
val
s1_hit_error
=
Mux
(
s1_tag_match_dup_dc
,
Mux1H
(
s1_tag_match_way_dup_dc
,
wayMap
((
w
:
Int
)
=>
io
.
extra_meta_resp
(
w
).
error
)),
false
.
B
)
val
s1_hit_prefetch
=
Mux
(
s1_tag_match_dup_dc
,
Mux1H
(
s1_tag_match_way_dup_dc
,
wayMap
((
w
:
Int
)
=>
io
.
extra_meta_resp
(
w
).
prefetch
)),
false
.
B
)
val
s1_hit_access
=
Mux
(
s1_tag_match_dup_dc
,
Mux1H
(
s1_tag_match_way_dup_dc
,
wayMap
((
w
:
Int
)
=>
io
.
extra_meta_resp
(
w
).
access
)),
false
.
B
)
io
.
replace_way
.
set
.
valid
:=
RegNext
(
s0_fire
)
io
.
replace_way
.
set
.
bits
:=
get_idx
(
s1_vaddr
)
val
s1_repl_way_en
=
UIntToOH
(
io
.
replace_way
.
way
)
val
s1_repl_tag
=
Mux1H
(
s1_repl_way_en
,
wayMap
(
w
=>
tag_resp
(
w
)))
val
s1_repl_coh
=
Mux1H
(
s1_repl_way_en
,
wayMap
(
w
=>
meta_resp
(
w
).
coh
))
val
s1_repl_extra_meta
=
Mux1H
(
s1_repl_way_en
,
wayMap
(
w
=>
io
.
extra_meta_resp
(
w
)))
val
s1_need_replacement
=
!
s1_tag_match_dup_dc
val
s1_way_en
=
Mux
(
s1_need_replacement
,
s1_repl_way_en
,
s1_tag_match_way_dup_dc
)
...
...
@@ -232,6 +238,7 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
dump_pipeline_reqs
(
"LoadPipe s2"
,
s2_valid
,
s2_req
)
// hit, miss, nack, permission checking
// dcache side tag match
val
s2_tag_match_way
=
RegEnable
(
s1_tag_match_way_dup_dc
,
s1_fire
)
...
...
@@ -244,12 +251,13 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
val
s2_hit_meta
=
RegEnable
(
s1_hit_meta
,
s1_fire
)
val
s2_hit_coh
=
RegEnable
(
s1_hit_coh
,
s1_fire
)
val
s2_has_permission
=
s2_hit_coh
.
onAccess
(
s2_req
.
cmd
).
_1
//
redundant
val
s2_new_hit_coh
=
s2_hit_coh
.
onAccess
(
s2_req
.
cmd
).
_3
//
redundant
val
s2_has_permission
=
s2_hit_coh
.
onAccess
(
s2_req
.
cmd
).
_1
//
for write prefetch
val
s2_new_hit_coh
=
s2_hit_coh
.
onAccess
(
s2_req
.
cmd
).
_3
//
for write prefetch
val
s2_way_en
=
RegEnable
(
s1_way_en
,
s1_fire
)
val
s2_repl_coh
=
RegEnable
(
s1_repl_coh
,
s1_fire
)
val
s2_repl_tag
=
RegEnable
(
s1_repl_tag
,
s1_fire
)
val
s2_repl_extra_meta
=
RegEnable
(
s1_repl_extra_meta
,
s1_fire
)
// not used for now
val
s2_encTag
=
RegEnable
(
s1_encTag
,
s1_fire
)
// when req got nacked, upper levels should replay this request
...
...
@@ -269,9 +277,10 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
val
s2_tag_error
=
dcacheParameters
.
tagCode
.
decode
(
s2_encTag
).
error
// error reported by tag ecc check
val
s2_flag_error
=
RegEnable
(
s1_flag_error
,
s1_fire
)
val
s2_hit_prefetch
=
RegEnable
(
s1_hit_prefetch
,
s1_fire
)
val
s2_hit_access
=
RegEnable
(
s1_hit_access
,
s1_fire
)
val
s2_hit
=
s2_tag_match
&&
s2_has_permission
&&
s2_hit_coh
===
s2_new_hit_coh
&&
!
s2_wpu_pred_fail
// assert(!RegNext(s2_valid && (s2_tag_match && !s2_hit)))
// assert(!RegNext(s2_valid && (s2_hit_dup_lsu =/= s2_hit)))
// only dump these signals when they are actually valid
dump_pipeline_valids
(
"LoadPipe s2"
,
"s2_hit"
,
s2_valid
&&
s2_hit
)
...
...
@@ -293,6 +302,7 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
io
.
miss_req
.
bits
.
replace_coh
:=
s2_repl_coh
io
.
miss_req
.
bits
.
replace_tag
:=
s2_repl_tag
io
.
miss_req
.
bits
.
cancel
:=
io
.
lsu
.
s2_kill
||
s2_tag_error
io
.
miss_req
.
bits
.
pc
:=
io
.
lsu
.
s2_pc
// send back response
val
resp
=
Wire
(
ValidIO
(
new
DCacheWordResp
))
...
...
@@ -312,11 +322,15 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
resp
.
bits
.
replay
:=
(
resp
.
bits
.
miss
&&
(!
io
.
miss_req
.
fire
()
||
s2_nack
))
||
io
.
bank_conflict_slow
||
s2_wpu_pred_fail
resp
.
bits
.
replayCarry
.
valid
:=
resp
.
bits
.
miss
resp
.
bits
.
replayCarry
.
real_way_en
:=
s2_real_way_en
resp
.
bits
.
meta_prefetch
:=
s2_hit_prefetch
resp
.
bits
.
meta_access
:=
s2_hit_access
resp
.
bits
.
tag_error
:=
s2_tag_error
// report tag_error in load s2
resp
.
bits
.
mshr_id
:=
io
.
miss_resp
.
id
XSPerfAccumulate
(
"dcache_read_bank_conflict"
,
io
.
bank_conflict_slow
&&
s2_valid
)
XSPerfAccumulate
(
"wpu_pred_fail"
,
s2_wpu_pred_fail
&&
s2_valid
)
XSPerfAccumulate
(
"dcache_read_bank_conflict"
,
io
.
bank_conflict_slow
&&
s2_valid
)
XSPerfAccumulate
(
"dcache_read_from_prefetched_line"
,
s2_valid
&&
s2_hit_prefetch
&&
!
resp
.
bits
.
miss
)
XSPerfAccumulate
(
"dcache_first_read_from_prefetched_line"
,
s2_valid
&&
s2_hit_prefetch
&&
!
resp
.
bits
.
miss
&&
!
s2_hit_access
)
io
.
lsu
.
resp
.
valid
:=
resp
.
valid
io
.
lsu
.
resp
.
bits
:=
resp
.
bits
...
...
@@ -337,11 +351,13 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
// report ecc error and get selected dcache data
val
s3_valid
=
RegNext
(
s2_valid
)
val
s3_vaddr
=
RegEnable
(
s2_vaddr
,
s2_fire
)
val
s3_paddr
=
RegEnable
(
s2_paddr
,
s2_fire
)
val
s3_hit
=
RegEnable
(
s2_hit
,
s2_fire
)
val
s3_tag_match_way
=
RegEnable
(
s2_tag_match_way
,
s2_fire
)
val
s3_banked_data_resp_word
=
io
.
banked_data_resp
.
raw_data
val
s3_data_error
=
io
.
read_error_delayed
// banked_data_resp_word.error && !bank_conflict
val
s3_data_error
=
io
.
read_error_delayed
&&
s3_hit
// banked_data_resp_word.error && !bank_conflict
val
s3_tag_error
=
RegEnable
(
s2_tag_error
,
s2_fire
)
val
s3_flag_error
=
RegEnable
(
s2_flag_error
,
s2_fire
)
val
s3_error
=
s3_tag_error
||
s3_flag_error
||
s3_data_error
...
...
@@ -361,12 +377,17 @@ class LoadPipe(id: Int)(implicit p: Parameters) extends DCacheModule with HasPer
// report tag error / l2 corrupted to CACHE_ERROR csr
io
.
error
.
valid
:=
s3_error
&&
s3_valid
// update plru, report error in s3
// update plru in s3
io
.
replace_access
.
valid
:=
RegNext
(
RegNext
(
RegNext
(
io
.
meta_read
.
fire
())
&&
s1_valid
&&
!
io
.
lsu
.
s1_kill
)
&&
!
s2_nack_no_mshr
)
io
.
replace_access
.
bits
.
set
:=
RegNext
(
RegNext
(
get_idx
(
s1_req
.
addr
)))
io
.
replace_access
.
bits
.
way
:=
RegNext
(
RegNext
(
Mux
(
s1_tag_match_dup_dc
,
OHToUInt
(
s1_tag_match_way_dup_dc
),
io
.
replace_way
.
way
)))
// update access bit
io
.
access_flag_write
.
valid
:=
s3_valid
&&
s3_hit
io
.
access_flag_write
.
bits
.
idx
:=
get_idx
(
s3_vaddr
)
io
.
access_flag_write
.
bits
.
way_en
:=
s3_tag_match_way
io
.
access_flag_write
.
bits
.
flag
:=
true
.
B
// --------------------------------------------------------------------------------
// Debug logging functions
def
dump_pipeline_reqs
(
pipeline_stage_name
:
String
,
valid
:
Bool
,
...
...
src/main/scala/xiangshan/cache/dcache/mainpipe/MainPipe.scala
浏览文件 @
b35479a0
...
...
@@ -131,9 +131,11 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents {
// meta array
val
meta_read
=
DecoupledIO
(
new
MetaReadReq
)
val
meta_resp
=
Input
(
Vec
(
nWays
,
new
Meta
))
val
meta_write
=
DecoupledIO
(
new
MetaWriteReq
)
val
error_flag_resp
=
Input
(
Vec
(
nWays
,
Bool
()))
val
error_flag_write
=
DecoupledIO
(
new
ErrorWriteReq
)
val
meta_write
=
DecoupledIO
(
new
CohMetaWriteReq
)
val
extra_meta_resp
=
Input
(
Vec
(
nWays
,
new
DCacheExtraMeta
))
val
error_flag_write
=
DecoupledIO
(
new
FlagMetaWriteReq
)
val
prefetch_flag_write
=
DecoupledIO
(
new
FlagMetaWriteReq
)
val
access_flag_write
=
DecoupledIO
(
new
FlagMetaWriteReq
)
// tag sram
val
tag_read
=
DecoupledIO
(
new
TagReadReq
)
...
...
@@ -282,9 +284,13 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents {
val
s1_hit_tag
=
Mux
(
s1_tag_match
,
Mux1H
(
s1_tag_match_way
,
wayMap
(
w
=>
tag_resp
(
w
))),
get_tag
(
s1_req
.
addr
))
val
s1_hit_coh
=
ClientMetadata
(
Mux
(
s1_tag_match
,
Mux1H
(
s1_tag_match_way
,
wayMap
(
w
=>
meta_resp
(
w
))),
0.
U
))
val
s1_encTag
=
Mux1H
(
s1_tag_match_way
,
wayMap
((
w
:
Int
)
=>
enc_tag_resp
(
w
)))
val
s1_flag_error
=
Mux
(
s1_tag_match
,
Mux1H
(
s1_tag_match_way
,
wayMap
(
w
=>
io
.
error_flag_resp
(
w
))),
false
.
B
)
val
s1_flag_error
=
Mux
(
s1_tag_match
,
Mux1H
(
s1_tag_match_way
,
wayMap
(
w
=>
io
.
extra_meta_resp
(
w
).
error
)),
false
.
B
)
val
s1_extra_meta
=
Mux1H
(
s1_tag_match_way
,
wayMap
(
w
=>
io
.
extra_meta_resp
(
w
)))
val
s1_l2_error
=
s1_req
.
error
XSPerfAccumulate
(
"probe_unused_prefetch"
,
s1_req
.
probe
&&
s1_extra_meta
.
prefetch
&&
!
s1_extra_meta
.
access
)
// may not be accurate
XSPerfAccumulate
(
"replace_unused_prefetch"
,
s1_req
.
replace
&&
s1_extra_meta
.
prefetch
&&
!
s1_extra_meta
.
access
)
// may not be accurate
// replacement policy
val
s1_repl_way_en
=
WireInit
(
0.
U
(
nWays
.
W
))
s1_repl_way_en
:=
Mux
(
RegNext
(
s0_fire
),
UIntToOH
(
io
.
replace_way
.
way
),
RegNext
(
s1_repl_way_en
))
...
...
@@ -1412,6 +1418,7 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents {
miss_req
.
replace_tag
:=
s2_repl_tag
miss_req
.
id
:=
s2_req
.
id
miss_req
.
cancel
:=
false
.
B
miss_req
.
pc
:=
DontCare
io
.
store_replay_resp
.
valid
:=
s2_valid_dup
(
5
)
&&
s2_can_go_to_mq_dup
(
1
)
&&
replay
&&
s2_req
.
isStore
io
.
store_replay_resp
.
bits
.
data
:=
DontCare
...
...
@@ -1470,7 +1477,22 @@ class MainPipe(implicit p: Parameters) extends DCacheModule with HasPerfEvents {
io
.
error_flag_write
.
valid
:=
s3_fire_dup_for_err_w_valid
&&
update_meta_dup_for_err_w_valid
&&
s3_l2_error
io
.
error_flag_write
.
bits
.
idx
:=
s3_idx_dup
(
3
)
io
.
error_flag_write
.
bits
.
way_en
:=
s3_way_en_dup
(
1
)
io
.
error_flag_write
.
bits
.
error
:=
s3_l2_error
io
.
error_flag_write
.
bits
.
flag
:=
s3_l2_error
// if we use (prefetch_flag && meta =/= ClientStates.Nothing) for prefetch check
// prefetch_flag_write can be omited
// io.prefetch_flag_write.valid := io.meta_write.valid && new_coh === ClientStates.Nothing
// io.prefetch_flag_write.bits.idx := s3_idx_dup(3)
// io.prefetch_flag_write.bits.way_en := s3_way_en_dup(1)
// io.prefetch_flag_write.bits.flag := false.B
io
.
prefetch_flag_write
.
valid
:=
false
.
B
io
.
prefetch_flag_write
.
bits
:=
DontCare
// probe / replace will not update access bit
io
.
access_flag_write
.
valid
:=
s3_fire_dup_for_meta_w_valid
&&
!
s3_req
.
probe
&&
!
s3_req
.
replace
io
.
access_flag_write
.
bits
.
idx
:=
s3_idx_dup
(
3
)
io
.
access_flag_write
.
bits
.
way_en
:=
s3_way_en_dup
(
1
)
io
.
access_flag_write
.
bits
.
flag
:=
true
.
B
io
.
tag_write
.
valid
:=
s3_fire_dup_for_tag_w_valid
&&
s3_req_miss_dup_for_tag_w_valid
io
.
tag_write
.
bits
.
idx
:=
s3_idx_dup
(
4
)
...
...
src/main/scala/xiangshan/cache/dcache/mainpipe/MissQueue.scala
浏览文件 @
b35479a0
...
...
@@ -30,6 +30,7 @@ import difftest._
import
huancun.
{
AliasKey
,
DirtyKey
,
PreferCacheKey
,
PrefetchKey
}
import
utility.FastArbiter
import
mem.
{
AddPipelineReg
}
import
mem.trace._
class
MissReqWoStoreData
(
implicit
p
:
Parameters
)
extends
DCacheBundle
{
val
source
=
UInt
(
sourceTypeWidth
.
W
)
...
...
@@ -37,6 +38,7 @@ class MissReqWoStoreData(implicit p: Parameters) extends DCacheBundle {
val
addr
=
UInt
(
PAddrBits
.
W
)
val
vaddr
=
UInt
(
VAddrBits
.
W
)
val
way_en
=
UInt
(
DCacheWays
.
W
)
val
pc
=
UInt
(
VAddrBits
.
W
)
// store
val
full_overwrite
=
Bool
()
...
...
@@ -61,9 +63,13 @@ class MissReqWoStoreData(implicit p: Parameters) extends DCacheBundle {
// 2. pmp check failed
val
cancel
=
Bool
()
// cancel is slow to generate, it will cancel missreq.valid
def
isLoad
=
source
===
LOAD_SOURCE
.
U
def
isStore
=
source
===
STORE_SOURCE
.
U
def
isAMO
=
source
===
AMO_SOURCE
.
U
// Req source decode
// Note that req source is NOT cmd type
// For instance, a req which isFromPrefetch may have R or W cmd
def
isFromLoad
=
source
===
LOAD_SOURCE
.
U
def
isFromStore
=
source
===
STORE_SOURCE
.
U
def
isFromAMO
=
source
===
AMO_SOURCE
.
U
def
isFromPrefetch
=
source
>=
DCACHE_PREFETCH_SOURCE
.
U
def
hit
=
req_coh
.
isValid
()
}
...
...
@@ -103,6 +109,7 @@ class MissReq(implicit p: Parameters) extends MissReqWoStoreData {
out
.
replace_tag
:=
replace_tag
out
.
id
:=
id
out
.
cancel
:=
cancel
out
.
pc
:=
pc
out
}
}
...
...
@@ -160,6 +167,7 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
val
req_handled_by_this_entry
=
Output
(
Bool
())
val
forwardInfo
=
Output
(
new
MissEntryForwardIO
)
val
l2_pf_store_only
=
Input
(
Bool
())
})
assert
(!
RegNext
(
io
.
primary_valid
&&
!
io
.
primary_ready
))
...
...
@@ -169,6 +177,8 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
val
req_valid
=
RegInit
(
false
.
B
)
val
set
=
addr_to_dcache_set
(
req
.
vaddr
)
val
input_req_is_prefetch
=
isPrefetch
(
io
.
req
.
bits
.
cmd
)
val
s_acquire
=
RegInit
(
true
.
B
)
val
s_grantack
=
RegInit
(
true
.
B
)
val
s_replace_req
=
RegInit
(
true
.
B
)
...
...
@@ -188,11 +198,13 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
val
data_not_refilled
=
!
w_grantfirst
val
error
=
RegInit
(
false
.
B
)
val
prefetch
=
RegInit
(
false
.
B
)
val
access
=
RegInit
(
false
.
B
)
val
should_refill_data_reg
=
Reg
(
Bool
())
val
should_refill_data
=
WireInit
(
should_refill_data_reg
)
// val full_overwrite = req.isStore && req_store_mask.andR
// val full_overwrite = req.is
From
Store && req_store_mask.andR
val
full_overwrite
=
Reg
(
Bool
())
val
(
_
,
_
,
refill_done
,
refill_count
)
=
edge
.
count
(
io
.
mem_grant
)
...
...
@@ -235,46 +247,51 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
w_grantfirst
:=
false
.
B
w_grantlast
:=
false
.
B
s_write_storedata
:=
!
io
.
req
.
bits
.
isStore
// only store need to wait for data
full_overwrite
:=
io
.
req
.
bits
.
isStore
&&
io
.
req
.
bits
.
full_overwrite
s_write_storedata
:=
!
io
.
req
.
bits
.
is
From
Store
// only store need to wait for data
full_overwrite
:=
io
.
req
.
bits
.
is
From
Store
&&
io
.
req
.
bits
.
full_overwrite
when
(!
io
.
req
.
bits
.
isAMO
)
{
when
(!
io
.
req
.
bits
.
is
From
AMO
)
{
s_refill
:=
false
.
B
w_refill_resp
:=
false
.
B
}
when
(!
io
.
req
.
bits
.
hit
&&
io
.
req
.
bits
.
replace_coh
.
isValid
()
&&
!
io
.
req
.
bits
.
isAMO
)
{
when
(!
io
.
req
.
bits
.
hit
&&
io
.
req
.
bits
.
replace_coh
.
isValid
()
&&
!
io
.
req
.
bits
.
is
From
AMO
)
{
s_replace_req
:=
false
.
B
w_replace_resp
:=
false
.
B
}
when
(
io
.
req
.
bits
.
isAMO
)
{
when
(
io
.
req
.
bits
.
is
From
AMO
)
{
s_mainpipe_req
:=
false
.
B
w_mainpipe_resp
:=
false
.
B
}
should_refill_data_reg
:=
io
.
req
.
bits
.
isLoad
should_refill_data_reg
:=
io
.
req
.
bits
.
is
From
Load
error
:=
false
.
B
prefetch
:=
input_req_is_prefetch
access
:=
false
.
B
}
when
(
secondary_fire
)
{
assert
(
io
.
req
.
bits
.
req_coh
.
state
<=
req
.
req_coh
.
state
)
assert
(!(
io
.
req
.
bits
.
is
AMO
||
req
.
is
AMO
))
assert
(
io
.
req
.
bits
.
req_coh
.
state
<=
req
.
req_coh
.
state
||
(
prefetch
&&
!
access
)
)
assert
(!(
io
.
req
.
bits
.
is
FromAMO
||
req
.
isFrom
AMO
))
// use the most uptodate meta
req
.
req_coh
:=
io
.
req
.
bits
.
req_coh
when
(
io
.
req
.
bits
.
isStore
)
{
when
(
io
.
req
.
bits
.
is
From
Store
)
{
req
:=
io
.
req
.
bits
req
.
addr
:=
get_block_addr
(
io
.
req
.
bits
.
addr
)
req
.
way_en
:=
req
.
way_en
req
.
replace_coh
:=
req
.
replace_coh
req
.
replace_tag
:=
req
.
replace_tag
s_write_storedata
:=
false
.
B
// only store need to wait for data
full_overwrite
:=
io
.
req
.
bits
.
isStore
&&
io
.
req
.
bits
.
full_overwrite
full_overwrite
:=
io
.
req
.
bits
.
is
From
Store
&&
io
.
req
.
bits
.
full_overwrite
}
should_refill_data
:=
should_refill_data_reg
||
io
.
req
.
bits
.
isLoad
should_refill_data
:=
should_refill_data_reg
||
io
.
req
.
bits
.
is
From
Load
should_refill_data_reg
:=
should_refill_data
when
(!
input_req_is_prefetch
)
{
access
:=
true
.
B
// when merge non-prefetch req, set access bit
}
}
when
(
io
.
mem_acquire
.
fire
())
{
...
...
@@ -301,7 +318,7 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
// new_data(i) := req.store_data(rowBits * (i + 1) - 1, rowBits * i)
new_data
(
i
)
:=
refill_and_store_data
(
i
)
// we only need to merge data for Store
new_mask
(
i
)
:=
Mux
(
req
.
isStore
,
req_store_mask
(
rowBytes
*
(
i
+
1
)
-
1
,
rowBytes
*
i
),
0.
U
)
new_mask
(
i
)
:=
Mux
(
req
.
is
From
Store
,
req_store_mask
(
rowBytes
*
(
i
+
1
)
-
1
,
rowBytes
*
i
),
0.
U
)
}
val
hasData
=
RegInit
(
true
.
B
)
...
...
@@ -362,19 +379,21 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
w_mainpipe_resp
:=
true
.
B
}
def
before_re
ad
_sent_can_merge
(
new_req
:
MissReqWoStoreData
)
:
Bool
=
{
acquire_not_sent
&&
req
.
isLoad
&&
(
new_req
.
isLoad
||
new_req
.
is
Store
)
def
before_re
q
_sent_can_merge
(
new_req
:
MissReqWoStoreData
)
:
Bool
=
{
acquire_not_sent
&&
(
req
.
isFromLoad
||
req
.
isFromPrefetch
)
&&
(
new_req
.
isFromLoad
||
new_req
.
isFrom
Store
)
}
def
before_data_refill_can_merge
(
new_req
:
MissReqWoStoreData
)
:
Bool
=
{
data_not_refilled
&&
(
req
.
is
Load
||
req
.
isStore
)
&&
new_req
.
is
Load
data_not_refilled
&&
(
req
.
is
FromLoad
||
req
.
isFromStore
||
req
.
isFromPrefetch
)
&&
new_req
.
isFrom
Load
}
// Note that late prefetch will be ignored
def
should_merge
(
new_req
:
MissReqWoStoreData
)
:
Bool
=
{
val
block_match
=
get_block
(
req
.
addr
)
===
get_block
(
new_req
.
addr
)
block_match
&&
(
before_re
ad
_sent_can_merge
(
new_req
)
||
before_re
q
_sent_can_merge
(
new_req
)
||
before_data_refill_can_merge
(
new_req
)
)
}
...
...
@@ -392,7 +411,7 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
req_valid
&&
Mux
(
block_match
,
!
before_re
ad
_sent_can_merge
(
new_req
)
&&
!
before_re
q
_sent_can_merge
(
new_req
)
&&
!
before_data_refill_can_merge
(
new_req
),
set_match
&&
new_req
.
way_en
===
req
.
way_en
)
...
...
@@ -437,7 +456,7 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
// resolve cache alias by L2
io
.
mem_acquire
.
bits
.
user
.
lift
(
AliasKey
).
foreach
(
_
:=
req
.
vaddr
(
13
,
12
))
// trigger prefetch
io
.
mem_acquire
.
bits
.
user
.
lift
(
PrefetchKey
).
foreach
(
_
:=
true
.
B
)
io
.
mem_acquire
.
bits
.
user
.
lift
(
PrefetchKey
).
foreach
(
_
:=
Mux
(
io
.
l2_pf_store_only
,
req
.
isFromStore
,
true
.
B
)
)
// prefer not to cache data in L2 by default
io
.
mem_acquire
.
bits
.
user
.
lift
(
PreferCacheKey
).
foreach
(
_
:=
false
.
B
)
require
(
nSets
<=
256
)
...
...
@@ -471,7 +490,7 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
refill
.
addr
:=
req
.
addr
refill
.
way_en
:=
req
.
way_en
refill
.
wmask
:=
Mux
(
hasData
||
req
.
isLoad
,
hasData
||
req
.
is
From
Load
,
~
0.
U
(
DCacheBanks
.
W
),
VecInit
((
0
until
DCacheBanks
).
map
(
i
=>
get_mask_of_bank
(
i
,
req_store_mask
).
orR
)).
asUInt
)
...
...
@@ -493,6 +512,8 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
}
refill
.
meta
.
coh
:=
ClientMetadata
(
missCohGen
(
req
.
cmd
,
grant_param
,
isDirty
))
refill
.
error
:=
error
refill
.
prefetch
:=
prefetch
refill
.
access
:=
access
refill
.
alias
:=
req
.
vaddr
(
13
,
12
)
// TODO
io
.
main_pipe_req
.
valid
:=
!
s_mainpipe_req
&&
w_grantlast
...
...
@@ -535,13 +556,14 @@ class MissEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule {
XSPerfAccumulate
(
"penalty_waiting_for_channel_D"
,
s_acquire
&&
!
w_grantlast
&&
!
io
.
mem_grant
.
valid
)
XSPerfAccumulate
(
"penalty_waiting_for_channel_E"
,
io
.
mem_finish
.
valid
&&
!
io
.
mem_finish
.
ready
)
XSPerfAccumulate
(
"penalty_from_grant_to_refill"
,
!
w_refill_resp
&&
w_grantlast
)
XSPerfAccumulate
(
"soft_prefetch_number"
,
primary_fire
&&
io
.
req
.
bits
.
source
===
SOFT_PREFETCH
.
U
)
XSPerfAccumulate
(
"prefetch_req_primary"
,
primary_fire
&&
io
.
req
.
bits
.
source
===
DCACHE_PREFETCH_SOURCE
.
U
)
XSPerfAccumulate
(
"prefetch_req_merged"
,
secondary_fire
&&
io
.
req
.
bits
.
source
===
DCACHE_PREFETCH_SOURCE
.
U
)
val
(
mshr_penalty_sample
,
mshr_penalty
)
=
TransactionLatencyCounter
(
RegNext
(
primary_fire
),
release_entry
)
XSPerfHistogram
(
"miss_penalty"
,
mshr_penalty
,
mshr_penalty_sample
,
0
,
20
,
1
,
true
,
true
)
XSPerfHistogram
(
"miss_penalty"
,
mshr_penalty
,
mshr_penalty_sample
,
20
,
100
,
10
,
true
,
false
)
val
load_miss_begin
=
primary_fire
&&
io
.
req
.
bits
.
isLoad
val
load_miss_begin
=
primary_fire
&&
io
.
req
.
bits
.
is
From
Load
val
refill_finished
=
RegNext
(!
w_grantlast
&&
refill_done
)
&&
should_refill_data
val
(
load_miss_penalty_sample
,
load_miss_penalty
)
=
TransactionLatencyCounter
(
load_miss_begin
,
refill_finished
)
// not real refill finish time
XSPerfHistogram
(
"load_miss_penalty_to_use"
,
load_miss_penalty
,
load_miss_penalty_sample
,
0
,
20
,
1
,
true
,
true
)
...
...
@@ -590,6 +612,7 @@ class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule wi
// forward missqueue
val
forward
=
Vec
(
LoadPipelineWidth
,
new
LduToMissqueueForwardIO
)
val
l2_pf_store_only
=
Input
(
Bool
())
})
// 128KBL1: FIXME: provide vaddr for l2
...
...
@@ -656,6 +679,7 @@ class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule wi
e
.
io
.
hartId
:=
io
.
hartId
e
.
io
.
id
:=
i
.
U
e
.
io
.
l2_pf_store_only
:=
io
.
l2_pf_store_only
e
.
io
.
req
.
valid
:=
io
.
req
.
valid
e
.
io
.
primary_valid
:=
io
.
req
.
valid
&&
!
merge
&&
...
...
@@ -707,6 +731,17 @@ class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule wi
io
.
full
:=
~
Cat
(
entries
.
map
(
_
.
io
.
primary_ready
)).
andR
// L1MissTrace Chisel DB
val
debug_miss_trace
=
Wire
(
new
L1MissTrace
)
debug_miss_trace
.
vaddr
:=
io
.
req
.
bits
.
vaddr
debug_miss_trace
.
paddr
:=
io
.
req
.
bits
.
addr
debug_miss_trace
.
source
:=
io
.
req
.
bits
.
source
debug_miss_trace
.
pc
:=
io
.
req
.
bits
.
pc
val
table
=
ChiselDB
.
createTable
(
"L1MissQMissTrace_hart"
+
p
(
XSCoreParamsKey
).
HartId
.
toString
,
new
L1MissTrace
)
table
.
log
(
debug_miss_trace
,
io
.
req
.
valid
&&
!
io
.
req
.
bits
.
cancel
&&
alloc
,
"MissQueue"
,
clock
,
reset
)
// Difftest
if
(
env
.
EnableDifftest
)
{
val
difftest
=
Module
(
new
DifftestRefillEvent
)
difftest
.
io
.
clock
:=
clock
...
...
@@ -717,11 +752,14 @@ class MissQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule wi
difftest
.
io
.
data
:=
io
.
refill_to_ldq
.
bits
.
data_raw
.
asTypeOf
(
difftest
.
io
.
data
)
}
// Perf count
XSPerfAccumulate
(
"miss_req"
,
io
.
req
.
fire
())
XSPerfAccumulate
(
"miss_req_allocate"
,
io
.
req
.
fire
()
&&
alloc
)
XSPerfAccumulate
(
"miss_req_merge_load"
,
io
.
req
.
fire
()
&&
merge
&&
io
.
req
.
bits
.
isLoad
)
XSPerfAccumulate
(
"miss_req_reject_load"
,
io
.
req
.
valid
&&
reject
&&
io
.
req
.
bits
.
isLoad
)
XSPerfAccumulate
(
"miss_req_merge_load"
,
io
.
req
.
fire
()
&&
merge
&&
io
.
req
.
bits
.
is
From
Load
)
XSPerfAccumulate
(
"miss_req_reject_load"
,
io
.
req
.
valid
&&
reject
&&
io
.
req
.
bits
.
is
From
Load
)
XSPerfAccumulate
(
"probe_blocked_by_miss"
,
io
.
probe_block
)
XSPerfAccumulate
(
"prefetch_primary_fire"
,
io
.
req
.
fire
()
&&
alloc
&&
io
.
req
.
bits
.
isFromPrefetch
)
XSPerfAccumulate
(
"prefetch_secondary_fire"
,
io
.
req
.
fire
()
&&
merge
&&
io
.
req
.
bits
.
isFromPrefetch
)
val
max_inflight
=
RegInit
(
0.
U
((
log2Up
(
cfg
.
nMissEntries
)
+
1
).
W
))
val
num_valids
=
PopCount
(~
Cat
(
primary_ready_vec
).
asUInt
)
when
(
num_valids
>
max_inflight
)
{
...
...
src/main/scala/xiangshan/cache/dcache/mainpipe/RefillPipe.scala
浏览文件 @
b35479a0
...
...
@@ -30,6 +30,8 @@ class RefillPipeReqCtrl(implicit p: Parameters) extends DCacheBundle {
val
id
=
UInt
(
reqIdWidth
.
W
)
val
error
=
Bool
()
val
prefetch
=
Bool
()
val
access
=
Bool
()
def
paddrWithVirtualAlias
:
UInt
=
{
Cat
(
alias
,
addr
(
DCacheSameVPAddrLength
-
1
,
0
))
...
...
@@ -51,6 +53,8 @@ class RefillPipeReq(implicit p: Parameters) extends RefillPipeReqCtrl {
ctrl
.
miss_id
:=
miss_id
ctrl
.
id
:=
id
ctrl
.
error
:=
error
ctrl
.
prefetch
:=
prefetch
ctrl
.
access
:=
access
ctrl
}
}
...
...
@@ -67,8 +71,10 @@ class RefillPipe(implicit p: Parameters) extends DCacheModule {
val
data_write
=
DecoupledIO
(
new
L1BankedDataWriteReq
)
val
data_write_dup
=
Vec
(
DCacheBanks
,
Valid
(
new
L1BankedDataWriteReqCtrl
))
val
meta_write
=
DecoupledIO
(
new
MetaWriteReq
)
val
error_flag_write
=
DecoupledIO
(
new
ErrorWriteReq
)
val
meta_write
=
DecoupledIO
(
new
CohMetaWriteReq
)
val
error_flag_write
=
DecoupledIO
(
new
FlagMetaWriteReq
)
val
prefetch_flag_write
=
DecoupledIO
(
new
FlagMetaWriteReq
)
val
access_flag_write
=
DecoupledIO
(
new
FlagMetaWriteReq
)
val
tag_write
=
DecoupledIO
(
new
TagWriteReq
)
val
store_resp
=
ValidIO
(
new
DCacheLineResp
)
val
release_wakeup
=
ValidIO
(
UInt
(
log2Up
(
cfg
.
nMissEntries
).
W
))
...
...
@@ -113,7 +119,17 @@ class RefillPipe(implicit p: Parameters) extends DCacheModule {
io
.
error_flag_write
.
valid
:=
io
.
req_dup_for_err_w
.
valid
io
.
error_flag_write
.
bits
.
idx
:=
req_dup_for_err_w
.
idx
io
.
error_flag_write
.
bits
.
way_en
:=
req_dup_for_err_w
.
way_en
io
.
error_flag_write
.
bits
.
error
:=
refill_w_req
.
error
io
.
error_flag_write
.
bits
.
flag
:=
refill_w_req
.
error
io
.
prefetch_flag_write
.
valid
:=
io
.
req_dup_for_err_w
.
valid
io
.
prefetch_flag_write
.
bits
.
idx
:=
req_dup_for_err_w
.
idx
io
.
prefetch_flag_write
.
bits
.
way_en
:=
req_dup_for_err_w
.
way_en
io
.
prefetch_flag_write
.
bits
.
flag
:=
refill_w_req
.
prefetch
io
.
access_flag_write
.
valid
:=
io
.
req_dup_for_err_w
.
valid
io
.
access_flag_write
.
bits
.
idx
:=
req_dup_for_err_w
.
idx
io
.
access_flag_write
.
bits
.
way_en
:=
req_dup_for_err_w
.
way_en
io
.
access_flag_write
.
bits
.
flag
:=
refill_w_req
.
access
io
.
tag_write
.
valid
:=
io
.
req_dup_for_tag_w
.
valid
io
.
tag_write
.
bits
.
idx
:=
req_dup_for_tag_w
.
idx
...
...
src/main/scala/xiangshan/cache/dcache/meta/AsynchronousMetaArray.scala
浏览文件 @
b35479a0
...
...
@@ -40,19 +40,19 @@ class MetaReadReq(implicit p: Parameters) extends DCacheBundle {
val
way_en
=
UInt
(
nWays
.
W
)
}
class
MetaWriteReq
(
implicit
p
:
Parameters
)
extends
MetaReadReq
{
class
Coh
MetaWriteReq
(
implicit
p
:
Parameters
)
extends
MetaReadReq
{
val
meta
=
new
Meta
}
class
Error
WriteReq
(
implicit
p
:
Parameters
)
extends
MetaReadReq
{
val
error
=
Bool
()
class
FlagMeta
WriteReq
(
implicit
p
:
Parameters
)
extends
MetaReadReq
{
val
flag
=
Bool
()
}
class
Asynchronous
MetaArray
(
readPorts
:
Int
,
writePorts
:
Int
)(
implicit
p
:
Parameters
)
extends
DCacheModule
{
class
L1Coh
MetaArray
(
readPorts
:
Int
,
writePorts
:
Int
)(
implicit
p
:
Parameters
)
extends
DCacheModule
{
val
io
=
IO
(
new
Bundle
()
{
val
read
=
Vec
(
readPorts
,
Flipped
(
DecoupledIO
(
new
MetaReadReq
)))
val
resp
=
Output
(
Vec
(
readPorts
,
Vec
(
nWays
,
new
Meta
)))
val
write
=
Vec
(
writePorts
,
Flipped
(
DecoupledIO
(
new
MetaWriteReq
)))
val
write
=
Vec
(
writePorts
,
Flipped
(
DecoupledIO
(
new
Coh
MetaWriteReq
)))
})
val
meta_array
=
RegInit
(
...
...
@@ -103,12 +103,12 @@ class AsynchronousMetaArray(readPorts: Int, writePorts: Int)(implicit p: Paramet
}
}
class
Error
Array
(
readPorts
:
Int
,
writePorts
:
Int
)(
implicit
p
:
Parameters
)
extends
DCacheModule
{
class
L1FlagMeta
Array
(
readPorts
:
Int
,
writePorts
:
Int
)(
implicit
p
:
Parameters
)
extends
DCacheModule
{
val
io
=
IO
(
new
Bundle
()
{
val
read
=
Vec
(
readPorts
,
Flipped
(
DecoupledIO
(
new
MetaReadReq
)))
val
resp
=
Output
(
Vec
(
readPorts
,
Vec
(
nWays
,
Bool
())))
val
write
=
Vec
(
writePorts
,
Flipped
(
DecoupledIO
(
new
Error
WriteReq
)))
// customized cache op port
val
write
=
Vec
(
writePorts
,
Flipped
(
DecoupledIO
(
new
FlagMeta
WriteReq
)))
// customized cache op port
// val cacheOp = Flipped(new L1CacheInnerOpIO)
})
...
...
@@ -152,7 +152,7 @@ class ErrorArray(readPorts: Int, writePorts: Int)(implicit p: Parameters) extend
s0_way_wen
(
way
)(
wport
)
:=
write
.
valid
&&
wen
s1_way_wen
(
way
)(
wport
)
:=
RegNext
(
s0_way_wen
(
way
)(
wport
))
s1_way_waddr
(
way
)(
wport
)
:=
RegEnable
(
write
.
bits
.
idx
,
s0_way_wen
(
way
)(
wport
))
s1_way_wdata
(
way
)(
wport
)
:=
RegEnable
(
write
.
bits
.
error
,
s0_way_wen
(
way
)(
wport
))
s1_way_wdata
(
way
)(
wport
)
:=
RegEnable
(
write
.
bits
.
flag
,
s0_way_wen
(
way
)(
wport
))
when
(
s1_way_wen
(
way
)(
wport
))
{
meta_array
(
s1_way_waddr
(
way
)(
wport
))(
way
)
:=
s1_way_wdata
(
way
)(
wport
)
}
...
...
src/main/scala/xiangshan/cache/dcache/meta/MetaArray.scala
→
src/main/scala/xiangshan/cache/dcache/meta/
Legacy
MetaArray.scala
浏览文件 @
b35479a0
文件已移动
src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
浏览文件 @
b35479a0
...
...
@@ -359,6 +359,8 @@ class TlbReq(implicit p: Parameters) extends TlbBundle {
val
cmd
=
Output
(
TlbCmd
())
val
size
=
Output
(
UInt
(
log2Ceil
(
log2Ceil
(
XLEN
/
8
)+
1
).
W
))
val
kill
=
Output
(
Bool
())
// Use for blocked tlb that need sync with other module like icache
// do not translate, but still do pmp/pma check
val
no_translate
=
Output
(
Bool
())
val
debug
=
new
Bundle
{
val
pc
=
Output
(
UInt
(
XLEN
.
W
))
val
robIdx
=
Output
(
new
RobPtr
)
...
...
src/main/scala/xiangshan/cache/mmu/TLB.scala
浏览文件 @
b35479a0
...
...
@@ -72,6 +72,7 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
// val vmEnable = satp.mode === 8.U // && (mode < ModeM) // FIXME: fix me when boot xv6/linux...
val
vmEnable
=
if
(
EnbaleTlbDebug
)
(
satp
.
mode
===
8.
U
)
else
(
satp
.
mode
===
8.
U
&&
(
mode
<
ModeM
))
val
portTranslateEnable
=
(
0
until
Width
).
map
(
i
=>
vmEnable
&&
!
req
(
i
).
bits
.
no_translate
)
val
req_in
=
req
val
req_out
=
req
.
map
(
a
=>
RegEnable
(
a
.
bits
,
a
.
fire
()))
...
...
@@ -118,10 +119,11 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
def
TLBRead
(
i
:
Int
)
=
{
val
(
e_hit
,
e_ppn
,
e_perm
,
e_super_hit
,
e_super_ppn
,
static_pm
)
=
entries
.
io
.
r_resp_apply
(
i
)
val
(
p_hit
,
p_ppn
,
p_perm
)
=
ptw_resp_bypass
(
get_pn
(
req_in
(
i
).
bits
.
vaddr
))
val
enable
=
portTranslateEnable
(
i
)
val
hit
=
e_hit
||
p_hit
val
miss
=
!
hit
&&
vmE
nable
val
fast_miss
=
!(
e_super_hit
||
p_hit
)
&&
vmE
nable
val
miss
=
!
hit
&&
e
nable
val
fast_miss
=
!(
e_super_hit
||
p_hit
)
&&
e
nable
hit
.
suggestName
(
s
"hit_read_${i}"
)
miss
.
suggestName
(
s
"miss_read_${i}"
)
...
...
@@ -138,15 +140,15 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
perm
(
d
)
:=
Mux
(
p_hit
,
p_perm
,
e_perm
(
d
))
val
paddr
=
Cat
(
ppn
(
d
),
get_off
(
req_out
(
i
).
vaddr
))
resp
(
i
).
bits
.
paddr
(
d
)
:=
Mux
(
vmE
nable
,
paddr
,
vaddr
)
resp
(
i
).
bits
.
paddr
(
d
)
:=
Mux
(
e
nable
,
paddr
,
vaddr
)
}
XSDebug
(
req_out_v
(
i
),
p
"(${i.U}) hit:${hit} miss:${miss} ppn:${Hexadecimal(ppn(0))} perm:${perm(0)}\n"
)
val
pmp_paddr
=
Mux
(
vmE
nable
,
Cat
(
Mux
(
p_hit
,
p_ppn
,
e_super_ppn
),
get_off
(
req_out
(
i
).
vaddr
)),
vaddr
)
val
pmp_paddr
=
Mux
(
e
nable
,
Cat
(
Mux
(
p_hit
,
p_ppn
,
e_super_ppn
),
get_off
(
req_out
(
i
).
vaddr
)),
vaddr
)
// pmp_paddr seems same to paddr functionally. It abandons normal_ppn for timing optimization.
// val pmp_paddr = Mux(
vmE
nable, paddr, vaddr)
val
static_pm_valid
=
!(
e_super_hit
||
p_hit
)
&&
vmE
nable
&&
q
.
partialStaticPMP
.
B
// val pmp_paddr = Mux(
e
nable, paddr, vaddr)
val
static_pm_valid
=
!(
e_super_hit
||
p_hit
)
&&
e
nable
&&
q
.
partialStaticPMP
.
B
(
hit
,
miss
,
pmp_paddr
,
static_pm
,
static_pm_valid
,
perm
)
}
...
...
@@ -174,7 +176,7 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
val
ldPf
=
(
ldPermFail
||
pf
)
&&
(
TlbCmd
.
isRead
(
cmd
)
&&
!
TlbCmd
.
isAmo
(
cmd
))
val
stPf
=
(
stPermFail
||
pf
)
&&
(
TlbCmd
.
isWrite
(
cmd
)
||
TlbCmd
.
isAmo
(
cmd
))
val
instrPf
=
(
instrPermFail
||
pf
)
&&
TlbCmd
.
isExec
(
cmd
)
val
fault_valid
=
vmEnable
val
fault_valid
=
portTranslateEnable
(
idx
)
resp
(
idx
).
bits
.
excp
(
nDups
).
pf
.
ld
:=
(
ldPf
||
ldUpdate
)
&&
fault_valid
&&
!
af
resp
(
idx
).
bits
.
excp
(
nDups
).
pf
.
st
:=
(
stPf
||
stUpdate
)
&&
fault_valid
&&
!
af
resp
(
idx
).
bits
.
excp
(
nDups
).
pf
.
instr
:=
(
instrPf
||
instrUpdate
)
&&
fault_valid
&&
!
af
...
...
@@ -218,8 +220,8 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
io
.
ptw
.
req
(
idx
).
fire
()
||
resp
(
idx
).
fire
(),
flush_pipe
(
idx
))
// when ptw resp, check if hit, reset miss_v, resp to lsu/ifu
resp
(
idx
).
valid
:=
req_out_v
(
idx
)
&&
!(
miss_v
&&
vmEnable
)
when
(
io
.
ptw
.
resp
.
fire
()
&&
hit
&&
req_out_v
(
idx
)
&&
vmEnable
)
{
resp
(
idx
).
valid
:=
req_out_v
(
idx
)
&&
!(
miss_v
&&
portTranslateEnable
(
idx
)
)
when
(
io
.
ptw
.
resp
.
fire
()
&&
hit
&&
req_out_v
(
idx
)
&&
portTranslateEnable
(
idx
)
)
{
val
pte
=
io
.
ptw
.
resp
.
bits
resp
(
idx
).
valid
:=
true
.
B
resp
(
idx
).
bits
.
miss
:=
false
.
B
// for blocked tlb, this is useless
...
...
@@ -242,7 +244,7 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
// however, some outside modules like icache, dont care flushPipe, and still waiting for tlb resp
// just resp valid and raise page fault to go through. The pipe(ifu) will abandon it.
if
(!
q
.
outsideRecvFlush
)
{
when
(
req_out_v
(
idx
)
&&
flush_pipe
(
idx
)
&&
vmEnable
)
{
when
(
req_out_v
(
idx
)
&&
flush_pipe
(
idx
)
&&
portTranslateEnable
(
idx
)
)
{
resp
(
idx
).
valid
:=
true
.
B
for
(
d
<-
0
until
nRespDups
)
{
resp
(
idx
).
bits
.
excp
(
d
).
pf
.
ld
:=
true
.
B
// sfence happened, pf for not to use this addr
...
...
@@ -271,21 +273,21 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
val
result_ok
=
req_in
.
map
(
a
=>
RegNext
(
a
.
fire
()))
val
perfEvents
=
Seq
(
(
"access"
,
PopCount
((
0
until
Width
).
map
{
i
=>
if
(
Block
(
i
))
io
.
requestor
(
i
).
req
.
fire
()
else
vmEnable
&&
result_ok
(
i
)
})),
(
"miss "
,
PopCount
((
0
until
Width
).
map
{
i
=>
if
(
Block
(
i
))
vmEnable
&&
result_ok
(
i
)
&&
missVec
(
i
)
else
ptw
.
req
(
i
).
fire
()
})),
(
"access"
,
PopCount
((
0
until
Width
).
map
{
i
=>
if
(
Block
(
i
))
io
.
requestor
(
i
).
req
.
fire
()
else
portTranslateEnable
(
i
)
&&
result_ok
(
i
)
})),
(
"miss "
,
PopCount
((
0
until
Width
).
map
{
i
=>
if
(
Block
(
i
))
portTranslateEnable
(
i
)
&&
result_ok
(
i
)
&&
missVec
(
i
)
else
ptw
.
req
(
i
).
fire
()
})),
)
generatePerfEvent
()
// perf log
for
(
i
<-
0
until
Width
)
{
if
(
Block
(
i
))
{
XSPerfAccumulate
(
s
"access${i}"
,
result_ok
(
i
)
&&
vmEnable
)
XSPerfAccumulate
(
s
"access${i}"
,
result_ok
(
i
)
&&
portTranslateEnable
(
i
)
)
XSPerfAccumulate
(
s
"miss${i}"
,
result_ok
(
i
)
&&
missVec
(
i
))
}
else
{
XSPerfAccumulate
(
"first_access"
+
Integer
.
toString
(
i
,
10
),
result_ok
(
i
)
&&
vmEnable
&&
RegNext
(
req
(
i
).
bits
.
debug
.
isFirstIssue
))
XSPerfAccumulate
(
"access"
+
Integer
.
toString
(
i
,
10
),
result_ok
(
i
)
&&
vmEnable
)
XSPerfAccumulate
(
"first_miss"
+
Integer
.
toString
(
i
,
10
),
result_ok
(
i
)
&&
vmEnable
&&
missVec
(
i
)
&&
RegNext
(
req
(
i
).
bits
.
debug
.
isFirstIssue
))
XSPerfAccumulate
(
"miss"
+
Integer
.
toString
(
i
,
10
),
result_ok
(
i
)
&&
vmEnable
&&
missVec
(
i
))
XSPerfAccumulate
(
"first_access"
+
Integer
.
toString
(
i
,
10
),
result_ok
(
i
)
&&
portTranslateEnable
(
i
)
&&
RegNext
(
req
(
i
).
bits
.
debug
.
isFirstIssue
))
XSPerfAccumulate
(
"access"
+
Integer
.
toString
(
i
,
10
),
result_ok
(
i
)
&&
portTranslateEnable
(
i
)
)
XSPerfAccumulate
(
"first_miss"
+
Integer
.
toString
(
i
,
10
),
result_ok
(
i
)
&&
portTranslateEnable
(
i
)
&&
missVec
(
i
)
&&
RegNext
(
req
(
i
).
bits
.
debug
.
isFirstIssue
))
XSPerfAccumulate
(
"miss"
+
Integer
.
toString
(
i
,
10
),
result_ok
(
i
)
&&
portTranslateEnable
(
i
)
&&
missVec
(
i
))
}
}
XSPerfAccumulate
(
"ptw_resp_count"
,
ptw
.
resp
.
fire
())
...
...
@@ -322,7 +324,7 @@ class TLB(Width: Int, nRespDups: Int = 1, Block: Seq[Boolean], q: TLBParameters)
val
difftest
=
Module
(
new
DifftestL1TLBEvent
)
difftest
.
io
.
clock
:=
clock
difftest
.
io
.
coreid
:=
p
(
XSCoreParamsKey
).
HartId
.
asUInt
difftest
.
io
.
valid
:=
RegNext
(
io
.
requestor
(
i
).
req
.
fire
)
&&
!
RegNext
(
io
.
requestor
(
i
).
req_kill
)
&&
io
.
requestor
(
i
).
resp
.
fire
&&
!
io
.
requestor
(
i
).
resp
.
bits
.
miss
&&
!
pf
&&
!
af
&&
vmEnable
difftest
.
io
.
valid
:=
RegNext
(
io
.
requestor
(
i
).
req
.
fire
)
&&
!
RegNext
(
io
.
requestor
(
i
).
req_kill
)
&&
io
.
requestor
(
i
).
resp
.
fire
&&
!
io
.
requestor
(
i
).
resp
.
bits
.
miss
&&
!
pf
&&
!
af
&&
portTranslateEnable
(
i
)
difftest
.
io
.
index
:=
i
.
U
difftest
.
io
.
l1tlbid
:=
l1tlbid
difftest
.
io
.
satp
:=
io
.
csr
.
satp
.
ppn
...
...
src/main/scala/xiangshan/frontend/IFU.scala
浏览文件 @
b35479a0
...
...
@@ -551,6 +551,7 @@ class NewIFU(implicit p: Parameters) extends XSModule
io
.
iTLBInter
.
req
.
bits
.
kill
:=
false
.
B
// IFU use itlb for mmio, doesn't need sync, set it to false
io
.
iTLBInter
.
req
.
bits
.
cmd
:=
TlbCmd
.
exec
io
.
iTLBInter
.
req
.
bits
.
debug
.
robIdx
:=
DontCare
io
.
iTLBInter
.
req
.
bits
.
no_translate
:=
false
.
B
io
.
iTLBInter
.
req
.
bits
.
debug
.
isFirstIssue
:=
DontCare
io
.
pmp
.
req
.
valid
:=
(
mmio_state
===
m_sendPMP
)
&&
f3_req_is_mmio
...
...
src/main/scala/xiangshan/frontend/icache/ICacheMainPipe.scala
浏览文件 @
b35479a0
...
...
@@ -196,6 +196,7 @@ class ICacheMainPipe(implicit p: Parameters) extends ICacheModule
toITLB
.
map
{
port
=>
port
.
bits
.
cmd
:=
TlbCmd
.
exec
port
.
bits
.
debug
.
robIdx
:=
DontCare
port
.
bits
.
no_translate
:=
false
.
B
port
.
bits
.
debug
.
isFirstIssue
:=
DontCare
}
...
...
src/main/scala/xiangshan/frontend/icache/IPrefetch.scala
浏览文件 @
b35479a0
...
...
@@ -107,6 +107,7 @@ class IPrefetchPipe(implicit p: Parameters) extends IPrefetchModule
toITLB
.
bits
.
kill
:=
DontCare
toITLB
.
bits
.
cmd
:=
TlbCmd
.
exec
toITLB
.
bits
.
debug
.
robIdx
:=
DontCare
toITLB
.
bits
.
no_translate
:=
false
.
B
toITLB
.
bits
.
debug
.
isFirstIssue
:=
DontCare
...
...
src/main/scala/xiangshan/mem/MemCommon.scala
浏览文件 @
b35479a0
...
...
@@ -68,8 +68,10 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundleWithMicroOp with
val
forwardMask
=
Vec
(
8
,
Bool
())
val
forwardData
=
Vec
(
8
,
UInt
(
8.
W
))
//softprefetch
val
isSoftPrefetch
=
Bool
()
// prefetch
val
isPrefetch
=
Bool
()
val
isHWPrefetch
=
Bool
()
def
isSWPrefetch
=
isPrefetch
&&
!
isHWPrefetch
// For debug usage
val
isFirstIssue
=
Bool
()
...
...
@@ -84,6 +86,37 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundleWithMicroOp with
val
forward_tlDchannel
=
Bool
()
}
class
LdPrefetchTrainBundle
(
implicit
p
:
Parameters
)
extends
LsPipelineBundle
{
val
meta_prefetch
=
Bool
()
val
meta_access
=
Bool
()
def
fromLsPipelineBundle
(
input
:
LsPipelineBundle
)
=
{
vaddr
:=
input
.
vaddr
paddr
:=
input
.
paddr
mask
:=
input
.
mask
data
:=
input
.
data
uop
:=
input
.
uop
wlineflag
:=
input
.
wlineflag
miss
:=
input
.
miss
tlbMiss
:=
input
.
tlbMiss
ptwBack
:=
input
.
ptwBack
mmio
:=
input
.
mmio
rsIdx
:=
input
.
rsIdx
forwardMask
:=
input
.
forwardMask
forwardData
:=
input
.
forwardData
isPrefetch
:=
input
.
isPrefetch
isHWPrefetch
:=
input
.
isHWPrefetch
isFirstIssue
:=
input
.
isFirstIssue
meta_prefetch
:=
DontCare
meta_access
:=
DontCare
forward_tlDchannel
:=
DontCare
mshrid
:=
DontCare
replayCarry
:=
DontCare
atomic
:=
DontCare
isLoadReplay
:=
DontCare
}
}
class
LqWriteBundle
(
implicit
p
:
Parameters
)
extends
LsPipelineBundle
{
// queue entry data, except flag bits, will be updated if writeQueue is true,
// valid bit in LqWriteBundle will be ignored
...
...
@@ -104,7 +137,8 @@ class LqWriteBundle(implicit p: Parameters) extends LsPipelineBundle {
rsIdx
:=
input
.
rsIdx
forwardMask
:=
input
.
forwardMask
forwardData
:=
input
.
forwardData
isSoftPrefetch
:=
input
.
isSoftPrefetch
isPrefetch
:=
input
.
isPrefetch
isHWPrefetch
:=
input
.
isHWPrefetch
isFirstIssue
:=
input
.
isFirstIssue
isLoadReplay
:=
input
.
isLoadReplay
mshrid
:=
input
.
mshrid
...
...
src/main/scala/xiangshan/mem/MemTrace.scala
0 → 100644
浏览文件 @
b35479a0
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package
xiangshan.mem.trace
import
chipsalliance.rocketchip.config.Parameters
import
chisel3._
import
chisel3.util._
class
L1MissTrace
extends
Bundle
{
val
vaddr
=
UInt
(
39.
W
)
val
paddr
=
UInt
(
36.
W
)
val
source
=
UInt
(
4.
W
)
val
pc
=
UInt
(
39.
W
)
}
\ No newline at end of file
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
浏览文件 @
b35479a0
...
...
@@ -347,7 +347,8 @@ class LoadQueue(implicit p: Parameters) extends XSModule
})
(
0
until
LoadPipelineWidth
).
map
(
i
=>
{
vaddrModule
.
io
.
raddr
(
LoadPipelineWidth
+
i
)
:=
loadReplaySelGen
(
i
)
// vaddrModule rport 0 and 1 is used by exception and mmio
vaddrModule
.
io
.
raddr
(
2
+
i
)
:=
loadReplaySelGen
(
i
)
})
(
0
until
LoadPipelineWidth
).
map
(
i
=>
{
...
...
src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala
浏览文件 @
b35479a0
...
...
@@ -55,6 +55,7 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstant
val
in
=
Reg
(
new
ExuInput
())
val
exceptionVec
=
RegInit
(
0.
U
.
asTypeOf
(
ExceptionVec
()))
val
atom_override_xtval
=
RegInit
(
false
.
B
)
val
have_sent_first_tlb_req
=
RegInit
(
false
.
B
)
val
isLr
=
in
.
uop
.
ctrl
.
fuOpType
===
LSUOpType
.
lr_w
||
in
.
uop
.
ctrl
.
fuOpType
===
LSUOpType
.
lr_d
// paddr after translation
val
paddr
=
Reg
(
UInt
())
...
...
@@ -100,6 +101,7 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstant
in
:=
io
.
in
.
bits
in
.
src
(
1
)
:=
in
.
src
(
1
)
// leave src2 unchanged
state
:=
s_tlb_and_flush_sbuffer_req
have_sent_first_tlb_req
:=
false
.
B
}
}
...
...
@@ -136,7 +138,12 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule with MemoryOpConstant
// send req to sbuffer to flush it if it is not empty
io
.
flush_sbuffer
.
valid
:=
Mux
(
sbuffer_empty
,
false
.
B
,
true
.
B
)
when
(
io
.
dtlb
.
resp
.
fire
){
// do not accept tlb resp in the first cycle
// this limition is for hw prefetcher
// when !have_sent_first_tlb_req, tlb resp may come from hw prefetch
have_sent_first_tlb_req
:=
true
.
B
when
(
io
.
dtlb
.
resp
.
fire
&&
have_sent_first_tlb_req
){
paddr
:=
io
.
dtlb
.
resp
.
bits
.
paddr
(
0
)
// exception handling
val
addrAligned
=
LookupTree
(
in
.
uop
.
ctrl
.
fuOpType
(
1
,
0
),
List
(
...
...
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
浏览文件 @
b35479a0
此差异已折叠。
点击以展开。
src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala
浏览文件 @
b35479a0
...
...
@@ -53,6 +53,7 @@ class StoreUnit_S0(implicit p: Parameters) extends XSModule {
io
.
dtlbReq
.
bits
.
size
:=
LSUOpType
.
size
(
io
.
in
.
bits
.
uop
.
ctrl
.
fuOpType
)
io
.
dtlbReq
.
bits
.
kill
:=
DontCare
io
.
dtlbReq
.
bits
.
debug
.
robIdx
:=
io
.
in
.
bits
.
uop
.
robIdx
io
.
dtlbReq
.
bits
.
no_translate
:=
false
.
B
io
.
dtlbReq
.
bits
.
debug
.
pc
:=
io
.
in
.
bits
.
uop
.
cf
.
pc
io
.
dtlbReq
.
bits
.
debug
.
isFirstIssue
:=
io
.
isFirstIssue
...
...
src/main/scala/xiangshan/mem/prefetch/BasePrefecher.scala
0 → 100644
浏览文件 @
b35479a0
package
xiangshan.mem.prefetch
import
chisel3._
import
chisel3.util._
import
chipsalliance.rocketchip.config.Parameters
import
xiangshan._
import
xiangshan.cache.mmu.TlbRequestIO
import
xiangshan.mem.
{
LdPrefetchTrainBundle
,
L1PrefetchReq
}
class
PrefetcherIO
()(
implicit
p
:
Parameters
)
extends
XSBundle
{
val
ld_in
=
Flipped
(
Vec
(
exuParameters
.
LduCnt
,
ValidIO
(
new
LdPrefetchTrainBundle
())))
val
tlb_req
=
new
TlbRequestIO
(
nRespDups
=
2
)
val
pf_addr
=
ValidIO
(
UInt
(
PAddrBits
.
W
))
val
l1_req
=
DecoupledIO
(
new
L1PrefetchReq
())
val
enable
=
Input
(
Bool
())
}
trait
PrefetcherParams
abstract
class
BasePrefecher
()(
implicit
p
:
Parameters
)
extends
XSModule
{
val
io
=
IO
(
new
PrefetcherIO
())
}
\ No newline at end of file
src/main/scala/xiangshan/mem/prefetch/L1PrefetchInterface.scala
0 → 100644
浏览文件 @
b35479a0
/***************************************************************************************
* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
* Copyright (c) 2020-2021 Peng Cheng Laboratory
*
* XiangShan is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
*
* See the Mulan PSL v2 for more details.
***************************************************************************************/
package
xiangshan.mem
import
chipsalliance.rocketchip.config.Parameters
import
chisel3._
import
chisel3.util._
import
utils._
import
utility._
import
xiangshan.ExceptionNO._
import
xiangshan._
import
xiangshan.backend.fu.PMPRespBundle
import
xiangshan.cache._
import
xiangshan.cache.mmu.
{
TlbCmd
,
TlbReq
,
TlbRequestIO
,
TlbResp
}
class
L1PrefetchReq
(
implicit
p
:
Parameters
)
extends
XSBundle
with
HasDCacheParameters
{
val
paddr
=
UInt
(
PAddrBits
.
W
)
val
alias
=
UInt
(
2.
W
)
val
confidence
=
UInt
(
1.
W
)
val
is_store
=
Bool
()
// only index bit is used, do not use tag
def
getVaddr
()
:
UInt
=
{
Cat
(
alias
,
paddr
(
DCacheSameVPAddrLength
-
1
,
0
))
}
// when l1 cache prefetch req arrives at load unit:
// if (confidence == 1)
// override load unit 2 load req
// else if (load unit 1/2 is available)
// send prefetch req
// else
// report prefetch !ready
}
class
L1PrefetchHint
(
implicit
p
:
Parameters
)
extends
XSBundle
with
HasDCacheParameters
{
val
loadbusy
=
Bool
()
val
missqbusy
=
Bool
()
}
class
L1PrefetchFuzzer
(
implicit
p
:
Parameters
)
extends
DCacheModule
{
val
io
=
IO
(
new
Bundle
()
{
// prefetch req interface
val
req
=
Decoupled
(
new
L1PrefetchReq
())
// for fuzzer address gen
val
vaddr
=
Input
(
UInt
(
VAddrBits
.
W
))
val
paddr
=
Input
(
UInt
(
PAddrBits
.
W
))
})
// prefetch req queue is not provided, prefetcher must maintain its
// own prefetch req queue.
val
rand_offset
=
LFSR64
(
seed
=
Some
(
123L
))(
5
,
0
)
<<
6
val
rand_addr_select
=
LFSR64
(
seed
=
Some
(
567L
))(
3
,
0
)
===
0.
U
// use valid vaddr and paddr
val
rand_vaddr
=
DelayN
(
io
.
vaddr
,
2
)
val
rand_paddr
=
DelayN
(
io
.
paddr
,
2
)
io
.
req
.
bits
.
paddr
:=
0x80000000
L
.
U
+
rand_offset
io
.
req
.
bits
.
alias
:=
io
.
req
.
bits
.
paddr
(
13
,
12
)
io
.
req
.
bits
.
confidence
:=
LFSR64
(
seed
=
Some
(
789L
))(
4
,
0
)
===
0.
U
io
.
req
.
bits
.
is_store
:=
LFSR64
(
seed
=
Some
(
890L
))(
4
,
0
)
===
0.
U
io
.
req
.
valid
:=
LFSR64
(
seed
=
Some
(
901L
))(
3
,
0
)
===
0.
U
}
\ No newline at end of file
src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala
0 → 100644
浏览文件 @
b35479a0
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录