Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenXiangShan
XiangShan
提交
49681eda
X
XiangShan
项目概览
OpenXiangShan
/
XiangShan
10 个月 前同步成功
通知
1183
Star
3914
Fork
526
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
X
XiangShan
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
49681eda
编写于
2月 24, 2021
作者:
Y
Yinan Xu
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'origin/master' into dev-prefetch-switch
上级
35a47a38
1ef04a55
变更
59
展开全部
隐藏空白更改
内联
并排
Showing
59 changed file
with
1332 addition
and
1229 deletion
+1332
-1229
src/main/scala/system/SoC.scala
src/main/scala/system/SoC.scala
+1
-0
src/main/scala/utils/LogUtils.scala
src/main/scala/utils/LogUtils.scala
+17
-9
src/main/scala/utils/RegMap.scala
src/main/scala/utils/RegMap.scala
+1
-1
src/main/scala/xiangshan/XSCore.scala
src/main/scala/xiangshan/XSCore.scala
+58
-80
src/main/scala/xiangshan/backend/CtrlBlock.scala
src/main/scala/xiangshan/backend/CtrlBlock.scala
+1
-0
src/main/scala/xiangshan/backend/FloatBlock.scala
src/main/scala/xiangshan/backend/FloatBlock.scala
+57
-44
src/main/scala/xiangshan/backend/IntegerBlock.scala
src/main/scala/xiangshan/backend/IntegerBlock.scala
+74
-62
src/main/scala/xiangshan/backend/MemBlock.scala
src/main/scala/xiangshan/backend/MemBlock.scala
+19
-39
src/main/scala/xiangshan/backend/decode/FPDecoder.scala
src/main/scala/xiangshan/backend/decode/FPDecoder.scala
+19
-18
src/main/scala/xiangshan/backend/dispatch/Dispatch.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch.scala
+5
-3
src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala
+9
-3
src/main/scala/xiangshan/backend/dispatch/Dispatch2Fp.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch2Fp.scala
+1
-1
src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala
+1
-1
src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala
src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala
+2
-2
src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala
...main/scala/xiangshan/backend/dispatch/DispatchQueue.scala
+2
-2
src/main/scala/xiangshan/backend/exu/AluExeUnit.scala
src/main/scala/xiangshan/backend/exu/AluExeUnit.scala
+5
-5
src/main/scala/xiangshan/backend/exu/Exu.scala
src/main/scala/xiangshan/backend/exu/Exu.scala
+3
-20
src/main/scala/xiangshan/backend/exu/FmacExeUnit.scala
src/main/scala/xiangshan/backend/exu/FmacExeUnit.scala
+3
-3
src/main/scala/xiangshan/backend/exu/FmiscExeUnit.scala
src/main/scala/xiangshan/backend/exu/FmiscExeUnit.scala
+5
-15
src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala
src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala
+2
-2
src/main/scala/xiangshan/backend/exu/MulDivExeUnit.scala
src/main/scala/xiangshan/backend/exu/MulDivExeUnit.scala
+3
-3
src/main/scala/xiangshan/backend/exu/Wb.scala
src/main/scala/xiangshan/backend/exu/Wb.scala
+48
-23
src/main/scala/xiangshan/backend/ftq/Ftq.scala
src/main/scala/xiangshan/backend/ftq/Ftq.scala
+23
-23
src/main/scala/xiangshan/backend/fu/Alu.scala
src/main/scala/xiangshan/backend/fu/Alu.scala
+45
-39
src/main/scala/xiangshan/backend/fu/CSR.scala
src/main/scala/xiangshan/backend/fu/CSR.scala
+2
-114
src/main/scala/xiangshan/backend/fu/Jump.scala
src/main/scala/xiangshan/backend/fu/Jump.scala
+40
-21
src/main/scala/xiangshan/backend/fu/Multiplier.scala
src/main/scala/xiangshan/backend/fu/Multiplier.scala
+20
-8
src/main/scala/xiangshan/backend/fu/SRT4Divider.scala
src/main/scala/xiangshan/backend/fu/SRT4Divider.scala
+101
-61
src/main/scala/xiangshan/backend/fu/fpu/FDivSqrt.scala
src/main/scala/xiangshan/backend/fu/fpu/FDivSqrt.scala
+39
-21
src/main/scala/xiangshan/backend/fu/fpu/FMA.scala
src/main/scala/xiangshan/backend/fu/fpu/FMA.scala
+25
-13
src/main/scala/xiangshan/backend/fu/fpu/FPToFP.scala
src/main/scala/xiangshan/backend/fu/fpu/FPToFP.scala
+18
-9
src/main/scala/xiangshan/backend/fu/fpu/FPToInt.scala
src/main/scala/xiangshan/backend/fu/fpu/FPToInt.scala
+21
-13
src/main/scala/xiangshan/backend/fu/fpu/FPUSubModule.scala
src/main/scala/xiangshan/backend/fu/fpu/FPUSubModule.scala
+26
-0
src/main/scala/xiangshan/backend/fu/fpu/IntToFP.scala
src/main/scala/xiangshan/backend/fu/fpu/IntToFP.scala
+39
-13
src/main/scala/xiangshan/backend/issue/ReservationStation.scala
...in/scala/xiangshan/backend/issue/ReservationStation.scala
+61
-22
src/main/scala/xiangshan/backend/rename/Rename.scala
src/main/scala/xiangshan/backend/rename/Rename.scala
+8
-0
src/main/scala/xiangshan/backend/roq/Roq.scala
src/main/scala/xiangshan/backend/roq/Roq.scala
+13
-13
src/main/scala/xiangshan/cache/DCacheWrapper.scala
src/main/scala/xiangshan/cache/DCacheWrapper.scala
+11
-1
src/main/scala/xiangshan/cache/ICache.scala
src/main/scala/xiangshan/cache/ICache.scala
+3
-5
src/main/scala/xiangshan/cache/ICacheMissQueue.scala
src/main/scala/xiangshan/cache/ICacheMissQueue.scala
+7
-10
src/main/scala/xiangshan/cache/MainPipe.scala
src/main/scala/xiangshan/cache/MainPipe.scala
+189
-124
src/main/scala/xiangshan/cache/MissQueue.scala
src/main/scala/xiangshan/cache/MissQueue.scala
+9
-14
src/main/scala/xiangshan/cache/StoreReplayUnit.scala
src/main/scala/xiangshan/cache/StoreReplayUnit.scala
+5
-4
src/main/scala/xiangshan/cache/TLB.scala
src/main/scala/xiangshan/cache/TLB.scala
+94
-82
src/main/scala/xiangshan/cache/prefetch/L1plusPrefetcher.scala
...ain/scala/xiangshan/cache/prefetch/L1plusPrefetcher.scala
+10
-13
src/main/scala/xiangshan/cache/prefetch/L2Prefetcher.scala
src/main/scala/xiangshan/cache/prefetch/L2Prefetcher.scala
+9
-12
src/main/scala/xiangshan/frontend/Bim.scala
src/main/scala/xiangshan/frontend/Bim.scala
+4
-3
src/main/scala/xiangshan/frontend/Btb.scala
src/main/scala/xiangshan/frontend/Btb.scala
+4
-2
src/main/scala/xiangshan/frontend/Ibuffer.scala
src/main/scala/xiangshan/frontend/Ibuffer.scala
+1
-1
src/main/scala/xiangshan/frontend/LoopPredictor.scala
src/main/scala/xiangshan/frontend/LoopPredictor.scala
+6
-7
src/main/scala/xiangshan/frontend/RAS.scala
src/main/scala/xiangshan/frontend/RAS.scala
+6
-5
src/main/scala/xiangshan/frontend/uBTB.scala
src/main/scala/xiangshan/frontend/uBTB.scala
+114
-177
src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala
src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala
+6
-8
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
+3
-2
src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala
src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala
+3
-1
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
+13
-40
src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala
src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala
+1
-11
src/main/scala/xiangshan/mem/sbuffer/Sbuffer.scala
src/main/scala/xiangshan/mem/sbuffer/Sbuffer.scala
+1
-1
src/test/csrc/emu.cpp
src/test/csrc/emu.cpp
+16
-0
未找到文件。
src/main/scala/system/SoC.scala
浏览文件 @
49681eda
...
...
@@ -173,6 +173,7 @@ class XSSoc()(implicit p: Parameters) extends LazyModule with HasSoCParameter {
plic
.
module
.
io
.
extra
.
get
.
intrVec
<>
RegNext
(
RegNext
(
io
.
extIntrs
))
for
(
i
<-
0
until
NumCores
)
{
xs_core
(
i
).
module
.
io
.
hartId
:=
i
.
U
xs_core
(
i
).
module
.
io
.
externalInterrupt
.
mtip
:=
clint
.
module
.
io
.
mtip
(
i
)
xs_core
(
i
).
module
.
io
.
externalInterrupt
.
msip
:=
clint
.
module
.
io
.
msip
(
i
)
// xs_core(i).module.io.externalInterrupt.meip := RegNext(RegNext(io.meip(i)))
...
...
src/main/scala/utils/LogUtils.scala
浏览文件 @
49681eda
...
...
@@ -4,6 +4,7 @@ import chisel3._
import
top.Parameters
import
xiangshan.HasXSParameter
import
utils.XSLogLevel.XSLogLevel
import
chisel3.ExcitingUtils.ConnectionType
object
XSLogLevel
extends
Enumeration
{
type
XSLogLevel
=
Value
...
...
@@ -103,26 +104,33 @@ object XSWarn extends LogHelper(XSLogLevel.WARN)
object
XSError
extends
LogHelper
(
XSLogLevel
.
ERROR
)
object
XSPerf
{
def
apply
(
perfName
:
String
,
perfCnt
:
UInt
,
acc
:
Boolean
=
false
,
intervalBits
:
Int
=
15
)(
implicit
name
:
String
)
=
{
def
apply
(
perfName
:
String
,
perfCnt
:
UInt
,
acc
:
Boolean
=
false
,
realtime
:
Boolean
=
false
,
intervalBits
:
Int
=
15
)(
implicit
name
:
String
)
=
{
val
counter
=
RegInit
(
0.
U
(
64.
W
))
val
next_counter
=
WireInit
(
0.
U
(
64.
W
))
val
logTimestamp
=
WireInit
(
0.
U
(
64.
W
))
val
en
ableDebug
=
Parameters
.
get
.
envParameters
.
EnablePerfDebug
val
en
v
=
Parameters
.
get
.
envParameters
next_counter
:=
counter
+
perfCnt
counter
:=
next_counter
if
(
en
able
Debug
)
{
if
(
en
v
.
EnablePerf
Debug
)
{
ExcitingUtils
.
addSink
(
logTimestamp
,
"logTimestamp"
)
val
printCond
=
if
(
intervalBits
==
0
)
true
.
B
else
(
logTimestamp
(
intervalBits
-
1
,
0
)
===
0.
U
)
when
(
printCond
)
{
// TODO: Need print when program exit?
if
(
acc
)
{
val
printCond
=
if
(
intervalBits
==
0
)
true
.
B
else
(
logTimestamp
(
intervalBits
-
1
,
0
)
===
0.
U
)
val
printEnable
=
if
(
realtime
)
printCond
else
false
.
B
val
xstrap
=
WireInit
(
false
.
B
)
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
)
{
ExcitingUtils
.
addSink
(
xstrap
,
"XSTRAP"
,
ConnectionType
.
Debug
)
}
when
(
printEnable
)
{
// interval print
if
(
acc
)
{
XSLog
(
XSLogLevel
.
PERF
)(
true
,
true
.
B
,
p
"$perfName, $next_counter\n"
)
}
else
{
}
else
{
XSLog
(
XSLogLevel
.
PERF
)(
true
,
true
.
B
,
p
"$perfName, $perfCnt\n"
)
}
}
when
(
xstrap
)
{
// summary print
// dump acc counter by default
printf
(
"%d <- "
+
perfName
+
"\n"
,
next_counter
)
}
}
}
}
src/main/scala/utils/RegMap.scala
浏览文件 @
49681eda
...
...
@@ -37,7 +37,7 @@ object MaskedRegMap { // TODO: add read mask
}
def
isIllegalAddr
(
mapping
:
Map
[
Int
,
(
UInt
,
UInt
,
UInt
=>
UInt
,
UInt
,
UInt
=>
UInt
)],
addr
:
UInt
)
:
Bool
=
{
val
illegalAddr
=
Wire
(
Bool
())
illegalAddr
:=
LookupTreeDefault
(
addr
,
true
.
B
,
mapping
.
map
{
case
(
a
,
_
)
=>
(
a
.
U
,
false
.
B
)
})
illegalAddr
:=
LookupTreeDefault
(
addr
,
true
.
B
,
mapping
.
toSeq
.
sortBy
(
_
.
_1
).
map
{
case
(
a
,
_
)
=>
(
a
.
U
,
false
.
B
)
})
illegalAddr
}
def
generate
(
mapping
:
Map
[
Int
,
(
UInt
,
UInt
,
UInt
=>
UInt
,
UInt
,
UInt
=>
UInt
)],
addr
:
UInt
,
rdata
:
UInt
,
...
...
src/main/scala/xiangshan/XSCore.scala
浏览文件 @
49681eda
...
...
@@ -10,7 +10,7 @@ import xiangshan.backend.exu.Exu._
import
xiangshan.frontend._
import
xiangshan.mem._
import
xiangshan.backend.fu.HasExceptionNO
import
xiangshan.cache.
{
DCache
,
InstrUncache
,
DCacheParameters
,
ICache
,
ICacheParameters
,
L1plusCache
,
L1plusCacheParameters
,
PTW
,
PTWRepeater
,
Uncache
,
MemoryOpConstants
,
MissReq
}
import
xiangshan.cache.
{
DCache
,
InstrUncache
,
DCacheParameters
,
ICache
,
ICacheParameters
,
L1plusCache
,
L1plusCacheParameters
,
PTW
,
PTWRepeater
,
Uncache
,
MemoryOpConstants
,
MissReq
}
import
xiangshan.cache.prefetch._
import
chipsalliance.rocketchip.config
import
freechips.rocketchip.diplomacy.
{
AddressSet
,
LazyModule
,
LazyModuleImp
}
...
...
@@ -24,9 +24,10 @@ import utils._
object
hartIdCore
extends
(()
=>
Int
)
{
var
x
=
0
def
apply
()
:
Int
=
{
x
=
x
+
1
x
-
1
x
-
1
}
}
...
...
@@ -43,7 +44,7 @@ case class XSCoreParameters
VAddrBits
:
Int
=
39
,
PAddrBits
:
Int
=
40
,
HasFPU
:
Boolean
=
true
,
Fe
c
tchWidth
:
Int
=
8
,
FetchWidth
:
Int
=
8
,
EnableBPU
:
Boolean
=
true
,
EnableBPD
:
Boolean
=
true
,
EnableRAS
:
Boolean
=
true
,
...
...
@@ -106,7 +107,7 @@ case class XSCoreParameters
PtwL3EntrySize
:
Int
=
4096
,
//(256 * 16) or 512
PtwSPEntrySize
:
Int
=
16
,
PtwL1EntrySize
:
Int
=
16
,
PtwL2EntrySize
:
Int
=
2048
,
//(256 * 8)
PtwL2EntrySize
:
Int
=
2048
,
//(256 * 8)
NumPerfCounters
:
Int
=
16
,
NrExtIntr
:
Int
=
150
)
...
...
@@ -119,7 +120,9 @@ trait HasXSParameter {
val
XLEN
=
64
val
minFLen
=
32
val
fLen
=
64
def
xLen
=
64
val
HasMExtension
=
core
.
HasMExtension
val
HasCExtension
=
core
.
HasCExtension
val
HasDiv
=
core
.
HasDiv
...
...
@@ -133,7 +136,7 @@ trait HasXSParameter {
val
DataBits
=
XLEN
val
DataBytes
=
DataBits
/
8
val
HasFPU
=
core
.
HasFPU
val
FetchWidth
=
core
.
Fe
c
tchWidth
val
FetchWidth
=
core
.
FetchWidth
val
PredictWidth
=
FetchWidth
*
(
if
(
HasCExtension
)
2
else
1
)
val
EnableBPU
=
core
.
EnableBPU
val
EnableBPD
=
core
.
EnableBPD
// enable backing predictor(like Tage) in BPUStage3
...
...
@@ -173,7 +176,7 @@ trait HasXSParameter {
val
exuParameters
=
core
.
exuParameters
val
NRIntReadPorts
=
core
.
NRIntReadPorts
val
NRIntWritePorts
=
core
.
NRIntWritePorts
val
NRMemReadPorts
=
exuParameters
.
LduCnt
+
2
*
exuParameters
.
StuCnt
val
NRMemReadPorts
=
exuParameters
.
LduCnt
+
2
*
exuParameters
.
StuCnt
val
NRFpReadPorts
=
core
.
NRFpReadPorts
val
NRFpWritePorts
=
core
.
NRFpWritePorts
val
LoadPipelineWidth
=
core
.
LoadPipelineWidth
...
...
@@ -256,7 +259,7 @@ trait HasXSParameter {
// dcache prefetcher
val
l2PrefetcherParameters
=
L2PrefetcherParameters
(
enable
=
true
,
_type
=
"bop"
,
// "stream" or "bop"
_type
=
"bop"
,
// "stream" or "bop"
streamParams
=
StreamPrefetchParameters
(
streamCnt
=
4
,
streamSize
=
4
,
...
...
@@ -277,7 +280,8 @@ trait HasXSParameter {
)
}
trait
HasXSLog
{
this:
RawModule
=>
trait
HasXSLog
{
this:
RawModule
=>
implicit
val
moduleName
:
String
=
this
.
name
}
...
...
@@ -285,13 +289,13 @@ abstract class XSModule extends MultiIOModule
with
HasXSParameter
with
HasExceptionNO
with
HasXSLog
with
HasFPUParameters
{
with
HasFPUParameters
{
def
io
:
Record
}
//remove this trait after impl module logic
trait
NeedImpl
{
this:
RawModule
=>
trait
NeedImpl
{
this:
RawModule
=>
override
protected
def
IO
[
T
<:
Data
](
iodef
:
T
)
:
T
=
{
println
(
s
"[Warn]: (${this.name}) please reomve 'NeedImpl' after implement this module"
)
val
io
=
chisel3
.
experimental
.
IO
(
iodef
)
...
...
@@ -327,35 +331,19 @@ case class EnviromentParameters
// }
class
XSCore
()(
implicit
p
:
config.Parameters
)
extends
LazyModule
with
HasXSParameter
with
HasExeBlockHelper
{
// to fast wake up fp, mem rs
val
intBlockFastWakeUpFp
=
intExuConfigs
.
filter
(
fpFastFilter
)
val
intBlockSlowWakeUpFp
=
intExuConfigs
.
filter
(
fpSlowFilter
)
val
intBlockFastWakeUpInt
=
intExuConfigs
.
filter
(
intFastFilter
)
val
intBlockSlowWakeUpInt
=
intExuConfigs
.
filter
(
intSlowFilter
)
val
fpBlockFastWakeUpFp
=
fpExuConfigs
.
filter
(
fpFastFilter
)
val
fpBlockSlowWakeUpFp
=
fpExuConfigs
.
filter
(
fpSlowFilter
)
val
fpBlockFastWakeUpInt
=
fpExuConfigs
.
filter
(
intFastFilter
)
val
fpBlockSlowWakeUpInt
=
fpExuConfigs
.
filter
(
intSlowFilter
)
with
HasExeBlockHelper
{
// outer facing nodes
val
frontend
=
LazyModule
(
new
Frontend
())
val
l1pluscache
=
LazyModule
(
new
L1plusCache
())
val
ptw
=
LazyModule
(
new
PTW
())
val
l2Prefetcher
=
LazyModule
(
new
L2Prefetcher
())
val
memBlock
=
LazyModule
(
new
MemBlock
(
fastWakeUpIn
=
intBlockFastWakeUpInt
++
intBlockFastWakeUpFp
++
fpBlockFastWakeUpInt
++
fpBlockFastWakeUpFp
,
slowWakeUpIn
=
intBlockSlowWakeUpInt
++
intBlockSlowWakeUpFp
++
fpBlockSlowWakeUpInt
++
fpBlockSlowWakeUpFp
,
fastFpOut
=
Seq
(),
slowFpOut
=
loadExuConfigs
,
fastIntOut
=
Seq
(),
slowIntOut
=
loadExuConfigs
fastWakeUpIn
=
intExuConfigs
.
filter
(
_
.
hasCertainLatency
),
slowWakeUpIn
=
intExuConfigs
.
filter
(
_
.
hasUncertainlatency
)
++
fpExuConfigs
,
fastWakeUpOut
=
Seq
(),
slowWakeUpOut
=
loadExuConfigs
))
lazy
val
module
=
new
XSCoreImp
(
this
)
...
...
@@ -363,9 +351,9 @@ class XSCore()(implicit p: config.Parameters) extends LazyModule
class
XSCoreImp
(
outer
:
XSCore
)
extends
LazyModuleImp
(
outer
)
with
HasXSParameter
with
HasExeBlockHelper
{
with
HasExeBlockHelper
{
val
io
=
IO
(
new
Bundle
{
val
hartId
=
Input
(
UInt
(
64.
W
))
val
externalInterrupt
=
new
ExternalInterruptIO
val
l2ToPrefetcher
=
Flipped
(
new
PrefetcherIO
(
PAddrBits
))
})
...
...
@@ -381,32 +369,21 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
AddressSpace
.
printMemmap
()
// to fast wake up fp, mem rs
val
intBlockFastWakeUpFp
=
intExuConfigs
.
filter
(
fpFastFilter
)
val
intBlockSlowWakeUpFp
=
intExuConfigs
.
filter
(
fpSlowFilter
)
val
intBlockFastWakeUpInt
=
intExuConfigs
.
filter
(
intFastFilter
)
val
intBlockSlowWakeUpInt
=
intExuConfigs
.
filter
(
intSlowFilter
)
val
fpBlockFastWakeUpFp
=
fpExuConfigs
.
filter
(
fpFastFilter
)
val
fpBlockSlowWakeUpFp
=
fpExuConfigs
.
filter
(
fpSlowFilter
)
val
fpBlockFastWakeUpInt
=
fpExuConfigs
.
filter
(
intFastFilter
)
val
fpBlockSlowWakeUpInt
=
fpExuConfigs
.
filter
(
intSlowFilter
)
val
intBlockFastWakeUp
=
intExuConfigs
.
filter
(
_
.
hasCertainLatency
)
val
intBlockSlowWakeUp
=
intExuConfigs
.
filter
(
_
.
hasUncertainlatency
)
val
ctrlBlock
=
Module
(
new
CtrlBlock
)
val
integerBlock
=
Module
(
new
IntegerBlock
(
fastWakeUpIn
=
fpBlockFastWakeUpInt
,
slowWakeUpIn
=
fpBlockSlowWakeUpInt
++
loadExuConfigs
,
fastFpOut
=
intBlockFastWakeUpFp
,
slowFpOut
=
intBlockSlowWakeUpFp
,
fastIntOut
=
intBlockFastWakeUpInt
,
slowIntOut
=
intBlockSlowWakeUpInt
fastWakeUpIn
=
Seq
(),
slowWakeUpIn
=
fpExuConfigs
.
filter
(
_
.
writeIntRf
)
++
loadExuConfigs
,
fastWakeUpOut
=
intBlockFastWakeUp
,
slowWakeUpOut
=
intBlockSlowWakeUp
))
val
floatBlock
=
Module
(
new
FloatBlock
(
fastWakeUpIn
=
intBlockFastWakeUpFp
,
slowWakeUpIn
=
intBlockSlowWakeUpFp
++
loadExuConfigs
,
fastFpOut
=
fpBlockFastWakeUpFp
,
slowFpOut
=
fpBlockSlowWakeUpFp
,
fastIntOut
=
fpBlockFastWakeUpInt
,
slowIntOut
=
fpBlockSlowWakeUpInt
fastWakeUpIn
=
Seq
(),
slowWakeUpIn
=
intExuConfigs
.
filter
(
_
.
writeFpRf
)
++
loadExuConfigs
,
fastWakeUpOut
=
Seq
(),
slowWakeUpOut
=
fpExuConfigs
))
val
frontend
=
outer
.
frontend
.
module
...
...
@@ -432,38 +409,39 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ctrlBlock
.
io
.
toFpBlock
<>
floatBlock
.
io
.
fromCtrlBlock
ctrlBlock
.
io
.
toLsBlock
<>
memBlock
.
io
.
fromCtrlBlock
integerBlock
.
io
.
wakeUpIn
.
fastUops
<>
floatBlock
.
io
.
wakeUpIntOut
.
fastUops
integerBlock
.
io
.
wakeUpIn
.
fast
<>
floatBlock
.
io
.
wakeUpIntOut
.
fast
integerBlock
.
io
.
wakeUpIn
.
slow
<>
floatBlock
.
io
.
wakeUpIntOut
.
slow
++
memBlock
.
io
.
wakeUpIntOut
.
slow
integerBlock
.
io
.
toMemBlock
<>
memBlock
.
io
.
fromIntBlock
val
memBlockWakeUpInt
=
memBlock
.
io
.
wakeUpOut
.
slow
.
map
(
x
=>
intOutValid
(
x
))
val
memBlockWakeUpFp
=
memBlock
.
io
.
wakeUpOut
.
slow
.
map
(
x
=>
fpOutValid
(
x
))
memBlock
.
io
.
wakeUpOut
.
slow
.
foreach
(
_
.
ready
:=
true
.
B
)
floatBlock
.
io
.
wakeUpIn
.
fastUops
<>
integerBlock
.
io
.
wakeUpFpOut
.
fastUops
floatBlock
.
io
.
wakeUpIn
.
fast
<>
integerBlock
.
io
.
wakeUpFpOut
.
fast
floatBlock
.
io
.
wakeUpIn
.
slow
<>
integerBlock
.
io
.
wakeUpFpOut
.
slow
++
memBlock
.
io
.
wakeUpFpOut
.
slow
floatBlock
.
io
.
toMemBlock
<>
memBlock
.
io
.
fromFpBlock
fpExuConfigs
.
zip
(
floatBlock
.
io
.
wakeUpOut
.
slow
).
filterNot
(
_
.
_1
.
writeIntRf
).
map
(
_
.
_2
.
ready
:=
true
.
B
)
val
fpBlockWakeUpInt
=
fpExuConfigs
.
zip
(
floatBlock
.
io
.
wakeUpOut
.
slow
)
.
filter
(
_
.
_1
.
writeIntRf
)
.
map
(
_
.
_2
).
map
(
x
=>
intOutValid
(
x
,
connectReady
=
true
))
intExuConfigs
.
zip
(
integerBlock
.
io
.
wakeUpOut
.
slow
).
filterNot
(
_
.
_1
.
writeFpRf
).
map
(
_
.
_2
.
ready
:=
true
.
B
)
val
intBlockWakeUpFp
=
intExuConfigs
.
filter
(
_
.
hasUncertainlatency
)
.
zip
(
integerBlock
.
io
.
wakeUpOut
.
slow
)
.
filter
(
_
.
_1
.
writeFpRf
)
.
map
(
_
.
_2
).
map
(
x
=>
fpOutValid
(
x
,
connectReady
=
true
))
integerBlock
.
io
.
wakeUpIntOut
.
fast
.
map
(
_
.
ready
:=
true
.
B
)
integerBlock
.
io
.
wakeUpIntOut
.
slow
.
map
(
_
.
ready
:=
true
.
B
)
floatBlock
.
io
.
wakeUpFpOut
.
fast
.
map
(
_
.
ready
:=
true
.
B
)
floatBlock
.
io
.
wakeUpFpOut
.
slow
.
map
(
_
.
ready
:=
true
.
B
)
integerBlock
.
io
.
wakeUpIn
.
slow
<>
fpBlockWakeUpInt
++
memBlockWakeUpInt
integerBlock
.
io
.
toMemBlock
<>
memBlock
.
io
.
fromIntBlock
floatBlock
.
io
.
wakeUpIn
.
slow
<>
intBlockWakeUpFp
++
memBlockWakeUpFp
floatBlock
.
io
.
toMemBlock
<>
memBlock
.
io
.
fromFpBlock
val
wakeUpMem
=
Seq
(
integerBlock
.
io
.
wakeUpIntOut
,
integerBlock
.
io
.
wakeUpFpOut
,
floatBlock
.
io
.
wakeUpIntOut
,
floatBlock
.
io
.
wakeUpFpOut
integerBlock
.
io
.
wakeUpOut
,
floatBlock
.
io
.
wakeUpOut
,
)
memBlock
.
io
.
wakeUpIn
.
fastUops
<>
wakeUpMem
.
flatMap
(
_
.
fastUops
)
memBlock
.
io
.
wakeUpIn
.
fast
<>
wakeUpMem
.
flatMap
(
w
=>
w
.
fast
.
map
(
f
=>
{
val
raw
=
WireInit
(
f
)
raw
}))
memBlock
.
io
.
wakeUpIn
.
slow
<>
wakeUpMem
.
flatMap
(
w
=>
w
.
slow
.
map
(
s
=>
{
val
raw
=
WireInit
(
s
)
raw
}))
memBlock
.
io
.
wakeUpIn
.
fast
<>
wakeUpMem
.
flatMap
(
_
.
fast
)
// Note: 'WireInit' is used to block 'ready's from memBlock,
// we don't need 'ready's from memBlock
memBlock
.
io
.
wakeUpIn
.
slow
<>
wakeUpMem
.
flatMap
(
_
.
slow
.
map
(
x
=>
WireInit
(
x
)))
integerBlock
.
io
.
csrio
.
hartId
<>
io
.
hartId
integerBlock
.
io
.
csrio
.
perf
<>
DontCare
integerBlock
.
io
.
csrio
.
perf
.
retiredInstr
<>
ctrlBlock
.
io
.
roqio
.
toCSR
.
perfinfo
.
retiredInstr
integerBlock
.
io
.
csrio
.
fpu
.
fflags
<>
ctrlBlock
.
io
.
roqio
.
toCSR
.
fflags
...
...
@@ -495,7 +473,7 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ptw
.
io
.
tlb
(
0
)
<>
dtlbRepester
.
io
.
ptw
ptw
.
io
.
tlb
(
1
)
<>
itlbRepester
.
io
.
ptw
ptw
.
io
.
sfence
<>
integerBlock
.
io
.
fenceio
.
sfence
ptw
.
io
.
csr
<>
integerBlock
.
io
.
csrio
.
tlb
ptw
.
io
.
csr
<>
integerBlock
.
io
.
csrio
.
tlb
val
l2PrefetcherIn
=
Wire
(
Decoupled
(
new
MissReq
))
if
(
l2PrefetcherParameters
.
enable
&&
l2PrefetcherParameters
.
_type
==
"bop"
)
{
...
...
src/main/scala/xiangshan/backend/CtrlBlock.scala
浏览文件 @
49681eda
...
...
@@ -292,6 +292,7 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
rename
.
io
.
roqCommits
<>
roq
.
io
.
commits
rename
.
io
.
out
<>
dispatch
.
io
.
fromRename
rename
.
io
.
renameBypass
<>
dispatch
.
io
.
renameBypass
rename
.
io
.
dispatchInfo
<>
dispatch
.
io
.
preDpInfo
dispatch
.
io
.
redirect
<>
backendRedirect
dispatch
.
io
.
flush
:=
flushReg
...
...
src/main/scala/xiangshan/backend/FloatBlock.scala
浏览文件 @
49681eda
...
...
@@ -6,7 +6,8 @@ import xiangshan._
import
utils._
import
xiangshan.backend.regfile.Regfile
import
xiangshan.backend.exu._
import
xiangshan.backend.issue.
{
ReservationStation
}
import
xiangshan.backend.issue.ReservationStation
import
xiangshan.mem.HasLoadHelper
class
FpBlockToCtrlIO
extends
XSBundle
{
...
...
@@ -18,19 +19,16 @@ class FloatBlock
(
fastWakeUpIn
:
Seq
[
ExuConfig
],
slowWakeUpIn
:
Seq
[
ExuConfig
],
fastFpOut
:
Seq
[
ExuConfig
],
slowFpOut
:
Seq
[
ExuConfig
],
fastIntOut
:
Seq
[
ExuConfig
],
slowIntOut
:
Seq
[
ExuConfig
]
)
extends
XSModule
with
HasExeBlockHelper
{
fastWakeUpOut
:
Seq
[
ExuConfig
],
slowWakeUpOut
:
Seq
[
ExuConfig
],
)
extends
XSModule
with
HasExeBlockHelper
with
HasLoadHelper
{
val
io
=
IO
(
new
Bundle
{
val
fromCtrlBlock
=
Flipped
(
new
CtrlToFpBlockIO
)
val
toCtrlBlock
=
new
FpBlockToCtrlIO
val
toMemBlock
=
new
FpBlockToMemBlockIO
val
wakeUpIn
=
new
WakeUpBundle
(
fastWakeUpIn
.
size
,
slowWakeUpIn
.
size
)
val
wakeUpFpOut
=
Flipped
(
new
WakeUpBundle
(
fastFpOut
.
size
,
slowFpOut
.
size
))
val
wakeUpIntOut
=
Flipped
(
new
WakeUpBundle
(
fastIntOut
.
size
,
slowIntOut
.
size
))
val
wakeUpOut
=
Flipped
(
new
WakeUpBundle
(
fastWakeUpOut
.
size
,
slowWakeUpOut
.
size
))
// from csr
val
frm
=
Input
(
UInt
(
3.
W
))
...
...
@@ -39,6 +37,25 @@ class FloatBlock
val
redirect
=
io
.
fromCtrlBlock
.
redirect
val
flush
=
io
.
fromCtrlBlock
.
flush
require
(
fastWakeUpIn
.
isEmpty
)
val
wakeUpInReg
=
Wire
(
Flipped
(
new
WakeUpBundle
(
fastWakeUpIn
.
size
,
slowWakeUpIn
.
size
)))
wakeUpInReg
.
slow
.
zip
(
io
.
wakeUpIn
.
slow
).
foreach
{
case
(
inReg
,
in
)
=>
PipelineConnect
(
in
,
inReg
,
inReg
.
fire
(),
in
.
bits
.
uop
.
roqIdx
.
needFlush
(
redirect
,
flush
))
}
val
wakeUpInRecode
=
WireInit
(
wakeUpInReg
)
for
(((
rec
,
reg
),
cfg
)
<-
wakeUpInRecode
.
slow
.
zip
(
wakeUpInReg
.
slow
).
zip
(
slowWakeUpIn
)){
rec
.
bits
.
data
:=
{
if
(
cfg
==
Exu
.
ldExeUnitCfg
)
fpRdataHelper
(
reg
.
bits
.
uop
,
reg
.
bits
.
data
)
else
Mux
(
reg
.
bits
.
uop
.
ctrl
.
fpu
.
typeTagOut
===
S
,
recode
(
reg
.
bits
.
data
(
31
,
0
),
S
),
recode
(
reg
.
bits
.
data
(
63
,
0
),
D
)
)
}
rec
.
bits
.
redirectValid
:=
false
.
B
reg
.
ready
:=
rec
.
ready
}
val
fpRf
=
Module
(
new
Regfile
(
numReadPorts
=
NRFpReadPorts
,
numWirtePorts
=
NRFpWritePorts
,
...
...
@@ -70,12 +87,11 @@ class FloatBlock
val
readFpRf
=
cfg
.
readFpRf
val
inBlockWbData
=
exeUnits
.
filter
(
e
=>
e
.
config
.
hasCertainLatency
&&
readFpRf
).
map
(
_
.
io
.
toFp
.
bits
.
data
)
val
writeBackData
=
inBlockWbData
++
io
.
wakeUpIn
.
fast
.
map
(
_
.
bits
.
data
)
val
fastPortsCnt
=
writeBackData
.
length
val
inBlockWbData
=
exeUnits
.
filter
(
e
=>
e
.
config
.
hasCertainLatency
).
map
(
_
.
io
.
out
.
bits
.
data
)
val
fastPortsCnt
=
inBlockWbData
.
length
val
inBlockListenPorts
=
exeUnits
.
filter
(
e
=>
e
.
config
.
hasUncertainlatency
&&
readFpRf
).
map
(
_
.
io
.
toFp
)
val
slowPorts
=
inBlockListenPorts
++
io
.
wakeUpIn
.
slow
val
inBlockListenPorts
=
exeUnits
.
filter
(
e
=>
e
.
config
.
hasUncertainlatency
).
map
(
_
.
io
.
out
)
val
slowPorts
=
(
inBlockListenPorts
++
wakeUpInRecode
.
slow
).
map
(
decoupledIOToValidIO
)
val
slowPortsCnt
=
slowPorts
.
length
println
(
s
"${i}: exu:${cfg.name} fastPortsCnt: ${fastPortsCnt} "
+
...
...
@@ -99,11 +115,8 @@ class FloatBlock
rs
.
io
.
srcRegValue
(
1
)
:=
src2Value
(
readPortIndex
(
i
))
if
(
cfg
.
fpSrcCnt
>
2
)
rs
.
io
.
srcRegValue
(
2
)
:=
src3Value
(
readPortIndex
(
i
))
rs
.
io
.
fastDatas
<>
writeBackData
for
((
x
,
y
)
<-
rs
.
io
.
slowPorts
.
zip
(
slowPorts
))
{
x
.
valid
:=
y
.
fire
()
x
.
bits
:=
y
.
bits
}
rs
.
io
.
fastDatas
<>
inBlockWbData
rs
.
io
.
slowPorts
<>
slowPorts
exeUnits
(
i
).
io
.
redirect
<>
redirect
exeUnits
(
i
).
io
.
flush
<>
flush
...
...
@@ -123,44 +136,44 @@ class FloatBlock
raw
.
valid
:=
x
.
io
.
fastUopOut
.
valid
&&
raw
.
bits
.
ctrl
.
fpWen
raw
})
rs
.
io
.
fastUopsIn
<>
inBlockUops
++
io
.
wakeUpIn
.
fastUops
rs
.
io
.
fastUopsIn
<>
inBlockUops
}
io
.
wakeUpFpOut
.
fastUops
<>
reservedStations
.
filter
(
rs
=>
fpFastFilter
(
rs
.
exuCfg
)
).
map
(
_
.
io
.
fastUopOut
).
map
(
fpValid
)
io
.
wakeUpFpOut
.
fast
<>
exeUnits
.
filter
(
x
=>
fpFastFilter
(
x
.
config
)
).
map
(
_
.
io
.
toFp
)
io
.
wakeUpFpOut
.
slow
<>
exeUnits
.
filter
(
x
=>
fpSlowFilter
(
x
.
config
)
).
map
(
_
.
io
.
toFp
)
io
.
wakeUpIntOut
.
fastUops
<>
reservedStations
.
filter
(
rs
=>
intFastFilter
(
rs
.
exuCfg
)
).
map
(
_
.
io
.
fastUopOut
).
map
(
intValid
)
io
.
wakeUpIntOut
.
fast
<>
exeUnits
.
filter
(
x
=>
intFastFilter
(
x
.
config
)
).
map
(
_
.
io
.
toInt
)
io
.
wakeUpIntOut
.
slow
<>
exeUnits
.
filter
(
x
=>
intSlowFilter
(
x
.
config
)
).
map
(
_
.
io
.
toInt
)
val
(
recodeOut
,
ieeeOutReg
)
=
exeUnits
.
map
(
e
=>
{
val
rec
=
WireInit
(
e
.
io
.
out
)
val
recReg
=
Wire
(
DecoupledIO
(
new
ExuOutput
))
PipelineConnect
(
rec
,
recReg
,
recReg
.
fire
(),
rec
.
bits
.
uop
.
roqIdx
.
needFlush
(
redirect
,
flush
)
)
val
ieeeReg
=
WireInit
(
recReg
)
recReg
.
ready
:=
ieeeReg
.
ready
ieeeReg
.
bits
.
data
:=
Mux
(
recReg
.
bits
.
uop
.
ctrl
.
fpWen
,
ieee
(
recReg
.
bits
.
data
),
recReg
.
bits
.
data
)
ieeeReg
.
bits
.
redirectValid
:=
false
.
B
(
rec
,
ieeeReg
)
}).
unzip
io
.
wakeUpOut
.
slow
<>
ieeeOutReg
// read fp rf from ctrl block
fpRf
.
io
.
readPorts
.
zipWithIndex
.
map
{
case
(
r
,
i
)
=>
r
.
addr
:=
io
.
fromCtrlBlock
.
readRf
(
i
)
}
(
0
until
exuParameters
.
StuCnt
).
foreach
(
i
=>
io
.
toMemBlock
.
readFpRf
(
i
).
data
:=
fpRf
.
io
.
readPorts
(
i
+
12
).
data
)
(
0
until
exuParameters
.
StuCnt
).
foreach
(
i
=>
io
.
toMemBlock
.
readFpRf
(
i
).
data
:=
RegNext
(
ieee
(
fpRf
.
io
.
readPorts
(
i
+
12
).
data
))
)
// write fp rf arbiter
val
fpWbArbiter
=
Module
(
new
Wb
(
(
exeUnits
.
map
(
_
.
config
)
++
fastWakeUpIn
++
slowWakeUpIn
),
NRFpWritePorts
,
isFp
=
true
))
fpWbArbiter
.
io
.
in
<>
exeUnits
.
map
(
_
.
io
.
toFp
)
++
io
.
wakeUpIn
.
fast
++
io
.
wakeUpIn
.
slow
fpWbArbiter
.
io
.
in
<>
exeUnits
.
map
(
e
=>
if
(
e
.
config
.
writeIntRf
)
WireInit
(
e
.
io
.
out
)
else
e
.
io
.
out
)
++
wakeUpInRecode
.
slow
exeUnits
.
zip
(
recodeOut
).
zip
(
fpWbArbiter
.
io
.
in
).
filter
(
_
.
_1
.
_1
.
config
.
writeIntRf
).
foreach
{
case
((
exu
,
wInt
),
wFp
)
=>
exu
.
io
.
out
.
ready
:=
wInt
.
fire
()
||
wFp
.
fire
()
}
// set busytable and update roq
io
.
toCtrlBlock
.
wbRegs
<>
fpWbArbiter
.
io
.
out
...
...
src/main/scala/xiangshan/backend/IntegerBlock.scala
浏览文件 @
49681eda
...
...
@@ -11,7 +11,7 @@ import xiangshan.backend.regfile.Regfile
class
WakeUpBundle
(
numFast
:
Int
,
numSlow
:
Int
)
extends
XSBundle
{
val
fastUops
=
Vec
(
numFast
,
Flipped
(
ValidIO
(
new
MicroOp
)))
val
fast
=
Vec
(
numFast
,
Flipped
(
Decouple
dIO
(
new
ExuOutput
)))
//one cycle later than fastUops
val
fast
=
Vec
(
numFast
,
Flipped
(
Vali
dIO
(
new
ExuOutput
)))
//one cycle later than fastUops
val
slow
=
Vec
(
numSlow
,
Flipped
(
DecoupledIO
(
new
ExuOutput
)))
override
def
cloneType
=
(
new
WakeUpBundle
(
numFast
,
numSlow
)).
asInstanceOf
[
this.
type
]
...
...
@@ -23,32 +23,56 @@ class IntBlockToCtrlIO extends XSBundle {
// used to update busytable and roq state
val
wbRegs
=
Vec
(
NRIntWritePorts
,
ValidIO
(
new
ExuOutput
))
// write back to brq
val
exuRedirect
=
Vec
(
exuParameters
.
AluCnt
+
exuParameters
.
JmpCnt
,
ValidIO
(
new
ExuOutput
))
val
exuRedirect
=
Vec
(
exuParameters
.
AluCnt
+
exuParameters
.
JmpCnt
,
ValidIO
(
new
ExuOutput
))
val
numExist
=
Vec
(
exuParameters
.
IntExuCnt
,
Output
(
UInt
(
log2Ceil
(
IssQueSize
).
W
)))
}
trait
HasExeBlockHelper
{
def
fpFastFilter
(
cfg
:
ExuConfig
)
:
Boolean
=
{
cfg
.
hasCertainLatency
&&
cfg
.
writeFpRf
def
fpUopValid
(
x
:
ValidIO
[
MicroOp
])
:
ValidIO
[
MicroOp
]
=
{
val
uop
=
WireInit
(
x
)
uop
.
valid
:=
x
.
valid
&&
x
.
bits
.
ctrl
.
fpWen
uop
}
def
fpSlowFilter
(
cfg
:
ExuConfig
)
:
Boolean
=
{
cfg
.
hasUncertainlatency
&&
cfg
.
writeFpRf
def
fpOutValid
(
x
:
ValidIO
[
ExuOutput
])
:
ValidIO
[
ExuOutput
]
=
{
val
out
=
WireInit
(
x
)
out
.
valid
:=
x
.
valid
&&
x
.
bits
.
uop
.
ctrl
.
fpWen
out
}
def
intFastFilter
(
cfg
:
ExuConfig
)
:
Boolean
=
{
cfg
.
hasCertainLatency
&&
cfg
.
writeIntRf
def
fpOutValid
(
x
:
DecoupledIO
[
ExuOutput
],
connectReady
:
Boolean
=
false
)
:
DecoupledIO
[
ExuOutput
]
=
{
val
out
=
WireInit
(
x
)
if
(
connectReady
)
x
.
ready
:=
out
.
ready
out
.
valid
:=
x
.
valid
&&
x
.
bits
.
uop
.
ctrl
.
fpWen
out
}
def
intSlowFilter
(
cfg
:
ExuConfig
)
:
Boolean
=
{
cfg
.
hasUncertainlatency
&&
cfg
.
writeIntRf
def
intUopValid
(
x
:
ValidIO
[
MicroOp
])
:
ValidIO
[
MicroOp
]
=
{
val
uop
=
WireInit
(
x
)
uop
.
valid
:=
x
.
valid
&&
x
.
bits
.
ctrl
.
rfWen
uop
}
def
fpValid
(
x
:
ValidIO
[
MicroOp
])
:
ValidIO
[
MicroOp
]
=
{
val
uop
=
WireInit
(
x
)
uop
.
valid
:=
x
.
valid
&&
x
.
bits
.
ctrl
.
fp
Wen
uop
def
intOutValid
(
x
:
ValidIO
[
ExuOutput
])
:
ValidIO
[
ExuOutput
]
=
{
val
out
=
WireInit
(
x
)
out
.
valid
:=
x
.
valid
&&
x
.
bits
.
uop
.
ctrl
.
rf
Wen
out
}
def
intValid
(
x
:
ValidIO
[
MicroOp
])
:
ValidIO
[
MicroOp
]
=
{
val
uop
=
WireInit
(
x
)
uop
.
valid
:=
x
.
valid
&&
x
.
bits
.
ctrl
.
rfWen
uop
def
intOutValid
(
x
:
DecoupledIO
[
ExuOutput
],
connectReady
:
Boolean
=
false
)
:
DecoupledIO
[
ExuOutput
]
=
{
val
out
=
WireInit
(
x
)
if
(
connectReady
)
x
.
ready
:=
out
.
ready
out
.
valid
:=
x
.
valid
&&
x
.
bits
.
uop
.
ctrl
.
rfWen
out
}
def
decoupledIOToValidIO
[
T
<:
Data
](
d
:
DecoupledIO
[
T
])
:
Valid
[
T
]
=
{
val
v
=
Wire
(
Valid
(
d
.
bits
.
cloneType
))
v
.
valid
:=
d
.
valid
v
.
bits
:=
d
.
bits
v
}
def
validIOToDecoupledIO
[
T
<:
Data
](
v
:
Valid
[
T
])
:
DecoupledIO
[
T
]
=
{
val
d
=
Wire
(
DecoupledIO
(
v
.
bits
.
cloneType
))
d
.
valid
:=
v
.
valid
d
.
ready
:=
true
.
B
d
.
bits
:=
v
.
bits
d
}
}
...
...
@@ -56,26 +80,22 @@ class IntegerBlock
(
fastWakeUpIn
:
Seq
[
ExuConfig
],
slowWakeUpIn
:
Seq
[
ExuConfig
],
fastFpOut
:
Seq
[
ExuConfig
],
slowFpOut
:
Seq
[
ExuConfig
],
fastIntOut
:
Seq
[
ExuConfig
],
slowIntOut
:
Seq
[
ExuConfig
]
)
extends
XSModule
with
HasExeBlockHelper
{
fastWakeUpOut
:
Seq
[
ExuConfig
],
slowWakeUpOut
:
Seq
[
ExuConfig
]
)
extends
XSModule
with
HasExeBlockHelper
{
val
io
=
IO
(
new
Bundle
{
val
fromCtrlBlock
=
Flipped
(
new
CtrlToIntBlockIO
)
val
toCtrlBlock
=
new
IntBlockToCtrlIO
val
toMemBlock
=
new
IntBlockToMemBlockIO
val
wakeUpIn
=
new
WakeUpBundle
(
fastWakeUpIn
.
size
,
slowWakeUpIn
.
size
)
val
wakeUpFpOut
=
Flipped
(
new
WakeUpBundle
(
fastFpOut
.
size
,
slowFpOut
.
size
))
val
wakeUpIntOut
=
Flipped
(
new
WakeUpBundle
(
fastIntOut
.
size
,
slowIntOut
.
size
))
val
wakeUpOut
=
Flipped
(
new
WakeUpBundle
(
fastWakeUpOut
.
size
,
slowWakeUpOut
.
size
))
val
csrio
=
new
CSRFileIO
val
fenceio
=
new
Bundle
{
val
sfence
=
Output
(
new
SfenceBundle
)
// to front,mem
val
fencei
=
Output
(
Bool
())
// to icache
val
sbuffer
=
new
FenceToSbuffer
// to mem
val
fencei
=
Output
(
Bool
())
// to icache
val
sbuffer
=
new
FenceToSbuffer
// to mem
}
})
val
difftestIO
=
IO
(
new
Bundle
()
{
...
...
@@ -136,12 +156,12 @@ class IntegerBlock
val
readIntRf
=
cfg
.
readIntRf
val
inBlockWbData
=
exeUnits
.
filter
(
e
=>
e
.
config
.
hasCertainLatency
&&
readIntRf
).
map
(
_
.
io
.
toIn
t
.
bits
.
data
)
val
inBlockWbData
=
exeUnits
.
filter
(
e
=>
e
.
config
.
hasCertainLatency
).
map
(
_
.
io
.
ou
t
.
bits
.
data
)
val
fastDatas
=
inBlockWbData
++
io
.
wakeUpIn
.
fast
.
map
(
_
.
bits
.
data
)
val
wakeupCnt
=
fastDatas
.
length
val
inBlockListenPorts
=
exeUnits
.
filter
(
e
=>
e
.
config
.
hasUncertainlatency
&&
readIntRf
).
map
(
_
.
io
.
toIn
t
)
val
slowPorts
=
inBlockListenPorts
++
io
.
wakeUpIn
.
slow
val
inBlockListenPorts
=
exeUnits
.
filter
(
e
=>
e
.
config
.
hasUncertainlatency
).
map
(
_
.
io
.
ou
t
)
val
slowPorts
=
(
inBlockListenPorts
++
io
.
wakeUpIn
.
slow
).
map
(
decoupledIOToValidIO
)
val
extraListenPortsCnt
=
slowPorts
.
length
val
feedback
=
(
cfg
==
ldExeUnitCfg
)
||
(
cfg
==
stExeUnitCfg
)
...
...
@@ -166,10 +186,7 @@ class IntegerBlock
}
rs
.
io
.
fastDatas
<>
fastDatas
for
((
x
,
y
)
<-
rs
.
io
.
slowPorts
.
zip
(
slowPorts
))
{
x
.
valid
:=
y
.
fire
()
x
.
bits
:=
y
.
bits
}
rs
.
io
.
slowPorts
<>
slowPorts
exeUnits
(
i
).
io
.
redirect
<>
redirect
exeUnits
(
i
).
io
.
fromInt
<>
rs
.
io
.
deq
...
...
@@ -181,7 +198,7 @@ class IntegerBlock
rs
})
for
(
rs
<-
reservationStations
)
{
for
(
rs
<-
reservationStations
)
{
val
inBlockUops
=
reservationStations
.
filter
(
x
=>
x
.
exuCfg
.
hasCertainLatency
&&
x
.
exuCfg
.
writeIntRf
).
map
(
x
=>
{
...
...
@@ -192,34 +209,22 @@ class IntegerBlock
rs
.
io
.
fastUopsIn
<>
inBlockUops
++
io
.
wakeUpIn
.
fastUops
}
io
.
wakeUp
Fp
Out
.
fastUops
<>
reservationStations
.
filter
(
rs
=>
fpFastFilter
(
rs
.
exuCfg
)
).
map
(
_
.
io
.
fastUopOut
).
map
(
f
pValid
)
io
.
wakeUpOut
.
fastUops
<>
reservationStations
.
filter
(
rs
=>
rs
.
exuCfg
.
hasCertainLatency
).
map
(
_
.
io
.
fastUopOut
).
map
(
intUo
pValid
)
io
.
wakeUp
Fp
Out
.
fast
<>
exeUnits
.
filter
(
x
=>
fpFastFilter
(
x
.
config
)
).
map
(
_
.
io
.
toFp
)
io
.
wakeUpOut
.
fast
<>
exeUnits
.
filter
(
x
=>
x
.
config
.
hasCertainLatency
).
map
(
_
.
io
.
out
).
map
(
decoupledIOToValidIO
)
io
.
wakeUpFpOut
.
slow
<>
exeUnits
.
filter
(
x
=>
fpSlowFilter
(
x
.
config
)
).
map
(
_
.
io
.
toFp
)
io
.
wakeUpIntOut
.
fastUops
<>
reservationStations
.
filter
(
rs
=>
intFastFilter
(
rs
.
exuCfg
)
).
map
(
_
.
io
.
fastUopOut
).
map
(
intValid
)
io
.
wakeUpIntOut
.
fast
<>
exeUnits
.
filter
(
x
=>
intFastFilter
(
x
.
config
)
).
map
(
_
.
io
.
toInt
)
io
.
wakeUpIntOut
.
slow
<>
exeUnits
.
filter
(
x
=>
intSlowFilter
(
x
.
config
)
).
map
(
_
.
io
.
toInt
)
io
.
wakeUpOut
.
slow
<>
exeUnits
.
filter
(
x
=>
x
.
config
.
hasUncertainlatency
).
map
(
x
=>
WireInit
(
x
.
io
.
out
))
// send misprediction to brq
io
.
toCtrlBlock
.
exuRedirect
.
zip
(
exeUnits
.
filter
(
_
.
config
.
hasRedirect
).
map
(
_
.
io
.
toIn
t
)
).
foreach
{
exeUnits
.
filter
(
_
.
config
.
hasRedirect
).
map
(
_
.
io
.
ou
t
)
).
foreach
{
case
(
x
,
y
)
=>
x
.
valid
:=
y
.
fire
()
&&
y
.
bits
.
redirectValid
x
.
bits
:=
y
.
bits
...
...
@@ -232,7 +237,7 @@ class IntegerBlock
}
// read int rf from ctrl block
intRf
.
io
.
readPorts
.
zipWithIndex
.
map
{
case
(
r
,
i
)
=>
r
.
addr
:=
io
.
fromCtrlBlock
.
readRf
(
i
)
}
intRf
.
io
.
readPorts
.
zipWithIndex
.
map
{
case
(
r
,
i
)
=>
r
.
addr
:=
io
.
fromCtrlBlock
.
readRf
(
i
)
}
(
0
until
NRMemReadPorts
).
foreach
(
i
=>
io
.
toMemBlock
.
readIntRf
(
i
).
data
:=
intRf
.
io
.
readPorts
(
i
+
8
).
data
)
// write int rf arbiter
val
intWbArbiter
=
Module
(
new
Wb
(
...
...
@@ -240,12 +245,19 @@ class IntegerBlock
NRIntWritePorts
,
isFp
=
false
))
intWbArbiter
.
io
.
in
<>
exeUnits
.
map
(
_
.
io
.
toInt
)
++
io
.
wakeUpIn
.
fast
++
io
.
wakeUpIn
.
slow
intWbArbiter
.
io
.
in
<>
exeUnits
.
map
(
e
=>
{
if
(
e
.
config
.
writeFpRf
)
WireInit
(
e
.
io
.
out
)
else
e
.
io
.
out
})
++
io
.
wakeUpIn
.
slow
exeUnits
.
zip
(
intWbArbiter
.
io
.
in
).
filter
(
_
.
_1
.
config
.
writeFpRf
).
zip
(
io
.
wakeUpIn
.
slow
).
foreach
{
case
((
exu
,
wInt
),
wFp
)
=>
exu
.
io
.
out
.
ready
:=
wFp
.
fire
()
||
wInt
.
fire
()
}
// set busytable and update roq
io
.
toCtrlBlock
.
wbRegs
<>
intWbArbiter
.
io
.
out
intRf
.
io
.
writePorts
.
zip
(
intWbArbiter
.
io
.
out
).
foreach
{
intRf
.
io
.
writePorts
.
zip
(
intWbArbiter
.
io
.
out
).
foreach
{
case
(
rf
,
wb
)
=>
rf
.
wen
:=
wb
.
valid
&&
wb
.
bits
.
uop
.
ctrl
.
rfWen
rf
.
addr
:=
wb
.
bits
.
uop
.
pdest
...
...
src/main/scala/xiangshan/backend/MemBlock.scala
浏览文件 @
49681eda
...
...
@@ -30,29 +30,19 @@ class FpBlockToMemBlockIO extends XSBundle {
}
class
MemBlock
(
fastWakeUpIn
:
Seq
[
ExuConfig
],
slowWakeUpIn
:
Seq
[
ExuConfig
],
fastFpOut
:
Seq
[
ExuConfig
],
slowFpOut
:
Seq
[
ExuConfig
],
fastIntOut
:
Seq
[
ExuConfig
],
slowIntOut
:
Seq
[
ExuConfig
]
val
fastWakeUpIn
:
Seq
[
ExuConfig
],
val
slowWakeUpIn
:
Seq
[
ExuConfig
],
val
fastWakeUpOut
:
Seq
[
ExuConfig
],
val
slowWakeUpOut
:
Seq
[
ExuConfig
]
)(
implicit
p
:
Parameters
)
extends
LazyModule
{
val
dcache
=
LazyModule
(
new
DCache
())
val
uncache
=
LazyModule
(
new
Uncache
())
lazy
val
module
=
new
MemBlockImp
(
fastWakeUpIn
,
slowWakeUpIn
,
fastFpOut
,
slowFpOut
,
fastIntOut
,
slowIntOut
)(
this
)
lazy
val
module
=
new
MemBlockImp
(
this
)
}
class
MemBlockImp
(
fastWakeUpIn
:
Seq
[
ExuConfig
],
slowWakeUpIn
:
Seq
[
ExuConfig
],
fastFpOut
:
Seq
[
ExuConfig
],
slowFpOut
:
Seq
[
ExuConfig
],
fastIntOut
:
Seq
[
ExuConfig
],
slowIntOut
:
Seq
[
ExuConfig
]
)
(
outer
:
MemBlock
)
extends
LazyModuleImp
(
outer
)
class
MemBlockImp
(
outer
:
MemBlock
)
extends
LazyModuleImp
(
outer
)
with
HasXSParameter
with
HasExceptionNO
with
HasXSLog
...
...
@@ -60,6 +50,11 @@ class MemBlockImp
with
HasExeBlockHelper
{
val
fastWakeUpIn
=
outer
.
fastWakeUpIn
val
slowWakeUpIn
=
outer
.
slowWakeUpIn
val
fastWakeUpOut
=
outer
.
fastWakeUpOut
val
slowWakeUpOut
=
outer
.
slowWakeUpOut
val
io
=
IO
(
new
Bundle
{
val
fromCtrlBlock
=
Flipped
(
new
CtrlToLsBlockIO
)
val
fromIntBlock
=
Flipped
(
new
IntBlockToMemBlockIO
)
...
...
@@ -67,8 +62,7 @@ class MemBlockImp
val
toCtrlBlock
=
new
LsBlockToCtrlIO
val
wakeUpIn
=
new
WakeUpBundle
(
fastWakeUpIn
.
size
,
slowWakeUpIn
.
size
)
val
wakeUpFpOut
=
Flipped
(
new
WakeUpBundle
(
fastFpOut
.
size
,
slowFpOut
.
size
))
val
wakeUpIntOut
=
Flipped
(
new
WakeUpBundle
(
fastIntOut
.
size
,
slowIntOut
.
size
))
val
wakeUpOut
=
Flipped
(
new
WakeUpBundle
(
fastWakeUpOut
.
size
,
slowWakeUpOut
.
size
))
val
ptw
=
new
TlbPtwIO
val
sfence
=
Input
(
new
SfenceBundle
)
...
...
@@ -124,8 +118,7 @@ class MemBlockImp
atomicsUnit
.
io
.
out
.
ready
:=
ldOut0
.
ready
loadUnits
.
head
.
io
.
ldout
.
ready
:=
ldOut0
.
ready
val
intExeWbReqs
=
ldOut0
+:
loadUnits
.
tail
.
map
(
_
.
io
.
ldout
)
val
fpExeWbReqs
=
loadUnits
.
map
(
_
.
io
.
fpout
)
val
exeWbReqs
=
ldOut0
+:
loadUnits
.
tail
.
map
(
_
.
io
.
ldout
)
val
readPortIndex
=
Seq
(
0
,
1
,
2
,
4
)
io
.
fromIntBlock
.
readIntRf
.
foreach
(
_
.
addr
:=
DontCare
)
...
...
@@ -145,11 +138,10 @@ class MemBlockImp
.
map
(
_
.
_2
.
bits
.
data
)
val
wakeupCnt
=
fastDatas
.
length
val
inBlockListenPorts
=
intExeWbReqs
++
fpExeWbReqs
val
slowPorts
=
inBlockListenPorts
++
val
slowPorts
=
(
exeWbReqs
++
slowWakeUpIn
.
zip
(
io
.
wakeUpIn
.
slow
)
.
filter
(
x
=>
(
x
.
_1
.
writeIntRf
&&
readIntRf
)
||
(
x
.
_1
.
writeFpRf
&&
readFpRf
))
.
map
(
_
.
_2
)
.
map
(
_
.
_2
)
).
map
(
decoupledIOToValidIO
)
val
slowPortsCnt
=
slowPorts
.
length
...
...
@@ -165,18 +157,14 @@ class MemBlockImp
rs
.
io
.
numExist
<>
io
.
toCtrlBlock
.
numExist
(
i
)
rs
.
io
.
fromDispatch
<>
io
.
fromCtrlBlock
.
enqIqCtrl
(
i
)
val
src2IsFp
=
RegNext
(
io
.
fromCtrlBlock
.
enqIqCtrl
(
i
).
bits
.
ctrl
.
src2Type
===
SrcType
.
fp
)
rs
.
io
.
srcRegValue
:=
DontCare
rs
.
io
.
srcRegValue
(
0
)
:=
io
.
fromIntBlock
.
readIntRf
(
readPortIndex
(
i
)).
data
if
(
i
>=
exuParameters
.
LduCnt
)
{
rs
.
io
.
srcRegValue
(
1
)
:=
Mux
(
src2IsFp
,
io
.
fromFpBlock
.
readFpRf
(
i
-
exuParameters
.
LduCnt
).
data
,
io
.
fromIntBlock
.
readIntRf
(
readPortIndex
(
i
)
+
1
).
data
)
rs
.
io
.
srcRegValue
(
1
)
:=
io
.
fromIntBlock
.
readIntRf
(
readPortIndex
(
i
)
+
1
).
data
rs
.
io
.
fpRegValue
:=
io
.
fromFpBlock
.
readFpRf
(
i
-
exuParameters
.
LduCnt
).
data
}
rs
.
io
.
fastDatas
<>
fastDatas
for
((
x
,
y
)
<-
rs
.
io
.
slowPorts
.
zip
(
slowPorts
))
{
x
.
valid
:=
y
.
fire
()
x
.
bits
:=
y
.
bits
}
rs
.
io
.
slowPorts
<>
slowPorts
// exeUnits(i).io.redirect <> redirect
// exeUnits(i).io.fromInt <> rs.io.deq
...
...
@@ -193,17 +181,9 @@ class MemBlockImp
.
map
(
_
.
_2
)
}
// TODO: make this better
io
.
wakeUpIn
.
fast
.
foreach
(
_
.
ready
:=
true
.
B
)
io
.
wakeUpOut
.
slow
<>
exeWbReqs
io
.
wakeUpIn
.
slow
.
foreach
(
_
.
ready
:=
true
.
B
)
io
.
wakeUpFpOut
.
slow
<>
fpExeWbReqs
io
.
wakeUpIntOut
.
slow
<>
intExeWbReqs
// load always ready
fpExeWbReqs
.
foreach
(
_
.
ready
:=
true
.
B
)
intExeWbReqs
.
foreach
(
_
.
ready
:=
true
.
B
)
val
dtlb
=
Module
(
new
TLB
(
Width
=
DTLBWidth
,
isDtlb
=
true
))
val
lsq
=
Module
(
new
LsqWrappper
)
val
sbuffer
=
Module
(
new
NewSbuffer
)
...
...
src/main/scala/xiangshan/backend/decode/FPDecoder.scala
浏览文件 @
49681eda
...
...
@@ -17,6 +17,7 @@ class FPDecoder extends XSModule{
def
Y
=
BitPat
(
"b1"
)
val
s
=
BitPat
(
S
)
val
d
=
BitPat
(
D
)
val
i
=
BitPat
(
I
)
val
default
=
List
(
X
,
X
,
X
,
N
,
N
,
N
,
X
,
X
,
X
)
...
...
@@ -27,15 +28,15 @@ class FPDecoder extends XSModule{
FCVT_S_WU
->
List
(
N
,
s
,
s
,
Y
,
Y
,
Y
,
N
,
N
,
Y
),
FCVT_S_L
->
List
(
N
,
s
,
s
,
Y
,
Y
,
Y
,
N
,
N
,
Y
),
FCVT_S_LU
->
List
(
N
,
s
,
s
,
Y
,
Y
,
Y
,
N
,
N
,
Y
),
FMV_X_W
->
List
(
N
,
d
,
X
,
N
,
N
,
N
,
N
,
N
,
N
),
FCLASS_S
->
List
(
N
,
s
,
X
,
N
,
N
,
N
,
N
,
N
,
N
),
FCVT_W_S
->
List
(
N
,
s
,
X
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_WU_S
->
List
(
N
,
s
,
X
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_L_S
->
List
(
N
,
s
,
X
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_LU_S
->
List
(
N
,
s
,
X
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FEQ_S
->
List
(
N
,
s
,
X
,
N
,
Y
,
N
,
N
,
N
,
N
),
FLT_S
->
List
(
N
,
s
,
X
,
N
,
Y
,
N
,
N
,
N
,
N
),
FLE_S
->
List
(
N
,
s
,
X
,
N
,
Y
,
N
,
N
,
N
,
N
),
FMV_X_W
->
List
(
N
,
d
,
i
,
N
,
N
,
N
,
N
,
N
,
N
),
FCLASS_S
->
List
(
N
,
s
,
i
,
N
,
N
,
N
,
N
,
N
,
N
),
FCVT_W_S
->
List
(
N
,
s
,
i
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_WU_S
->
List
(
N
,
s
,
i
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_L_S
->
List
(
N
,
s
,
i
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_LU_S
->
List
(
N
,
s
,
i
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FEQ_S
->
List
(
N
,
s
,
i
,
N
,
Y
,
N
,
N
,
N
,
N
),
FLT_S
->
List
(
N
,
s
,
i
,
N
,
Y
,
N
,
N
,
N
,
N
),
FLE_S
->
List
(
N
,
s
,
i
,
N
,
Y
,
N
,
N
,
N
,
N
),
FSGNJ_S
->
List
(
N
,
s
,
s
,
N
,
N
,
Y
,
N
,
N
,
N
),
FSGNJN_S
->
List
(
N
,
s
,
s
,
N
,
N
,
Y
,
N
,
N
,
N
),
FSGNJX_S
->
List
(
N
,
s
,
s
,
N
,
N
,
Y
,
N
,
N
,
N
),
...
...
@@ -60,17 +61,17 @@ class FPDecoder extends XSModule{
FCVT_D_WU
->
List
(
N
,
d
,
d
,
Y
,
Y
,
Y
,
N
,
N
,
Y
),
FCVT_D_L
->
List
(
N
,
d
,
d
,
Y
,
Y
,
Y
,
N
,
N
,
Y
),
FCVT_D_LU
->
List
(
N
,
d
,
d
,
Y
,
Y
,
Y
,
N
,
N
,
Y
),
FMV_X_D
->
List
(
N
,
d
,
X
,
N
,
N
,
N
,
N
,
N
,
N
),
FCLASS_D
->
List
(
N
,
d
,
X
,
N
,
N
,
N
,
N
,
N
,
N
),
FCVT_W_D
->
List
(
N
,
d
,
X
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_WU_D
->
List
(
N
,
d
,
X
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_L_D
->
List
(
N
,
d
,
X
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_LU_D
->
List
(
N
,
d
,
X
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FMV_X_D
->
List
(
N
,
d
,
i
,
N
,
N
,
N
,
N
,
N
,
N
),
FCLASS_D
->
List
(
N
,
d
,
i
,
N
,
N
,
N
,
N
,
N
,
N
),
FCVT_W_D
->
List
(
N
,
d
,
i
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_WU_D
->
List
(
N
,
d
,
i
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_L_D
->
List
(
N
,
d
,
i
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_LU_D
->
List
(
N
,
d
,
i
,
N
,
Y
,
N
,
N
,
N
,
Y
),
FCVT_S_D
->
List
(
N
,
d
,
s
,
N
,
Y
,
Y
,
N
,
N
,
Y
),
FCVT_D_S
->
List
(
N
,
s
,
d
,
N
,
Y
,
Y
,
N
,
N
,
Y
),
FEQ_D
->
List
(
N
,
d
,
X
,
N
,
Y
,
N
,
N
,
N
,
N
),
FLT_D
->
List
(
N
,
d
,
X
,
N
,
Y
,
N
,
N
,
N
,
N
),
FLE_D
->
List
(
N
,
d
,
X
,
N
,
Y
,
N
,
N
,
N
,
N
),
FEQ_D
->
List
(
N
,
d
,
i
,
N
,
Y
,
N
,
N
,
N
,
N
),
FLT_D
->
List
(
N
,
d
,
i
,
N
,
Y
,
N
,
N
,
N
,
N
),
FLE_D
->
List
(
N
,
d
,
i
,
N
,
Y
,
N
,
N
,
N
,
N
),
FSGNJ_D
->
List
(
N
,
d
,
d
,
N
,
N
,
Y
,
N
,
N
,
N
),
FSGNJN_D
->
List
(
N
,
d
,
d
,
N
,
N
,
Y
,
N
,
N
,
N
),
FSGNJX_D
->
List
(
N
,
d
,
d
,
N
,
N
,
Y
,
N
,
N
,
N
),
...
...
src/main/scala/xiangshan/backend/dispatch/Dispatch.scala
浏览文件 @
49681eda
...
...
@@ -28,6 +28,7 @@ class Dispatch extends XSModule {
// from rename
val
fromRename
=
Vec
(
RenameWidth
,
Flipped
(
DecoupledIO
(
new
MicroOp
)))
val
renameBypass
=
Input
(
new
RenameBypassInfo
)
val
preDpInfo
=
Input
(
new
PreDispatchInfo
)
// to busytable: set pdest to busy (not ready) when they are dispatched
val
allocPregs
=
Vec
(
RenameWidth
,
Output
(
new
ReplayPregReq
))
// enq Roq
...
...
@@ -52,9 +53,9 @@ class Dispatch extends XSModule {
})
val
dispatch1
=
Module
(
new
Dispatch1
)
val
intDq
=
Module
(
new
DispatchQueue
(
dpParams
.
IntDqSize
,
RenameWidth
,
dpParams
.
IntDqDeqWidth
))
val
fpDq
=
Module
(
new
DispatchQueue
(
dpParams
.
FpDqSize
,
RenameWidth
,
dpParams
.
FpDqDeqWidth
))
val
lsDq
=
Module
(
new
DispatchQueue
(
dpParams
.
LsDqSize
,
RenameWidth
,
dpParams
.
LsDqDeqWidth
))
val
intDq
=
Module
(
new
DispatchQueue
(
dpParams
.
IntDqSize
,
RenameWidth
,
dpParams
.
IntDqDeqWidth
,
"int"
))
val
fpDq
=
Module
(
new
DispatchQueue
(
dpParams
.
FpDqSize
,
RenameWidth
,
dpParams
.
FpDqDeqWidth
,
"fp"
))
val
lsDq
=
Module
(
new
DispatchQueue
(
dpParams
.
LsDqSize
,
RenameWidth
,
dpParams
.
LsDqDeqWidth
,
"ls"
))
// pipeline between rename and dispatch
// accepts all at once
...
...
@@ -66,6 +67,7 @@ class Dispatch extends XSModule {
// dispatch 1: accept uops from rename and dispatch them to the three dispatch queues
// dispatch1.io.redirect <> io.redirect
dispatch1
.
io
.
renameBypass
:=
RegEnable
(
io
.
renameBypass
,
io
.
fromRename
(
0
).
valid
&&
dispatch1
.
io
.
fromRename
(
0
).
ready
)
dispatch1
.
io
.
preDpInfo
:=
RegEnable
(
io
.
preDpInfo
,
io
.
fromRename
(
0
).
valid
&&
dispatch1
.
io
.
fromRename
(
0
).
ready
)
dispatch1
.
io
.
enqRoq
<>
io
.
enqRoq
dispatch1
.
io
.
enqLsq
<>
io
.
enqLsq
dispatch1
.
io
.
toIntDq
<>
intDq
.
io
.
enq
...
...
src/main/scala/xiangshan/backend/dispatch/Dispatch1.scala
浏览文件 @
49681eda
...
...
@@ -10,12 +10,18 @@ import xiangshan.backend.rename.RenameBypassInfo
import
xiangshan.mem.LsqEnqIO
import
xiangshan.backend.fu.HasExceptionNO
class
PreDispatchInfo
extends
XSBundle
{
val
lsqNeedAlloc
=
Vec
(
RenameWidth
,
UInt
(
2.
W
))
}
// read rob and enqueue
class
Dispatch1
extends
XSModule
with
HasExceptionNO
{
val
io
=
IO
(
new
Bundle
()
{
// from rename
val
fromRename
=
Vec
(
RenameWidth
,
Flipped
(
DecoupledIO
(
new
MicroOp
)))
val
renameBypass
=
Input
(
new
RenameBypassInfo
)
val
preDpInfo
=
Input
(
new
PreDispatchInfo
)
val
recv
=
Output
(
Vec
(
RenameWidth
,
Bool
()))
// enq Roq
val
enqRoq
=
Flipped
(
new
RoqEnqIO
)
...
...
@@ -147,7 +153,7 @@ class Dispatch1 extends XSModule with HasExceptionNO {
io
.
enqRoq
.
req
(
i
).
bits
:=
updatedUop
(
i
)
XSDebug
(
io
.
enqRoq
.
req
(
i
).
valid
,
p
"pc 0x${Hexadecimal(io.fromRename(i).bits.cf.pc)} receives nroq ${io.enqRoq.resp(i)}\n"
)
io
.
enqLsq
.
needAlloc
(
i
)
:=
io
.
fromRename
(
i
).
valid
&&
isLs
(
i
)
io
.
enqLsq
.
needAlloc
(
i
)
:=
Mux
(
io
.
fromRename
(
i
).
valid
,
io
.
preDpInfo
.
lsqNeedAlloc
(
i
),
0.
U
)
io
.
enqLsq
.
req
(
i
).
valid
:=
io
.
fromRename
(
i
).
valid
&&
isLs
(
i
)
&&
thisCanActualOut
(
i
)
&&
io
.
enqRoq
.
canAccept
&&
io
.
toIntDq
.
canAccept
&&
io
.
toFpDq
.
canAccept
&&
io
.
toLsDq
.
canAccept
io
.
enqLsq
.
req
(
i
).
bits
:=
updatedUop
(
i
)
io
.
enqLsq
.
req
(
i
).
bits
.
roqIdx
:=
io
.
enqRoq
.
resp
(
i
)
...
...
@@ -200,6 +206,6 @@ class Dispatch1 extends XSModule with HasExceptionNO {
PopCount
(
io
.
toLsDq
.
req
.
map
(
_
.
valid
&&
io
.
toLsDq
.
canAccept
))
XSError
(
enqFireCnt
>
renameFireCnt
,
"enqFireCnt should not be greater than renameFireCnt\n"
)
XSPerf
(
"
utilizatio
n"
,
PopCount
(
io
.
fromRename
.
map
(
_
.
valid
)))
XSPerf
(
"waitInstr"
,
PopCount
((
0
until
RenameWidth
).
map
(
i
=>
io
.
fromRename
(
i
).
valid
&&
!
io
.
recv
(
i
))))
XSPerf
(
"
dp1_i
n"
,
PopCount
(
io
.
fromRename
.
map
(
_
.
valid
)))
XSPerf
(
"
dp1_
waitInstr"
,
PopCount
((
0
until
RenameWidth
).
map
(
i
=>
io
.
fromRename
(
i
).
valid
&&
!
io
.
recv
(
i
))))
}
src/main/scala/xiangshan/backend/dispatch/Dispatch2Fp.scala
浏览文件 @
49681eda
...
...
@@ -148,6 +148,6 @@ class Dispatch2Fp extends XSModule {
// p"(${readPortIndexReg(i)+2.U}, ${uopReg(i).psrc3}, ${Hexadecimal(io.enqIQData(i).src3)})\n")
// }
XSPerf
(
"
utilizatio
n"
,
PopCount
(
io
.
fromDq
.
map
(
_
.
valid
)))
XSPerf
(
"
dp2fp_i
n"
,
PopCount
(
io
.
fromDq
.
map
(
_
.
valid
)))
}
src/main/scala/xiangshan/backend/dispatch/Dispatch2Int.scala
浏览文件 @
49681eda
...
...
@@ -153,6 +153,6 @@ class Dispatch2Int extends XSModule {
// p"(${readPortIndexReg(i)+1.U}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
XSPerf
(
"
utilizatio
n"
,
PopCount
(
io
.
fromDq
.
map
(
_
.
valid
)))
XSPerf
(
"
dp2int_i
n"
,
PopCount
(
io
.
fromDq
.
map
(
_
.
valid
)))
}
src/main/scala/xiangshan/backend/dispatch/Dispatch2Ls.scala
浏览文件 @
49681eda
...
...
@@ -146,7 +146,7 @@ class Dispatch2Ls extends XSModule {
// p"(${readPort(i)+1}, ${uopReg(i).psrc2}, ${Hexadecimal(io.enqIQData(i).src2)})\n")
// }
XSPerf
(
"
utilizatio
n"
,
PopCount
(
io
.
fromDq
.
map
(
_
.
valid
)))
XSPerf
(
"waitInstr"
,
PopCount
(
io
.
fromDq
.
map
(
r
=>
r
.
valid
&&
!
r
.
ready
)))
XSPerf
(
"
dp2ls_i
n"
,
PopCount
(
io
.
fromDq
.
map
(
_
.
valid
)))
XSPerf
(
"
dp2ls_
waitInstr"
,
PopCount
(
io
.
fromDq
.
map
(
r
=>
r
.
valid
&&
!
r
.
ready
)))
}
src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala
浏览文件 @
49681eda
...
...
@@ -23,7 +23,7 @@ class DispatchQueueIO(enqnum: Int, deqnum: Int) extends XSBundle {
}
// dispatch queue: accepts at most enqnum uops from dispatch1 and dispatches deqnum uops at every clock cycle
class
DispatchQueue
(
size
:
Int
,
enqnum
:
Int
,
deqnum
:
Int
)
extends
XSModule
with
HasCircularQueuePtrHelper
{
class
DispatchQueue
(
size
:
Int
,
enqnum
:
Int
,
deqnum
:
Int
,
name
:
String
)
extends
XSModule
with
HasCircularQueuePtrHelper
{
val
io
=
IO
(
new
DispatchQueueIO
(
enqnum
,
deqnum
))
val
s_invalid
::
s_valid
::
Nil
=
Enum
(
2
)
...
...
@@ -203,5 +203,5 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int) extends XSModule with H
XSDebug
(
false
,
true
.
B
,
"\n"
)
// XSError(isAfter(headPtr(0), tailPtr(0)), p"assert greaterOrEqualThan(tailPtr: ${tailPtr(0)}, headPtr: ${headPtr(0)}) failed\n")
XSPerf
(
"utilization"
,
PopCount
(
stateEntries
.
map
(
_
=/=
s_invalid
)))
XSPerf
(
"
dq_"
+
name
+
"_
utilization"
,
PopCount
(
stateEntries
.
map
(
_
=/=
s_invalid
)))
}
src/main/scala/xiangshan/backend/exu/AluExeUnit.scala
浏览文件 @
49681eda
...
...
@@ -13,11 +13,11 @@ class AluExeUnit extends Exu(aluExeUnitCfg)
case
a
:
Alu
=>
a
}.
get
io
.
toIn
t
.
bits
.
redirectValid
:=
alu
.
redirectOutValid
io
.
toIn
t
.
bits
.
redirect
:=
alu
.
redirectOut
io
.
ou
t
.
bits
.
redirectValid
:=
alu
.
redirectOutValid
io
.
ou
t
.
bits
.
redirect
:=
alu
.
redirectOut
XSDebug
(
io
.
fromInt
.
valid
||
io
.
redirect
.
valid
,
p
"fromInt(${io.fromInt.valid} ${io.fromInt.ready}) toInt(${io.
toInt.valid} ${io.toIn
t.ready})"
+
p
"fromInt(${io.fromInt.valid} ${io.fromInt.ready}) toInt(${io.
out.valid} ${io.ou
t.ready})"
+
p
"Redirect:(${io.redirect.valid}) roqIdx:${io.redirect.bits.roqIdx}\n"
,
)
XSDebug
(
io
.
fromInt
.
valid
,
...
...
@@ -25,7 +25,7 @@ class AluExeUnit extends Exu(aluExeUnitCfg)
p
"src3:${Hexadecimal(io.fromInt.bits.src3)} func:${Binary(io.fromInt.bits.uop.ctrl.fuOpType)} "
+
p
"pc:${Hexadecimal(io.fromInt.bits.uop.cf.pc)} roqIdx:${io.fromInt.bits.uop.roqIdx}\n"
)
XSDebug
(
io
.
toIn
t
.
valid
,
p
"res:${Hexadecimal(io.
toIn
t.bits.data)}\n"
XSDebug
(
io
.
ou
t
.
valid
,
p
"res:${Hexadecimal(io.
ou
t.bits.data)}\n"
)
}
\ No newline at end of file
src/main/scala/xiangshan/backend/exu/Exu.scala
浏览文件 @
49681eda
...
...
@@ -83,8 +83,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
val
fromFp
=
if
(
config
.
readFpRf
)
Flipped
(
DecoupledIO
(
new
ExuInput
))
else
null
val
redirect
=
Flipped
(
ValidIO
(
new
Redirect
))
val
flush
=
Input
(
Bool
())
val
toInt
=
if
(
config
.
writeIntRf
)
DecoupledIO
(
new
ExuOutput
)
else
null
val
toFp
=
if
(
config
.
writeFpRf
)
DecoupledIO
(
new
ExuOutput
)
else
null
val
out
=
DecoupledIO
(
new
ExuOutput
)
})
for
((
fuCfg
,
(
fu
,
sel
))
<-
config
.
fuConfigs
.
zip
(
supportedFunctionUnits
.
zip
(
fuSel
)))
{
...
...
@@ -147,15 +146,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
}
}
val
intArb
=
if
(
config
.
writeIntRf
)
writebackArb
(
supportedFunctionUnits
.
zip
(
config
.
fuConfigs
).
filter
(
x
=>
!
x
.
_2
.
writeFpRf
).
map
(
_
.
_1
.
io
.
out
),
io
.
toInt
)
else
null
val
fpArb
=
if
(
config
.
writeFpRf
)
writebackArb
(
supportedFunctionUnits
.
zip
(
config
.
fuConfigs
).
filter
(
x
=>
x
.
_2
.
writeFpRf
).
map
(
_
.
_1
.
io
.
out
),
io
.
toFp
)
else
null
val
arb
=
writebackArb
(
supportedFunctionUnits
.
map
(
_
.
io
.
out
),
io
.
out
)
val
readIntFu
=
config
.
fuConfigs
.
zip
(
supportedFunctionUnits
.
zip
(
fuSel
))
...
...
@@ -179,7 +170,6 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
}
}
if
(
config
.
readIntRf
)
{
io
.
fromInt
.
ready
:=
inReady
(
readIntFu
)
}
...
...
@@ -198,12 +188,7 @@ abstract class Exu(val config: ExuConfig) extends XSModule {
out
.
redirectValid
:=
false
.
B
}
if
(
config
.
writeFpRf
)
{
assignDontCares
(
io
.
toFp
.
bits
)
}
if
(
config
.
writeIntRf
)
{
assignDontCares
(
io
.
toInt
.
bits
)
}
assignDontCares
(
io
.
out
.
bits
)
}
object
Exu
{
...
...
@@ -233,6 +218,4 @@ object Exu {
Seq
.
fill
(
exuParameters
.
FmiscCnt
)(
fmiscExeUnitCfg
)
val
exuConfigs
:
Seq
[
ExuConfig
]
=
intExuConfigs
++
fpExuConfigs
}
\ No newline at end of file
src/main/scala/xiangshan/backend/exu/FmacExeUnit.scala
浏览文件 @
49681eda
...
...
@@ -20,8 +20,8 @@ class FmacExeUnit extends Exu(fmacExeUnitCfg)
fma
.
io
.
redirectIn
:=
io
.
redirect
fma
.
io
.
flushIn
:=
io
.
flush
fma
.
io
.
out
.
ready
:=
io
.
toFp
.
ready
fma
.
io
.
out
.
ready
:=
io
.
out
.
ready
io
.
toFp
.
bits
.
data
:=
box
(
fma
.
io
.
out
.
bits
.
data
,
fma
.
io
.
out
.
bits
.
uop
.
ctrl
.
fpu
.
typeTagOut
)
io
.
toFp
.
bits
.
fflags
:=
fma
.
fflags
io
.
out
.
bits
.
data
:=
box
(
fma
.
io
.
out
.
bits
.
data
,
fma
.
io
.
out
.
bits
.
uop
.
ctrl
.
fpu
.
typeTagOut
)
io
.
out
.
bits
.
fflags
:=
fma
.
fflags
}
src/main/scala/xiangshan/backend/exu/FmiscExeUnit.scala
浏览文件 @
49681eda
...
...
@@ -10,12 +10,7 @@ class FmiscExeUnit extends Exu(fmiscExeUnitCfg) {
val
frm
=
IO
(
Input
(
UInt
(
3.
W
)))
val
f2i
::
f2f
::
fdivSqrt
::
Nil
=
supportedFunctionUnits
.
map
(
fu
=>
fu
.
asInstanceOf
[
FPUSubModule
])
val
toFpUnits
=
Seq
(
f2f
,
fdivSqrt
)
val
toIntUnits
=
Seq
(
f2i
)
assert
(
toFpUnits
.
size
==
1
||
fpArb
.
io
.
in
.
length
==
toFpUnits
.
size
)
assert
(
toIntUnits
.
size
==
1
||
intArb
.
io
.
in
.
length
==
toIntUnits
.
size
)
val
fus
=
supportedFunctionUnits
.
map
(
fu
=>
fu
.
asInstanceOf
[
FPUSubModule
])
val
input
=
io
.
fromFp
val
isRVF
=
input
.
bits
.
uop
.
ctrl
.
isRVF
...
...
@@ -28,15 +23,10 @@ class FmiscExeUnit extends Exu(fmiscExeUnitCfg) {
module
.
asInstanceOf
[
FPUSubModule
].
rm
:=
Mux
(
instr_rm
=/=
7.
U
,
instr_rm
,
frm
)
}
io
.
toFp
.
bits
.
fflags
:=
MuxCase
(
0.
U
,
toFpUnits
.
map
(
x
=>
x
.
io
.
out
.
fire
()
->
x
.
fflags
)
)
val
fpOutCtrl
=
io
.
toFp
.
bits
.
uop
.
ctrl
.
fpu
io
.
toFp
.
bits
.
data
:=
box
(
fpArb
.
io
.
out
.
bits
.
data
,
fpOutCtrl
.
typeTagOut
)
io
.
toInt
.
bits
.
fflags
:=
MuxCase
(
io
.
out
.
bits
.
fflags
:=
MuxCase
(
0.
U
,
toIntUnit
s
.
map
(
x
=>
x
.
io
.
out
.
fire
()
->
x
.
fflags
)
fu
s
.
map
(
x
=>
x
.
io
.
out
.
fire
()
->
x
.
fflags
)
)
val
fpOutCtrl
=
io
.
out
.
bits
.
uop
.
ctrl
.
fpu
io
.
out
.
bits
.
data
:=
box
(
arb
.
io
.
out
.
bits
.
data
,
fpOutCtrl
.
typeTagOut
)
}
src/main/scala/xiangshan/backend/exu/JumpExeUnit.scala
浏览文件 @
49681eda
...
...
@@ -73,6 +73,6 @@ class JumpExeUnit extends Exu(jumpExeUnitCfg)
val
isDouble
=
!
uop
.
ctrl
.
isRVF
io
.
toIn
t
.
bits
.
redirectValid
:=
jmp
.
redirectOutValid
io
.
toIn
t
.
bits
.
redirect
:=
jmp
.
redirectOut
io
.
ou
t
.
bits
.
redirectValid
:=
jmp
.
redirectOutValid
io
.
ou
t
.
bits
.
redirect
:=
jmp
.
redirectOut
}
src/main/scala/xiangshan/backend/exu/MulDivExeUnit.scala
浏览文件 @
49681eda
...
...
@@ -67,13 +67,13 @@ class MulDivExeUnit extends Exu(mulDivExeUnitCfg) {
XSDebug
(
io
.
fromInt
.
valid
,
"In(%d %d) Out(%d %d) Redirect:(%d %d)\n"
,
io
.
fromInt
.
valid
,
io
.
fromInt
.
ready
,
io
.
toInt
.
valid
,
io
.
toIn
t
.
ready
,
io
.
out
.
valid
,
io
.
ou
t
.
ready
,
io
.
redirect
.
valid
,
io
.
redirect
.
bits
.
level
)
XSDebug
(
io
.
fromInt
.
valid
,
"src1:%x src2:%x pc:%x\n"
,
src1
,
src2
,
io
.
fromInt
.
bits
.
uop
.
cf
.
pc
)
XSDebug
(
io
.
toIn
t
.
valid
,
"Out(%d %d) res:%x pc:%x\n"
,
io
.
toInt
.
valid
,
io
.
toInt
.
ready
,
io
.
toInt
.
bits
.
data
,
io
.
toIn
t
.
bits
.
uop
.
cf
.
pc
XSDebug
(
io
.
ou
t
.
valid
,
"Out(%d %d) res:%x pc:%x\n"
,
io
.
out
.
valid
,
io
.
out
.
ready
,
io
.
out
.
bits
.
data
,
io
.
ou
t
.
bits
.
uop
.
cf
.
pc
)
}
src/main/scala/xiangshan/backend/exu/Wb.scala
浏览文件 @
49681eda
...
...
@@ -3,8 +3,45 @@ package xiangshan.backend.exu
import
chisel3._
import
chisel3.util._
import
xiangshan._
import
utils._
class
ExuWbArbiter
(
n
:
Int
)
extends
XSModule
{
val
io
=
IO
(
new
Bundle
()
{
val
in
=
Vec
(
n
,
Flipped
(
DecoupledIO
(
new
ExuOutput
)))
val
out
=
DecoupledIO
(
new
ExuOutput
)
})
class
ExuCtrl
extends
Bundle
{
val
uop
=
new
MicroOp
val
fflags
=
UInt
(
5.
W
)
val
redirectValid
=
Bool
()
val
redirect
=
new
Redirect
val
debug
=
new
DebugBundle
}
val
ctrl_arb
=
Module
(
new
Arbiter
(
new
ExuCtrl
,
n
))
val
data_arb
=
Module
(
new
Arbiter
(
UInt
((
XLEN
+
1
).
W
),
n
))
ctrl_arb
.
io
.
out
.
ready
:=
io
.
out
.
ready
data_arb
.
io
.
out
.
ready
:=
io
.
out
.
ready
for
(((
in
,
ctrl
),
data
)
<-
io
.
in
.
zip
(
ctrl_arb
.
io
.
in
).
zip
(
data_arb
.
io
.
in
)){
ctrl
.
valid
:=
in
.
valid
for
((
name
,
d
)
<-
ctrl
.
bits
.
elements
)
{
d
:=
in
.
bits
.
elements
(
name
)
}
data
.
valid
:=
in
.
valid
data
.
bits
:=
in
.
bits
.
data
in
.
ready
:=
ctrl
.
ready
assert
(
ctrl
.
ready
===
data
.
ready
)
}
assert
(
ctrl_arb
.
io
.
chosen
===
data_arb
.
io
.
chosen
)
io
.
out
.
bits
.
data
:=
data_arb
.
io
.
out
.
bits
for
((
name
,
d
)
<-
ctrl_arb
.
io
.
out
.
bits
.
elements
){
io
.
out
.
bits
.
elements
(
name
)
:=
d
}
io
.
out
.
valid
:=
ctrl_arb
.
io
.
out
.
valid
assert
(
ctrl_arb
.
io
.
out
.
valid
===
data_arb
.
io
.
out
.
valid
)
}
class
Wb
(
cfgs
:
Seq
[
ExuConfig
],
numOut
:
Int
,
isFp
:
Boolean
)
extends
XSModule
{
...
...
@@ -15,14 +52,6 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
val
out
=
Vec
(
numOut
,
ValidIO
(
new
ExuOutput
))
})
// def exuOutToRfReq(exuOut: DecoupledIO[ExuOutput]): DecoupledIO[ExuOutput] = {
// val req = WireInit(exuOut)
// req.valid := exuOut.valid && wen(exuOut.bits)
// exuOut.ready := Mux(req.valid, req.ready, true.B)
// req
// }
val
directConnect
=
io
.
in
.
zip
(
priorities
).
filter
(
x
=>
x
.
_2
==
0
).
map
(
_
.
_1
)
val
mulReq
=
io
.
in
.
zip
(
priorities
).
filter
(
x
=>
x
.
_2
==
1
).
map
(
_
.
_1
)
val
otherReq
=
io
.
in
.
zip
(
priorities
).
filter
(
x
=>
x
.
_2
>
1
).
map
(
_
.
_1
)
...
...
@@ -32,9 +61,11 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
io
.
out
.
take
(
directConnect
.
size
).
zip
(
directConnect
).
foreach
{
case
(
o
,
i
)
=>
o
.
bits
:=
i
.
bits
o
.
valid
:=
i
.
valid
i
.
ready
:=
true
.
B
val
arb
=
Module
(
new
ExuWbArbiter
(
1
))
arb
.
io
.
in
.
head
<>
i
o
.
bits
:=
arb
.
io
.
out
.
bits
o
.
valid
:=
arb
.
io
.
out
.
valid
arb
.
io
.
out
.
ready
:=
true
.
B
}
def
splitN
[
T
](
in
:
Seq
[
T
],
n
:
Int
)
:
Seq
[
Option
[
Seq
[
T
]]]
=
{
...
...
@@ -59,17 +90,11 @@ class Wb(cfgs: Seq[ExuConfig], numOut: Int, isFp: Boolean) extends XSModule {
for
(
i
<-
mulReq
.
indices
)
{
val
out
=
io
.
out
(
directConnect
.
size
+
i
)
val
other
=
arbReq
(
i
).
getOrElse
(
Seq
())
if
(
other
.
isEmpty
){
out
.
valid
:=
mulReq
(
i
).
valid
out
.
bits
:=
mulReq
(
i
).
bits
mulReq
(
i
).
ready
:=
true
.
B
}
else
{
val
arb
=
Module
(
new
Arbiter
(
new
ExuOutput
,
1
+
other
.
size
))
arb
.
io
.
in
<>
mulReq
(
i
)
+:
other
out
.
valid
:=
arb
.
io
.
out
.
valid
out
.
bits
:=
arb
.
io
.
out
.
bits
arb
.
io
.
out
.
ready
:=
true
.
B
}
val
arb
=
Module
(
new
ExuWbArbiter
(
1
+
other
.
size
))
arb
.
io
.
in
<>
mulReq
(
i
)
+:
other
out
.
valid
:=
arb
.
io
.
out
.
valid
out
.
bits
:=
arb
.
io
.
out
.
bits
arb
.
io
.
out
.
ready
:=
true
.
B
}
if
(
portUsed
<
numOut
){
...
...
src/main/scala/xiangshan/backend/ftq/Ftq.scala
浏览文件 @
49681eda
...
...
@@ -221,28 +221,28 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
}
})
// from 4r sram
commitEntry
.
ftqPC
:=
ftq_4r_sram
.
io
.
rdata
(
0
).
ftqPC
commitEntry
.
lastPacketPC
:=
ftq_4r_sram
.
io
.
rdata
(
0
).
lastPacketPC
commitEntry
.
ftqPC
:=
RegNext
(
ftq_4r_sram
.
io
.
rdata
(
0
).
ftqPC
)
commitEntry
.
lastPacketPC
:=
RegNext
(
ftq_4r_sram
.
io
.
rdata
(
0
).
lastPacketPC
)
// from 2r sram
commitEntry
.
rasSp
:=
ftq_2r_sram
.
io
.
rdata
(
0
).
rasSp
commitEntry
.
rasTop
:=
ftq_2r_sram
.
io
.
rdata
(
0
).
rasEntry
commitEntry
.
hist
:=
ftq_2r_sram
.
io
.
rdata
(
0
).
hist
commitEntry
.
predHist
:=
ftq_2r_sram
.
io
.
rdata
(
0
).
predHist
commitEntry
.
specCnt
:=
ftq_2r_sram
.
io
.
rdata
(
0
).
specCnt
commitEntry
.
br_mask
:=
ftq_2r_sram
.
io
.
rdata
(
0
).
br_mask
commitEntry
.
rasSp
:=
RegNext
(
ftq_2r_sram
.
io
.
rdata
(
0
).
rasSp
)
commitEntry
.
rasTop
:=
RegNext
(
ftq_2r_sram
.
io
.
rdata
(
0
).
rasEntry
)
commitEntry
.
hist
:=
RegNext
(
ftq_2r_sram
.
io
.
rdata
(
0
).
hist
)
commitEntry
.
predHist
:=
RegNext
(
ftq_2r_sram
.
io
.
rdata
(
0
).
predHist
)
commitEntry
.
specCnt
:=
RegNext
(
ftq_2r_sram
.
io
.
rdata
(
0
).
specCnt
)
commitEntry
.
br_mask
:=
RegNext
(
ftq_2r_sram
.
io
.
rdata
(
0
).
br_mask
)
// from 1r sram
commitEntry
.
metas
:=
ftq_1r_sram
.
io
.
rdata
(
0
).
metas
commitEntry
.
rvc_mask
:=
ftq_1r_sram
.
io
.
rdata
(
0
).
rvc_mask
commitEntry
.
metas
:=
RegNext
(
ftq_1r_sram
.
io
.
rdata
(
0
).
metas
)
commitEntry
.
rvc_mask
:=
RegNext
(
ftq_1r_sram
.
io
.
rdata
(
0
).
rvc_mask
)
// from regs
commitEntry
.
valids
:=
RegNext
(
commit_valids
)
commitEntry
.
mispred
:=
RegNext
(
mispredict_vec
(
headPtr
.
value
))
commitEntry
.
cfiIndex
:=
RegNext
(
cfiIndex_vec
(
headPtr
.
value
))
commitEntry
.
cfiIsCall
:=
RegNext
(
cfiIsCall
(
headPtr
.
value
))
commitEntry
.
cfiIsRet
:=
RegNext
(
cfiIsRet
(
headPtr
.
value
))
commitEntry
.
cfiIsRVC
:=
RegNext
(
cfiIsRVC
(
headPtr
.
value
))
commitEntry
.
target
:=
RegNext
(
update_target
(
headPtr
.
value
))
io
.
commit_ftqEntry
.
valid
:=
RegNext
(
Cat
(
commit_valids
).
orR
(
))
//TODO: do we need this?
commitEntry
.
valids
:=
RegNext
(
RegNext
(
commit_valids
)
)
commitEntry
.
mispred
:=
RegNext
(
RegNext
(
mispredict_vec
(
headPtr
.
value
)
))
commitEntry
.
cfiIndex
:=
RegNext
(
RegNext
(
cfiIndex_vec
(
headPtr
.
value
)
))
commitEntry
.
cfiIsCall
:=
RegNext
(
RegNext
(
cfiIsCall
(
headPtr
.
value
)
))
commitEntry
.
cfiIsRet
:=
RegNext
(
RegNext
(
cfiIsRet
(
headPtr
.
value
)
))
commitEntry
.
cfiIsRVC
:=
RegNext
(
RegNext
(
cfiIsRVC
(
headPtr
.
value
)
))
commitEntry
.
target
:=
RegNext
(
RegNext
(
update_target
(
headPtr
.
value
)
))
io
.
commit_ftqEntry
.
valid
:=
RegNext
(
RegNext
(
Cat
(
commit_valids
).
orR
()
))
//TODO: do we need this?
io
.
commit_ftqEntry
.
bits
:=
commitEntry
// read logic
...
...
@@ -286,10 +286,10 @@ class Ftq extends XSModule with HasCircularQueuePtrHelper {
}
}
XSPerf
(
"ftq
E
ntries"
,
validEntries
)
XSPerf
(
"ftq
StallAcc
"
,
io
.
enq
.
valid
&&
!
io
.
enq
.
ready
,
acc
=
true
)
XSPerf
(
"
mispredictRedirectAcc
"
,
io
.
redirect
.
valid
&&
RedirectLevel
.
flushAfter
===
io
.
redirect
.
bits
.
level
,
acc
=
true
)
XSPerf
(
"
replayRedirectAcc
"
,
io
.
redirect
.
valid
&&
RedirectLevel
.
flushItself
(
io
.
redirect
.
bits
.
level
),
acc
=
true
)
XSPerf
(
"ftq
_e
ntries"
,
validEntries
)
XSPerf
(
"ftq
_stall
"
,
io
.
enq
.
valid
&&
!
io
.
enq
.
ready
,
acc
=
true
)
XSPerf
(
"
ftq_mispredictRedirect
"
,
io
.
redirect
.
valid
&&
RedirectLevel
.
flushAfter
===
io
.
redirect
.
bits
.
level
,
acc
=
true
)
XSPerf
(
"
ftq_replayRedirect
"
,
io
.
redirect
.
valid
&&
RedirectLevel
.
flushItself
(
io
.
redirect
.
bits
.
level
),
acc
=
true
)
XSDebug
(
io
.
commit_ftqEntry
.
valid
,
p
"ftq commit: ${io.commit_ftqEntry.bits}"
)
XSDebug
(
io
.
enq
.
fire
(),
p
"ftq enq: ${io.enq.bits}"
)
...
...
src/main/scala/xiangshan/backend/fu/Alu.scala
浏览文件 @
49681eda
...
...
@@ -35,21 +35,12 @@ class RightShiftModule extends XSModule {
val
io
=
IO
(
new
Bundle
()
{
val
shamt
=
Input
(
UInt
(
6.
W
))
val
srlSrc
,
sraSrc
=
Input
(
UInt
(
XLEN
.
W
))
val
srl
,
sra
=
Output
(
UInt
(
XLEN
.
W
))
val
srl
_l
,
srl_w
,
sra_l
,
sra_w
=
Output
(
UInt
(
XLEN
.
W
))
})
io
.
srl
:=
io
.
srlSrc
>>
io
.
shamt
io
.
sra
:=
(
io
.
sraSrc
.
asSInt
()
>>
io
.
shamt
).
asUInt
()
}
class
ShiftModule
extends
XSModule
{
val
io
=
IO
(
new
Bundle
()
{
val
shamt
=
Input
(
UInt
(
6.
W
))
val
shsrc1
=
Input
(
UInt
(
XLEN
.
W
))
val
sll
,
srl
,
sra
=
Output
(
UInt
(
XLEN
.
W
))
})
io
.
sll
:=
(
io
.
shsrc1
<<
io
.
shamt
)(
XLEN
-
1
,
0
)
io
.
srl
:=
io
.
shsrc1
>>
io
.
shamt
io
.
sra
:=
(
io
.
shsrc1
.
asSInt
>>
io
.
shamt
).
asUInt
io
.
srl_l
:=
io
.
srlSrc
>>
io
.
shamt
io
.
srl_w
:=
io
.
srlSrc
(
31
,
0
)
>>
io
.
shamt
io
.
sra_l
:=
(
io
.
sraSrc
.
asSInt
()
>>
io
.
shamt
).
asUInt
()
io
.
sra_w
:=
(
Cat
(
Fill
(
32
,
io
.
sraSrc
(
31
)),
io
.
sraSrc
(
31
,
0
)).
asSInt
()
>>
io
.
shamt
).
asUInt
()
}
class
MiscResultSelect
extends
XSModule
{
...
...
@@ -87,17 +78,15 @@ class AluResSel extends XSModule {
io
.
aluRes
:=
Cat
(
h32
,
res
(
31
,
0
))
}
class
Alu
extends
FunctionUnit
with
HasRedirectOut
{
val
(
src1
,
src2
,
func
,
pc
,
uop
)
=
(
io
.
in
.
bits
.
src
(
0
),
io
.
in
.
bits
.
src
(
1
),
io
.
in
.
bits
.
uop
.
ctrl
.
fuOpType
,
SignExt
(
io
.
in
.
bits
.
uop
.
cf
.
pc
,
AddrBits
),
io
.
in
.
bits
.
uop
)
val
valid
=
io
.
in
.
valid
class
AluDataModule
extends
XSModule
{
val
io
=
IO
(
new
Bundle
()
{
val
src1
,
src2
=
Input
(
UInt
(
XLEN
.
W
))
val
func
=
Input
(
FuOpType
())
val
pred_taken
,
isBranch
=
Input
(
Bool
())
val
result
=
Output
(
UInt
(
XLEN
.
W
))
val
taken
,
mispredict
=
Output
(
Bool
())
})
val
(
src1
,
src2
,
func
)
=
(
io
.
src1
,
io
.
src2
,
io
.
func
)
val
isAdderSub
=
(
func
=/=
ALUOpType
.
add
)
&&
(
func
=/=
ALUOpType
.
addw
)
val
addModule
=
Module
(
new
AddModule
)
...
...
@@ -121,18 +110,12 @@ class Alu extends FunctionUnit with HasRedirectOut {
val
rightShiftModule
=
Module
(
new
RightShiftModule
)
rightShiftModule
.
io
.
shamt
:=
shamt
rightShiftModule
.
io
.
srlSrc
:=
Cat
(
Mux
(
isW
,
0.
U
(
32.
W
),
src1
(
63
,
32
)),
src1
(
31
,
0
)
)
rightShiftModule
.
io
.
sraSrc
:=
Cat
(
Mux
(
isW
,
Fill
(
32
,
src1
(
31
)),
src1
(
63
,
32
)),
src1
(
31
,
0
)
)
rightShiftModule
.
io
.
srlSrc
:=
src1
rightShiftModule
.
io
.
sraSrc
:=
src1
val
sll
=
leftShiftModule
.
io
.
sll
val
srl
=
rightShiftModule
.
io
.
srl
val
sra
=
rightShiftModule
.
io
.
sra
val
srl
=
Mux
(
isW
,
rightShiftModule
.
io
.
srl_w
,
rightShiftModule
.
io
.
srl_l
)
val
sra
=
Mux
(
isW
,
rightShiftModule
.
io
.
sra_w
,
rightShiftModule
.
io
.
sra_l
)
val
miscResSel
=
Module
(
new
MiscResultSelect
)
miscResSel
.
io
.
func
:=
func
(
3
,
0
)
...
...
@@ -160,9 +143,32 @@ class Alu extends FunctionUnit with HasRedirectOut {
ALUOpType
.
getBranchType
(
ALUOpType
.
blt
)
->
slt
,
ALUOpType
.
getBranchType
(
ALUOpType
.
bltu
)
->
sltu
)
val
taken
=
LookupTree
(
ALUOpType
.
getBranchType
(
func
),
branchOpTable
)
^
ALUOpType
.
isBranchInvert
(
func
)
io
.
result
:=
aluRes
io
.
taken
:=
taken
io
.
mispredict
:=
(
io
.
pred_taken
^
taken
)
&&
io
.
isBranch
}
class
Alu
extends
FunctionUnit
with
HasRedirectOut
{
val
(
src1
,
src2
,
func
,
pc
,
uop
)
=
(
io
.
in
.
bits
.
src
(
0
),
io
.
in
.
bits
.
src
(
1
),
io
.
in
.
bits
.
uop
.
ctrl
.
fuOpType
,
SignExt
(
io
.
in
.
bits
.
uop
.
cf
.
pc
,
AddrBits
),
io
.
in
.
bits
.
uop
)
val
valid
=
io
.
in
.
valid
val
isBranch
=
ALUOpType
.
isBranch
(
func
)
val
taken
=
LookupTree
(
ALUOpType
.
getBranchType
(
func
),
branchOpTable
)
^
ALUOpType
.
isBranchInvert
(
func
)
val
dataModule
=
Module
(
new
AluDataModule
)
dataModule
.
io
.
src1
:=
src1
dataModule
.
io
.
src2
:=
src2
dataModule
.
io
.
func
:=
func
dataModule
.
io
.
pred_taken
:=
uop
.
cf
.
pred_taken
dataModule
.
io
.
isBranch
:=
isBranch
redirectOutValid
:=
io
.
out
.
valid
&&
isBranch
redirectOut
:=
DontCare
...
...
@@ -170,12 +176,12 @@ class Alu extends FunctionUnit with HasRedirectOut {
redirectOut
.
roqIdx
:=
uop
.
roqIdx
redirectOut
.
ftqIdx
:=
uop
.
cf
.
ftqPtr
redirectOut
.
ftqOffset
:=
uop
.
cf
.
ftqOffset
redirectOut
.
cfiUpdate
.
isMisPred
:=
(
uop
.
cf
.
pred_taken
^
taken
)
&&
isBranch
redirectOut
.
cfiUpdate
.
taken
:=
taken
redirectOut
.
cfiUpdate
.
isMisPred
:=
dataModule
.
io
.
mispredict
redirectOut
.
cfiUpdate
.
taken
:=
dataModule
.
io
.
taken
redirectOut
.
cfiUpdate
.
predTaken
:=
uop
.
cf
.
pred_taken
io
.
in
.
ready
:=
io
.
out
.
ready
io
.
out
.
valid
:=
valid
io
.
out
.
bits
.
uop
<>
io
.
in
.
bits
.
uop
io
.
out
.
bits
.
data
:=
aluRes
io
.
out
.
bits
.
data
:=
dataModule
.
io
.
result
}
src/main/scala/xiangshan/backend/fu/CSR.scala
浏览文件 @
49681eda
...
...
@@ -8,14 +8,6 @@ import xiangshan._
import
xiangshan.backend._
import
xiangshan.backend.fu.util._
object
hartId
extends
(()
=>
Int
)
{
var
x
=
0
def
apply
()
:
Int
=
{
x
=
x
+
1
x
-
1
}
}
trait
HasExceptionNO
{
def
instrAddrMisaligned
=
0
def
instrAccessFault
=
1
...
...
@@ -129,6 +121,7 @@ class CustomCSRCtrlIO extends XSBundle {
}
class
CSRFileIO
extends
XSBundle
{
val
hartId
=
Input
(
UInt
(
64.
W
))
// output (for func === CSROpType.jmp)
val
perf
=
new
PerfCounterIO
val
isPerfCnt
=
Output
(
Bool
())
...
...
@@ -265,8 +258,7 @@ class CSR extends FunctionUnit with HasCSRConst
val
mvendorid
=
RegInit
(
UInt
(
XLEN
.
W
),
0.
U
)
// this is a non-commercial implementation
val
marchid
=
RegInit
(
UInt
(
XLEN
.
W
),
0.
U
)
// return 0 to indicate the field is not implemented
val
mimpid
=
RegInit
(
UInt
(
XLEN
.
W
),
0.
U
)
// provides a unique encoding of the version of the processor implementation
val
mhartNo
=
hartId
()
val
mhartid
=
RegInit
(
UInt
(
XLEN
.
W
),
mhartNo
.
asUInt
)
// the hardware thread running the code
val
mhartid
=
RegInit
(
UInt
(
XLEN
.
W
),
csrio
.
hartId
)
// the hardware thread running the code
val
mstatus
=
RegInit
(
UInt
(
XLEN
.
W
),
0.
U
)
// mstatus Value Table
...
...
@@ -813,113 +805,9 @@ class CSR extends FunctionUnit with HasCSRConst
XSDebug
(
raiseExceptionIntr
&&
delegS
,
"sepc is writen!!! pc:%x\n"
,
cfIn
.
pc
)
/**
* Emu Performance counters
*/
val
emuPerfCntList
=
Map
(
// "Mcycle" -> (0x1000, "perfCntCondMcycle" ),
// "Minstret" -> (0x1002, "perfCntCondMinstret" ),
"BpInstr"
->
(
0x1003
,
"perfCntCondBpInstr"
),
"BpRight"
->
(
0x1004
,
"perfCntCondBpRight"
),
"BpWrong"
->
(
0x1005
,
"perfCntCondBpWrong"
),
"BpBRight"
->
(
0x1006
,
"perfCntCondBpBRight"
),
"BpBWrong"
->
(
0x1007
,
"perfCntCondBpBWrong"
),
"BpJRight"
->
(
0x1008
,
"perfCntCondBpJRight"
),
"BpJWrong"
->
(
0x1009
,
"perfCntCondBpJWrong"
),
"BpIRight"
->
(
0x100a
,
"perfCntCondBpIRight"
),
"BpIWrong"
->
(
0x100b
,
"perfCntCondBpIWrong"
),
"BpRRight"
->
(
0x100c
,
"perfCntCondBpRRight"
),
"BpRWrong"
->
(
0x100d
,
"perfCntCondBpRWrong"
),
"RoqWalk"
->
(
0x100f
,
"perfCntCondRoqWalk"
),
"DTlbReqCnt0"
->
(
0x1015
,
"perfCntDtlbReqCnt0"
),
"DTlbReqCnt1"
->
(
0x1016
,
"perfCntDtlbReqCnt1"
),
"DTlbReqCnt2"
->
(
0x1017
,
"perfCntDtlbReqCnt2"
),
"DTlbReqCnt3"
->
(
0x1018
,
"perfCntDtlbReqCnt3"
),
"DTlbMissCnt0"
->
(
0x1019
,
"perfCntDtlbMissCnt0"
),
"DTlbMissCnt1"
->
(
0x1020
,
"perfCntDtlbMissCnt1"
),
"DTlbMissCnt2"
->
(
0x1021
,
"perfCntDtlbMissCnt2"
),
"DTlbMissCnt3"
->
(
0x1022
,
"perfCntDtlbMissCnt3"
),
"ITlbReqCnt0"
->
(
0x1023
,
"perfCntItlbReqCnt0"
),
"ITlbMissCnt0"
->
(
0x1024
,
"perfCntItlbMissCnt0"
),
"PtwReqCnt"
->
(
0x1025
,
"perfCntPtwReqCnt"
),
"PtwCycleCnt"
->
(
0x1026
,
"perfCntPtwCycleCnt"
),
"PtwL2TlbHit"
->
(
0x1027
,
"perfCntPtwL2TlbHit"
),
"ICacheReq"
->
(
0x1028
,
"perfCntIcacheReqCnt"
),
"ICacheMiss"
->
(
0x1029
,
"perfCntIcacheMissCnt"
),
"ICacheMMIO"
->
(
0x102a
,
"perfCntIcacheMMIOCnt"
),
// "FetchFromLoopBuffer" -> (0x102b, "CntFetchFromLoopBuffer"),
// "ExitLoop1" -> (0x102c, "CntExitLoop1"),
// "ExitLoop2" -> (0x102d, "CntExitLoop2"),
// "ExitLoop3" -> (0x102e, "CntExitLoop3")
"ubtbRight"
->
(
0x1030
,
"perfCntubtbRight"
),
"ubtbWrong"
->
(
0x1031
,
"perfCntubtbWrong"
),
"btbRight"
->
(
0x1032
,
"perfCntbtbRight"
),
"btbWrong"
->
(
0x1033
,
"perfCntbtbWrong"
),
"tageRight"
->
(
0x1034
,
"perfCnttageRight"
),
"tageWrong"
->
(
0x1035
,
"perfCnttageWrong"
),
"rasRight"
->
(
0x1036
,
"perfCntrasRight"
),
"rasWrong"
->
(
0x1037
,
"perfCntrasWrong"
),
"loopRight"
->
(
0x1038
,
"perfCntloopRight"
),
"loopWrong"
->
(
0x1039
,
"perfCntloopWrong"
),
"s1Right"
->
(
0x103a
,
"perfCntS1Right"
),
"s1Wrong"
->
(
0x103b
,
"perfCntS1Wrong"
),
"s2Right"
->
(
0x103c
,
"perfCntS2Right"
),
"s2Wrong"
->
(
0x103d
,
"perfCntS2Wrong"
),
"s3Right"
->
(
0x103e
,
"perfCntS3Right"
),
"s3Wrong"
->
(
0x103f
,
"perfCntS3Wrong"
),
"loopExit"
->
(
0x1040
,
"perfCntLoopExit"
),
"takenButWrong"
->
(
0x1041
,
"perfCntTakenButWrong"
),
// "L2cacheHit" -> (0x1023, "perfCntCondL2cacheHit")
)
++
(
(
0
until
dcacheParameters
.
nMissEntries
).
map
(
i
=>
(
"DCacheMissQueuePenalty"
+
Integer
.
toString
(
i
,
10
),
(
0x1042
+
i
,
"perfCntDCacheMissQueuePenaltyEntry"
+
Integer
.
toString
(
i
,
10
)))
).
toMap
)
++
(
(
0
until
icacheParameters
.
nMissEntries
).
map
(
i
=>
(
"ICacheMissQueuePenalty"
+
Integer
.
toString
(
i
,
10
),
(
0x1042
+
dcacheParameters
.
nMissEntries
+
i
,
"perfCntICacheMissQueuePenaltyEntry"
+
Integer
.
toString
(
i
,
10
)))
).
toMap
)
++
(
(
0
until
l1plusPrefetcherParameters
.
nEntries
).
map
(
i
=>
(
"L1+PrefetchPenalty"
+
Integer
.
toString
(
i
,
10
),
(
0x1042
+
dcacheParameters
.
nMissEntries
+
icacheParameters
.
nMissEntries
+
i
,
"perfCntL1plusPrefetchPenaltyEntry"
+
Integer
.
toString
(
i
,
10
)))
).
toMap
)
++
(
(
0
until
l2PrefetcherParameters
.
nEntries
).
map
(
i
=>
(
"L2PrefetchPenalty"
+
Integer
.
toString
(
i
,
10
),
(
0x1042
+
dcacheParameters
.
nMissEntries
+
icacheParameters
.
nMissEntries
+
l1plusPrefetcherParameters
.
nEntries
+
i
,
"perfCntL2PrefetchPenaltyEntry"
+
Integer
.
toString
(
i
,
10
)))
).
toMap
)
emuPerfCntList
.
foreach
{
case
(
_
,
(
address
,
boringId
))
=>
if
(
hasEmuPerfCnt
)
{
ExcitingUtils
.
addSink
(
emuPerfCntCond
(
address
&
0x7f
),
boringId
,
ConnectionType
.
Perf
)
}
// if (!hasEmuPerfCnt) {
// // do not enable perfcnts except for Mcycle and Minstret
// if (address != emuPerfCntList("Mcycle")._1 && address != emuPerfCntList("Minstret")._1) {
// perfCntCond(address & 0x7f) := false.B
// }
// }
}
val
xstrap
=
WireInit
(
false
.
B
)
if
(!
env
.
FPGAPlatform
&&
EnableBPU
&&
!
env
.
DualCore
)
{
ExcitingUtils
.
addSink
(
xstrap
,
"XSTRAP"
,
ConnectionType
.
Debug
)
}
def
readWithScala
(
addr
:
Int
)
:
UInt
=
mapping
(
addr
).
_1
val
difftestIntrNO
=
Mux
(
raiseIntr
,
causeNO
,
0.
U
)
if
(!
env
.
FPGAPlatform
)
{
// display all perfcnt when nooptrap is executed
when
(
xstrap
)
{
printf
(
"======== PerfCnt =========\n"
)
emuPerfCntList
.
toSeq
.
sortBy
(
_
.
_2
.
_1
).
foreach
{
case
(
str
,
(
address
,
_
))
=>
printf
(
"%d <- "
+
str
+
"\n"
,
readWithScala
(
address
))
}
}
}
if
(!
env
.
FPGAPlatform
)
{
difftestIO
.
intrNO
:=
RegNext
(
difftestIntrNO
)
...
...
src/main/scala/xiangshan/backend/fu/Jump.scala
浏览文件 @
49681eda
...
...
@@ -14,6 +14,34 @@ trait HasRedirectOut { this: RawModule =>
val
redirectOut
=
IO
(
Output
(
new
Redirect
))
}
class
JumpDataModule
extends
XSModule
{
val
io
=
IO
(
new
Bundle
()
{
val
src1
=
Input
(
UInt
(
XLEN
.
W
))
val
pc
=
Input
(
UInt
(
XLEN
.
W
))
// sign-ext to XLEN
val
immMin
=
Input
(
UInt
(
ImmUnion
.
maxLen
.
W
))
val
func
=
Input
(
FuOpType
())
val
isRVC
=
Input
(
Bool
())
val
result
,
target
=
Output
(
UInt
(
XLEN
.
W
))
val
isAuipc
=
Output
(
Bool
())
})
val
(
src1
,
pc
,
immMin
,
func
,
isRVC
)
=
(
io
.
src1
,
io
.
pc
,
io
.
immMin
,
io
.
func
,
io
.
isRVC
)
val
isJalr
=
JumpOpType
.
jumpOpisJalr
(
func
)
val
isAuipc
=
JumpOpType
.
jumpOpisAuipc
(
func
)
val
offset
=
SignExt
(
ParallelMux
(
Seq
(
isJalr
->
ImmUnion
.
I
.
toImm32
(
immMin
),
isAuipc
->
ImmUnion
.
U
.
toImm32
(
immMin
),
!(
isJalr
||
isAuipc
)
->
ImmUnion
.
J
.
toImm32
(
immMin
)
)),
XLEN
)
val
snpc
=
Mux
(
isRVC
,
pc
+
2.
U
,
pc
+
4.
U
)
val
target
=
src1
+
offset
// NOTE: src1 is (pc/rf(rs1)), src2 is (offset)
io
.
target
:=
target
io
.
result
:=
Mux
(
JumpOpType
.
jumpOpisAuipc
(
func
),
target
,
snpc
)
io
.
isAuipc
:=
isAuipc
}
class
Jump
extends
FunctionUnit
with
HasRedirectOut
{
val
(
src1
,
jalr_target
,
pc
,
immMin
,
func
,
uop
)
=
(
...
...
@@ -25,41 +53,33 @@ class Jump extends FunctionUnit with HasRedirectOut {
io
.
in
.
bits
.
uop
)
val
isJalr
=
JumpOpType
.
jumpOpisJalr
(
func
)
val
isAuipc
=
JumpOpType
.
jumpOpisAuipc
(
func
)
val
offset
=
SignExt
(
ParallelMux
(
Seq
(
isJalr
->
ImmUnion
.
I
.
toImm32
(
immMin
),
isAuipc
->
ImmUnion
.
U
.
toImm32
(
immMin
),
!(
isJalr
||
isAuipc
)
->
ImmUnion
.
J
.
toImm32
(
immMin
)
)),
XLEN
)
val
redirectHit
=
uop
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
val
valid
=
io
.
in
.
valid
val
isRVC
=
uop
.
cf
.
pd
.
isRVC
val
snpc
=
Mux
(
isRVC
,
pc
+
2.
U
,
pc
+
4.
U
)
val
target
=
src1
+
offset
// NOTE: src1 is (pc/rf(rs1)), src2 is (offset)
redirectOutValid
:=
valid
&&
!
isAuipc
val
jumpDataModule
=
Module
(
new
JumpDataModule
)
jumpDataModule
.
io
.
src1
:=
src1
jumpDataModule
.
io
.
pc
:=
pc
jumpDataModule
.
io
.
immMin
:=
immMin
jumpDataModule
.
io
.
func
:=
func
jumpDataModule
.
io
.
isRVC
:=
isRVC
redirectOutValid
:=
valid
&&
!
jumpDataModule
.
io
.
isAuipc
redirectOut
:=
DontCare
redirectOut
.
cfiUpdate
.
target
:=
target
redirectOut
.
cfiUpdate
.
target
:=
jumpDataModule
.
io
.
target
redirectOut
.
level
:=
RedirectLevel
.
flushAfter
redirectOut
.
roqIdx
:=
uop
.
roqIdx
redirectOut
.
ftqIdx
:=
uop
.
cf
.
ftqPtr
redirectOut
.
ftqOffset
:=
uop
.
cf
.
ftqOffset
redirectOut
.
cfiUpdate
.
predTaken
:=
true
.
B
redirectOut
.
cfiUpdate
.
taken
:=
true
.
B
redirectOut
.
cfiUpdate
.
target
:=
target
redirectOut
.
cfiUpdate
.
isMisPred
:=
target
=/=
jalr_target
||
!
uop
.
cf
.
pred_taken
// Output
val
res
=
Mux
(
JumpOpType
.
jumpOpisAuipc
(
func
),
target
,
snpc
)
redirectOut
.
cfiUpdate
.
target
:=
jumpDataModule
.
io
.
target
redirectOut
.
cfiUpdate
.
isMisPred
:=
jumpDataModule
.
io
.
target
=/=
jalr_target
||
!
uop
.
cf
.
pred_taken
io
.
in
.
ready
:=
io
.
out
.
ready
io
.
out
.
valid
:=
valid
io
.
out
.
bits
.
uop
<>
io
.
in
.
bits
.
uop
io
.
out
.
bits
.
data
:=
res
io
.
out
.
bits
.
data
:=
jumpDataModule
.
io
.
result
// NOTE: the debug info is for one-cycle exec, if FMV needs multi-cycle, may needs change it
XSDebug
(
io
.
in
.
valid
,
"In(%d %d) Out(%d %d) Redirect:(%d %d %d)\n"
,
...
...
@@ -71,5 +91,4 @@ class Jump extends FunctionUnit with HasRedirectOut {
io
.
redirectIn
.
bits
.
level
,
redirectHit
)
XSDebug
(
io
.
in
.
valid
,
"src1:%x offset:%x func:%b type:JUMP pc:%x res:%x\n"
,
src1
,
offset
,
func
,
pc
,
res
)
}
src/main/scala/xiangshan/backend/fu/Multiplier.scala
浏览文件 @
49681eda
...
...
@@ -42,14 +42,15 @@ class NaiveMultiplier(len: Int, val latency: Int)
XSDebug
(
p
"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n"
)
}
class
ArrayMultiplier
(
len
:
Int
,
doReg
:
Seq
[
Int
])
extends
AbstractMultiplier
(
len
)
with
HasPipelineReg
{
override
def
latency
=
doReg
.
size
class
ArrayMulDataModule
(
len
:
Int
,
doReg
:
Seq
[
Int
])
extends
XSModule
{
val
io
=
IO
(
new
Bundle
()
{
val
a
,
b
=
Input
(
UInt
(
len
.
W
))
val
regEnables
=
Input
(
Vec
(
doReg
.
size
,
Bool
()))
val
result
=
Output
(
UInt
((
2
*
len
).
W
))
})
val
(
a
,
b
)
=
(
io
.
a
,
io
.
b
)
val
doRegSorted
=
doReg
.
sortWith
(
_
<
_
)
val
(
a
,
b
)
=
(
io
.
in
.
bits
.
src
(
0
),
io
.
in
.
bits
.
src
(
1
))
val
b_sext
,
bx2
,
neg_b
,
neg_bx2
=
Wire
(
UInt
((
len
+
1
).
W
))
b_sext
:=
SignExt
(
b
,
len
+
1
)
bx2
:=
b_sext
<<
1
...
...
@@ -149,7 +150,7 @@ class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len)
val
needReg
=
doRegSorted
.
contains
(
depth
)
val
toNextLayer
=
if
(
needReg
)
columns_next
.
map
(
_
.
map
(
PipelineReg
(
doRegSorted
.
indexOf
(
depth
)
+
1
)(
_
)))
columns_next
.
map
(
_
.
map
(
x
=>
RegEnable
(
x
,
io
.
regEnables
(
doRegSorted
.
indexOf
(
depth
))
)))
else
columns_next
...
...
@@ -158,7 +159,18 @@ class ArrayMultiplier(len: Int, doReg: Seq[Int]) extends AbstractMultiplier(len)
}
val
(
sum
,
carry
)
=
addAll
(
cols
=
columns
,
depth
=
0
)
val
result
=
sum
+
carry
io
.
result
:=
sum
+
carry
}
class
ArrayMultiplier
(
len
:
Int
,
doReg
:
Seq
[
Int
])
extends
AbstractMultiplier
(
len
)
with
HasPipelineReg
{
override
def
latency
=
doReg
.
size
val
mulDataModule
=
Module
(
new
ArrayMulDataModule
(
len
,
doReg
))
mulDataModule
.
io
.
a
:=
io
.
in
.
bits
.
src
(
0
)
mulDataModule
.
io
.
b
:=
io
.
in
.
bits
.
src
(
1
)
mulDataModule
.
io
.
regEnables
:=
VecInit
((
1
to
doReg
.
size
)
map
(
i
=>
regEnable
(
i
)))
val
result
=
mulDataModule
.
io
.
result
var
ctrlVec
=
Seq
(
ctrl
)
for
(
i
<-
1
to
latency
){
...
...
src/main/scala/xiangshan/backend/fu/SRT4Divider.scala
浏览文件 @
49681eda
...
...
@@ -4,120 +4,134 @@ import chisel3._
import
chisel3.stage.
{
ChiselGeneratorAnnotation
,
ChiselStage
}
import
chisel3.util._
import
utils.SignExt
import
xiangshan.XSModule
import
xiangshan.backend.fu.util.CSA3_2
/** A Radix-4 SRT Integer Divider
*
* 2 ~ (5 + (len+3)/2) cycles are needed for each division.
*/
class
SRT4Divider
(
len
:
Int
)
extends
AbstractDivider
(
len
)
{
class
SRT4DividerDataModule
(
len
:
Int
)
extends
Module
{
val
io
=
IO
(
new
Bundle
()
{
val
src1
,
src2
=
Input
(
UInt
(
len
.
W
))
val
valid
,
sign
,
kill_w
,
kill_r
,
isHi
,
isW
=
Input
(
Bool
())
val
in_ready
=
Output
(
Bool
())
val
out_valid
=
Output
(
Bool
())
val
out_data
=
Output
(
UInt
(
len
.
W
))
val
out_ready
=
Input
(
Bool
())
})
val
(
a
,
b
,
sign
,
valid
,
kill_w
,
kill_r
,
isHi
,
isW
)
=
(
io
.
src1
,
io
.
src2
,
io
.
sign
,
io
.
valid
,
io
.
kill_w
,
io
.
kill_r
,
io
.
isHi
,
io
.
isW
)
val
in_fire
=
valid
&&
io
.
in_ready
val
out_fire
=
io
.
out_ready
&&
io
.
out_valid
val
s_idle
::
s_lzd
::
s_normlize
::
s_recurrence
::
s_recovery_1
::
s_recovery_2
::
s_finish
::
Nil
=
Enum
(
7
)
val
state
=
RegInit
(
s_idle
)
val
newReq
=
(
state
===
s_idle
)
&&
io
.
in
.
fire
()
val
cnt_next
=
Wire
(
UInt
(
log2Up
((
len
+
3
)/
2
).
W
))
val
cnt
=
RegEnable
(
cnt_next
,
state
===
s_normlize
||
state
===
s_recurrence
)
val
cnt_next
=
Wire
(
UInt
(
log2Up
((
len
+
3
)
/
2
).
W
))
val
cnt
=
RegEnable
(
cnt_next
,
state
===
s_normlize
||
state
===
s_recurrence
)
val
rec_enough
=
cnt_next
===
0.
U
val
newReq
=
in_fire
def
abs
(
a
:
UInt
,
sign
:
Bool
)
:
(
Bool
,
UInt
)
=
{
val
s
=
a
(
len
-
1
)
&&
sign
(
s
,
Mux
(
s
,
-
a
,
a
))
}
val
(
a
,
b
)
=
(
io
.
in
.
bits
.
src
(
0
),
io
.
in
.
bits
.
src
(
1
))
val
uop
=
io
.
in
.
bits
.
uop
val
(
aSign
,
aVal
)
=
abs
(
a
,
sign
)
val
(
bSign
,
bVal
)
=
abs
(
b
,
sign
)
val
aSignReg
=
RegEnable
(
aSign
,
newReq
)
val
qSignReg
=
RegEnable
(
aSign
^
bSign
,
newReq
)
val
uopReg
=
RegEnable
(
uop
,
newReq
)
val
ctrlReg
=
RegEnable
(
ctrl
,
newReq
)
val
divZero
=
b
===
0.
U
val
divZeroReg
=
RegEnable
(
divZero
,
newReq
)
val
kill
=
state
=/=
s_idle
&&
uopReg
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
switch
(
state
){
is
(
s_idle
){
when
(
io
.
in
.
fire
()
&&
!
io
.
in
.
bits
.
uop
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
))
{
switch
(
state
)
{
is
(
s_idle
)
{
when
(
in_fire
&&
!
kill_w
)
{
state
:=
Mux
(
divZero
,
s_finish
,
s_lzd
)
}
}
is
(
s_lzd
){
// leading zero detection
is
(
s_lzd
)
{
// leading zero detection
state
:=
s_normlize
}
is
(
s_normlize
){
// shift a/b
is
(
s_normlize
)
{
// shift a/b
state
:=
s_recurrence
}
is
(
s_recurrence
){
// (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d
when
(
rec_enough
){
state
:=
s_recovery_1
}
is
(
s_recurrence
)
{
// (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d
when
(
rec_enough
)
{
state
:=
s_recovery_1
}
}
is
(
s_recovery_1
){
// if rem < 0, rem = rem + d
is
(
s_recovery_1
)
{
// if rem < 0, rem = rem + d
state
:=
s_recovery_2
}
is
(
s_recovery_2
){
// recovery shift
is
(
s_recovery_2
)
{
// recovery shift
state
:=
s_finish
}
is
(
s_finish
){
when
(
io
.
out
.
fire
()){
state
:=
s_idle
}
is
(
s_finish
)
{
when
(
out_fire
)
{
state
:=
s_idle
}
}
}
when
(
kill
)
{
when
(
kill
_r
)
{
state
:=
s_idle
}
/** Calculate abs(a)/abs(b) by recurrence
*
* ws, wc: partial remainder in carry-save form,
*
in recurrence steps, ws/wc = 4ws[j]/4wc[j];
*
in recovery step, ws/wc = ws[j]/wc[j];
*
in final step, ws = abs(a)/abs(b).
* in recurrence steps, ws/wc = 4ws[j]/4wc[j];
* in recovery step, ws/wc = ws[j]/wc[j];
* in final step, ws = abs(a)/abs(b).
*
* d: normlized divisor(1/2<=d<1)
*
* wLen = 3 integer bits + (len+1) frac bits
*/
def
wLen
=
3
+
len
+
1
val
ws
,
wc
=
Reg
(
UInt
(
wLen
.
W
))
val
ws_next
,
wc_next
=
Wire
(
UInt
(
wLen
.
W
))
val
d
=
Reg
(
UInt
(
wLen
.
W
))
val
aLeadingZeros
=
RegEnable
(
next
=
PriorityEncoder
(
ws
(
len
-
1
,
0
).
asBools
().
reverse
),
enable
=
state
===
s_lzd
next
=
PriorityEncoder
(
ws
(
len
-
1
,
0
).
asBools
().
reverse
),
enable
=
state
===
s_lzd
)
val
bLeadingZeros
=
RegEnable
(
next
=
PriorityEncoder
(
d
(
len
-
1
,
0
).
asBools
().
reverse
),
enable
=
state
===
s_lzd
next
=
PriorityEncoder
(
d
(
len
-
1
,
0
).
asBools
().
reverse
),
enable
=
state
===
s_lzd
)
val
diff
=
Cat
(
0.
U
(
1.
W
),
bLeadingZeros
).
asSInt
()
-
Cat
(
0.
U
(
1.
W
),
aLeadingZeros
).
asSInt
()
val
isNegDiff
=
diff
(
diff
.
getWidth
-
1
)
val
quotientBits
=
Mux
(
isNegDiff
,
0.
U
,
diff
.
asUInt
())
val
qBitsIsOdd
=
quotientBits
(
0
)
val
recoveryShift
=
RegEnable
(
len
.
U
-
bLeadingZeros
,
state
===
s_normlize
)
val
recoveryShift
=
RegEnable
(
len
.
U
-
bLeadingZeros
,
state
===
s_normlize
)
val
a_shifted
,
b_shifted
=
Wire
(
UInt
(
len
.
W
))
a_shifted
:=
Mux
(
isNegDiff
,
ws
(
len
-
1
,
0
)
<<
bLeadingZeros
,
ws
(
len
-
1
,
0
)
<<
aLeadingZeros
ws
(
len
-
1
,
0
)
<<
bLeadingZeros
,
ws
(
len
-
1
,
0
)
<<
aLeadingZeros
)
b_shifted
:=
d
(
len
-
1
,
0
)
<<
bLeadingZeros
b_shifted
:=
d
(
len
-
1
,
0
)
<<
bLeadingZeros
val
rem_temp
=
ws
+
wc
val
rem_fixed
=
Mux
(
rem_temp
(
wLen
-
1
),
rem_temp
+
d
,
rem_temp
)
val
rem_abs
=
(
ws
<<
recoveryShift
)
(
2
*
len
,
len
+
1
)
val
rem_fixed
=
Mux
(
rem_temp
(
wLen
-
1
),
rem_temp
+
d
,
rem_temp
)
val
rem_abs
=
(
ws
<<
recoveryShift
)
(
2
*
len
,
len
+
1
)
when
(
newReq
){
when
(
newReq
)
{
ws
:=
Cat
(
0.
U
(
4.
W
),
Mux
(
divZero
,
a
,
aVal
))
wc
:=
0.
U
d
:=
Cat
(
0.
U
(
4.
W
),
bVal
)
}.
elsewhen
(
state
===
s_normlize
){
}.
elsewhen
(
state
===
s_normlize
)
{
d
:=
Cat
(
0.
U
(
3.
W
),
b_shifted
,
0.
U
(
1.
W
))
ws
:=
Mux
(
qBitsIsOdd
,
a_shifted
,
a_shifted
<<
1
)
}.
elsewhen
(
state
===
s_recurrence
){
}.
elsewhen
(
state
===
s_recurrence
)
{
ws
:=
Mux
(
rec_enough
,
ws_next
,
ws_next
<<
2
)
wc
:=
Mux
(
rec_enough
,
wc_next
,
wc_next
<<
2
)
}.
elsewhen
(
state
===
s_recovery_1
){
}.
elsewhen
(
state
===
s_recovery_1
)
{
ws
:=
rem_fixed
}.
elsewhen
(
state
===
s_recovery_2
){
}.
elsewhen
(
state
===
s_recovery_2
)
{
ws
:=
rem_abs
}
...
...
@@ -140,8 +154,8 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_dx2
->
2.
U
(
2.
W
)
))
val
w_truncated
=
(
ws
(
wLen
-
1
,
wLen
-
1
-
6
)
+
wc
(
wLen
-
1
,
wLen
-
1
-
6
)).
asSInt
()
val
d_truncated
=
d
(
len
-
1
,
len
-
3
)
val
w_truncated
=
(
ws
(
wLen
-
1
,
wLen
-
1
-
6
)
+
wc
(
wLen
-
1
,
wLen
-
1
-
6
)).
asSInt
()
val
d_truncated
=
d
(
len
-
1
,
len
-
3
)
val
qSelTable
=
Array
(
Array
(
12
,
4
,
-
4
,
-
13
),
...
...
@@ -156,9 +170,9 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
// ge(x): w_truncated >= x
var
ge
=
Map
[
Int
,
Bool
]()
for
(
row
<-
qSelTable
)
{
for
(
k
<-
row
)
{
if
(!
ge
.
contains
(
k
))
ge
=
ge
+
(
k
->
(
w_truncated
>=
k
.
S
(
7.
W
)))
for
(
row
<-
qSelTable
)
{
for
(
k
<-
row
)
{
if
(!
ge
.
contains
(
k
))
ge
=
ge
+
(
k
->
(
w_truncated
>=
k
.
S
(
7.
W
)))
}
}
q_sel
:=
MuxLookup
(
d_truncated
,
sel_0
,
...
...
@@ -169,7 +183,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
ge
(
x
(
2
))
->
sel_0
,
ge
(
x
(
3
))
->
sel_neg_d
))
).
zipWithIndex
.
map
({
case
(
v
,
i
)
=>
i
.
U
->
v
})
).
zipWithIndex
.
map
({
case
(
v
,
i
)
=>
i
.
U
->
v
})
)
/** Calculate (ws[j+1],wc[j+1]) by a [3-2]carry-save adder
...
...
@@ -178,7 +192,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
*/
val
csa
=
Module
(
new
CSA3_2
(
wLen
))
csa
.
io
.
in
(
0
)
:=
ws
csa
.
io
.
in
(
1
)
:=
Cat
(
wc
(
wLen
-
1
,
2
),
wc_adj
)
csa
.
io
.
in
(
1
)
:=
Cat
(
wc
(
wLen
-
1
,
2
),
wc_adj
)
csa
.
io
.
in
(
2
)
:=
MuxLookup
(
q_sel
,
0.
U
,
Seq
(
sel_d
->
neg_d
,
sel_dx2
->
neg_dx2
,
...
...
@@ -190,10 +204,10 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
// On the fly quotient conversion
val
q
,
qm
=
Reg
(
UInt
(
len
.
W
))
when
(
newReq
){
when
(
newReq
)
{
q
:=
0.
U
qm
:=
0.
U
}.
elsewhen
(
state
===
s_recurrence
){
}.
elsewhen
(
state
===
s_recurrence
)
{
val
qMap
=
Seq
(
sel_0
->
(
q
,
0
),
sel_d
->
(
q
,
1
),
...
...
@@ -202,7 +216,7 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_neg_dx2
->
(
qm
,
2
)
)
q
:=
MuxLookup
(
q_sel
,
0.
U
,
qMap
.
map
(
m
=>
m
.
_1
->
Cat
(
m
.
_2
.
_1
(
len
-
3
,
0
),
m
.
_2
.
_2
.
U
(
2.
W
)))
qMap
.
map
(
m
=>
m
.
_1
->
Cat
(
m
.
_2
.
_1
(
len
-
3
,
0
),
m
.
_2
.
_2
.
U
(
2.
W
)))
)
val
qmMap
=
Seq
(
sel_0
->
(
qm
,
3
),
...
...
@@ -212,27 +226,53 @@ class SRT4Divider(len: Int) extends AbstractDivider(len) {
sel_neg_dx2
->
(
qm
,
1
)
)
qm
:=
MuxLookup
(
q_sel
,
0.
U
,
qmMap
.
map
(
m
=>
m
.
_1
->
Cat
(
m
.
_2
.
_1
(
len
-
3
,
0
),
m
.
_2
.
_2
.
U
(
2.
W
)))
qmMap
.
map
(
m
=>
m
.
_1
->
Cat
(
m
.
_2
.
_1
(
len
-
3
,
0
),
m
.
_2
.
_2
.
U
(
2.
W
)))
)
}.
elsewhen
(
state
===
s_recovery_1
){
q
:=
Mux
(
rem_temp
(
wLen
-
1
),
qm
,
q
)
}.
elsewhen
(
state
===
s_recovery_1
)
{
q
:=
Mux
(
rem_temp
(
wLen
-
1
),
qm
,
q
)
}
val
remainder
=
Mux
(
aSignReg
,
-
ws
(
len
-
1
,
0
),
ws
(
len
-
1
,
0
))
val
remainder
=
Mux
(
aSignReg
,
-
ws
(
len
-
1
,
0
),
ws
(
len
-
1
,
0
))
val
quotient
=
Mux
(
qSignReg
,
-
q
,
q
)
val
res
=
Mux
(
ctrlReg
.
isHi
,
Mux
(
divZeroReg
,
ws
(
len
-
1
,
0
),
remainder
),
val
res
=
Mux
(
isHi
,
Mux
(
divZeroReg
,
ws
(
len
-
1
,
0
),
remainder
),
Mux
(
divZeroReg
,
Fill
(
len
,
1.
U
(
1.
W
)),
quotient
)
)
io
.
in
.
ready
:=
state
===
s_idle
io
.
out
.
valid
:=
state
===
s_finish
io
.
out
.
bits
.
data
:=
Mux
(
ctrlReg
.
isW
,
io
.
out_data
:=
Mux
(
isW
,
SignExt
(
res
(
31
,
0
),
len
),
res
)
io
.
out
.
bits
.
uop
:=
uopReg
io
.
in_ready
:=
state
===
s_idle
io
.
out_valid
:=
state
===
s_finish
}
class
SRT4Divider
(
len
:
Int
)
extends
AbstractDivider
(
len
)
{
val
newReq
=
io
.
in
.
fire
()
val
uop
=
io
.
in
.
bits
.
uop
val
uopReg
=
RegEnable
(
uop
,
newReq
)
val
ctrlReg
=
RegEnable
(
ctrl
,
newReq
)
val
divDataModule
=
Module
(
new
SRT4DividerDataModule
(
len
))
val
kill_w
=
uop
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
val
kill_r
=
!
divDataModule
.
io
.
in_ready
&&
uopReg
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
divDataModule
.
io
.
src1
:=
io
.
in
.
bits
.
src
(
0
)
divDataModule
.
io
.
src2
:=
io
.
in
.
bits
.
src
(
1
)
divDataModule
.
io
.
valid
:=
io
.
in
.
valid
divDataModule
.
io
.
sign
:=
sign
divDataModule
.
io
.
kill_w
:=
kill_w
divDataModule
.
io
.
kill_r
:=
kill_r
divDataModule
.
io
.
isHi
:=
ctrlReg
.
isHi
divDataModule
.
io
.
isW
:=
ctrlReg
.
isW
divDataModule
.
io
.
out_ready
:=
io
.
out
.
ready
io
.
in
.
ready
:=
divDataModule
.
io
.
in_ready
io
.
out
.
valid
:=
divDataModule
.
io
.
out_valid
io
.
out
.
bits
.
data
:=
divDataModule
.
io
.
out_data
io
.
out
.
bits
.
uop
:=
uopReg
}
src/main/scala/xiangshan/backend/fu/fpu/FDivSqrt.scala
浏览文件 @
49681eda
...
...
@@ -5,7 +5,15 @@ import chisel3.util._
import
freechips.rocketchip.tile.FType
import
hardfloat.
{
DivSqrtRecFNToRaw_small
,
RoundAnyRawFNToRecFN
}
class
FDivSqrt
extends
FPUSubModule
{
class
FDivSqrtDataModule
extends
FPUDataModule
{
val
in_valid
,
out_ready
=
IO
(
Input
(
Bool
()))
val
in_ready
,
out_valid
=
IO
(
Output
(
Bool
()))
val
kill_w
=
IO
(
Input
(
Bool
()))
val
kill_r
=
IO
(
Input
(
Bool
()))
val
in_fire
=
in_valid
&&
in_ready
val
out_fire
=
out_valid
&&
out_ready
val
killReg
=
RegInit
(
false
.
B
)
val
s_idle
::
s_div
::
s_finish
::
Nil
=
Enum
(
3
)
val
state
=
RegInit
(
s_idle
)
...
...
@@ -13,48 +21,42 @@ class FDivSqrt extends FPUSubModule {
val
divSqrt
=
Module
(
new
DivSqrtRecFNToRaw_small
(
FType
.
D
.
exp
,
FType
.
D
.
sig
,
0
))
val
divSqrtRawValid
=
divSqrt
.
io
.
rawOutValid_sqrt
||
divSqrt
.
io
.
rawOutValid_div
val
fpCtrl
=
io
.
in
.
bits
.
uop
.
ctrl
.
fpu
val
fpCtrl
=
io
.
in
.
fpCtrl
val
tag
=
fpCtrl
.
typeTagIn
val
uopReg
=
RegEnable
(
io
.
in
.
bits
.
uop
,
io
.
in
.
fire
())
val
single
=
RegEnable
(
tag
===
S
,
io
.
in
.
fire
())
val
rmReg
=
RegEnable
(
rm
,
io
.
in
.
fire
())
val
kill
=
uopReg
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
val
killReg
=
RegInit
(
false
.
B
)
val
single
=
RegEnable
(
tag
===
S
,
in_fire
)
val
rmReg
=
RegEnable
(
rm
,
in_fire
)
switch
(
state
){
is
(
s_idle
){
when
(
i
o
.
in
.
fire
()
&&
!
io
.
in
.
bits
.
uop
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
){
state
:=
s_div
}
when
(
i
n_fire
&&
!
kill_w
){
state
:=
s_div
}
}
is
(
s_div
){
when
(
divSqrtRawValid
){
when
(
kill
||
killReg
){
when
(
kill
_r
||
killReg
){
state
:=
s_idle
killReg
:=
false
.
B
}.
otherwise
({
state
:=
s_finish
})
}.
elsewhen
(
kill
){
}.
elsewhen
(
kill
_r
){
killReg
:=
true
.
B
}
}
is
(
s_finish
){
when
(
io
.
out
.
fire
()
||
kill
){
when
(
out_fire
||
kill_r
){
state
:=
s_idle
}
}
}
val
src1
=
unbox
(
io
.
in
.
bits
.
src
(
0
),
tag
,
None
)
val
src2
=
unbox
(
io
.
in
.
bits
.
src
(
1
),
tag
,
None
)
divSqrt
.
io
.
inValid
:=
io
.
in
.
fire
()
&&
!
io
.
in
.
bits
.
uop
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
val
src1
=
unbox
(
io
.
in
.
src
(
0
),
tag
,
None
)
val
src2
=
unbox
(
io
.
in
.
src
(
1
),
tag
,
None
)
divSqrt
.
io
.
inValid
:=
in_fire
&&
!
kill_w
divSqrt
.
io
.
sqrtOp
:=
fpCtrl
.
sqrt
divSqrt
.
io
.
a
:=
src1
divSqrt
.
io
.
b
:=
src2
divSqrt
.
io
.
roundingMode
:=
rm
val
round32
=
Module
(
new
RoundAnyRawFNToRecFN
(
FType
.
D
.
exp
,
FType
.
D
.
sig
+
2
,
FType
.
S
.
exp
,
FType
.
S
.
sig
,
0
))
...
...
@@ -73,9 +75,25 @@ class FDivSqrt extends FPUSubModule {
val
data
=
Mux
(
single
,
round32
.
io
.
out
,
round64
.
io
.
out
)
val
flags
=
Mux
(
single
,
round32
.
io
.
exceptionFlags
,
round64
.
io
.
exceptionFlags
)
io
.
in
.
ready
:=
state
===
s_idle
io
.
out
.
valid
:=
state
===
s_finish
&&
!
killReg
io
.
out
.
bits
.
uop
:=
uopReg
io
.
out
.
bits
.
data
:=
RegNext
(
data
,
divSqrtRawValid
)
in_ready
:=
state
===
s_idle
out_valid
:=
state
===
s_finish
&&
!
killReg
io
.
out
.
data
:=
RegNext
(
data
,
divSqrtRawValid
)
fflags
:=
RegNext
(
flags
,
divSqrtRawValid
)
}
class
FDivSqrt
extends
FPUSubModule
{
val
uopReg
=
RegEnable
(
io
.
in
.
bits
.
uop
,
io
.
in
.
fire
())
val
kill_r
=
uopReg
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
override
val
dataModule
=
Module
(
new
FDivSqrtDataModule
)
connectDataModule
dataModule
.
in_valid
:=
io
.
in
.
valid
dataModule
.
out_ready
:=
io
.
out
.
ready
dataModule
.
kill_w
:=
io
.
in
.
bits
.
uop
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
dataModule
.
kill_r
:=
kill_r
io
.
in
.
ready
:=
dataModule
.
in_ready
io
.
out
.
valid
:=
dataModule
.
out_valid
io
.
out
.
bits
.
uop
:=
uopReg
}
src/main/scala/xiangshan/backend/fu/fpu/FMA.scala
浏览文件 @
49681eda
package
xiangshan.backend.fu.fpu
import
chisel3._
import
chisel3.util.RegEnable
import
freechips.rocketchip.tile.FType
import
hardfloat.
{
MulAddRecFN_pipeline_stage1
,
MulAddRecFN_pipeline_stage2
,
MulAddRecFN_pipeline_stage3
,
MulAddRecFN_pipeline_stage4
,
RoundAnyRawFNToRecFN
}
import
xiangshan.backend.fu.FunctionUnit
class
FMA
extends
FPUPipelineModule
{
override
def
latency
:
Int
=
FunctionUnit
.
fmacCfg
.
latency
.
latencyVal
.
get
class
FMADataModule
(
latency
:
Int
)
extends
FPUDataModule
{
val
fpCtrl
=
io
.
in
.
bits
.
uop
.
ctrl
.
fpu
val
regEnables
=
IO
(
Input
(
Vec
(
latency
,
Bool
())))
val
typeTagOut
=
IO
(
Input
(
UInt
(
2.
W
)))
val
fpCtrl
=
io
.
in
.
fpCtrl
val
typeTagIn
=
fpCtrl
.
typeTagIn
val
src1
=
unbox
(
io
.
in
.
bits
.
src
(
0
),
typeTagIn
,
None
)
val
src2
=
unbox
(
io
.
in
.
bits
.
src
(
1
),
typeTagIn
,
None
)
val
src3
=
unbox
(
io
.
in
.
bits
.
src
(
2
),
typeTagIn
,
None
)
val
src1
=
unbox
(
io
.
in
.
src
(
0
),
typeTagIn
,
None
)
val
src2
=
unbox
(
io
.
in
.
src
(
1
),
typeTagIn
,
None
)
val
src3
=
unbox
(
io
.
in
.
src
(
2
),
typeTagIn
,
None
)
val
(
in1
,
in2
,
in3
)
=
(
WireInit
(
src1
),
WireInit
(
src2
),
WireInit
(
Mux
(
fpCtrl
.
isAddSub
,
src2
,
src3
))
)
...
...
@@ -34,7 +37,7 @@ class FMA extends FPUPipelineModule {
))
mul
.
io
.
a
:=
stage1
.
io
.
mulAddA
mul
.
io
.
b
:=
stage1
.
io
.
mulAddB
mul
.
io
.
reg_en
:=
regEnable
(
1
)
mul
.
io
.
reg_en
:=
regEnable
s
(
0
)
stage2
.
io
.
mulSum
:=
mul
.
io
.
sum
stage2
.
io
.
mulCarry
:=
mul
.
io
.
carry
...
...
@@ -54,10 +57,10 @@ class FMA extends FPUPipelineModule {
stage1
.
io
.
in
.
bits
.
roundingMode
:=
rm
stage1
.
io
.
in
.
bits
.
detectTininess
:=
hardfloat
.
consts
.
tininess_afterRounding
stage2
.
io
.
fromStage1
.
bits
<>
S1Reg
(
stage1
.
io
.
toStage2
.
bits
)
stage3
.
io
.
fromStage2
.
bits
<>
S2Reg
(
stage2
.
io
.
toStage3
.
bits
)
stage4
.
io
.
fromStage3
.
bits
<>
S3Reg
(
stage3
.
io
.
toStage4
.
bits
)
val
stage4toStage5
=
S4Reg
(
stage4
.
io
.
toStage5
.
bits
)
stage2
.
io
.
fromStage1
.
bits
<>
RegEnable
(
stage1
.
io
.
toStage2
.
bits
,
regEnables
(
0
)
)
stage3
.
io
.
fromStage2
.
bits
<>
RegEnable
(
stage2
.
io
.
toStage3
.
bits
,
regEnables
(
1
)
)
stage4
.
io
.
fromStage3
.
bits
<>
RegEnable
(
stage3
.
io
.
toStage4
.
bits
,
regEnables
(
2
)
)
val
stage4toStage5
=
RegEnable
(
stage4
.
io
.
toStage5
.
bits
,
regEnables
(
3
)
)
val
rounders
=
Seq
(
FType
.
S
,
FType
.
D
).
map
(
t
=>
{
val
rounder
=
Module
(
new
RoundAnyRawFNToRecFN
(
FType
.
D
.
exp
,
FType
.
D
.
sig
+
2
,
t
.
exp
,
t
.
sig
,
0
))
...
...
@@ -69,8 +72,8 @@ class FMA extends FPUPipelineModule {
rounder
})
val
singleOut
=
io
.
out
.
bits
.
uop
.
ctrl
.
fpu
.
typeTagOut
===
S
io
.
out
.
bits
.
data
:=
Mux
(
singleOut
,
val
singleOut
=
typeTagOut
===
S
io
.
out
.
data
:=
Mux
(
singleOut
,
sanitizeNaN
(
rounders
(
0
).
io
.
out
,
FType
.
S
),
sanitizeNaN
(
rounders
(
1
).
io
.
out
,
FType
.
D
)
)
...
...
@@ -79,3 +82,12 @@ class FMA extends FPUPipelineModule {
rounders
(
1
).
io
.
exceptionFlags
)
}
class
FMA
extends
FPUPipelineModule
{
override
def
latency
:
Int
=
FunctionUnit
.
fmacCfg
.
latency
.
latencyVal
.
get
override
val
dataModule
=
Module
(
new
FMADataModule
(
latency
))
connectDataModule
dataModule
.
regEnables
<>
VecInit
((
1
to
latency
)
map
(
i
=>
regEnable
(
i
)))
dataModule
.
typeTagOut
:=
io
.
out
.
bits
.
uop
.
ctrl
.
fpu
.
typeTagOut
}
src/main/scala/xiangshan/backend/fu/fpu/FPToFP.scala
浏览文件 @
49681eda
...
...
@@ -8,18 +8,18 @@ import chisel3.util._
import
hardfloat.CompareRecFN
import
xiangshan.backend.fu.FunctionUnit
class
FPToFP
extends
FPUPipelineModule
{
class
FPToFP
DataModule
(
latency
:
Int
)
extends
FPUDataModule
{
override
def
latency
:
Int
=
FunctionUnit
.
f2iCfg
.
latency
.
latencyVal
.
get
val
regEnables
=
IO
(
Input
(
Vec
(
latency
,
Bool
())))
val
ctrlIn
=
io
.
in
.
bits
.
uop
.
ctrl
.
fpu
val
ctrl
=
S1Reg
(
ctrlIn
)
val
ctrlIn
=
io
.
in
.
fpCtrl
val
ctrl
=
RegEnable
(
ctrlIn
,
regEnables
(
0
)
)
val
inTag
=
ctrl
.
typeTagIn
val
outTag
=
ctrl
.
typeTagOut
val
wflags
=
ctrl
.
wflags
val
src1
=
S1Reg
(
unbox
(
io
.
in
.
bits
.
src
(
0
),
ctrlIn
.
typeTagIn
,
None
))
val
src2
=
S1Reg
(
unbox
(
io
.
in
.
bits
.
src
(
1
),
ctrlIn
.
typeTagIn
,
None
))
val
rmReg
=
S1Reg
(
rm
)
val
src1
=
RegEnable
(
unbox
(
io
.
in
.
src
(
0
),
ctrlIn
.
typeTagIn
,
None
),
regEnables
(
0
))
val
src2
=
RegEnable
(
unbox
(
io
.
in
.
src
(
1
),
ctrlIn
.
typeTagIn
,
None
),
regEnables
(
0
))
val
rmReg
=
RegEnable
(
rm
,
regEnables
(
0
)
)
val
signNum
=
Mux
(
rmReg
(
1
),
src1
^
src2
,
Mux
(
rmReg
(
0
),
~
src2
,
src2
))
val
fsgnj
=
Cat
(
signNum
(
fLen
),
src1
(
fLen
-
1
,
0
))
...
...
@@ -79,6 +79,15 @@ class FPToFP extends FPUPipelineModule{
}
}
io
.
out
.
bits
.
data
:=
S2Reg
(
mux
.
data
)
fflags
:=
S2Reg
(
mux
.
exc
)
io
.
out
.
data
:=
RegEnable
(
mux
.
data
,
regEnables
(
1
))
fflags
:=
RegEnable
(
mux
.
exc
,
regEnables
(
1
))
}
class
FPToFP
extends
FPUPipelineModule
{
override
def
latency
:
Int
=
FunctionUnit
.
f2iCfg
.
latency
.
latencyVal
.
get
override
val
dataModule
=
Module
(
new
FPToFPDataModule
(
latency
))
connectDataModule
dataModule
.
regEnables
<>
VecInit
((
1
to
latency
)
map
(
i
=>
regEnable
(
i
)))
}
src/main/scala/xiangshan/backend/fu/fpu/FPToInt.scala
浏览文件 @
49681eda
...
...
@@ -10,19 +10,18 @@ import hardfloat.RecFNToIN
import
utils.SignExt
import
xiangshan.backend.fu.FunctionUnit
class
FPToInt
extends
FPUPipelineModule
{
override
def
latency
=
FunctionUnit
.
f2iCfg
.
latency
.
latencyVal
.
get
val
(
src1
,
src2
)
=
(
io
.
in
.
bits
.
src
(
0
),
io
.
in
.
bits
.
src
(
1
))
class
FPToIntDataModule
(
latency
:
Int
)
extends
FPUDataModule
{
val
regEnables
=
IO
(
Input
(
Vec
(
latency
,
Bool
())))
val
(
src1
,
src2
)
=
(
io
.
in
.
src
(
0
),
io
.
in
.
src
(
1
))
val
ctrl
=
io
.
in
.
bits
.
uop
.
ctrl
.
fpu
val
ctrl
=
io
.
in
.
fpCtrl
// stage 1: unbox inputs
val
src1_d
=
S1Reg
(
unbox
(
src1
,
ctrl
.
typeTagIn
,
None
))
val
src2_d
=
S1Reg
(
unbox
(
src2
,
ctrl
.
typeTagIn
,
None
))
val
ctrl_reg
=
S1Reg
(
ctrl
)
val
rm_reg
=
S1Reg
(
rm
)
val
src1_d
=
RegEnable
(
unbox
(
src1
,
ctrl
.
typeTagIn
,
None
),
regEnables
(
0
))
val
src2_d
=
RegEnable
(
unbox
(
src2
,
ctrl
.
typeTagIn
,
None
),
regEnables
(
0
))
val
ctrl_reg
=
RegEnable
(
ctrl
,
regEnables
(
0
)
)
val
rm_reg
=
RegEnable
(
rm
,
regEnables
(
0
)
)
// stage2
...
...
@@ -79,13 +78,22 @@ class FPToInt extends FPUPipelineModule {
Mux
(
rm_reg
(
0
),
classify_out
,
move_out
)
)
val
doubleOut
=
Mux
(
ctrl_reg
.
fcvt
,
ctrl_reg
.
typ
(
1
),
ctrl_reg
.
fmt
(
0
))
val
intValue
=
S2Reg
(
Mux
(
doubleOut
,
val
intValue
=
RegEnable
(
Mux
(
doubleOut
,
SignExt
(
intData
,
XLEN
),
SignExt
(
intData
(
31
,
0
),
XLEN
)
))
)
,
regEnables
(
1
)
)
val
exc
=
S2Reg
(
Mux
(
ctrl_reg
.
fcvt
,
conv_exc
,
dcmp_exc
))
val
exc
=
RegEnable
(
Mux
(
ctrl_reg
.
fcvt
,
conv_exc
,
dcmp_exc
),
regEnables
(
1
))
io
.
out
.
bits
.
data
:=
intValue
io
.
out
.
data
:=
intValue
fflags
:=
exc
}
class
FPToInt
extends
FPUPipelineModule
{
override
def
latency
=
FunctionUnit
.
f2iCfg
.
latency
.
latencyVal
.
get
override
val
dataModule
=
Module
(
new
FPToIntDataModule
(
latency
))
connectDataModule
dataModule
.
regEnables
<>
VecInit
((
1
to
latency
)
map
(
i
=>
regEnable
(
i
)))
}
src/main/scala/xiangshan/backend/fu/fpu/FPUSubModule.scala
浏览文件 @
49681eda
...
...
@@ -2,6 +2,7 @@ package xiangshan.backend.fu.fpu
import
chisel3._
import
chisel3.util._
import
xiangshan.
{
FPUCtrlSignals
,
XSModule
}
import
xiangshan.backend.fu.
{
FuConfig
,
FunctionUnit
,
HasPipelineReg
}
trait
HasUIntToSIntHelper
{
...
...
@@ -10,11 +11,36 @@ trait HasUIntToSIntHelper {
}
}
abstract
class
FPUDataModule
extends
XSModule
{
val
io
=
IO
(
new
Bundle
()
{
val
in
=
Input
(
new
Bundle
()
{
val
src
=
Vec
(
3
,
UInt
(
65.
W
))
val
fpCtrl
=
new
FPUCtrlSignals
val
rm
=
UInt
(
3.
W
)
})
val
out
=
Output
(
new
Bundle
()
{
val
data
=
UInt
(
65.
W
)
val
fflags
=
UInt
(
5.
W
)
})
})
val
rm
=
io
.
in
.
rm
val
fflags
=
io
.
out
.
fflags
}
abstract
class
FPUSubModule
extends
FunctionUnit
(
len
=
65
)
with
HasUIntToSIntHelper
{
val
rm
=
IO
(
Input
(
UInt
(
3.
W
)))
val
fflags
=
IO
(
Output
(
UInt
(
5.
W
)))
val
dataModule
:
FPUDataModule
def
connectDataModule
=
{
dataModule
.
io
.
in
.
src
<>
io
.
in
.
bits
.
src
dataModule
.
io
.
in
.
fpCtrl
<>
io
.
in
.
bits
.
uop
.
ctrl
.
fpu
dataModule
.
io
.
in
.
rm
<>
rm
io
.
out
.
bits
.
data
:=
dataModule
.
io
.
out
.
data
fflags
:=
dataModule
.
io
.
out
.
fflags
}
}
abstract
class
FPUPipelineModule
...
...
src/main/scala/xiangshan/backend/fu/fpu/IntToFP.scala
浏览文件 @
49681eda
...
...
@@ -8,41 +8,50 @@ import chisel3.util._
import
hardfloat.INToRecFN
import
utils.
{
SignExt
,
ZeroExt
}
class
IntToFP
extends
FPUSubModule
{
class
IntToFPDataModule
extends
FPUDataModule
{
val
in_valid
,
out_ready
=
IO
(
Input
(
Bool
()))
val
in_ready
,
out_valid
=
IO
(
Output
(
Bool
()))
val
kill_w
,
kill_r
=
IO
(
Input
(
Bool
()))
val
s_idle
::
s_cvt
::
s_
finish
::
Nil
=
Enum
(
3
)
val
s_idle
::
s_cvt
::
s_
ieee
::
s_finish
::
Nil
=
Enum
(
4
)
val
state
=
RegInit
(
s_idle
)
io
.
in
.
ready
:=
state
===
s_idle
io
.
out
.
valid
:=
state
===
s_finish
val
src1
=
RegEnable
(
io
.
in
.
bits
.
src
(
0
)(
XLEN
-
1
,
0
),
io
.
in
.
fire
())
val
uopReg
=
RegEnable
(
io
.
in
.
bits
.
uop
,
io
.
in
.
fire
())
val
rmReg
=
RegEnable
(
rm
,
io
.
in
.
fire
())
val
in_fire
=
in_valid
&&
in_ready
val
out_fire
=
out_valid
&&
out_ready
in_ready
:=
state
===
s_idle
out_valid
:=
state
===
s_finish
val
src1
=
RegEnable
(
io
.
in
.
src
(
0
)(
XLEN
-
1
,
0
),
in_fire
)
val
rmReg
=
RegEnable
(
rm
,
in_fire
)
val
ctrl
=
RegEnable
(
io
.
in
.
fpCtrl
,
in_fire
)
switch
(
state
){
is
(
s_idle
){
when
(
i
o
.
in
.
fire
()
&&
!
io
.
in
.
bits
.
uop
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
){
when
(
i
n_fire
&&
!
kill_w
){
state
:=
s_cvt
}
}
is
(
s_cvt
){
state
:=
s_ieee
}
is
(
s_ieee
){
state
:=
s_finish
}
is
(
s_finish
){
when
(
io
.
out
.
fire
()
){
when
(
out_fire
){
state
:=
s_idle
}
}
}
when
(
state
=/=
s_idle
&&
uopReg
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
){
when
(
state
=/=
s_idle
&&
kill_r
){
state
:=
s_idle
}
/*
s_cvt
*/
val
ctrl
=
uopReg
.
ctrl
.
fpu
val
tag
=
ctrl
.
typeTagIn
val
typ
=
ctrl
.
typ
val
wflags
=
ctrl
.
wflags
...
...
@@ -73,9 +82,26 @@ class IntToFP extends FPUSubModule {
mux
.
exc
:=
VecInit
(
exc
)(
tag
)
}
val
muxReg
=
RegEnable
(
mux
,
enable
=
state
===
s_cvt
)
val
muxReg
=
Reg
(
mux
.
cloneType
)
when
(
state
===
s_cvt
){
muxReg
:=
mux
}.
elsewhen
(
state
===
s_ieee
){
muxReg
.
data
:=
ieee
(
box
(
muxReg
.
data
,
ctrl
.
typeTagOut
))
}
fflags
:=
muxReg
.
exc
io
.
out
.
data
:=
muxReg
.
data
}
class
IntToFP
extends
FPUSubModule
{
override
val
dataModule
=
Module
(
new
IntToFPDataModule
)
dataModule
.
in_valid
:=
io
.
in
.
valid
dataModule
.
out_ready
:=
io
.
out
.
ready
connectDataModule
val
uopReg
=
RegEnable
(
io
.
in
.
bits
.
uop
,
io
.
in
.
fire
())
dataModule
.
kill_w
:=
io
.
in
.
bits
.
uop
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
dataModule
.
kill_r
:=
uopReg
.
roqIdx
.
needFlush
(
io
.
redirectIn
,
io
.
flushIn
)
io
.
in
.
ready
:=
dataModule
.
in_ready
io
.
out
.
valid
:=
dataModule
.
out_valid
io
.
out
.
bits
.
uop
:=
uopReg
io
.
out
.
bits
.
data
:=
box
(
muxReg
.
data
,
ctrl
.
typeTagOut
)
}
src/main/scala/xiangshan/backend/issue/ReservationStation.scala
浏览文件 @
49681eda
...
...
@@ -100,6 +100,7 @@ class ReservationStation
val
fromDispatch
=
Flipped
(
DecoupledIO
(
new
MicroOp
))
val
deq
=
DecoupledIO
(
new
ExuInput
)
val
srcRegValue
=
Input
(
Vec
(
srcNum
,
UInt
(
srcLen
.
W
)))
val
fpRegValue
=
if
(
exuCfg
==
Exu
.
stExeUnitCfg
)
Input
(
UInt
(
srcLen
.
W
))
else
null
val
jumpPc
=
if
(
exuCfg
==
Exu
.
jumpExeUnitCfg
)
Input
(
UInt
(
VAddrBits
.
W
))
else
null
val
jalr_target
=
if
(
exuCfg
==
Exu
.
jumpExeUnitCfg
)
Input
(
UInt
(
VAddrBits
.
W
))
else
null
...
...
@@ -130,7 +131,7 @@ class ReservationStation
select
.
io
.
memfeedback
:=
io
.
memfeedback
}
ctrl
.
io
.
in
.
valid
:=
select
.
io
.
enq
.
fire
()
&&
!(
io
.
redirect
.
valid
||
io
.
flush
)
// NOTE: same as select
ctrl
.
io
.
in
.
valid
:=
select
.
io
.
enq
.
fire
()
//
&& !(io.redirect.valid || io.flush) // NOTE: same as select
ctrl
.
io
.
flush
:=
io
.
flush
ctrl
.
io
.
in
.
bits
.
addr
:=
select
.
io
.
enq
.
bits
ctrl
.
io
.
in
.
bits
.
uop
:=
io
.
fromDispatch
.
bits
...
...
@@ -155,6 +156,9 @@ class ReservationStation
data
.
io
.
jumpPc
:=
io
.
jumpPc
data
.
io
.
jalr_target
:=
io
.
jalr_target
}
if
(
exuCfg
==
Exu
.
stExeUnitCfg
)
{
data
.
io
.
fpRegValue
:=
io
.
fpRegValue
}
data
.
io
.
sel
:=
select
.
io
.
deq
.
bits
data
.
io
.
listen
.
wen
:=
ctrl
.
io
.
listen
for
(
i
<-
0
until
fastPortsCnt
)
{
...
...
@@ -345,7 +349,8 @@ class ReservationStationSelect
val
enqueue
=
io
.
enq
.
fire
()
&&
!(
io
.
redirect
.
valid
||
io
.
flush
)
val
tailInc
=
tailPtr
+
1.
U
val
tailDec
=
tailPtr
-
1.
U
tailPtr
:=
Mux
(
dequeue
===
enqueue
,
tailPtr
,
Mux
(
dequeue
,
tailDec
,
tailInc
))
val
nextTailPtr
=
Mux
(
dequeue
===
enqueue
,
tailPtr
,
Mux
(
dequeue
,
tailDec
,
tailInc
))
tailPtr
:=
nextTailPtr
val
enqPtr
=
Mux
(
tailPtr
.
flag
,
deqPtr
,
tailPtr
.
value
)
val
enqIdx
=
indexQueue
(
enqPtr
)
...
...
@@ -362,7 +367,7 @@ class ReservationStationSelect
io
.
deq
.
valid
:=
selectValid
io
.
deq
.
bits
:=
selectIndex
io
.
numExist
:=
Mux
(
tailPtr
.
flag
,
(
iqSize
-
1
).
U
,
tailPtr
.
value
)
io
.
numExist
:=
RegNext
(
Mux
(
nextTailPtr
.
flag
,
(
iqSize
-
1
).
U
,
nextTailPtr
.
value
)
)
assert
(
RegNext
(
Mux
(
tailPtr
.
flag
,
tailPtr
.
value
===
0.
U
,
true
.
B
)))
}
...
...
@@ -450,6 +455,15 @@ class ReservationStationCtrl
when
(
enqEn
)
{
srcQueue
(
enqPtr
).
zip
(
enqSrcReady
).
map
{
case
(
s
,
e
)
=>
s
:=
e
}
}
// NOTE: delay one cycle for fp src will come one cycle later than usual
if
(
exuCfg
==
Exu
.
stExeUnitCfg
)
{
when
(
enqEn
)
{
when
(
enqUop
.
ctrl
.
src2Type
===
SrcType
.
fp
)
{
srcQueue
(
enqPtr
)(
1
)
:=
false
.
B
}
}
when
(
enqEnReg
&&
RegNext
(
enqUop
.
ctrl
.
src2Type
===
SrcType
.
fp
&&
enqSrcReady
(
1
)))
{
srcQueue
(
enqPtrReg
)(
1
)
:=
true
.
B
}
}
for
(
i
<-
0
until
iqSize
)
{
for
(
j
<-
0
until
srcNum
)
{
when
(
srcUpdate
(
i
)(
j
))
{
srcQueue
(
i
)(
j
)
:=
true
.
B
}
...
...
@@ -591,18 +605,18 @@ class ReservationStationCtrl
}
}
class
RSDataSingleSrc
(
srcLen
:
Int
,
numEntries
:
Int
,
numListen
:
Int
)
extends
XSModule
{
class
RSDataSingleSrc
(
srcLen
:
Int
,
numEntries
:
Int
,
numListen
:
Int
,
writePort
:
Int
=
1
)
extends
XSModule
{
val
io
=
IO
(
new
Bundle
{
val
r
=
new
Bundle
{
// val valid = Bool() // NOTE: if read valid is necessary, but now it is not completed
val
addr
=
Input
(
UInt
(
log2Up
(
numEntries
).
W
))
val
rdata
=
Output
(
UInt
(
srcLen
.
W
))
}
val
w
=
Input
(
new
Bundle
{
val
w
=
Input
(
Vec
(
writePort
,
new
Bundle
{
val
wen
=
Bool
()
val
addr
=
UInt
(
log2Up
(
numEntries
).
W
)
val
wdata
=
Input
(
UInt
(
srcLen
.
W
)
)
})
val
wdata
=
UInt
(
srcLen
.
W
)
})
)
val
listen
=
Input
(
new
Bundle
{
val
wdata
=
Vec
(
numListen
,
UInt
(
srcLen
.
W
))
val
wen
=
Vec
(
numEntries
,
Vec
(
numListen
,
Bool
()))
...
...
@@ -611,9 +625,14 @@ class RSDataSingleSrc(srcLen: Int, numEntries: Int, numListen: Int) extends XSMo
val
value
=
Reg
(
Vec
(
numEntries
,
UInt
(
srcLen
.
W
)))
val
wMask
=
Mux
(
io
.
w
.
wen
,
UIntToOH
(
io
.
w
.
addr
)(
numEntries
-
1
,
0
),
0.
U
(
numEntries
.
W
))
val
data
=
io
.
listen
.
wdata
:+
io
.
w
.
wdata
val
wen
=
io
.
listen
.
wen
.
zip
(
wMask
.
asBools
).
map
{
case
(
w
,
m
)
=>
w
:+
m
}
val
wMaskT
=
io
.
w
.
map
(
w
=>
Mux
(
w
.
wen
,
UIntToOH
(
w
.
addr
)(
numEntries
-
1
,
0
),
0.
U
(
numEntries
.
W
)))
val
wMask
=
(
0
until
numEntries
).
map
(
i
=>
(
0
until
writePort
).
map
(
j
=>
wMaskT
(
j
)(
i
)
))
val
wData
=
io
.
w
.
map
(
w
=>
w
.
wdata
)
val
data
=
io
.
listen
.
wdata
++
io
.
w
.
map
(
_
.
wdata
)
val
wen
=
io
.
listen
.
wen
.
zip
(
wMask
).
map
{
case
(
w
,
m
)
=>
w
++
m
}
for
(
i
<-
0
until
numEntries
)
{
when
(
Cat
(
wen
(
i
)).
orR
)
{
value
(
i
)
:=
ParallelMux
(
wen
(
i
)
zip
data
)
...
...
@@ -640,8 +659,10 @@ class ReservationStationData
val
srcNum
=
if
(
exuCfg
==
Exu
.
jumpExeUnitCfg
)
2
else
max
(
exuCfg
.
intSrcCnt
,
exuCfg
.
fpSrcCnt
)
require
(
nonBlocked
==
fastWakeup
)
val
io
=
IO
(
new
XSBundle
{
val
srcRegValue
=
Vec
(
srcNum
,
Input
(
UInt
(
srcLen
.
W
)))
val
fpRegValue
=
if
(
exuCfg
==
Exu
.
stExeUnitCfg
)
Input
(
UInt
(
srcLen
.
W
))
else
null
val
jumpPc
=
if
(
exuCfg
==
Exu
.
jumpExeUnitCfg
)
Input
(
UInt
(
VAddrBits
.
W
))
else
null
val
jalr_target
=
if
(
exuCfg
==
Exu
.
jumpExeUnitCfg
)
Input
(
UInt
(
VAddrBits
.
W
))
else
null
val
in
=
Input
(
new
Bundle
{
...
...
@@ -665,25 +686,35 @@ class ReservationStationData
// Data : single read, multi write
// ------------------------
val
data
=
(
0
until
srcNum
).
map
{
i
=>
val
d
=
Module
(
new
RSDataSingleSrc
(
srcLen
,
iqSize
,
fastPortsCnt
+
slowPortsCnt
))
d
.
suggestName
(
s
"${this.name}_data${i}"
)
d
.
io
val
data
=
if
(
exuCfg
==
Exu
.
stExeUnitCfg
)
{
val
srcBase
=
Module
(
new
RSDataSingleSrc
(
srcLen
,
iqSize
,
fastPortsCnt
+
slowPortsCnt
,
1
))
val
srcData
=
Module
(
new
RSDataSingleSrc
(
srcLen
,
iqSize
,
fastPortsCnt
+
slowPortsCnt
,
2
))
srcBase
.
suggestName
(
s
"${this.name}_data0"
)
srcData
.
suggestName
(
s
"${this.name}_data1"
)
Seq
(
srcBase
.
io
,
srcData
.
io
)
}
else
{
(
0
until
srcNum
).
map
{
i
=>
val
d
=
Module
(
new
RSDataSingleSrc
(
srcLen
,
iqSize
,
fastPortsCnt
+
slowPortsCnt
,
1
))
d
.
suggestName
(
s
"${this.name}_data${i}"
)
d
.
io
}
}
(
0
until
srcNum
).
foreach
{
i
=>
data
(
i
).
listen
.
wen
:=
io
.
listen
.
wen
(
i
)
data
(
i
).
listen
.
wdata
:=
io
.
listen
.
wdata
}
data
.
map
(
_
.
w
.
addr
:=
RegEnable
(
io
.
in
.
addr
,
io
.
in
.
valid
))
data
.
zip
(
io
.
in
.
enqSrcReady
).
map
{
case
(
src
,
ready
)
=>
src
.
w
.
wen
:=
RegNext
(
ready
&&
io
.
in
.
valid
)
}
val
addrReg
=
RegEnable
(
io
.
in
.
addr
,
io
.
in
.
valid
)
val
enqSrcReadyReg
=
io
.
in
.
enqSrcReady
.
map
(
r
=>
RegNext
(
r
&&
io
.
in
.
valid
))
data
.
map
(
_
.
w
(
0
).
addr
:=
addrReg
)
data
.
zip
(
enqSrcReadyReg
).
map
{
case
(
src
,
ready
)
=>
src
.
w
(
0
).
wen
:=
ready
}
val
pcMem
=
if
(
exuCfg
==
Exu
.
jumpExeUnitCfg
)
Some
(
Module
(
new
SyncDataModuleTemplate
(
UInt
(
VAddrBits
.
W
),
iqSize
,
numRead
=
1
,
numWrite
=
1
)))
else
None
if
(
pcMem
.
nonEmpty
){
pcMem
.
get
.
io
.
wen
(
0
)
:=
RegNext
(
io
.
in
.
valid
)
pcMem
.
get
.
io
.
waddr
(
0
)
:=
RegNext
(
io
.
in
.
addr
)
pcMem
.
get
.
io
.
waddr
(
0
)
:=
addrReg
pcMem
.
get
.
io
.
wdata
(
0
)
:=
io
.
jumpPc
}
...
...
@@ -694,15 +725,15 @@ class ReservationStationData
io
.
srcRegValue
(
0
)
)
// data.io.w.bits.data(0) := src1Mux
data
(
0
).
w
.
wdata
:=
src1Mux
data
(
1
).
w
.
wdata
:=
io
.
jalr_target
data
(
0
).
w
(
0
)
.
wdata
:=
src1Mux
data
(
1
).
w
(
0
)
.
wdata
:=
io
.
jalr_target
case
Exu
.
aluExeUnitCfg
=>
val
src1Mux
=
Mux
(
enqUopReg
.
ctrl
.
src1Type
===
SrcType
.
pc
,
SignExt
(
enqUopReg
.
cf
.
pc
,
XLEN
),
io
.
srcRegValue
(
0
)
)
data
(
0
).
w
.
wdata
:=
src1Mux
data
(
0
).
w
(
0
)
.
wdata
:=
src1Mux
// alu only need U type and I type imm
val
imm32
=
Mux
(
enqUopReg
.
ctrl
.
selImm
===
SelImm
.
IMM_U
,
ImmUnion
.
U
.
toImm32
(
enqUopReg
.
ctrl
.
imm
),
...
...
@@ -712,9 +743,17 @@ class ReservationStationData
val
src2Mux
=
Mux
(
enqUopReg
.
ctrl
.
src2Type
===
SrcType
.
imm
,
imm64
,
io
.
srcRegValue
(
1
)
)
data
(
1
).
w
.
wdata
:=
src2Mux
data
(
1
).
w
(
0
).
wdata
:=
src2Mux
case
Exu
.
stExeUnitCfg
=>
(
0
until
srcNum
).
foreach
(
i
=>
data
(
i
).
w
(
0
).
wdata
:=
io
.
srcRegValue
(
i
)
)
data
(
1
).
w
(
1
).
wdata
:=
io
.
fpRegValue
data
(
1
).
w
(
1
).
addr
:=
RegNext
(
addrReg
)
data
(
1
).
w
(
1
).
wen
:=
RegNext
(
enqSrcReadyReg
(
1
)
&&
enqUopReg
.
ctrl
.
src2Type
===
SrcType
.
fp
)
data
(
1
).
w
(
0
).
wen
:=
enqSrcReadyReg
(
1
)
&&
enqUopReg
.
ctrl
.
src2Type
=/=
SrcType
.
fp
case
_
=>
(
0
until
srcNum
).
foreach
(
i
=>
data
(
i
).
w
.
wdata
:=
io
.
srcRegValue
(
i
)
)
(
0
until
srcNum
).
foreach
(
i
=>
data
(
i
).
w
(
0
)
.
wdata
:=
io
.
srcRegValue
(
i
)
)
}
// deq
data
.
map
(
_
.
r
.
addr
:=
io
.
sel
)
...
...
src/main/scala/xiangshan/backend/rename/Rename.scala
浏览文件 @
49681eda
...
...
@@ -5,6 +5,7 @@ import chisel3.util._
import
xiangshan._
import
utils._
import
xiangshan.backend.roq.RoqPtr
import
xiangshan.backend.dispatch.PreDispatchInfo
class
RenameBypassInfo
extends
XSBundle
{
val
lsrc1_bypass
=
MixedVec
(
List
.
tabulate
(
RenameWidth
-
1
)(
i
=>
UInt
((
i
+
1
).
W
)))
...
...
@@ -23,6 +24,7 @@ class Rename extends XSModule with HasCircularQueuePtrHelper {
// to dispatch1
val
out
=
Vec
(
RenameWidth
,
DecoupledIO
(
new
MicroOp
))
val
renameBypass
=
Output
(
new
RenameBypassInfo
)
val
dispatchInfo
=
Output
(
new
PreDispatchInfo
)
})
def
printRenameInfo
(
in
:
DecoupledIO
[
CfCtrl
],
out
:
DecoupledIO
[
MicroOp
])
=
{
...
...
@@ -202,6 +204,12 @@ class Rename extends XSModule with HasCircularQueuePtrHelper {
}).
reverse
)
}
val
isLs
=
VecInit
(
uops
.
map
(
uop
=>
FuType
.
isLoadStore
(
uop
.
ctrl
.
fuType
)))
val
isStore
=
VecInit
(
uops
.
map
(
uop
=>
FuType
.
isStoreExu
(
uop
.
ctrl
.
fuType
)))
val
isAMO
=
VecInit
(
uops
.
map
(
uop
=>
FuType
.
isAMO
(
uop
.
ctrl
.
fuType
)))
io
.
dispatchInfo
.
lsqNeedAlloc
:=
VecInit
((
0
until
RenameWidth
).
map
(
i
=>
Mux
(
isLs
(
i
),
Mux
(
isStore
(
i
)
&&
!
isAMO
(
i
),
2.
U
,
1.
U
),
0.
U
)))
/**
* Instructions commit: update freelist and rename table
*/
...
...
src/main/scala/xiangshan/backend/roq/Roq.scala
浏览文件 @
49681eda
...
...
@@ -792,22 +792,22 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
if
(
i
%
4
==
3
)
XSDebug
(
false
,
true
.
B
,
"\n"
)
}
XSPerf
(
"utilization"
,
PopCount
((
0
until
RoqSize
).
map
(
valid
(
_
))))
XSPerf
(
"commitInstr"
,
Mux
(
io
.
commits
.
isWalk
,
0.
U
,
PopCount
(
io
.
commits
.
valid
)))
XSPerf
(
"commitInstrLoad"
,
Mux
(
io
.
commits
.
isWalk
,
0.
U
,
PopCount
(
io
.
commits
.
valid
.
zip
(
io
.
commits
.
info
.
map
(
_
.
commitType
)).
map
{
case
(
v
,
t
)
=>
v
&&
t
===
CommitType
.
LOAD
})))
XSPerf
(
"commitInstrStore"
,
Mux
(
io
.
commits
.
isWalk
,
0.
U
,
PopCount
(
io
.
commits
.
valid
.
zip
(
io
.
commits
.
info
.
map
(
_
.
commitType
)).
map
{
case
(
v
,
t
)
=>
v
&&
t
===
CommitType
.
STORE
})))
XSPerf
(
"writeback"
,
PopCount
((
0
until
RoqSize
).
map
(
i
=>
valid
(
i
)
&&
writebacked
(
i
))))
XSPerf
(
"
roq_
utilization"
,
PopCount
((
0
until
RoqSize
).
map
(
valid
(
_
))))
XSPerf
(
"
roq_
commitInstr"
,
Mux
(
io
.
commits
.
isWalk
,
0.
U
,
PopCount
(
io
.
commits
.
valid
)))
XSPerf
(
"
roq_
commitInstrLoad"
,
Mux
(
io
.
commits
.
isWalk
,
0.
U
,
PopCount
(
io
.
commits
.
valid
.
zip
(
io
.
commits
.
info
.
map
(
_
.
commitType
)).
map
{
case
(
v
,
t
)
=>
v
&&
t
===
CommitType
.
LOAD
})))
XSPerf
(
"
roq_
commitInstrStore"
,
Mux
(
io
.
commits
.
isWalk
,
0.
U
,
PopCount
(
io
.
commits
.
valid
.
zip
(
io
.
commits
.
info
.
map
(
_
.
commitType
)).
map
{
case
(
v
,
t
)
=>
v
&&
t
===
CommitType
.
STORE
})))
XSPerf
(
"
roq_
writeback"
,
PopCount
((
0
until
RoqSize
).
map
(
i
=>
valid
(
i
)
&&
writebacked
(
i
))))
// XSPerf("enqInstr", PopCount(io.dp1Req.map(_.fire())))
// XSPerf("d2rVnR", PopCount(io.dp1Req.map(p => p.valid && !p.ready)))
XSPerf
(
"walkInstrAcc"
,
Mux
(
io
.
commits
.
isWalk
,
PopCount
(
io
.
commits
.
valid
),
0.
U
),
acc
=
true
)
XSPerf
(
"walkCycleAcc"
,
state
===
s_walk
||
state
===
s_extrawalk
,
acc
=
true
)
XSPerf
(
"
roq_
walkInstrAcc"
,
Mux
(
io
.
commits
.
isWalk
,
PopCount
(
io
.
commits
.
valid
),
0.
U
),
acc
=
true
)
XSPerf
(
"
roq_
walkCycleAcc"
,
state
===
s_walk
||
state
===
s_extrawalk
,
acc
=
true
)
val
deqNotWritebacked
=
valid
(
deqPtr
.
value
)
&&
!
writebacked
(
deqPtr
.
value
)
val
deqUopCommitType
=
io
.
commits
.
info
(
0
).
commitType
XSPerf
(
"waitNormalCycleAcc"
,
deqNotWritebacked
&&
deqUopCommitType
===
CommitType
.
NORMAL
,
acc
=
true
)
XSPerf
(
"waitBranchCycleAcc"
,
deqNotWritebacked
&&
deqUopCommitType
===
CommitType
.
BRANCH
,
acc
=
true
)
XSPerf
(
"waitLoadCycleAcc"
,
deqNotWritebacked
&&
deqUopCommitType
===
CommitType
.
LOAD
,
acc
=
true
)
XSPerf
(
"waitStoreCycleAcc"
,
deqNotWritebacked
&&
deqUopCommitType
===
CommitType
.
STORE
,
acc
=
true
)
XSPerf
(
"roqHeadPC"
,
io
.
commits
.
info
(
0
).
pc
)
XSPerf
(
"
roq_
waitNormalCycleAcc"
,
deqNotWritebacked
&&
deqUopCommitType
===
CommitType
.
NORMAL
,
acc
=
true
)
XSPerf
(
"
roq_
waitBranchCycleAcc"
,
deqNotWritebacked
&&
deqUopCommitType
===
CommitType
.
BRANCH
,
acc
=
true
)
XSPerf
(
"
roq_
waitLoadCycleAcc"
,
deqNotWritebacked
&&
deqUopCommitType
===
CommitType
.
LOAD
,
acc
=
true
)
XSPerf
(
"
roq_
waitStoreCycleAcc"
,
deqNotWritebacked
&&
deqUopCommitType
===
CommitType
.
STORE
,
acc
=
true
)
XSPerf
(
"roq
_roq
HeadPC"
,
io
.
commits
.
info
(
0
).
pc
)
val
instrCnt
=
RegInit
(
0.
U
(
64.
W
))
val
retireCounter
=
Mux
(
state
===
s_idle
,
commitCnt
,
0.
U
)
...
...
@@ -866,7 +866,7 @@ class Roq(numWbPorts: Int) extends XSModule with HasCircularQueuePtrHelper {
val
trapCode
=
PriorityMux
(
wdata
.
zip
(
trapVec
).
map
(
x
=>
x
.
_2
->
x
.
_1
))
val
trapPC
=
SignExt
(
PriorityMux
(
wpc
.
zip
(
trapVec
).
map
(
x
=>
x
.
_2
->
x
.
_1
)),
XLEN
)
if
(!
env
.
FPGAPlatform
&&
EnableBPU
&&
!
env
.
DualCore
)
{
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
)
{
ExcitingUtils
.
addSource
(
hitTrap
,
"XSTRAP"
,
ConnectionType
.
Debug
)
}
...
...
src/main/scala/xiangshan/cache/DCacheWrapper.scala
浏览文件 @
49681eda
...
...
@@ -272,7 +272,17 @@ class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParame
mainPipeReqArb
.
io
.
in
(
AtomicsMainPipeReqPort
)
<>
atomicsReplayUnit
.
io
.
pipe_req
mainPipeReqArb
.
io
.
in
(
ProbeMainPipeReqPort
)
<>
probeQueue
.
io
.
pipe_req
mainPipe
.
io
.
req
<>
mainPipeReqArb
.
io
.
out
// add a stage to break the Arbiter bits.addr to ready path
val
mainPipeReq_valid
=
RegInit
(
false
.
B
)
val
mainPipeReq_fire
=
mainPipeReq_valid
&&
mainPipe
.
io
.
req
.
ready
val
mainPipeReq_req
=
RegEnable
(
mainPipeReqArb
.
io
.
out
.
bits
,
mainPipeReqArb
.
io
.
out
.
fire
())
mainPipeReqArb
.
io
.
out
.
ready
:=
mainPipe
.
io
.
req
.
ready
mainPipe
.
io
.
req
.
valid
:=
mainPipeReq_valid
mainPipe
.
io
.
req
.
bits
:=
mainPipeReq_req
when
(
mainPipeReqArb
.
io
.
out
.
fire
())
{
mainPipeReq_valid
:=
true
.
B
}
when
(!
mainPipeReqArb
.
io
.
out
.
fire
()
&&
mainPipeReq_fire
)
{
mainPipeReq_valid
:=
false
.
B
}
missQueue
.
io
.
pipe_resp
<>
mainPipe
.
io
.
miss_resp
storeReplayUnit
.
io
.
pipe_resp
<>
mainPipe
.
io
.
store_resp
...
...
src/main/scala/xiangshan/cache/ICache.scala
浏览文件 @
49681eda
...
...
@@ -629,9 +629,7 @@ class ICache extends ICacheModule
dump_pipe_info
()
// Performance Counter
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
)
{
ExcitingUtils
.
addSource
(
s3_valid
&&
!
blocking
,
"perfCntIcacheReqCnt"
,
Perf
)
ExcitingUtils
.
addSource
(
s3_miss
&&
blocking
&&
io
.
resp
.
fire
(),
"perfCntIcacheMissCnt"
,
Perf
)
ExcitingUtils
.
addSource
(
s3_mmio
&&
blocking
&&
io
.
resp
.
fire
(),
"perfCntIcacheMMIOCnt"
,
Perf
)
}
XSPerf
(
"icache_req"
,
s3_valid
&&
!
blocking
)
XSPerf
(
"icache_miss"
,
s3_miss
&&
blocking
&&
io
.
resp
.
fire
())
XSPerf
(
"icache_mmio"
,
s3_mmio
&&
blocking
&&
io
.
resp
.
fire
())
}
\ No newline at end of file
src/main/scala/xiangshan/cache/ICacheMissQueue.scala
浏览文件 @
49681eda
...
...
@@ -229,16 +229,13 @@ class IcacheMissQueue extends ICacheMissQueueModule
entry
.
io
.
mem_grant
<>
io
.
mem_grant
}
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
)
{
ExcitingUtils
.
addSource
(
BoolStopWatch
(
start
=
entry
.
io
.
req
.
fire
(),
stop
=
entry
.
io
.
resp
.
fire
()
||
entry
.
io
.
flush
,
startHighPriority
=
true
),
"perfCntICacheMissQueuePenaltyEntry"
+
Integer
.
toString
(
i
,
10
),
Perf
)
}
XSPerf
(
"ICacheMissQueue_PenaltyEntry"
+
Integer
.
toString
(
i
,
10
),
BoolStopWatch
(
start
=
entry
.
io
.
req
.
fire
(),
stop
=
entry
.
io
.
resp
.
fire
()
||
entry
.
io
.
flush
,
startHighPriority
=
true
)
)
entry
}
...
...
src/main/scala/xiangshan/cache/MainPipe.scala
浏览文件 @
49681eda
此差异已折叠。
点击以展开。
src/main/scala/xiangshan/cache/MissQueue.scala
浏览文件 @
49681eda
...
...
@@ -5,7 +5,7 @@ import chisel3.util._
import
chisel3.ExcitingUtils._
import
freechips.rocketchip.tilelink.
{
TLEdgeOut
,
TLBundleA
,
TLBundleD
,
TLBundleE
,
TLPermissions
,
TLArbiter
,
ClientMetadata
}
import
utils.
{
HasTLDump
,
XSDebug
,
BoolStopWatch
,
OneHot
}
import
utils.
{
HasTLDump
,
XSDebug
,
BoolStopWatch
,
OneHot
,
XSPerf
}
class
MissReq
extends
DCacheBundle
{
...
...
@@ -413,16 +413,13 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
}
/*
if (!env.FPGAPlatform && !env.DualCore) {
ExcitingUtils.addSource(
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire(),
startHighPriority = true),
"perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
Perf
)
}
XSPerf(
"perfCntDCacheMissQueuePenaltyEntry" + Integer.toString(i, 10),
BoolStopWatch(
start = entry.io.req.fire(),
stop = entry.io.resp.fire(),
startHighPriority = true)
)
*/
entry
...
...
@@ -492,7 +489,5 @@ class MissQueue(edge: TLEdgeOut) extends DCacheModule with HasTLDump
io
.
mem_finish
.
bits
.
dump
}
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
)
{
ExcitingUtils
.
addSource
(
io
.
req
.
fire
(),
"perfCntDCacheMiss"
,
Perf
)
}
XSPerf
(
"dcache_miss"
,
io
.
req
.
fire
())
}
src/main/scala/xiangshan/cache/StoreReplayUnit.scala
浏览文件 @
49681eda
...
...
@@ -137,8 +137,9 @@ class StoreReplayQueue extends DCacheModule
val
alloc_idx
=
PriorityEncoder
(
primary_ready
)
val
req
=
io
.
lsu
.
req
val
block_conflict
=
Wire
(
Bool
())
req
.
ready
:=
allocate
&&
!
block_conflict
// do not use block conflict in req allocate path
// compare with all entries incus much latency
req
.
ready
:=
allocate
val
entries
=
(
0
until
cfg
.
nStoreReplayEntries
)
map
{
i
=>
val
entry
=
Module
(
new
StoreReplayEntry
)
...
...
@@ -146,7 +147,7 @@ class StoreReplayQueue extends DCacheModule
entry
.
io
.
id
:=
i
.
U
// entry req
entry
.
io
.
lsu
.
req
.
valid
:=
(
i
.
U
===
alloc_idx
)
&&
allocate
&&
req
.
valid
&&
!
block_conflict
entry
.
io
.
lsu
.
req
.
valid
:=
(
i
.
U
===
alloc_idx
)
&&
allocate
&&
req
.
valid
primary_ready
(
i
)
:=
entry
.
io
.
lsu
.
req
.
ready
entry
.
io
.
lsu
.
req
.
bits
:=
req
.
bits
...
...
@@ -165,7 +166,7 @@ class StoreReplayQueue extends DCacheModule
io
.
lsu
.
resp
<>
resp_arb
.
io
.
out
io
.
pipe_req
<>
pipe_req_arb
.
io
.
out
block_conflict
:
=
VecInit
(
entries
.
map
(
e
=>
e
.
io
.
block_addr
.
valid
&&
e
.
io
.
block_addr
.
bits
===
io
.
lsu
.
req
.
bits
.
addr
)).
asUInt
.
orR
val
block_conflict
=
VecInit
(
entries
.
map
(
e
=>
e
.
io
.
block_addr
.
valid
&&
e
.
io
.
block_addr
.
bits
===
io
.
lsu
.
req
.
bits
.
addr
)).
asUInt
.
orR
// sanity check
when
(
io
.
lsu
.
req
.
valid
)
{
...
...
src/main/scala/xiangshan/cache/TLB.scala
浏览文件 @
49681eda
...
...
@@ -112,19 +112,22 @@ class CAMTemplate[T <: Data](val gen: T, val set: Int, val readWidth: Int) exten
val
io
=
IO
(
new
Bundle
{
val
r
=
new
Bundle
{
val
req
=
Input
(
Vec
(
readWidth
,
gen
))
val
resp
=
Output
(
Vec
(
readWidth
,
UInt
(
set
.
W
)))
val
resp
=
Output
(
Vec
(
readWidth
,
Vec
(
set
,
Bool
()
)))
}
val
w
=
Flipped
(
ValidIO
(
new
Bundle
{
val
index
=
UInt
(
log2Up
(
set
).
W
)
val
data
=
gen
}))
val
w
=
Input
(
new
Bundle
{
val
valid
=
Bool
()
val
bits
=
new
Bundle
{
val
index
=
UInt
(
log2Up
(
set
).
W
)
val
data
=
gen
}
})
})
val
wordType
=
UInt
(
gen
.
getWidth
.
W
)
val
array
=
Reg
(
Vec
(
set
,
wordType
))
io
.
r
.
resp
.
zipWithIndex
.
map
{
case
(
a
,
i
)
=>
a
:=
VecInit
(
array
.
map
(
io
.
r
.
req
(
i
).
asUInt
===
_
)).
asUInt
a
:=
array
.
map
(
io
.
r
.
req
(
i
).
asUInt
===
_
)
}
when
(
io
.
w
.
valid
)
{
...
...
@@ -132,78 +135,73 @@ class CAMTemplate[T <: Data](val gen: T, val set: Int, val readWidth: Int) exten
}
}
class
TlbEntryData
extends
TlbBundle
{
val
ppn
=
UInt
(
ppnLen
.
W
)
val
perm
=
new
TlbPermBundle
// TODO: change perm to every kinds of pf check
class
TlbSPMeta
extends
TlbBundle
{
val
tag
=
UInt
(
vpnLen
.
W
)
// tag is vpn
val
level
=
UInt
(
1.
W
)
// 1 for 2MB, 0 for 1GB
override
def
toPrintable
:
Printable
=
{
p
"ppn:0x${Hexadecimal(ppn)} perm:${perm}"
def
hit
(
vpn
:
UInt
)
:
Bool
=
{
val
a
=
tag
(
vpnnLen
*
3
-
1
,
vpnnLen
*
2
)
===
vpn
(
vpnnLen
*
3
-
1
,
vpnnLen
*
2
)
val
b
=
tag
(
vpnnLen
*
2
-
1
,
vpnnLen
*
1
)
===
vpn
(
vpnnLen
*
2
-
1
,
vpnnLen
*
1
)
XSDebug
(
Mux
(
level
.
asBool
,
a
&
b
,
a
),
p
"Hit superpage: hit:${Mux(level.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${level} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n"
)
Mux
(
level
.
asBool
,
a
&
b
,
a
)
}
}
class
TlbEntry
(
superpage
:
Boolean
=
false
)
extends
TlbBundle
{
val
tag
=
UInt
(
vpnLen
.
W
)
// tag is vpn
val
level
=
if
(
superpage
)
Some
(
UInt
(
1.
W
))
else
None
// /*2 for 4KB,*/ 1 for 2MB, 0 for 1GB
val
data
=
new
TlbEntryData
def
apply
(
vpn
:
UInt
,
level
:
UInt
)
=
{
this
.
tag
:=
vpn
this
.
level
:=
level
(
0
)
def
hit
(
vpn
:
UInt
)
:
Bool
=
{
if
(
superpage
)
{
val
insideLevel
=
level
.
getOrElse
(
0.
U
)
val
a
=
tag
(
vpnnLen
*
3
-
1
,
vpnnLen
*
2
)
===
vpn
(
vpnnLen
*
3
-
1
,
vpnnLen
*
2
)
val
b
=
tag
(
vpnnLen
*
2
-
1
,
vpnnLen
*
1
)
===
vpn
(
vpnnLen
*
2
-
1
,
vpnnLen
*
1
)
XSDebug
(
Mux
(
insideLevel
.
asBool
,
a
&
b
,
a
),
p
"Hit superpage: hit:${Mux(insideLevel.asBool, a&b, a)} tag:${Hexadecimal(tag)} level:${insideLevel} data:${data} a:${a} b:${b} vpn:${Hexadecimal(vpn)}\n"
)
Mux
(
insideLevel
.
asBool
,
a
&
b
,
a
)
}
else
{
XSDebug
(
tag
===
vpn
,
p
"Hit normalpage: hit:${tag === vpn} tag:${Hexadecimal(tag)} data:${data} vpn:${Hexadecimal(vpn)}\n"
)
tag
===
vpn
}
this
}
def
ppn
(
vpn
:
UInt
)
:
UInt
=
{
}
class
TlbData
(
superpage
:
Boolean
=
false
)
extends
TlbBundle
{
val
level
=
if
(
superpage
)
Some
(
UInt
(
1.
W
))
else
None
// /*2 for 4KB,*/ 1 for 2MB, 0 for 1GB
val
ppn
=
UInt
(
ppnLen
.
W
)
val
perm
=
new
TlbPermBundle
def
genPPN
(
vpn
:
UInt
)
:
UInt
=
{
if
(
superpage
)
{
val
insideLevel
=
level
.
getOrElse
(
0.
U
)
Mux
(
insideLevel
.
asBool
,
Cat
(
data
.
ppn
(
data
.
ppn
.
getWidth
-
1
,
vpnnLen
*
1
),
vpn
(
vpnnLen
*
1
-
1
,
0
)),
Cat
(
data
.
ppn
(
data
.
ppn
.
getWidth
-
1
,
vpnnLen
*
2
),
vpn
(
vpnnLen
*
2
-
1
,
0
)))
Mux
(
insideLevel
.
asBool
,
Cat
(
ppn
(
ppn
.
getWidth
-
1
,
vpnnLen
*
1
),
vpn
(
vpnnLen
*
1
-
1
,
0
)),
Cat
(
ppn
(
ppn
.
getWidth
-
1
,
vpnnLen
*
2
),
vpn
(
vpnnLen
*
2
-
1
,
0
)))
}
else
{
data
.
ppn
ppn
}
}
def
apply
(
vpn
:
UInt
,
ppn
:
UInt
,
level
:
UInt
,
perm
:
UInt
,
pf
:
Bool
)
=
{
this
.
tag
:=
vpn
def
apply
(
ppn
:
UInt
,
level
:
UInt
,
perm
:
UInt
,
pf
:
Bool
)
=
{
this
.
level
.
map
(
_
:=
level
(
0
))
this
.
data
.
ppn
:=
ppn
this
.
ppn
:=
ppn
// refill pagetable perm
val
ptePerm
=
perm
.
asTypeOf
(
new
PtePermBundle
)
this
.
data
.
perm
.
pf
:=
pf
this
.
data
.
perm
.
d
:=
ptePerm
.
d
this
.
data
.
perm
.
a
:=
ptePerm
.
a
this
.
data
.
perm
.
g
:=
ptePerm
.
g
this
.
data
.
perm
.
u
:=
ptePerm
.
u
this
.
data
.
perm
.
x
:=
ptePerm
.
x
this
.
data
.
perm
.
w
:=
ptePerm
.
w
this
.
data
.
perm
.
r
:=
ptePerm
.
r
this
.
perm
.
pf
:=
pf
this
.
perm
.
d
:=
ptePerm
.
d
this
.
perm
.
a
:=
ptePerm
.
a
this
.
perm
.
g
:=
ptePerm
.
g
this
.
perm
.
u
:=
ptePerm
.
u
this
.
perm
.
x
:=
ptePerm
.
x
this
.
perm
.
w
:=
ptePerm
.
w
this
.
perm
.
r
:=
ptePerm
.
r
// get pma perm
val
(
pmaMode
,
accessWidth
)
=
AddressSpace
.
memmapAddrMatch
(
Cat
(
ppn
,
0.
U
(
12.
W
)))
this
.
data
.
perm
.
pr
:=
PMAMode
.
read
(
pmaMode
)
this
.
data
.
perm
.
pw
:=
PMAMode
.
write
(
pmaMode
)
this
.
data
.
perm
.
pe
:=
PMAMode
.
execute
(
pmaMode
)
this
.
data
.
perm
.
pa
:=
PMAMode
.
atomic
(
pmaMode
)
this
.
data
.
perm
.
pi
:=
PMAMode
.
icache
(
pmaMode
)
this
.
data
.
perm
.
pd
:=
PMAMode
.
dcache
(
pmaMode
)
this
.
perm
.
pr
:=
PMAMode
.
read
(
pmaMode
)
this
.
perm
.
pw
:=
PMAMode
.
write
(
pmaMode
)
this
.
perm
.
pe
:=
PMAMode
.
execute
(
pmaMode
)
this
.
perm
.
pa
:=
PMAMode
.
atomic
(
pmaMode
)
this
.
perm
.
pi
:=
PMAMode
.
icache
(
pmaMode
)
this
.
perm
.
pd
:=
PMAMode
.
dcache
(
pmaMode
)
this
}
override
def
toPrintable
:
Printable
=
{
val
insideLevel
=
level
.
getOrElse
(
0.
U
)
p
"
vpn:0x${Hexadecimal(tag)} level:${insideLevel} data:${data
}"
p
"
level:${insideLevel} ppn:${Hexadecimal(ppn)} perm:${perm
}"
}
override
def
cloneType
:
this.
type
=
(
new
Tlb
Entry
(
superpage
)).
asInstanceOf
[
this.
type
]
override
def
cloneType
:
this.
type
=
(
new
Tlb
Data
(
superpage
)).
asInstanceOf
[
this.
type
]
}
object
TlbCmd
{
...
...
@@ -311,13 +309,15 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
// Normal page && Super page
val
nv
=
RegInit
(
VecInit
(
Seq
.
fill
(
TlbEntrySize
)(
false
.
B
)))
val
nentry
=
Reg
(
Vec
(
TlbEntrySize
,
new
TlbEntry
(
false
)))
val
nMeta
=
Module
(
new
CAMTemplate
(
UInt
(
vpnLen
.
W
),
TlbEntrySize
,
Width
+
1
)).
io
val
nData
=
Reg
(
Vec
(
TlbEntrySize
,
new
TlbData
(
false
)))
val
sv
=
RegInit
(
VecInit
(
Seq
.
fill
(
TlbSPEntrySize
)(
false
.
B
)))
val
sentry
=
Reg
(
Vec
(
TlbSPEntrySize
,
new
TlbEntry
(
true
)))
val
sMeta
=
Reg
(
Vec
(
TlbSPEntrySize
,
new
TlbSPMeta
))
val
sData
=
Reg
(
Vec
(
TlbSPEntrySize
,
new
TlbData
(
true
)))
val
v
=
nv
++
sv
val
entry
=
nentry
++
sentry
val
g
=
VecInit
(
entry
.
map
(
_
.
data
.
perm
.
g
))
val
pf
=
VecInit
(
entry
.
zip
(
v
).
map
{
case
(
e
,
vi
)
=>
e
.
data
.
perm
.
pf
&
vi
})
val
data
=
nData
++
sData
val
g
=
VecInit
(
data
.
map
(
_
.
perm
.
g
))
val
pf
=
VecInit
(
data
.
zip
(
v
).
map
{
case
(
e
,
vi
)
=>
e
.
perm
.
pf
&
vi
})
/**
* PTW refill
...
...
@@ -331,14 +331,19 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val
nRefillIdx
=
replaceWrapper
(
nv
,
nReplace
.
way
)
val
sRefillIdx
=
replaceWrapper
(
sv
,
sReplace
.
way
)
nMeta
.
w
:=
DontCare
nMeta
.
w
.
valid
:=
false
.
B
when
(
refill
)
{
val
resp
=
ptw
.
resp
.
bits
when
(
resp
.
entry
.
level
.
getOrElse
(
0.
U
)
===
2.
U
)
{
val
refillIdx
=
nRefillIdx
refillIdx
.
suggestName
(
s
"NormalRefillIdx"
)
nv
(
refillIdx
)
:=
true
.
B
nentry
(
refillIdx
).
apply
(
vpn
=
resp
.
entry
.
tag
,
nMeta
.
w
.
bits
.
index
:=
nRefillIdx
nMeta
.
w
.
bits
.
data
:=
resp
.
entry
.
tag
nMeta
.
w
.
valid
:=
true
.
B
nData
(
refillIdx
).
apply
(
ppn
=
resp
.
entry
.
ppn
,
level
=
resp
.
entry
.
level
.
getOrElse
(
0.
U
),
perm
=
VecInit
(
resp
.
entry
.
perm
.
getOrElse
(
0.
U
)).
asUInt
,
...
...
@@ -348,9 +353,13 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
}.
otherwise
{
val
refillIdx
=
sRefillIdx
refillIdx
.
suggestName
(
s
"SuperRefillIdx"
)
sv
(
refillIdx
)
:=
true
.
B
sentry
(
refillIdx
).
apply
(
vpn
=
resp
.
entry
.
tag
,
sMeta
(
refillIdx
).
apply
(
vpn
=
resp
.
entry
.
tag
,
level
=
resp
.
entry
.
level
.
getOrElse
(
0.
U
)
)
sData
(
refillIdx
).
apply
(
ppn
=
resp
.
entry
.
ppn
,
level
=
resp
.
entry
.
level
.
getOrElse
(
0.
U
),
perm
=
VecInit
(
resp
.
entry
.
perm
.
getOrElse
(
0.
U
)).
asUInt
,
...
...
@@ -363,14 +372,21 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
/**
* L1 TLB read
*/
val
sfenceVpn
=
sfence
.
bits
.
addr
.
asTypeOf
(
vaBundle
).
vpn
for
(
i
<-
0
until
Width
)
{
nMeta
.
r
.
req
(
i
)
:=
io
.
requestor
(
i
).
req
.
bits
.
vaddr
.
asTypeOf
(
vaBundle
).
vpn
}
nMeta
.
r
.
req
(
Width
)
:=
sfenceVpn
val
nRefillMask
=
Mux
(
refill
,
UIntToOH
(
nRefillIdx
)(
TlbEntrySize
-
1
,
0
),
0.
U
).
asBools
val
sRefillMask
=
Mux
(
refill
,
UIntToOH
(
sRefillIdx
)(
TlbSPEntrySize
-
1
,
0
),
0.
U
).
asBools
def
TLBNormalRead
(
i
:
Int
)
=
{
val
entryHitVec
=
(
if
(
isDtlb
)
VecInit
(
entry
.
zip
(
nRefillMask
++
sRefillMask
).
map
{
case
(
e
,
m
)
=>
~
m
&&
e
.
hit
(
reqAddr
(
i
).
vpn
)})
VecInit
(
nMeta
.
r
.
resp
(
i
).
zip
(
nRefillMask
).
map
{
case
(
e
,
m
)
=>
~
m
&&
e
}
++
sMeta
.
zip
(
sRefillMask
).
map
{
case
(
e
,
m
)
=>
~
m
&&
e
.
hit
(
reqAddr
(
i
).
vpn
)
})
else
VecInit
(
entry
.
map
(
_
.
hit
(
reqAddr
(
i
).
vpn
/*, satp.asid*/
)))
VecInit
(
nMeta
.
r
.
resp
(
i
)
++
sMeta
.
map
(
_
.
hit
(
reqAddr
(
i
).
vpn
/*, satp.asid*/
)))
)
val
reqAddrReg
=
if
(
isDtlb
)
RegNext
(
reqAddr
(
i
))
else
reqAddr
(
i
)
...
...
@@ -384,8 +400,8 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
val
pfArray
=
ParallelOR
(
pfHitVec
).
asBool
&&
validReg
&&
vmEnable
val
hit
=
ParallelOR
(
hitVec
).
asBool
&&
validReg
&&
vmEnable
&&
~
pfArray
val
miss
=
!
hit
&&
validReg
&&
vmEnable
&&
~
pfArray
val
hitppn
=
ParallelMux
(
hitVec
zip
entry
.
map
(
_
.
ppn
(
reqAddrReg
.
vpn
)))
val
hitPerm
=
ParallelMux
(
hitVec
zip
entry
.
map
(
_
.
data
.
perm
))
val
hitppn
=
ParallelMux
(
hitVec
zip
data
.
map
(
_
.
genPPN
(
reqAddrReg
.
vpn
)))
val
hitPerm
=
ParallelMux
(
hitVec
zip
data
.
map
(
_
.
perm
))
hitVec
.
suggestName
(
s
"hitVec_${i}"
)
pfHitVec
.
suggestName
(
s
"pfHitVec_${i}"
)
...
...
@@ -488,6 +504,7 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
// }
// sfence (flush)
val
sfenceHit
=
nMeta
.
r
.
resp
(
Width
)
++
sMeta
.
map
(
_
.
hit
(
sfenceVpn
))
when
(
sfence
.
valid
)
{
when
(
sfence
.
bits
.
rs1
)
{
// virtual address *.rs1 <- (rs1===0.U)
when
(
sfence
.
bits
.
rs2
)
{
// asid, but i do not want to support asid, *.rs2 <- (rs2===0.U)
...
...
@@ -498,31 +515,26 @@ class TLB(Width: Int, isDtlb: Boolean) extends TlbModule with HasCSRConst{
v
.
zipWithIndex
.
map
{
case
(
a
,
i
)
=>
a
:=
a
&
g
(
i
)
}
}
}.
otherwise
{
val
sfenceVpn
=
sfence
.
bits
.
addr
.
asTypeOf
(
vaBundle
).
vpn
when
(
sfence
.
bits
.
rs2
)
{
// specific addr but all asid
v
.
zipWithIndex
.
map
{
case
(
a
,
i
)
=>
a
:=
a
&
!
entry
(
i
).
hit
(
sfenceVpn
)
}
v
.
zipWithIndex
.
map
{
case
(
a
,
i
)
=>
a
:=
a
&
!
sfenceHit
(
i
)
}
}.
otherwise
{
// specific addr and specific asid
v
.
zipWithIndex
.
map
{
case
(
a
,
i
)
=>
a
:=
a
&
!
(
entry
(
i
).
hit
(
sfenceVpn
)
&&
!
g
(
i
))
}
v
.
zipWithIndex
.
map
{
case
(
a
,
i
)
=>
a
:=
a
&
!
sfenceHit
(
i
)
&&
!
g
(
i
)
}
}
}
}
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
&&
isDtlb
)
{
ExcitingUtils
.
addSource
(
valid
(
0
)
&&
vmEnable
,
"perfCntDtlbReqCnt0"
,
Perf
)
ExcitingUtils
.
addSource
(
valid
(
1
)
&&
vmEnable
,
"perfCntDtlbReqCnt1"
,
Perf
)
ExcitingUtils
.
addSource
(
valid
(
2
)
&&
vmEnable
,
"perfCntDtlbReqCnt2"
,
Perf
)
ExcitingUtils
.
addSource
(
valid
(
3
)
&&
vmEnable
,
"perfCntDtlbReqCnt3"
,
Perf
)
ExcitingUtils
.
addSource
(
valid
(
0
)
&&
vmEnable
&&
missVec
(
0
),
"perfCntDtlbMissCnt0"
,
Perf
)
ExcitingUtils
.
addSource
(
valid
(
1
)
&&
vmEnable
&&
missVec
(
1
),
"perfCntDtlbMissCnt1"
,
Perf
)
ExcitingUtils
.
addSource
(
valid
(
2
)
&&
vmEnable
&&
missVec
(
2
),
"perfCntDtlbMissCnt2"
,
Perf
)
ExcitingUtils
.
addSource
(
valid
(
3
)
&&
vmEnable
&&
missVec
(
3
),
"perfCntDtlbMissCnt3"
,
Perf
)
}
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
&&
!
isDtlb
)
{
ExcitingUtils
.
addSource
(
valid
(
0
)
&&
vmEnable
,
"perfCntItlbReqCnt0"
,
Perf
)
ExcitingUtils
.
addSource
(
valid
(
0
)
&&
vmEnable
&&
missVec
(
0
),
"perfCntItlbMissCnt0"
,
Perf
)
if
(
isDtlb
)
{
for
(
i
<-
0
until
Width
)
{
XSPerf
(
"dtlb_access"
+
Integer
.
toString
(
i
,
10
),
valid
(
i
)
&&
vmEnable
)
}
for
(
i
<-
0
until
Width
)
{
XSPerf
(
"dtlb_miss"
+
Integer
.
toString
(
i
,
10
),
valid
(
i
)
&&
vmEnable
&&
missVec
(
i
))
}
}
else
{
XSPerf
(
"itlb_access"
,
valid
(
0
)
&&
vmEnable
)
XSPerf
(
"itlb_miss"
,
valid
(
0
)
&&
vmEnable
&&
missVec
(
0
))
}
// Log
...
...
src/main/scala/xiangshan/cache/prefetch/L1plusPrefetcher.scala
浏览文件 @
49681eda
...
...
@@ -52,21 +52,18 @@ class L1plusPrefetcher extends PrefetchModule {
XSDebug
(
p
"io.mem_acquire: v=${io.mem_acquire.valid} r=${io.mem_acquire.ready} ${io.mem_acquire.bits}\n"
)
XSDebug
(
p
"io.mem_grant: v=${io.mem_grant.valid} r=${io.mem_grant.ready} ${io.mem_grant.bits}\n"
)
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
)
{
ExcitingUtils
.
addSource
(
io
.
mem_acquire
.
fire
(),
"perfCntL1plusPrefetchReqCnt"
,
Perf
)
def
idWidth
:
Int
=
log2Up
(
l1plusPrefetcherParameters
.
nEntries
)
(
0
until
l1plusPrefetcherParameters
.
nEntries
).
foreach
(
i
=>
ExcitingUtils
.
addSource
(
BoolStopWatch
(
start
=
io
.
mem_acquire
.
fire
()
&&
io
.
mem_acquire
.
bits
.
id
(
idWidth
-
1
,
0
)
===
i
.
U
,
stop
=
io
.
mem_grant
.
fire
()
&&
io
.
mem_grant
.
bits
.
id
(
idWidth
-
1
,
0
)
===
i
.
U
,
startHighPriority
=
true
),
"perfCntL1plusPrefetchPenaltyEntry"
+
Integer
.
toString
(
i
,
10
),
Perf
XSPerf
(
"L1+Prefetch_reqCnt"
,
io
.
mem_acquire
.
fire
())
def
idWidth
:
Int
=
log2Up
(
l1plusPrefetcherParameters
.
nEntries
)
(
0
until
l1plusPrefetcherParameters
.
nEntries
).
foreach
(
i
=>
XSPerf
(
"L1+Prefetch_penaltyEntry"
+
Integer
.
toString
(
i
,
10
),
BoolStopWatch
(
start
=
io
.
mem_acquire
.
fire
()
&&
io
.
mem_acquire
.
bits
.
id
(
idWidth
-
1
,
0
)
===
i
.
U
,
stop
=
io
.
mem_grant
.
fire
()
&&
io
.
mem_grant
.
bits
.
id
(
idWidth
-
1
,
0
)
===
i
.
U
,
startHighPriority
=
true
)
)
}
)
}
else
{
io
.
in
.
ready
:=
true
.
B
...
...
src/main/scala/xiangshan/cache/prefetch/L2Prefetcher.scala
浏览文件 @
49681eda
...
...
@@ -128,19 +128,16 @@ class L2PrefetcherImp(outer: L2Prefetcher) extends LazyModuleImp(outer) with Has
bus
.
e
.
valid
:=
false
.
B
bus
.
e
.
bits
:=
DontCare
if
(!
env
.
FPGAPlatform
&&
!
env
.
DualCore
)
{
ExcitingUtils
.
addSource
(
bus
.
a
.
fire
(),
"perfCntL2PrefetchReqCnt"
,
Perf
)
(
0
until
l2PrefetcherParameters
.
nEntries
).
foreach
(
i
=>
ExcitingUtils
.
addSource
(
BoolStopWatch
(
start
=
bus
.
a
.
fire
()
&&
bus
.
a
.
bits
.
source
(
l2PrefetcherParameters
.
totalWidth
-
1
,
0
)
===
i
.
U
,
stop
=
bus
.
d
.
fire
()
&&
bus
.
d
.
bits
.
source
(
l2PrefetcherParameters
.
totalWidth
-
1
,
0
)
===
i
.
U
,
startHighPriority
=
true
),
"perfCntL2PrefetchPenaltyEntry"
+
Integer
.
toString
(
i
,
10
),
Perf
XSPerf
(
"L2Prefetch_reqCnt"
,
bus
.
a
.
fire
())
(
0
until
l2PrefetcherParameters
.
nEntries
).
foreach
(
i
=>
XSPerf
(
"L2Prefetch_penaltyEntry"
+
Integer
.
toString
(
i
,
10
),
BoolStopWatch
(
start
=
bus
.
a
.
fire
()
&&
bus
.
a
.
bits
.
source
(
l2PrefetcherParameters
.
totalWidth
-
1
,
0
)
===
i
.
U
,
stop
=
bus
.
d
.
fire
()
&&
bus
.
d
.
bits
.
source
(
l2PrefetcherParameters
.
totalWidth
-
1
,
0
)
===
i
.
U
,
startHighPriority
=
true
)
)
}
)
}
src/main/scala/xiangshan/frontend/Bim.scala
浏览文件 @
49681eda
...
...
@@ -53,7 +53,8 @@ class BIM extends BasePredictor with BimParams {
io
.
resp
.
ctrs
:=
if2_bimRead
io
.
meta
.
ctrs
:=
if2_bimRead
val
u
=
io
.
update
.
bits
val
updateValid
=
RegNext
(
io
.
update
.
valid
)
val
u
=
RegNext
(
io
.
update
.
bits
)
val
updateRow
=
bimAddr
.
getBankIdx
(
u
.
ftqPC
)
...
...
@@ -76,7 +77,7 @@ class BIM extends BasePredictor with BimParams {
val
newCtrs
=
VecInit
((
0
until
BimBanks
).
map
(
b
=>
satUpdate
(
oldCtrs
(
b
),
2
,
newTakens
(
b
))))
// val oldSaturated = newCtr === oldCtr
val
needToUpdate
=
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
io
.
update
.
v
alid
&&
u
.
br_mask
(
i
)
&&
u
.
valids
(
i
)))
val
needToUpdate
=
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
updateV
alid
&&
u
.
br_mask
(
i
)
&&
u
.
valids
(
i
)))
when
(
reset
.
asBool
)
{
wrbypass_ctr_valids
.
foreach
(
_
.
foreach
(
_
:=
false
.
B
))}
...
...
@@ -104,7 +105,7 @@ class BIM extends BasePredictor with BimParams {
if
(
BPUDebug
&&
debug
)
{
XSDebug
(
doing_reset
,
"Reseting...\n"
)
XSDebug
(
"[update] v=%d pc=%x valids=%b, tgt=%x\n"
,
io
.
update
.
v
alid
,
u
.
ftqPC
,
u
.
valids
.
asUInt
,
u
.
target
)
XSDebug
(
"[update] v=%d pc=%x valids=%b, tgt=%x\n"
,
updateV
alid
,
u
.
ftqPC
,
u
.
valids
.
asUInt
,
u
.
target
)
XSDebug
(
"[update] brMask=%b, taken=%b isMisPred=%b\n"
,
u
.
br_mask
.
asUInt
,
newTakens
.
asUInt
,
u
.
mispred
.
asUInt
)
for
(
i
<-
0
until
BimBanks
)
{
...
...
src/main/scala/xiangshan/frontend/Btb.scala
浏览文件 @
49681eda
...
...
@@ -167,7 +167,9 @@ class BTB extends BasePredictor with BTBParams{
when
(
pd
.
isBr
)
{
t
:=
BTBtype
.
B
}
t
}
val
u
=
io
.
update
.
bits
val
do_update
=
RegNext
(
io
.
update
)
val
u
=
do_update
.
bits
val
cfi_pc
=
packetAligned
(
u
.
ftqPC
)
+
(
u
.
cfiIndex
.
bits
<<
instOffsetBits
)
val
new_target
=
u
.
target
...
...
@@ -188,7 +190,7 @@ class BTB extends BasePredictor with BTBParams{
val
dataWrite
=
BtbDataEntry
(
new_lower
,
new_extended
)
val
updateValid
=
io
.
update
.
valid
&&
updateTaken
val
updateValid
=
do_
update
.
valid
&&
updateTaken
// Update btb
require
(
isPow2
(
BtbBanks
))
// this is one hot, since each fetch bundle has at most 1 taken instruction
...
...
src/main/scala/xiangshan/frontend/Ibuffer.scala
浏览文件 @
49681eda
...
...
@@ -182,5 +182,5 @@ class Ibuffer extends XSModule with HasCircularQueuePtrHelper {
// )
// }
XSPerf
(
"utilization"
,
validEntries
)
XSPerf
(
"
ibuf_
utilization"
,
validEntries
)
}
src/main/scala/xiangshan/frontend/LoopPredictor.scala
浏览文件 @
49681eda
...
...
@@ -334,8 +334,9 @@ class LoopPredictor extends BasePredictor with LTBParams {
val
updateValid
=
io
.
update
.
valid
val
update
=
io
.
update
.
bits
val
redirectValid
=
io
.
redirect
.
valid
val
redirect
=
io
.
redirect
.
bits
.
cfiUpdate
val
do_redirect
=
RegNext
(
io
.
redirect
)
val
redirectValid
=
do_redirect
.
valid
val
redirect
=
do_redirect
.
bits
.
cfiUpdate
val
redirectPC
=
redirect
.
pc
val
redirectBank
=
ltbAddr
.
getBank
(
redirectPC
)
...
...
@@ -358,7 +359,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
ltbs
(
i
).
io
.
redirect
.
bits
.
specCnt
:=
redirect
.
specCnt
(
i
)
ltbs
(
i
).
io
.
redirect
.
bits
.
mispred
:=
redirect
.
isMisPred
ltbs
(
i
).
io
.
redirect
.
bits
.
taken
:=
redirect
.
taken
ltbs
(
i
).
io
.
redirect
.
bits
.
isReplay
:=
io
.
redirect
.
bits
.
flushItself
ltbs
(
i
).
io
.
redirect
.
bits
.
isReplay
:=
do_
redirect
.
bits
.
flushItself
ltbs
(
i
).
io
.
repair
:=
redirectValid
&&
redirectBank
=/=
i
.
U
}
...
...
@@ -379,9 +380,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
io
.
meta
.
specCnts
(
i
)
:=
ltbResps
(
i
).
specCnt
}
if
(!
env
.
FPGAPlatform
)
{
ExcitingUtils
.
addSource
(
io
.
resp
.
exit
.
reduce
(
_
||
_
),
"perfCntLoopExit"
,
Perf
)
}
XSPerf
(
"LoopExit"
,
io
.
resp
.
exit
.
reduce
(
_
||
_
))
if
(
BPUDebug
&&
debug
)
{
// debug info
...
...
@@ -391,7 +390,7 @@ class LoopPredictor extends BasePredictor with LTBParams {
XSDebug
(
"[IF4][req] inMask=%b\n"
,
inMask
)
XSDebug
(
"[IF4][req] updatePC=%x, updateValid=%d, isBr=%b\n"
,
update
.
ftqPC
,
updateValid
,
update
.
br_mask
.
asUInt
)
XSDebug
(
"[IF4][req] redirectPC=%x redirectBank=%d, redirectValid=%d, isBr=%d, isReplay=%d\n"
,
redirect
.
pc
,
redirectBank
,
redirectValid
,
redirect
.
pd
.
isBr
,
io
.
redirect
.
bits
.
flushItself
)
XSDebug
(
"[IF4][req] redirectPC=%x redirectBank=%d, redirectValid=%d, isBr=%d, isReplay=%d\n"
,
redirect
.
pc
,
redirectBank
,
redirectValid
,
redirect
.
pd
.
isBr
,
do_
redirect
.
bits
.
flushItself
)
XSDebug
(
"[IF4][req] isMisPred=%d\n"
,
redirect
.
isMisPred
)
XSDebug
(
redirectValid
,
"[redirect SpecCnt] "
)
...
...
src/main/scala/xiangshan/frontend/RAS.scala
浏览文件 @
49681eda
...
...
@@ -179,11 +179,12 @@ class RAS extends BasePredictor
spec_push
:=
!
spec_is_full
&&
io
.
callIdx
.
valid
&&
io
.
pc
.
valid
spec_pop
:=
!
spec_is_empty
&&
io
.
is_ret
&&
io
.
pc
.
valid
val
copy_valid
=
io
.
redirect
.
valid
val
recover_cfi
=
io
.
redirect
.
bits
.
cfiUpdate
val
redirect
=
RegNext
(
io
.
redirect
)
val
copy_valid
=
redirect
.
valid
val
recover_cfi
=
redirect
.
bits
.
cfiUpdate
val
retMissPred
=
copy_valid
&&
io
.
redirect
.
bits
.
level
===
0.
U
&&
recover_cfi
.
pd
.
isRet
val
callMissPred
=
copy_valid
&&
io
.
redirect
.
bits
.
level
===
0.
U
&&
recover_cfi
.
pd
.
isCall
val
retMissPred
=
copy_valid
&&
redirect
.
bits
.
level
===
0.
U
&&
recover_cfi
.
pd
.
isRet
val
callMissPred
=
copy_valid
&&
redirect
.
bits
.
level
===
0.
U
&&
recover_cfi
.
pd
.
isCall
// when we mispredict a call, we must redo a push operation
// similarly, when we mispredict a return, we should redo a pop
spec_ras
.
recover_valid
:=
copy_valid
...
...
@@ -215,7 +216,7 @@ class RAS extends BasePredictor
XSDebug
(
spec_push
,
"(spec_ras)push inAddr: 0x%x inCtr: %d | allocNewEntry:%d | sp:%d \n"
,
spec_new_addr
,
spec_debug
.
push_entry
.
ctr
,
spec_debug
.
alloc_new
,
spec_debug
.
sp
.
asUInt
)
XSDebug
(
spec_pop
,
"(spec_ras)pop outValid:%d outAddr: 0x%x \n"
,
io
.
out
.
valid
,
io
.
out
.
bits
.
target
)
val
redirectUpdate
=
io
.
redirect
.
bits
.
cfiUpdate
val
redirectUpdate
=
redirect
.
bits
.
cfiUpdate
XSDebug
(
"copyValid:%d recover(SP:%d retAddr:%x ctr:%d) \n"
,
copy_valid
,
redirectUpdate
.
rasSp
,
redirectUpdate
.
rasEntry
.
retAddr
,
redirectUpdate
.
rasEntry
.
ctr
)
}
...
...
src/main/scala/xiangshan/frontend/uBTB.scala
浏览文件 @
49681eda
...
...
@@ -12,8 +12,6 @@ trait MicroBTBPatameter{
val
nWays
=
16
val
lowerBitsSize
=
20
val
tagSize
=
20
val
extended_stat
=
false
}
@chiselName
...
...
@@ -62,231 +60,170 @@ class MicroBTB extends BasePredictor
val
tag
=
UInt
(
tagSize
.
W
)
}
class
MicroBTB
Entry
extends
XSBundle
class
MicroBTB
Data
extends
XSBundle
{
val
lower
=
UInt
(
lowerBitsSize
.
W
)
}
class
MetaOutput
extends
XSBundle
{
val
is_Br
=
Bool
()
class
ReadResp
extends
XSBundle
{
val
valid
=
Bool
()
val
taken
=
Bool
()
val
target
=
UInt
(
VAddrBits
.
W
)
val
is_RVC
=
Bool
()
val
pred
=
UInt
(
2.
W
)
val
is_Br
=
Bool
(
)
}
@chiselName
class
UBTBMetaBank
(
nWays
:
Int
)
extends
XSModule
{
class
UBTBBank
(
val
nWays
:
Int
)
extends
XSModule
with
HasIFUConst
{
val
io
=
IO
(
new
Bundle
{
val
wen
=
Input
(
Bool
())
val
wWay
=
Input
(
UInt
(
log2Up
(
nWays
).
W
))
val
wdata
=
Input
(
new
MicroBTBMeta
)
val
rtag
=
Input
(
UInt
(
tagSize
.
W
))
val
rdata
=
Output
(
new
MetaOutput
)
val
hit_and_taken
=
Output
(
Bool
())
val
hit_ohs
=
Output
(
Vec
(
nWays
,
Bool
()))
val
hit_way
=
Output
(
UInt
(
log2Up
(
nWays
).
W
))
val
allocatable_way
=
Valid
(
UInt
(
log2Up
(
nWays
).
W
))
val
rWay
=
Input
(
UInt
(
log2Up
(
nWays
).
W
))
val
rpred
=
Output
(
UInt
(
2.
W
))
})
val
mem
=
Mem
(
nWays
,
new
MicroBTBMeta
)
val
rentries
=
VecInit
((
0
until
nWays
)
map
(
i
=>
mem
(
i
)))
val
hit_ohs
=
VecInit
(
rentries
map
(
e
=>
e
.
valid
&&
e
.
tag
===
io
.
rtag
))
io
.
hit_and_taken
:=
VecInit
(
rentries
map
(
e
=>
e
.
valid
&&
e
.
tag
===
io
.
rtag
&&
e
.
pred
(
1
))).
asUInt
.
orR
val
hit_way
=
OHToUInt
(
hit_ohs
)
//val hit_entry = rentries(hit_way)
val
hit_entry
=
ParallelMux
(
hit_ohs
zip
rentries
)
val
read_pc
=
Flipped
(
Valid
(
UInt
(
VAddrBits
.
W
)))
val
read_resp
=
Output
(
new
ReadResp
)
val
read_hit
=
Output
(
Bool
())
val
to_write_way
=
Output
(
UInt
(
log2Ceil
(
nWays
).
W
))
io
.
hit_ohs
:=
hit_ohs
io
.
hit_way
:=
hit_way
io
.
rdata
.
is_Br
:=
hit_entry
.
is_Br
io
.
rdata
.
is_RVC
:=
hit_entry
.
is_RVC
io
.
rdata
.
pred
:=
hit_entry
.
pred
val
entry_emptys
=
VecInit
(
rentries
.
map
(
e
=>
!
e
.
valid
))
val
allocatable
=
ParallelOR
(
entry_emptys
)
io
.
allocatable_way
.
bits
:=
PriorityEncoder
(
entry_emptys
)
io
.
allocatable_way
.
valid
:=
allocatable
io
.
rpred
:=
rentries
(
io
.
rWay
).
pred
when
(
io
.
wen
)
{
mem
.
write
(
io
.
wWay
,
io
.
wdata
)
}
}
val
update_way
=
Input
(
UInt
(
log2Ceil
(
nWays
).
W
))
val
update_read_pred
=
Output
(
UInt
(
2.
W
))
@chiselName
class
UBTBDataBank
(
nWays
:
Int
)
extends
XSModule
{
val
io
=
IO
(
new
Bundle
{
val
wen
=
Input
(
Bool
())
val
wWay
=
Input
(
UInt
(
log2Up
(
nWays
).
W
))
val
wdata
=
Input
(
new
MicroBTBEntry
)
val
rOHs
=
Input
(
Vec
(
nWays
,
Bool
()))
val
rdata
=
Output
(
new
MicroBTBEntry
)
val
update_write_meta
=
Flipped
(
Valid
(
new
MicroBTBMeta
))
val
update_write_data
=
Flipped
(
Valid
(
new
MicroBTBData
))
})
val
me
m
=
Mem
(
nWays
,
new
MicroBTBEntry
)
val
rentries
=
VecInit
((
0
until
nWays
)
map
(
i
=>
mem
(
i
)
))
// io.rdata := rentries(io.rWay)
io
.
rdata
:=
ParallelMux
(
io
.
rOHs
zip
rentries
)
when
(
io
.
wen
)
{
mem
.
write
(
io
.
wWay
,
io
.
wdata
)
val
me
ta
=
Module
(
new
AsyncDataModuleTemplate
(
new
MicroBTBMeta
,
nWays
,
nWays
,
1
)
)
val
data
=
Module
(
new
AsyncDataModuleTemplate
(
new
MicroBTBData
,
nWays
,
nWays
,
1
))
for
(
w
<-
0
until
nWays
)
{
meta
.
io
.
raddr
(
w
)
:=
w
.
U
data
.
io
.
raddr
(
w
)
:=
w
.
U
}
}
meta
.
io
.
waddr
(
0
)
:=
io
.
update_way
meta
.
io
.
wen
(
0
)
:=
io
.
update_write_meta
.
valid
meta
.
io
.
wdata
(
0
)
:=
io
.
update_write_meta
.
bits
data
.
io
.
waddr
(
0
)
:=
io
.
update_way
data
.
io
.
wen
(
0
)
:=
io
.
update_write_data
.
valid
data
.
io
.
wdata
(
0
)
:=
io
.
update_write_data
.
bits
val
rmetas
=
meta
.
io
.
rdata
val
rdatas
=
data
.
io
.
rdata
val
packetAlignedPC
=
packetAligned
(
io
.
read_pc
.
bits
)
val
read_tag
=
getTag
(
io
.
read_pc
.
bits
)
val
hits
=
VecInit
(
rmetas
.
map
(
m
=>
m
.
valid
&&
m
.
tag
===
read_tag
))
val
takens
=
VecInit
(
rmetas
.
map
(
m
=>
m
.
pred
(
1
)))
val
hit_oh
=
hits
.
asUInt
val
hit_and_taken
=
VecInit
((
hits
zip
takens
)
map
{
case
(
h
,
t
)
=>
h
&&
t
}).
asUInt
.
orR
val
hit_meta
=
ParallelMux
(
hits
zip
rmetas
)
val
hit_data
=
ParallelMux
(
hits
zip
rdatas
)
val
target
=
Cat
(
io
.
read_pc
.
bits
(
VAddrBits
-
1
,
lowerBitsSize
+
instOffsetBits
),
hit_data
.
lower
,
0.
U
(
instOffsetBits
.
W
))
val
emptys
=
rmetas
.
map
(
m
=>
!
m
.
valid
)
val
allocatable
=
VecInit
(
emptys
).
asUInt
.
orR
val
empty_way
=
ParallelPriorityEncoder
(
emptys
)
val
hit_way
=
OHToUInt
(
hit_oh
)
val
random_way
=
LFSR64
()(
log2Ceil
(
nWays
)-
1
,
0
)
io
.
to_write_way
:=
Mux
(
hit_oh
.
orR
,
hit_way
,
Mux
(
allocatable
,
empty_way
,
random_way
))
val
ren
=
io
.
read_pc
.
valid
io
.
read_resp
.
valid
:=
ren
io
.
read_resp
.
is_RVC
:=
ren
&&
hit_meta
.
is_RVC
io
.
read_resp
.
is_Br
:=
ren
&&
hit_meta
.
is_Br
io
.
read_resp
.
taken
:=
ren
&&
hit_and_taken
io
.
read_resp
.
target
:=
target
io
.
read_hit
:=
ren
&&
hit_oh
.
orR
io
.
update_read_pred
:=
rmetas
(
io
.
update_way
).
pred
}
val
ubtbBanks
=
Seq
.
fill
(
PredictWidth
)(
Module
(
new
UBTBBank
(
nWays
)))
val
banks
=
VecInit
(
ubtbBanks
.
map
(
_
.
io
))
val
read_resps
=
VecInit
(
banks
.
map
(
b
=>
b
.
read_resp
))
val
metaBanks
=
Seq
.
fill
(
PredictWidth
)(
Module
(
new
UBTBMetaBank
(
nWays
)))
val
dataBanks
=
Seq
.
fill
(
PredictWidth
)(
Module
(
new
UBTBDataBank
(
nWays
)))
val
metas
=
VecInit
(
metaBanks
.
map
(
_
.
io
))
val
datas
=
VecInit
(
dataBanks
.
map
(
_
.
io
))
for
(
b
<-
0
until
PredictWidth
)
{
banks
(
b
).
read_pc
.
valid
:=
io
.
pc
.
valid
&&
io
.
inMask
(
b
)
banks
(
b
).
read_pc
.
bits
:=
io
.
pc
.
bits
out_ubtb_br_info
.
writeWay
(
b
)
:=
banks
(
b
).
to_write_way
out_ubtb_br_info
.
hits
(
b
)
:=
banks
(
b
).
read_hit
val
uBTBMeta
=
VecInit
(
metas
.
map
(
m
=>
m
.
rdata
))
val
uBTB
=
VecInit
(
datas
.
map
(
d
=>
d
.
rdata
))
//only when hit and instruction valid and entry valid can output data
io
.
out
.
targets
(
b
)
:=
read_resps
(
b
).
target
io
.
out
.
hits
(
b
)
:=
banks
(
b
).
read_hit
io
.
out
.
takens
(
b
)
:=
read_resps
(
b
).
taken
io
.
out
.
is_RVC
(
b
)
:=
read_resps
(
b
).
is_RVC
io
.
out
.
brMask
(
b
)
:=
read_resps
(
b
).
is_Br
}
val
do_reset
=
RegInit
(
true
.
B
)
val
reset_way
=
RegInit
(
0.
U
(
log2Ceil
(
nWays
).
W
))
when
(
do_reset
)
{
reset_way
:=
reset_way
+
1.
U
}
when
(
reset_way
===
(
nWays
-
1
).
U
)
{
do_reset
:=
false
.
B
}
//uBTB read
//tag is packet aligned
val
packetAlignedPC
=
packetAligned
(
io
.
pc
.
bits
)
val
read_valid
=
io
.
pc
.
valid
val
read_req_tag
=
getTag
(
packetAlignedPC
)
class
ReadRespEntry
extends
XSBundle
{
val
is_RVC
=
Bool
()
val
target
=
UInt
(
VAddrBits
.
W
)
val
valid
=
Bool
()
val
taken
=
Bool
()
val
is_Br
=
Bool
()
}
val
read_resp
=
Wire
(
Vec
(
PredictWidth
,
new
ReadRespEntry
))
(
0
until
PredictWidth
).
map
{
b
=>
metas
(
b
).
rtag
:=
read_req_tag
}
val
read_hit_ohs
=
(
0
until
PredictWidth
).
map
{
b
=>
metas
(
b
).
hit_ohs
}
val
read_hit_vec
=
VecInit
(
read_hit_ohs
.
map
{
oh
=>
ParallelOR
(
oh
).
asBool
})
val
read_hit_ways
=
(
0
until
PredictWidth
).
map
{
b
=>
metas
(
b
).
hit_way
}
(
0
until
PredictWidth
).
map
(
b
=>
datas
(
b
).
rOHs
:=
read_hit_ohs
(
b
))
val
uBTBMeta_resp
=
VecInit
((
0
until
PredictWidth
).
map
(
b
=>
metas
(
b
).
rdata
))
val
btb_resp
=
VecInit
((
0
until
PredictWidth
).
map
(
b
=>
datas
(
b
).
rdata
))
for
(
i
<-
0
until
PredictWidth
){
// do not need to decide whether to produce results\
read_resp
(
i
).
valid
:=
io
.
inMask
(
i
)
read_resp
(
i
).
taken
:=
read_resp
(
i
).
valid
&&
metas
(
i
).
hit_and_taken
read_resp
(
i
).
is_Br
:=
read_resp
(
i
).
valid
&&
uBTBMeta_resp
(
i
).
is_Br
read_resp
(
i
).
target
:=
Cat
(
io
.
pc
.
bits
(
VAddrBits
-
1
,
lowerBitsSize
+
instOffsetBits
),
btb_resp
(
i
).
asUInt
,
0.
U
(
instOffsetBits
.
W
))
read_resp
(
i
).
is_RVC
:=
read_resp
(
i
).
valid
&&
uBTBMeta_resp
(
i
).
is_RVC
out_ubtb_br_info
.
hits
(
i
)
:=
read_hit_vec
(
i
)
}
//TODO: way alloc algorithm
def
alloc_way
(
valids
:
UInt
,
meta_tags
:
UInt
,
req_tag
:
UInt
)
=
{
val
way
=
Wire
(
UInt
(
log2Up
(
BtbWays
).
W
))
val
all_valid
=
valids
.
andR
.
asBool
val
tags
=
Cat
(
meta_tags
,
req_tag
)
val
l
=
log2Ceil
(
nWays
)
val
nChunks
=
(
tags
.
getWidth
+
l
-
1
)
/
l
val
chunks
=
(
0
until
nChunks
)
map
{
i
=>
tags
(
min
((
i
+
1
)*
l
,
tags
.
getWidth
)-
1
,
i
*
l
)
}
way
:=
Mux
(
all_valid
,
chunks
.
reduce
(
_
^
_
),
PriorityEncoder
(~
valids
))
way
}
val
alloc_ways
=
(
0
until
PredictWidth
).
map
{
b
=>
Mux
(
metas
(
b
).
allocatable_way
.
valid
,
metas
(
b
).
allocatable_way
.
bits
,
LFSR64
()(
log2Ceil
(
nWays
)-
1
,
0
))}
(
0
until
PredictWidth
).
map
(
i
=>
out_ubtb_br_info
.
writeWay
(
i
)
:=
Mux
(
read_hit_vec
(
i
).
asBool
,
read_hit_ways
(
i
),
alloc_ways
(
i
)))
//response
//only when hit and instruction valid and entry valid can output data
for
(
i
<-
0
until
PredictWidth
)
{
io
.
out
.
targets
(
i
)
:=
read_resp
(
i
).
target
io
.
out
.
hits
(
i
)
:=
read_resp
(
i
).
valid
&&
read_hit_vec
(
i
)
io
.
out
.
takens
(
i
)
:=
read_resp
(
i
).
taken
io
.
out
.
is_RVC
(
i
)
:=
read_resp
(
i
).
is_RVC
io
.
out
.
brMask
(
i
)
:=
read_resp
(
i
).
is_Br
}
//uBTB update
//backend should send fetch pc to update
val
u
=
io
.
update
.
bits
val
u
=
RegNext
(
io
.
update
.
bits
)
val
update_valid
=
RegNext
(
io
.
update
.
valid
)
val
update_packet_pc
=
packetAligned
(
u
.
ftqPC
)
val
update_pcs
=
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
update_packet_pc
+
(
i
<<
instOffsetBits
).
U
))
val
update_write_ways
=
VecInit
(
u
.
metas
.
map
(
_
.
ubtbWriteWay
))
val
update_hits
=
u
.
metas
.
map
(
_
.
ubtbHits
)
val
update_takens
=
u
.
takens
val
update_bank
=
u
.
cfiIndex
.
bits
val
update_tag
=
getTag
(
update_packet_pc
)
val
update_target
=
u
.
target
val
update_target_lower
=
update_target
(
lowerBitsSize
-
1
+
instOffsetBits
,
instOffsetBits
)
val
update_target_lower
=
u
.
target
(
lowerBitsSize
-
1
+
instOffsetBits
,
instOffsetBits
)
// only when taken should we update target
val
entry_write_valid
=
io
.
update
.
valid
&&
u
.
valids
(
u
.
cfiIndex
.
bits
)
&&
u
.
takens
(
u
.
cfiIndex
.
bits
)
val
data_write_valids
=
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
update_valid
&&
u
.
valids
(
i
)
&&
u
.
takens
(
i
)))
val
meta_write_valids
=
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
io
.
update
.
valid
&&
u
.
valids
(
i
)
&&
(
u
.
br_mask
(
i
)
||
u
.
takens
(
i
))))
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
update_valid
&&
u
.
valids
(
i
)
&&
(
u
.
br_mask
(
i
)
||
u
.
takens
(
i
))))
val
new_preds
=
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
Mux
(!
update_hits
(
i
),
Mux
(
update_takens
(
i
),
3.
U
,
0.
U
),
satUpdate
(
banks
(
i
).
update_read_pred
,
2
,
update_takens
(
i
)))))
for
(
b
<-
0
until
PredictWidth
)
{
datas
(
b
).
wen
:=
do_reset
||
(
entry_write_valid
&&
b
.
U
===
update_bank
)
datas
(
b
).
wWay
:=
Mux
(
do_reset
,
reset_way
,
update_write_ways
(
u
.
cfiIndex
.
bits
))
datas
(
b
).
wdata
:=
Mux
(
do_reset
,
0.
U
.
asTypeOf
(
new
MicroBTBEntry
),
update_target_lower
.
asTypeOf
(
new
MicroBTBEntry
))
}
val
new_preds
=
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
Mux
(!
update_hits
(
i
),
Mux
(
update_takens
(
i
),
3.
U
,
0.
U
),
satUpdate
(
metas
(
i
).
rpred
,
2
,
update_takens
(
i
)))))
//write the uBTBMeta
(
0
until
PredictWidth
).
map
(
i
=>
metas
(
i
).
rWay
:=
update_write_ways
(
i
))
val
update_write_metas
=
Wire
(
Vec
(
PredictWidth
,
new
MicroBTBMeta
))
val
update_write_datas
=
Wire
(
Vec
(
PredictWidth
,
new
MicroBTBData
))
for
(
i
<-
0
until
PredictWidth
)
{
update_write_metas
(
i
).
is_Br
:=
u
.
br_mask
(
i
)
update_write_metas
(
i
).
is_RVC
:=
u
.
rvc_mask
(
i
)
update_write_metas
(
i
).
valid
:=
true
.
B
update_write_metas
(
i
).
tag
:=
update_tag
update_write_metas
(
i
).
pred
:=
new_preds
(
i
)
}
update_write_datas
(
i
).
lower
:=
update_target_lower
}
for
(
b
<-
0
until
PredictWidth
)
{
metas
(
b
).
wen
:=
do_reset
||
meta_write_valids
(
b
)
metas
(
b
).
wWay
:=
Mux
(
do_reset
,
reset_way
,
update_write_ways
(
b
))
metas
(
b
).
wdata
:=
Mux
(
do_reset
,
0.
U
.
asTypeOf
(
new
MicroBTBMeta
),
update_write_metas
(
b
))
banks
(
b
).
update_way
:=
update_write_ways
(
b
)
banks
(
b
).
update_write_meta
.
valid
:=
do_reset
||
meta_write_valids
(
b
)
banks
(
b
).
update_write_meta
.
bits
:=
Mux
(
do_reset
,
0.
U
.
asTypeOf
(
new
MicroBTBMeta
),
update_write_metas
(
b
))
banks
(
b
).
update_write_data
.
valid
:=
do_reset
||
data_write_valids
(
b
)
banks
(
b
).
update_write_data
.
bits
:=
Mux
(
do_reset
,
0.
U
.
asTypeOf
(
new
MicroBTBData
),
update_write_datas
(
b
))
}
if
(
BPUDebug
&&
debug
)
{
val
update_pcs
=
VecInit
((
0
until
PredictWidth
).
map
(
i
=>
update_packet_pc
+
(
i
<<
instOffsetBits
).
U
))
val
update_bank
=
u
.
cfiIndex
.
bits
val
read_valid
=
io
.
pc
.
valid
val
read_req_tag
=
getTag
(
io
.
pc
.
bits
)
val
read_hit_vec
=
VecInit
(
banks
.
map
(
b
=>
b
.
read_hit
))
val
read_hit_ways
=
VecInit
(
banks
.
map
(
b
=>
b
.
to_write_way
))
XSDebug
(
read_valid
,
"uBTB read req: pc:0x%x, tag:%x \n"
,
io
.
pc
.
bits
,
read_req_tag
)
XSDebug
(
read_valid
,
"uBTB read resp: read_hit_vec:%b, \n"
,
read_hit_vec
.
asUInt
)
for
(
i
<-
0
until
PredictWidth
)
{
XSDebug
(
read_valid
,
"bank(%d) hit:%d way:%d valid:%d is_RVC:%d taken:%d isBr:%d target:0x%x alloc_way:%d\n"
,
i
.
U
,
read_hit_vec
(
i
),
read_hit_ways
(
i
),
read_resp
(
i
).
valid
,
read_resp
(
i
).
is_RVC
,
read_resp
(
i
).
taken
,
read_resp
(
i
).
is_Br
,
read_resp
(
i
).
target
,
out_ubtb_br_info
.
writeWay
(
i
))
XSDebug
(
entry_write_valid
&&
(
i
.
U
===
update_bank
),
i
.
U
,
read_hit_vec
(
i
),
read_hit_ways
(
i
),
read_resp
s
(
i
).
valid
,
read_resps
(
i
).
is_RVC
,
read_resp
s
(
i
).
taken
,
read_resps
(
i
).
is_Br
,
read_resps
(
i
).
target
,
out_ubtb_br_info
.
writeWay
(
i
))
XSDebug
(
data_write_valids
(
i
),
"uBTB update data(%d): update | pc:0x%x | update hits:%b | update_write_way:%d | update_lower 0x%x\n "
,
i
.
U
,
update_pcs
(
i
),
update_hits
(
i
),
update_write_ways
(
i
),
update_target_lower
(
lowerBitsSize
-
1
,
0
))
XSDebug
(
meta_write_valids
(
i
),
"uBTB update meta(%d): update_taken:%d | old_pred:%b | new_pred:%b | br:%d | rvc:%d | update_tag:%x\n"
,
i
.
U
,
update_takens
(
i
),
metas
(
i
).
r
pred
,
new_preds
(
i
),
u
.
br_mask
(
i
),
u
.
rvc_mask
(
i
),
update_tag
)
i
.
U
,
update_takens
(
i
),
banks
(
i
).
update_read_
pred
,
new_preds
(
i
),
u
.
br_mask
(
i
),
u
.
rvc_mask
(
i
),
update_tag
)
}
}
if
(
extended_stat
)
{
val
high_identical
=
update_target
(
VAddrBits
-
1
,
lowerBitsSize
)
=/=
update_packet_pc
(
VAddrBits
-
1
,
lowerBitsSize
)
XSDebug
(
io
.
update
.
valid
,
"extended_stat: identical %d\n"
,
high_identical
)
}
//bypass:read-after-write
// for( b <- 0 until PredictWidth) {
// when(update_bank === b.U && meta_write_valid && read_valid
// && Mux(b.U < update_base_bank,update_tag===read_req_tag+1.U ,update_tag===read_req_tag)) //read and write is the same fetch-packet
// {
// io.out.targets(b) := u.target
// io.out.takens(b) := u.taken
// io.out.is_RVC(b) := u.pd.isRVC
// io.out.notTakens(b) := (u.pd.brType === BrType.branch) && (!io.out.takens(b))
// XSDebug("uBTB bypass hit! : hitpc:0x%x | hitbanck:%d | out_target:0x%x\n",io.pc.bits+(b<<1).asUInt(),b.U, io.out.targets(b))
// }
// }
}
\ No newline at end of file
src/main/scala/xiangshan/mem/lsqueue/LSQWrapper.scala
浏览文件 @
49681eda
...
...
@@ -29,7 +29,7 @@ class InflightBlockInfo extends XSBundle {
class
LsqEnqIO
extends
XSBundle
{
val
canAccept
=
Output
(
Bool
())
val
needAlloc
=
Vec
(
RenameWidth
,
Input
(
Bool
(
)))
val
needAlloc
=
Vec
(
RenameWidth
,
Input
(
UInt
(
2.
W
)))
val
req
=
Vec
(
RenameWidth
,
Flipped
(
ValidIO
(
new
MicroOp
)))
val
resp
=
Vec
(
RenameWidth
,
Output
(
new
LSIdx
))
}
...
...
@@ -75,15 +75,13 @@ class LsqWrappper extends XSModule with HasDCacheParameters {
loadQueue
.
io
.
enq
.
sqCanAccept
:=
storeQueue
.
io
.
enq
.
canAccept
storeQueue
.
io
.
enq
.
lqCanAccept
:=
loadQueue
.
io
.
enq
.
canAccept
for
(
i
<-
0
until
RenameWidth
)
{
val
isStore
=
CommitType
.
lsInstIsStore
(
io
.
enq
.
req
(
i
).
bits
.
ctrl
.
commitType
)
loadQueue
.
io
.
enq
.
needAlloc
(
i
)
:=
io
.
enq
.
needAlloc
(
i
)
&&
!
isStore
loadQueue
.
io
.
enq
.
req
(
i
).
valid
:=
!
isStore
&&
io
.
enq
.
req
(
i
).
valid
loadQueue
.
io
.
enq
.
needAlloc
(
i
)
:=
io
.
enq
.
needAlloc
(
i
)(
0
)
loadQueue
.
io
.
enq
.
req
(
i
).
valid
:=
io
.
enq
.
needAlloc
(
i
)(
0
)
&&
io
.
enq
.
req
(
i
).
valid
loadQueue
.
io
.
enq
.
req
(
i
).
bits
:=
io
.
enq
.
req
(
i
).
bits
storeQueue
.
io
.
enq
.
needAlloc
(
i
)
:=
io
.
enq
.
needAlloc
(
i
)
&&
isStore
storeQueue
.
io
.
enq
.
req
(
i
).
valid
:=
isStore
&&
io
.
enq
.
req
(
i
).
valid
storeQueue
.
io
.
enq
.
req
(
i
).
bits
:=
io
.
enq
.
req
(
i
).
bits
storeQueue
.
io
.
enq
.
needAlloc
(
i
)
:=
io
.
enq
.
needAlloc
(
i
)
(
1
)
storeQueue
.
io
.
enq
.
req
(
i
).
valid
:=
io
.
enq
.
needAlloc
(
i
)(
1
)
&&
io
.
enq
.
req
(
i
).
valid
storeQueue
.
io
.
enq
.
req
(
i
).
bits
:=
io
.
enq
.
req
(
i
).
bits
io
.
enq
.
resp
(
i
).
lqIdx
:=
loadQueue
.
io
.
enq
.
resp
(
i
)
io
.
enq
.
resp
(
i
).
sqIdx
:=
storeQueue
.
io
.
enq
.
resp
(
i
)
...
...
src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala
浏览文件 @
49681eda
...
...
@@ -30,7 +30,7 @@ trait HasLoadHelper { this: XSModule =>
LookupTree
(
uop
.
ctrl
.
fuOpType
,
List
(
LSUOpType
.
lb
->
SignExt
(
rdata
(
7
,
0
)
,
XLEN
),
LSUOpType
.
lh
->
SignExt
(
rdata
(
15
,
0
),
XLEN
),
LSUOpType
.
lw
->
Mux
(
fpWen
,
rdata
,
SignExt
(
rdata
(
31
,
0
),
XLEN
)),
LSUOpType
.
lw
->
Mux
(
fpWen
,
Cat
(
Fill
(
32
,
1.
U
(
1.
W
)),
rdata
(
31
,
0
))
,
SignExt
(
rdata
(
31
,
0
),
XLEN
)),
LSUOpType
.
ld
->
Mux
(
fpWen
,
rdata
,
SignExt
(
rdata
(
63
,
0
),
XLEN
)),
LSUOpType
.
lbu
->
ZeroExt
(
rdata
(
7
,
0
)
,
XLEN
),
LSUOpType
.
lhu
->
ZeroExt
(
rdata
(
15
,
0
),
XLEN
),
...
...
@@ -604,7 +604,8 @@ class LoadQueue extends XSModule
}
// Read vaddr for mem exception
vaddrModule
.
io
.
raddr
(
0
)
:=
deqPtr
+
io
.
roq
.
lcommit
// Note that both io.roq.lcommit and RegNext(io.roq.lcommit) should be take into consideration
vaddrModule
.
io
.
raddr
(
0
)
:=
(
deqPtrExt
+
commitCount
+
io
.
roq
.
lcommit
).
value
io
.
exceptionAddr
.
vaddr
:=
vaddrModule
.
io
.
rdata
(
0
)
// misprediction recovery / exception redirect
...
...
src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala
浏览文件 @
49681eda
...
...
@@ -104,7 +104,9 @@ class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueue
dataModule
.
io
.
raddr
(
i
)
:=
deqPtrExtNext
(
i
).
value
paddrModule
.
io
.
raddr
(
i
)
:=
deqPtrExtNext
(
i
).
value
}
vaddrModule
.
io
.
raddr
(
0
)
:=
cmtPtr
+
io
.
roq
.
scommit
// Note that both io.roq.scommit and RegNext(io.roq.scommit) should be take into consideration
vaddrModule
.
io
.
raddr
(
0
)
:=
(
cmtPtrExt
(
0
)
+
commitCount
+
io
.
roq
.
scommit
).
value
/**
* Enqueue at dispatch
...
...
src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
浏览文件 @
49681eda
...
...
@@ -241,7 +241,6 @@ class LoadUnit extends XSModule with HasLoadHelper {
val
io
=
IO
(
new
Bundle
()
{
val
ldin
=
Flipped
(
Decoupled
(
new
ExuInput
))
val
ldout
=
Decoupled
(
new
ExuOutput
)
val
fpout
=
Decoupled
(
new
ExuOutput
)
val
redirect
=
Flipped
(
ValidIO
(
new
Redirect
))
val
flush
=
Input
(
Bool
())
val
tlbFeedback
=
ValidIO
(
new
TlbFeedback
)
...
...
@@ -304,53 +303,27 @@ class LoadUnit extends XSModule with HasLoadHelper {
// write to rob and writeback bus
val
s2_wb_valid
=
load_s2
.
io
.
out
.
valid
&&
!
load_s2
.
io
.
out
.
bits
.
miss
val
refillFpLoad
=
io
.
lsq
.
ldout
.
bits
.
uop
.
ctrl
.
fpWen
// Int load, if hit, will be writebacked at s2
val
intH
itLoadOut
=
Wire
(
Valid
(
new
ExuOutput
))
intHitLoadOut
.
valid
:=
s2_wb_valid
&&
!
load_s2
.
io
.
out
.
bits
.
uop
.
ctrl
.
fpWen
intH
itLoadOut
.
bits
.
uop
:=
load_s2
.
io
.
out
.
bits
.
uop
intH
itLoadOut
.
bits
.
data
:=
load_s2
.
io
.
out
.
bits
.
data
intH
itLoadOut
.
bits
.
redirectValid
:=
false
.
B
intH
itLoadOut
.
bits
.
redirect
:=
DontCare
intH
itLoadOut
.
bits
.
debug
.
isMMIO
:=
load_s2
.
io
.
out
.
bits
.
mmio
intH
itLoadOut
.
bits
.
debug
.
isPerfCnt
:=
false
.
B
intH
itLoadOut
.
bits
.
debug
.
paddr
:=
load_s2
.
io
.
out
.
bits
.
paddr
intH
itLoadOut
.
bits
.
fflags
:=
DontCare
val
h
itLoadOut
=
Wire
(
Valid
(
new
ExuOutput
))
hitLoadOut
.
valid
:=
s2_wb_valid
h
itLoadOut
.
bits
.
uop
:=
load_s2
.
io
.
out
.
bits
.
uop
h
itLoadOut
.
bits
.
data
:=
load_s2
.
io
.
out
.
bits
.
data
h
itLoadOut
.
bits
.
redirectValid
:=
false
.
B
h
itLoadOut
.
bits
.
redirect
:=
DontCare
h
itLoadOut
.
bits
.
debug
.
isMMIO
:=
load_s2
.
io
.
out
.
bits
.
mmio
h
itLoadOut
.
bits
.
debug
.
isPerfCnt
:=
false
.
B
h
itLoadOut
.
bits
.
debug
.
paddr
:=
load_s2
.
io
.
out
.
bits
.
paddr
h
itLoadOut
.
bits
.
fflags
:=
DontCare
load_s2
.
io
.
out
.
ready
:=
true
.
B
io
.
ldout
.
bits
:=
Mux
(
intHitLoadOut
.
valid
,
intH
itLoadOut
.
bits
,
io
.
lsq
.
ldout
.
bits
)
io
.
ldout
.
valid
:=
intHitLoadOut
.
valid
||
io
.
lsq
.
ldout
.
valid
&&
!
refillFpLoa
d
io
.
ldout
.
bits
:=
Mux
(
hitLoadOut
.
valid
,
h
itLoadOut
.
bits
,
io
.
lsq
.
ldout
.
bits
)
io
.
ldout
.
valid
:=
hitLoadOut
.
valid
||
io
.
lsq
.
ldout
.
vali
d
// Fp load, if hit, will be stored to reg at s2, then it will be recoded at s3, writebacked at s4
val
fpHitLoadOut
=
Wire
(
Valid
(
new
ExuOutput
))
fpHitLoadOut
.
valid
:=
s2_wb_valid
&&
load_s2
.
io
.
out
.
bits
.
uop
.
ctrl
.
fpWen
fpHitLoadOut
.
bits
:=
intHitLoadOut
.
bits
val
fpLoadUnRecodedReg
=
Reg
(
Valid
(
new
ExuOutput
))
fpLoadUnRecodedReg
.
valid
:=
fpHitLoadOut
.
valid
||
io
.
lsq
.
ldout
.
valid
&&
refillFpLoad
when
(
fpHitLoadOut
.
valid
||
io
.
lsq
.
ldout
.
valid
&&
refillFpLoad
){
fpLoadUnRecodedReg
.
bits
:=
Mux
(
fpHitLoadOut
.
valid
,
fpHitLoadOut
.
bits
,
io
.
lsq
.
ldout
.
bits
)
}
val
fpLoadRecodedReg
=
Reg
(
Valid
(
new
ExuOutput
))
when
(
fpLoadUnRecodedReg
.
valid
){
fpLoadRecodedReg
:=
fpLoadUnRecodedReg
fpLoadRecodedReg
.
bits
.
data
:=
fpRdataHelper
(
fpLoadUnRecodedReg
.
bits
.
uop
,
fpLoadUnRecodedReg
.
bits
.
data
)
// recode
}
fpLoadRecodedReg
.
valid
:=
fpLoadUnRecodedReg
.
valid
io
.
fpout
.
bits
:=
fpLoadRecodedReg
.
bits
io
.
fpout
.
valid
:=
fpLoadRecodedReg
.
valid
io
.
lsq
.
ldout
.
ready
:=
Mux
(
refillFpLoad
,
!
fpHitLoadOut
.
valid
,
!
intHitLoadOut
.
valid
)
io
.
lsq
.
ldout
.
ready
:=
!
hitLoadOut
.
valid
when
(
io
.
ldout
.
fire
()){
XSDebug
(
"ldout %x\n"
,
io
.
ldout
.
bits
.
uop
.
cf
.
pc
)
}
when
(
io
.
fpout
.
fire
()){
XSDebug
(
"fpout %x\n"
,
io
.
fpout
.
bits
.
uop
.
cf
.
pc
)
}
}
src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala
浏览文件 @
49681eda
...
...
@@ -37,9 +37,6 @@ class StoreUnit_S0 extends XSModule {
io
.
out
.
bits
.
vaddr
:=
saddr
io
.
out
.
bits
.
data
:=
genWdata
(
io
.
in
.
bits
.
src2
,
io
.
in
.
bits
.
uop
.
ctrl
.
fuOpType
(
1
,
0
))
when
(
io
.
in
.
bits
.
uop
.
ctrl
.
src2Type
===
SrcType
.
fp
){
io
.
out
.
bits
.
data
:=
io
.
in
.
bits
.
src2
}
// not not touch fp store raw data
io
.
out
.
bits
.
uop
:=
io
.
in
.
bits
.
uop
io
.
out
.
bits
.
miss
:=
DontCare
io
.
out
.
bits
.
rsIdx
:=
io
.
rsIdx
...
...
@@ -64,7 +61,6 @@ class StoreUnit_S1 extends XSModule {
val
io
=
IO
(
new
Bundle
()
{
val
in
=
Flipped
(
Decoupled
(
new
LsPipelineBundle
))
val
out
=
Decoupled
(
new
LsPipelineBundle
)
// val fp_out = Decoupled(new LsPipelineBundle)
val
lsq
=
ValidIO
(
new
LsPipelineBundle
)
val
dtlbResp
=
Flipped
(
DecoupledIO
(
new
TlbResp
))
val
tlbFeedback
=
ValidIO
(
new
TlbFeedback
)
...
...
@@ -92,7 +88,7 @@ class StoreUnit_S1 extends XSModule {
// get paddr from dtlb, check if rollback is needed
// writeback store inst to lsq
io
.
lsq
.
valid
:=
io
.
in
.
valid
&&
!
s1_tlb_miss
// TODO: && ! FP
io
.
lsq
.
valid
:=
io
.
in
.
valid
&&
!
s1_tlb_miss
io
.
lsq
.
bits
:=
io
.
in
.
bits
io
.
lsq
.
bits
.
paddr
:=
s1_paddr
io
.
lsq
.
bits
.
miss
:=
false
.
B
...
...
@@ -103,12 +99,6 @@ class StoreUnit_S1 extends XSModule {
// mmio inst with exception will be writebacked immediately
io
.
out
.
valid
:=
io
.
in
.
valid
&&
(!
io
.
out
.
bits
.
mmio
||
s1_exception
)
&&
!
s1_tlb_miss
io
.
out
.
bits
:=
io
.
lsq
.
bits
// encode data for fp store
when
(
io
.
in
.
bits
.
uop
.
ctrl
.
src2Type
===
SrcType
.
fp
){
io
.
lsq
.
bits
.
data
:=
genWdata
(
ieee
(
io
.
in
.
bits
.
data
),
io
.
in
.
bits
.
uop
.
ctrl
.
fuOpType
(
1
,
0
))
}
}
class
StoreUnit_S2
extends
XSModule
{
...
...
src/main/scala/xiangshan/mem/sbuffer/Sbuffer.scala
浏览文件 @
49681eda
...
...
@@ -484,5 +484,5 @@ class Sbuffer extends XSModule with HasSBufferConst {
XSDebug
(
line
.
valid
,
"[#%d line] Tag: %x, data: %x, mask: %x\n"
,
i
.
U
,
line
.
tag
,
line
.
data
.
asUInt
(),
line
.
mask
.
asUInt
())
}}
XSPerf
(
"waitResp"
,
waitingCacheLine
.
valid
)
XSPerf
(
"
sbuf_
waitResp"
,
waitingCacheLine
.
valid
)
}
src/test/csrc/emu.cpp
浏览文件 @
49681eda
...
...
@@ -574,6 +574,22 @@ uint64_t Emulator::execute(uint64_t max_cycle, uint64_t max_instr) {
}
}
// first instruction commit
for
(
int
i
=
0
;
i
<
NumCore
;
i
++
)
{
if
(
lastcommit
[
i
]
-
max_cycle
>
firstCommit_limit
&&
!
hascommit
[
i
])
{
eprintf
(
"No instruction commits for %d cycles of core %d. Please check the first instruction.
\n
"
,
i
,
firstCommit_limit
);
eprintf
(
"Note: The first instruction may lie in 0x10000000 which may executes and commits after 500 cycles.
\n
"
);
eprintf
(
" Or the first instruction may lie in 0x80000000 which may executes and commits after 2000 cycles.
\n
"
);
#ifdef DUALCORE
int
priviledgeMode
=
(
i
==
0
)
?
dut_ptr
->
io_difftest_priviledgeMode
:
dut_ptr
->
io_difftest2_priviledgeMode
;
#else
int
priviledgeMode
=
dut_ptr
->
io_difftest_priviledgeMode
;
#endif
difftest_display
(
priviledgeMode
,
i
);
trapCode
=
STATE_ABORT
;
}
}
for
(
int
i
=
0
;
i
<
NumCore
;
i
++
)
{
#ifdef DUALCORE
int
first_instr_commit
=
(
i
==
0
)
?
dut_ptr
->
io_difftest_commit
&&
dut_ptr
->
io_difftest_thisPC
==
0x80000000u
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录