Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenXiangShan
nexus-am
提交
fcdbdc06
N
nexus-am
项目概览
OpenXiangShan
/
nexus-am
大约 1 年 前同步成功
通知
2
Star
21
Fork
25
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
N
nexus-am
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
fcdbdc06
编写于
3月 13, 2023
作者:
W
William Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
maprobe: add l2_l3_pressure_test & replacement_test
上级
d903857d
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
80 addition
and
14 deletion
+80
-14
apps/maprobe/Makefile
apps/maprobe/Makefile
+1
-1
apps/maprobe/bandwidth-test.c
apps/maprobe/bandwidth-test.c
+1
-1
apps/maprobe/common.c
apps/maprobe/common.c
+1
-1
apps/maprobe/include/maprobe.h
apps/maprobe/include/maprobe.h
+7
-2
apps/maprobe/latency-test.c
apps/maprobe/latency-test.c
+12
-7
apps/maprobe/main.c
apps/maprobe/main.c
+38
-2
apps/maprobe/replacement-test.c
apps/maprobe/replacement-test.c
+20
-0
未找到文件。
apps/maprobe/Makefile
浏览文件 @
fcdbdc06
NAME
=
maprobe
SRCS
=
common.c bitutils.c resultmat.c latency-test.c bandwidth-test.c main.c
SRCS
=
common.c bitutils.c resultmat.c latency-test.c bandwidth-test.c
replacement-test.c
main.c
include
$(AM_HOME)/Makefile.app
apps/maprobe/bandwidth-test.c
浏览文件 @
fcdbdc06
...
...
@@ -91,7 +91,7 @@ float test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv)
printf
(
"%ld, %f, %d, %ld, %ld
\n
"
,
size
,
(
float
)
perf
.
cycle
/
total_access
,
iter
,
total_access
,
perf
.
cycle
);
}
else
{
printf
(
"range %ldKB (%d iters) dcache linear (8Byte) store latency %f, throughput %f B/cycle (L1-L2 %f B/cycle) (%ld samples, %ld cycles), stride %dB
\n
"
,
size
/
KB
,
iter
,
(
float
)
perf
.
cycle
/
total_access
,
total_access
*
8
*
BYTE
/
(
float
)
perf
.
cycle
,
total_access
*
_PERF_CACHELINE_SIZE_BYTE
/
(
float
)
perf
.
cycle
,
total_access
,
perf
.
cycle
,
8
size
/
KB
,
iter
,
(
float
)
perf
.
cycle
/
total_access
,
total_access
*
8
*
BYTE
/
(
float
)
perf
.
cycle
,
total_access
*
_PERF_CACHELINE_SIZE_BYTE
/
(
float
)
perf
.
cycle
,
total_access
,
perf
.
cycle
,
_PERF_CACHELINE_SIZE_BYTE
);
}
_perf_g_total_samples
+=
total_access
;
...
...
apps/maprobe/common.c
浏览文件 @
fcdbdc06
...
...
@@ -6,8 +6,8 @@ uint64_t _perf_g_total_samples = 0;
void
_perf_start_timer
()
{
#ifndef PERF_SIM
perf
.
cycle
=
csr_read
(
CSR_MCYCLE
);
perf
.
instrcnt
=
csr_read
(
CSR_MINSTRET
);
perf
.
cycle
=
csr_read
(
CSR_MCYCLE
);
#endif
}
...
...
apps/maprobe/include/maprobe.h
浏览文件 @
fcdbdc06
...
...
@@ -31,13 +31,15 @@
#define _PERF_MEM_SIZE_BYTE (1024 * MB)
#define _PERF_L1_NUM_WAYS 4
#define _PERF_L1_NUM_SETS 256
#define _PERF_L2_NUM_WAYS 8
#define _PERF_L2_NUM_SLICES 4
//
#define _PERF_L2_NUM_SETS 512
#define _PERF_L2_NUM_SETS 512
#define _PERF_ADDR_STRIDE_L1_SAME_BANK _PERF_CACHELINE_SIZE_BYTE
#define _PERF_ADDR_STRIDE_L1_SAME_SET (_PERF_L1_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_L2_SAME_SLICE (_PERF_L2_NUM_SLICES * _PERF_CACHELINE_SIZE_BYTE)
// #define _PERF_ADDR_STRIDE_L2_SAME_SET (_PERF_L2_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_L1_SAME_SET (_PERF_L1_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_L2_SAME_SET (_PERF_L2_NUM_SLICES * _PERF_L2_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_NEXT_PAGE (_PERF_PAGE_SIZE_BYTE)
// probe const
...
...
@@ -70,6 +72,8 @@ extern uint64_t read_pointer_tracing_linklist(uint64_t base_addr, uint64_t num_v
extern
void
latency_test_warmup
(
uint64_t
base_addr
,
uint64_t
end_addr
);
extern
float
test_pointer_tracing_latency
(
uint64_t
size
,
int
step
,
int
iter
,
int
to_csv
);
extern
float
test_linear_access_latency
(
uint64_t
size
,
uint64_t
step
,
int
iter
,
int
to_csv
);
extern
float
test_linear_access_latency_simple
(
uint64_t
size
,
uint64_t
step
,
int
iter
,
int
to_csv
);
extern
float
test_linear_access_latency_batch8
(
uint64_t
size
,
uint64_t
step
,
int
iter
,
int
to_csv
);
extern
float
test_random_access_latency
(
uint64_t
num_access
,
uint64_t
test_range
,
uint64_t
test_align
,
int
pregen_addr
,
int
iter
,
int
to_csv
);
extern
float
test_same_address_load_latency
(
int
iter
,
int
to_csv
);
extern
float
test_read_after_write_latency
(
int
iter
,
int
to_csv
);
...
...
@@ -85,6 +89,7 @@ extern float test_l1_store_wcb_bandwidth(uint64_t size, int iter, int to_csv);
void
generate_linear_access_latency_matrix
();
void
generate_pointer_tracing_latency_matrix
();
void
generate_random_access_latency_matrix
();
void
generate_replacement_test_matrix
();
// legacy test
extern
void
legacy_test_mem_throughput
(
uint64_t
iter
);
...
...
apps/maprobe/latency-test.c
浏览文件 @
fcdbdc06
...
...
@@ -68,6 +68,7 @@ float test_pointer_tracing_latency(uint64_t size, int step, int iter, int to_csv
);
}
_perf_g_total_samples
+=
total_node
;
_perf_blackhole
(
result
);
return
acpa
;
}
...
...
@@ -95,6 +96,7 @@ float test_same_address_load_latency(int iter, int to_csv)
);
}
_perf_g_total_samples
+=
total_access
;
_perf_blackhole
(
result
);
return
acpa
;
}
...
...
@@ -106,8 +108,8 @@ float test_read_after_write_latency(int iter, int to_csv)
// _perf_print_timer();
_perf_start_timer
();
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
result
+=
*
((
uint64_t
*
)
(
address
));
address
+=
sizeof
(
uint64_t
);
}
...
...
@@ -123,6 +125,7 @@ float test_read_after_write_latency(int iter, int to_csv)
);
}
_perf_g_total_samples
+=
total_access
;
_perf_blackhole
(
result
);
return
acpa
;
}
...
...
@@ -135,10 +138,10 @@ float test_linear_access_latency_simple(uint64_t size, uint64_t step, int iter,
// _perf_print_timer();
_perf_start_timer
();
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
j
=
0
;
j
<
num_access
;
j
++
)
{
result
+=
*
((
uint64_t
*
)
(
address
));
result
+=
*
((
volatile
uint64_t
*
)
(
address
));
address
+=
step
;
}
}
...
...
@@ -154,6 +157,7 @@ float test_linear_access_latency_simple(uint64_t size, uint64_t step, int iter,
);
}
_perf_g_total_samples
+=
total_access
;
_perf_blackhole
(
result
);
return
acpa
;
}
...
...
@@ -177,8 +181,8 @@ float test_linear_access_latency_batch8(uint64_t size, uint64_t step, int iter,
// _perf_print_timer();
_perf_start_timer
();
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
j
=
0
;
j
<
num_access
;
j
+=
8
)
{
register
uint64_t
access_addr_0
=
address
+
address_offset_0
;
register
uint64_t
access_addr_1
=
address
+
address_offset_1
;
...
...
@@ -234,8 +238,8 @@ float test_linear_write_latency_batch8(uint64_t size, uint64_t step, int iter, i
// _perf_print_timer();
_perf_start_timer
();
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
j
=
0
;
j
<
num_access
;
j
+=
8
)
{
register
uint64_t
access_addr_0
=
address
+
address_offset_0
;
register
uint64_t
access_addr_1
=
address
+
address_offset_1
;
...
...
@@ -326,6 +330,7 @@ float test_random_access_latency(uint64_t num_access, uint64_t test_range, uint6
);
}
_perf_g_total_samples
+=
total_access
;
_perf_blackhole
(
result
);
return
acpa
;
}
...
...
@@ -395,9 +400,9 @@ void generate_pointer_tracing_latency_matrix()
void
generate_random_access_latency_matrix
()
{
#define RANDOM_ACCESS_MATRIX_SIZE_MAX_POW2_KB
10
#define RANDOM_ACCESS_MATRIX_SIZE_MAX_POW2_KB
6
// RANDOM_ACCESS_MATRIX_SIZE_MAX_POW2_KB 10: from 1KB to 512KB
#define RANDOM_ACCESS_MATRIX_ACCESS_MAX_POW2_KB
10
#define RANDOM_ACCESS_MATRIX_ACCESS_MAX_POW2_KB
6
// RANDOM_ACCESS_MATRIX_ACCESS_MAX_POW2_KB 10: from 1KB to 512KB
DEFINE_FLOAT_RESULT_MATRIX
(
random_access_latency
,
test_range_size_kb_pow2
,
RANDOM_ACCESS_MATRIX_SIZE_MAX_POW2_KB
,
access_size_kb_pow2
,
RANDOM_ACCESS_MATRIX_ACCESS_MAX_POW2_KB
);
FOR
(
x
,
RANDOM_ACCESS_MATRIX_SIZE_MAX_POW2_KB
)
{
random_access_latency_row_array
[
x
]
=
x
;
}
...
...
apps/maprobe/main.c
浏览文件 @
fcdbdc06
...
...
@@ -122,6 +122,9 @@ void typical_l1_access_test_set()
printf
(
"ideal write combine buffer bandwidth:
\n
"
);
test_l1_store_wcb_bandwidth
(
_PERF_L1_SIZE_BYTE
,
2
,
0
);
test_l1_store_wcb_bandwidth
(
_PERF_L1_SIZE_BYTE
,
5
,
0
);
printf
(
"replacement error penalty:
\n
"
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
32
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
32
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
}
// typical latency test for fast regression
...
...
@@ -169,6 +172,7 @@ void latency_test_example()
test_linear_access_latency
(
_PERF_PAGE_SIZE_BYTE
,
sizeof
(
uint64_t
),
5
,
0
);
test_linear_access_latency
(
_PERF_PAGE_SIZE_BYTE
,
sizeof
(
uint64_t
),
5
,
0
);
test_linear_access_latency
(
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
5
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
4
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
8
,
0
);
test_random_access_latency
(
4096
,
1024
*
MB
,
_PERF_CACHELINE_SIZE_BYTE
,
0
,
1
,
0
);
test_random_access_latency
(
4096
,
1024
*
MB
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
,
0
);
test_same_address_load_latency
(
1024
,
0
);
...
...
@@ -176,6 +180,32 @@ void latency_test_example()
printf
(
"total samples: %ld
\n
"
,
_perf_g_total_samples
);
}
void
l2_l3_pressure_test
()
{
for
(
int
i
=
1
;
i
<
16
;
i
++
)
{
printf
(
"ways accessed: %d
\n
"
,
i
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L2_SAME_SET
*
i
,
_PERF_ADDR_STRIDE_L2_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L2_SAME_SET
*
i
,
_PERF_ADDR_STRIDE_L2_SAME_SET
,
64
,
0
);
}
for
(
int
i
=
16
;
i
<=
512
;
i
*=
2
)
{
printf
(
"ways accessed: %d
\n
"
,
i
);
// jump at i = 32
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L2_SAME_SET
*
i
,
_PERF_ADDR_STRIDE_L2_SAME_SET
,
64
,
0
);
}
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
32
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
32
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
64
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
64
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
// jump at i = 128
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
128
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
128
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
256
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
256
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
512
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
512
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
64
,
0
);
}
void
legacy_latency_throughput_test
()
{
_perf_calibrate
();
...
...
@@ -194,17 +224,23 @@ void legacy_latency_throughput_test()
// test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE,2, 0);
// printf("MEM:\n");
// test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE*2, _PERF_CACHELINE_SIZE_BYTE,2, 0);
printf
(
"total samples: %ld
\n
"
,
_perf_g_total_samples
);
printf
(
"total sampl
8
es: %ld
\n
"
,
_perf_g_total_samples
);
}
int
main
()
{
l2_l3_pressure_test
();
return
0
;
generate_replacement_test_matrix
();
latency_test_example
();
generate_linear_access_latency_matrix
();
generate_pointer_tracing_latency_matrix
();
generate_random_access_latency_matrix
();
generate_replacement_test_matrix
();
// matrix_print_example();
latency_test_example
();
typical_latency_test
();
// pointer_tracing_graph();
// latency_test();
...
...
apps/maprobe/replacement-test.c
0 → 100644
浏览文件 @
fcdbdc06
#include "maprobe.h"
void
generate_replacement_test_matrix
()
{
#define REPLACEMENT_TEST_MAX_WAY 17 // up to 16 way
#define REPLACEMENT_TEST_ITER 5 // 1 warmup + 4 test
assert
(
REPLACEMENT_TEST_ITER
>=
2
);
DEFINE_FLOAT_RESULT_MATRIX
(
replacement_test
,
num_way_accessed
,
REPLACEMENT_TEST_MAX_WAY
,
iter
,
REPLACEMENT_TEST_ITER
);
FOR
(
x
,
REPLACEMENT_TEST_ITER
)
{
replacement_test_column_array
[
x
]
=
x
;
}
for
(
int
i
=
0
;
i
<
REPLACEMENT_TEST_MAX_WAY
;
i
++
)
{
replacement_test_row_array
[
i
]
=
i
+
1
;
int
warm_up_iter
=
64
;
int
test_iter
=
i
<
4
?
256
:
64
;
replacement_test_result_array
[
i
][
0
]
=
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
(
i
+
1
),
_PERF_ADDR_STRIDE_L1_SAME_SET
,
warm_up_iter
,
0
);
//warmup
for
(
int
j
=
1
;
j
<
REPLACEMENT_TEST_ITER
;
j
++
)
{
replacement_test_result_array
[
i
][
j
]
=
test_linear_access_latency_simple
(
_PERF_ADDR_STRIDE_L1_SAME_SET
*
(
i
+
1
),
_PERF_ADDR_STRIDE_L1_SAME_SET
,
test_iter
,
0
);
//test
}
}
print_float_result_matrix
(
&
replacement_test_matrix_meta
);
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录