Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenXiangShan
nexus-am
提交
421b3b8f
N
nexus-am
项目概览
OpenXiangShan
/
nexus-am
大约 1 年 前同步成功
通知
2
Star
21
Fork
25
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
N
nexus-am
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
421b3b8f
编写于
3月 06, 2023
作者:
W
William Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
maprobe: add linear read, random read, l-l vio test
上级
4f4982b1
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
337 addition
and
33 deletion
+337
-33
apps/maprobe/common.c
apps/maprobe/common.c
+1
-0
apps/maprobe/include/maprobe.h
apps/maprobe/include/maprobe.h
+25
-10
apps/maprobe/latency-test.c
apps/maprobe/latency-test.c
+144
-12
apps/maprobe/main.c
apps/maprobe/main.c
+167
-11
未找到文件。
apps/maprobe/common.c
浏览文件 @
421b3b8f
#include "maprobe.h"
struct
perf
perf
;
uint64_t
_perf_g_total_samples
=
0
;
void
_perf_start_timer
()
{
...
...
apps/maprobe/include/maprobe.h
浏览文件 @
421b3b8f
...
...
@@ -22,12 +22,22 @@
// #define _PERF_TEST_ADDR_BASE 0x2000400000
#endif
#define _PERF_CACHELINE_SIZE_BYTE (64 * BYTE)
#define _PERF_L1_NOALIAS_SIZE_BYTE (32 * KB)
#define _PERF_PAGE_SIZE_BYTE (4 * KB)
#define _PERF_L1_NOALIAS_SIZE_BYTE (16 * KB)
#define _PERF_L1_SIZE_BYTE (64 * KB)
#define _PERF_L2_SIZE_BYTE (512 * KB)
#define _PERF_L3_SIZE_BYTE (2 * MB)
#define _PERF_L2_SIZE_BYTE (1 * MB)
#define _PERF_L3_SIZE_BYTE (6 * MB)
#define _PERF_MEM_SIZE_BYTE (1024 * MB)
#define _PERF_L1_NUM_WAYS 4
#define _PERF_SET_SIZE_BYTE (_PERF_L1_SIZE_BYTE / _PERF_L1_NUM_WAYS)
#define _PERF_L1_NUM_SETS 256
#define _PERF_L2_NUM_SLICES 4
// #define _PERF_L2_NUM_SETS 512
#define _PERF_ADDR_STRIDE_L1_SAME_BANK _PERF_CACHELINE_SIZE_BYTE
#define _PERF_ADDR_STRIDE_L1_SAME_SET (_PERF_L1_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_L2_SAME_SLICE (_PERF_L2_NUM_SLICES * _PERF_CACHELINE_SIZE_BYTE)
// #define _PERF_ADDR_STRIDE_L2_SAME_SET (_PERF_L2_NUM_SETS * _PERF_CACHELINE_SIZE_BYTE)
#define _PERF_ADDR_STRIDE_NEXT_PAGE (_PERF_PAGE_SIZE_BYTE)
// probe const
#define _PERF_BLACKHOLE _PERF_TEST_ADDR_BASE
...
...
@@ -42,9 +52,10 @@ struct perf
uint64_t
cycle
;
uint64_t
instrcnt
;
};
extern
struct
perf
perf
;
extern
uint64_t
_perf_g_total_samples
;
// common perf tools
extern
void
_perf_start_timer
();
extern
void
_perf_end_timer
();
...
...
@@ -53,11 +64,15 @@ extern void _perf_calibrate();
extern
void
_perf_blackhole
(
uint64_t
value
);
// latency test
extern
uint64_t
setup_
latency_test
_linklist
(
uint64_t
base_addr
,
uint64_t
end_addr
,
uint64_t
step
);
extern
uint64_t
read_
latency_test
_linklist
(
uint64_t
base_addr
,
uint64_t
num_valid_node
);
extern
uint64_t
setup_
pointer_tracing
_linklist
(
uint64_t
base_addr
,
uint64_t
end_addr
,
uint64_t
step
);
extern
uint64_t
read_
pointer_tracing
_linklist
(
uint64_t
base_addr
,
uint64_t
num_valid_node
);
extern
void
latency_test_warmup
(
uint64_t
base_addr
,
uint64_t
end_addr
);
extern
void
test_latency
(
uint64_t
size
,
int
iter
);
extern
void
test_mem_throughput
(
uint64_t
iter
);
extern
void
test_mem_throughput_same_set
(
uint64_t
iter
);
extern
void
test_pointer_tracing_latency
(
uint64_t
size
,
int
step
,
int
iter
,
int
to_csv
);
extern
void
test_linear_access_latency
(
uint64_t
size
,
uint64_t
step
,
int
iter
,
int
to_csv
);
extern
void
test_random_access_latency
(
uint64_t
num_access
,
uint64_t
test_range
,
uint64_t
test_align
,
int
pregen_addr
,
int
iter
,
int
to_csv
);
extern
void
test_same_address_load_latency
(
int
iter
,
int
to_csv
);
extern
void
legacy_test_mem_throughput
(
uint64_t
iter
);
extern
void
legacy_test_mem_throughput_same_set
(
uint64_t
iter
);
#endif
\ No newline at end of file
apps/maprobe/latency-test.c
浏览文件 @
421b3b8f
#include "maprobe.h"
uint64_t
setup_latency_test_linklist
(
uint64_t
base_addr
,
uint64_t
end_addr
,
uint64_t
step
)
// inline uint64_t get_next_linear_address(uint64_t current_addr, uint64_t step) {
// return current_addr + step;
// }
inline
uint64_t
generate_rand_address
(
uint64_t
base_addr
,
uint64_t
end_addr
,
uint64_t
align
)
{
return
(
rand
()
%
(
end_addr
-
base_addr
)
+
base_addr
)
/
align
*
align
;
}
void
generate_rand_address_array
(
uint64_t
*
dest
,
uint64_t
base_addr
,
uint64_t
end_addr
,
uint64_t
align
,
int
number
)
{
for
(
int
i
=
0
;
i
<
number
;
i
++
)
{
*
(
dest
+
i
)
=
generate_rand_address
(
base_addr
,
end_addr
,
align
);
}
}
uint64_t
generate_pointer_tracing_address
(
uint64_t
base_addr
,
uint64_t
end_addr
,
uint64_t
step
)
{
return
setup_pointer_tracing_linklist
(
base_addr
,
end_addr
,
step
);
}
uint64_t
setup_pointer_tracing_linklist
(
uint64_t
base_addr
,
uint64_t
end_addr
,
uint64_t
step
)
{
uint64_t
num_valid_node
=
0
;
assert
(
step
%
8
==
0
);
...
...
@@ -14,7 +32,7 @@ uint64_t setup_latency_test_linklist(uint64_t base_addr, uint64_t end_addr, uint
return
num_valid_node
;
}
uint64_t
read_
latency_test
_linklist
(
uint64_t
base_addr
,
uint64_t
num_valid_node
)
uint64_t
read_
pointer_tracing
_linklist
(
uint64_t
base_addr
,
uint64_t
num_valid_node
)
{
uint64_t
cur_addr
=
base_addr
;
for
(
int
i
=
0
;
i
<
num_valid_node
;
i
++
)
{
...
...
@@ -25,31 +43,145 @@ uint64_t read_latency_test_linklist(uint64_t base_addr, uint64_t num_valid_node)
void
latency_test_warmup
(
uint64_t
base_addr
,
uint64_t
end_addr
)
{
setup_
latency_test
_linklist
(
base_addr
,
end_addr
,
_PERF_CACHELINE_SIZE_BYTE
);
setup_
pointer_tracing
_linklist
(
base_addr
,
end_addr
,
_PERF_CACHELINE_SIZE_BYTE
);
}
void
test_
latency
(
uint64_t
size
,
int
iter
)
void
test_
pointer_tracing_latency
(
uint64_t
size
,
int
step
,
int
iter
,
int
to_csv
)
{
volatile
uint64_t
result
=
0
;
// make sure compiler will not opt read_latency_test_linklist
printf
(
"range 0x%xB (%d iters) latency test
\n
"
,
size
,
iter
);
// printf("pointer tracing latency test\n");
// printf("range (B), read latency, iters, samples, cycles\n");
volatile
uint64_t
result
=
0
;
// make sure compiler will not opt read_pointer_tracing_linklist
_perf_start_timer
();
uint64_t
nnode
=
setup_
latency_test_linklist
(
_PERF_TEST_ADDR_BASE
,
_PERF_TEST_ADDR_BASE
+
size
,
_PERF_CACHELINE_SIZE_BYTE
);
uint64_t
nnode
=
setup_
pointer_tracing_linklist
(
_PERF_TEST_ADDR_BASE
,
_PERF_TEST_ADDR_BASE
+
size
,
step
);
_perf_end_timer
();
uint64_t
total_node
=
nnode
*
iter
;
// _perf_print_timer();
_perf_start_timer
();
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
result
+=
read_
latency_test
_linklist
(
_PERF_TEST_ADDR_BASE
,
nnode
);
result
+=
read_
pointer_tracing
_linklist
(
_PERF_TEST_ADDR_BASE
,
nnode
);
}
_perf_end_timer
();
// _perf_print_timer();
printf
(
"range 0x%xB (%d iters) read latency %f (%d samples)
\n
"
,
size
,
iter
,
(
float
)
perf
.
cycle
/
total_node
,
total_node
);
if
(
to_csv
)
{
printf
(
"%ld, %f, %d, %ld, %ld
\n
"
,
size
,
(
float
)
perf
.
cycle
/
total_node
,
iter
,
total_node
,
perf
.
cycle
);
}
else
{
printf
(
"range %ldKB (%d iters) pointer tracing read latency %f, throughput %f B/cycle (%ld samples, %ld cycles)
\n
"
,
size
/
KB
,
iter
,
(
float
)
perf
.
cycle
/
total_node
,
total_node
*
8
*
BYTE
/
(
float
)
perf
.
cycle
,
total_node
,
perf
.
cycle
);
}
_perf_blackhole
(
result
);
_perf_g_total_samples
+=
total_node
;
}
void
test_same_address_load_latency
(
int
iter
,
int
to_csv
)
{
// printf("same address load latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n");
volatile
uint64_t
result
=
0
;
// make sure compiler will not opt read_pointer_tracing_linklist
// _perf_print_timer();
_perf_start_timer
();
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
result
+=
*
((
uint64_t
*
)
(
address
));
}
_perf_end_timer
();
// _perf_print_timer();
uint64_t
total_access
=
iter
;
if
(
to_csv
)
{
printf
(
"%ld, %f, %d, %ld, %ld
\n
"
,
0
,
(
float
)
perf
.
cycle
/
total_access
,
iter
,
total_access
,
perf
.
cycle
);
}
else
{
printf
(
"same address read latency %f, throughput %f B/cycle (%ld samples, %ld cycles)
\n
"
,
(
float
)
perf
.
cycle
/
total_access
,
total_access
*
8
*
BYTE
/
(
float
)
perf
.
cycle
,
total_access
,
perf
.
cycle
);
}
_perf_blackhole
(
result
);
_perf_g_total_samples
+=
total_access
;
}
void
test_linear_access_latency
(
uint64_t
size
,
uint64_t
step
,
int
iter
,
int
to_csv
)
{
// printf("stride %d linear access latency test\n", step);
// printf("range (B), read latency, iters, samples, cycles\n");
volatile
uint64_t
result
=
0
;
// make sure compiler will not opt read_pointer_tracing_linklist
uint64_t
num_access
=
size
/
step
;
// _perf_print_timer();
_perf_start_timer
();
uint64_t
address
=
_PERF_TEST_ADDR_BASE
;
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
for
(
int
j
=
0
;
j
<
num_access
;
j
++
)
{
result
+=
*
((
uint64_t
*
)
(
address
));
address
+=
step
;
}
}
_perf_end_timer
();
// _perf_print_timer();
uint64_t
total_access
=
num_access
*
iter
;
if
(
to_csv
)
{
printf
(
"%ld, %f, %d, %ld, %ld
\n
"
,
size
,
(
float
)
perf
.
cycle
/
total_access
,
iter
,
total_access
,
perf
.
cycle
);
}
else
{
printf
(
"range %ldKB (%d iters) linear read latency %f, throughput %f B/cycle (%ld samples, %ld cycles), stride %dB
\n
"
,
size
/
KB
,
iter
,
(
float
)
perf
.
cycle
/
total_access
,
total_access
*
8
*
BYTE
/
(
float
)
perf
.
cycle
,
total_access
,
perf
.
cycle
,
step
);
}
_perf_blackhole
(
result
);
_perf_g_total_samples
+=
total_access
;
}
void
test_random_access_latency
(
uint64_t
num_access
,
uint64_t
test_range
,
uint64_t
test_align
,
int
pregen_addr
,
int
iter
,
int
to_csv
)
{
// printf("align %d random access (cache line) latency test, %s\n",
// test_align, pregen_addr ? "use pregen addr array" : "gen rand addr at run time"
// );
// printf("range (B), read latency, iters, samples, cycles\n");
volatile
uint64_t
result
=
0
;
// make sure compiler will not opt read_pointer_tracing_linklist
// _perf_print_timer();
// alloc memory for random access addr array and data
assert
(
test_align
>=
8
*
BYTE
);
// assert(size >= test_align);
// uint64_t num_access = size / test_align;
if
(
pregen_addr
)
{
uint64_t
test_array_base_addr
=
_PERF_TEST_ADDR_BASE
+
num_access
*
sizeof
(
uint64_t
*
);
uint64_t
address_array_base_addr
=
_PERF_TEST_ADDR_BASE
;
generate_rand_address_array
((
uint64_t
*
)
address_array_base_addr
,
test_array_base_addr
,
test_array_base_addr
+
test_range
,
test_align
,
num_access
);
_perf_start_timer
();
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
for
(
int
j
=
0
;
j
<
num_access
;
j
++
)
{
result
+=
*
((
uint64_t
*
)
(
address_array_base_addr
+
j
*
sizeof
(
uint64_t
*
)));
}
}
_perf_end_timer
();
}
else
{
_perf_start_timer
();
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
for
(
int
j
=
0
;
j
<
num_access
;
j
++
)
{
result
+=
*
((
uint64_t
*
)
(
generate_rand_address
(
_PERF_TEST_ADDR_BASE
,
_PERF_TEST_ADDR_BASE
+
test_range
,
test_align
)));
}
}
_perf_end_timer
();
}
// _perf_print_timer();
uint64_t
total_access
=
num_access
*
iter
;
if
(
to_csv
)
{
printf
(
"%ld, %f, %d, %ld, %ld
\n
"
,
test_range
,
(
float
)
perf
.
cycle
/
total_access
,
iter
,
total_access
,
perf
.
cycle
);
}
else
{
printf
(
"range %ldKB, access cover %ldKB (%d iters) random read latency %f, throughput %f B/cycle (%ld samples, %ld cycles), align %ldB, %s
\n
"
,
test_range
/
KB
,
total_access
*
8
*
_PERF_CACHELINE_SIZE_BYTE
/
KB
,
iter
,
(
float
)
perf
.
cycle
/
(
total_access
),
total_access
*
8
*
BYTE
/
(
float
)
perf
.
cycle
,
total_access
,
perf
.
cycle
,
test_align
,
pregen_addr
?
"pregen addr"
:
"runtime addr"
);
}
_perf_blackhole
(
result
);
_perf_g_total_samples
+=
total_access
;
}
void
test_mem_throughput
(
uint64_t
iter
)
void
legacy_
test_mem_throughput
(
uint64_t
iter
)
{
uint64_t
remain
=
iter
;
uint64_t
result
=
0
;
...
...
@@ -64,7 +196,7 @@ void test_mem_throughput(uint64_t iter)
printf
(
"mem band width %f B/cycle (%d samples)
\n
"
,
(
float
)
iter
*
_PERF_CACHELINE_SIZE_BYTE
/
perf
.
cycle
,
iter
);
}
void
test_mem_throughput_same_set
(
uint64_t
iter
)
void
legacy_
test_mem_throughput_same_set
(
uint64_t
iter
)
{
uint64_t
remain
=
iter
;
uint64_t
result
=
0
;
...
...
@@ -72,7 +204,7 @@ void test_mem_throughput_same_set(uint64_t iter)
_perf_start_timer
();
while
(
remain
--
)
{
result
+=
*
(
uint64_t
*
)
access_addr
;
access_addr
+=
_PERF_
SET_SIZE_BYTE
;
access_addr
+=
_PERF_
ADDR_STRIDE_L1_SAME_SET
;
}
_perf_end_timer
();
*
(
uint64_t
*
)
_PERF_BLACKHOLE
=
result
;
...
...
apps/maprobe/main.c
浏览文件 @
421b3b8f
#include <klib.h>
#include "maprobe.h"
int
main
()
void
typical_linear_load_test_set
()
{
_perf_calibrate
();
printf
(
"------------- linear load test set -------------
\n
"
);
printf
(
"page size linear double word load:
\n
"
);
test_linear_access_latency
(
_PERF_PAGE_SIZE_BYTE
,
sizeof
(
uint64_t
),
1
,
0
);
test_linear_access_latency
(
_PERF_PAGE_SIZE_BYTE
,
sizeof
(
uint64_t
),
2
,
0
);
printf
(
"page size linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
0
);
test_linear_access_latency
(
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
printf
(
"dcache/2 linear double word load:
\n
"
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
/
2
,
sizeof
(
uint64_t
),
1
,
0
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
/
2
,
sizeof
(
uint64_t
),
2
,
0
);
printf
(
"dcache/2 linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
0
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
printf
(
"dcache linear double word load:
\n
"
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
sizeof
(
uint64_t
),
1
,
0
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
sizeof
(
uint64_t
),
2
,
0
);
printf
(
"dcache linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
0
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
printf
(
"L2 linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L2_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
0
);
test_linear_access_latency
(
_PERF_L2_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
printf
(
"L1 (L1 same set) linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
1
,
0
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
10
,
0
);
printf
(
"L2 (L1 same set) linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L2_SIZE_BYTE
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
1
,
0
);
test_linear_access_latency
(
_PERF_L2_SIZE_BYTE
,
_PERF_ADDR_STRIDE_L1_SAME_SET
,
2
,
0
);
printf
(
"L1 (L2 same slice) linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_ADDR_STRIDE_L2_SAME_SLICE
,
1
,
0
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_ADDR_STRIDE_L2_SAME_SLICE
,
2
,
0
);
printf
(
"L2 (L2 same slice) linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L2_SIZE_BYTE
,
_PERF_ADDR_STRIDE_L2_SAME_SLICE
,
1
,
0
);
test_linear_access_latency
(
_PERF_L2_SIZE_BYTE
,
_PERF_ADDR_STRIDE_L2_SAME_SLICE
,
2
,
0
);
printf
(
"L1 (page traverse) linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_ADDR_STRIDE_NEXT_PAGE
,
1
,
0
);
test_linear_access_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_ADDR_STRIDE_NEXT_PAGE
,
10
,
0
);
printf
(
"L2 (page traverse) linear cache line load:
\n
"
);
test_linear_access_latency
(
_PERF_L2_SIZE_BYTE
,
_PERF_ADDR_STRIDE_NEXT_PAGE
,
1
,
0
);
test_linear_access_latency
(
_PERF_L2_SIZE_BYTE
,
_PERF_ADDR_STRIDE_NEXT_PAGE
,
2
,
0
);
printf
(
"total samples: %ld
\n
"
,
_perf_g_total_samples
);
}
void
typical_random_load_test_set
()
{
printf
(
"------------- random load test set -------------
\n
"
);
printf
(
"from page size random load (word):
\n
"
);
test_random_access_latency
(
1024
,
_PERF_PAGE_SIZE_BYTE
,
8
*
BYTE
,
1
,
1
,
0
);
test_random_access_latency
(
1024
,
_PERF_PAGE_SIZE_BYTE
,
8
*
BYTE
,
0
,
1
,
0
);
printf
(
"from page size random load (cache line):
\n
"
);
test_random_access_latency
(
1024
,
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
,
0
);
test_random_access_latency
(
1024
,
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
0
,
1
,
0
);
printf
(
"from dcache/2 size random load (word):
\n
"
);
test_random_access_latency
(
1024
,
_PERF_L1_SIZE_BYTE
/
2
,
8
*
BYTE
,
1
,
1
,
0
);
test_random_access_latency
(
1024
,
_PERF_L1_SIZE_BYTE
/
2
,
8
*
BYTE
,
0
,
1
,
0
);
printf
(
"from dcache/2 size random load (cache line):
\n
"
);
test_random_access_latency
(
1024
,
_PERF_L1_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
,
0
);
test_random_access_latency
(
1024
,
_PERF_L1_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
0
,
1
,
0
);
printf
(
"from dcache size random load (word):
\n
"
);
test_random_access_latency
(
_PERF_L1_SIZE_BYTE
/
_PERF_CACHELINE_SIZE_BYTE
*
2
,
_PERF_L1_SIZE_BYTE
,
8
*
BYTE
,
1
,
1
,
0
);
test_random_access_latency
(
_PERF_L1_SIZE_BYTE
/
_PERF_CACHELINE_SIZE_BYTE
*
2
,
_PERF_L1_SIZE_BYTE
,
8
*
BYTE
,
0
,
1
,
0
);
printf
(
"from dcache size random load (cache line):
\n
"
);
test_random_access_latency
(
_PERF_L1_SIZE_BYTE
/
_PERF_CACHELINE_SIZE_BYTE
*
2
,
_PERF_L1_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
,
0
);
test_random_access_latency
(
_PERF_L1_SIZE_BYTE
/
_PERF_CACHELINE_SIZE_BYTE
*
2
,
_PERF_L1_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
0
,
1
,
0
);
printf
(
"from l2 size random load (word):
\n
"
);
test_random_access_latency
(
_PERF_L1_SIZE_BYTE
/
_PERF_CACHELINE_SIZE_BYTE
*
2
,
_PERF_L2_SIZE_BYTE
,
8
*
BYTE
,
1
,
1
,
0
);
test_random_access_latency
(
_PERF_L1_SIZE_BYTE
/
_PERF_CACHELINE_SIZE_BYTE
*
2
,
_PERF_L2_SIZE_BYTE
,
8
*
BYTE
,
0
,
1
,
0
);
printf
(
"from l2 size random load (cache line):
\n
"
);
test_random_access_latency
(
_PERF_L1_SIZE_BYTE
/
_PERF_CACHELINE_SIZE_BYTE
*
2
,
_PERF_L2_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
,
0
);
test_random_access_latency
(
_PERF_L1_SIZE_BYTE
/
_PERF_CACHELINE_SIZE_BYTE
*
2
,
_PERF_L2_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
0
,
1
,
0
);
printf
(
"total samples: %ld
\n
"
,
_perf_g_total_samples
);
}
void
typical_pointer_tracing_load_test_set
()
{
printf
(
"------------- pointer tracing load test set -------------
\n
"
);
printf
(
"cacheline by cacheline tracing:
\n
"
);
test_pointer_tracing_latency
(
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
10
,
0
);
test_pointer_tracing_latency
(
_PERF_L1_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
test_pointer_tracing_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
test_pointer_tracing_latency
(
_PERF_L2_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
test_pointer_tracing_latency
(
_PERF_L2_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
0
);
test_pointer_tracing_latency
(
_PERF_L3_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
0
);
printf
(
"page by page, tracing:
\n
"
);
test_pointer_tracing_latency
(
_PERF_PAGE_SIZE_BYTE
*
2
,
_PERF_PAGE_SIZE_BYTE
,
10
,
0
);
test_pointer_tracing_latency
(
_PERF_L1_SIZE_BYTE
/
2
,
_PERF_PAGE_SIZE_BYTE
,
10
,
0
);
test_pointer_tracing_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_PAGE_SIZE_BYTE
,
10
,
0
);
test_pointer_tracing_latency
(
_PERF_L2_SIZE_BYTE
/
2
,
_PERF_PAGE_SIZE_BYTE
,
10
,
0
);
test_pointer_tracing_latency
(
_PERF_L2_SIZE_BYTE
,
_PERF_PAGE_SIZE_BYTE
,
10
,
0
);
printf
(
"total samples: %ld
\n
"
,
_perf_g_total_samples
);
}
void
typical_memory_disambiuation_test_set
()
{
printf
(
"------------- memory disambiuation test set -------------
\n
"
);
printf
(
"load from the same address:
\n
"
);
test_same_address_load_latency
(
1024
,
0
);
test_same_address_load_latency
(
1024
,
0
);
test_same_address_load_latency
(
1024
,
0
);
// more to be added
}
// typical latency test for fast regression
void
typical_latency_test
()
{
_perf_g_total_samples
=
0
;
typical_linear_load_test_set
();
typical_random_load_test_set
();
typical_pointer_tracing_load_test_set
();
typical_memory_disambiuation_test_set
();
}
void
pointer_tracing_graph
()
{
_perf_g_total_samples
=
0
;
_perf_calibrate
();
printf
(
"data for pointer tracing latency graph:
\n
"
);
printf
(
"range (B), read latency, iters, samples
\n
"
);
for
(
int
i
=
1
*
KB
;
i
<
64
*
KB
;
i
=
i
+
1
*
KB
)
{
test_pointer_tracing_latency
(
i
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
1
);
}
for
(
int
i
=
64
*
KB
;
i
<
1024
*
KB
;
i
=
i
+
64
*
KB
)
{
test_pointer_tracing_latency
(
i
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
);
}
test_pointer_tracing_latency
(
1024
*
KB
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
);
for
(
int
i
=
1
*
MB
;
i
<
8
*
MB
;
i
=
i
+
1
*
MB
)
{
test_pointer_tracing_latency
(
i
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
);
}
printf
(
"total samples: %ld
\n
"
,
_perf_g_total_samples
);
}
// a simple test set used to check if test is working correctly
void
latency_test_example
()
{
_perf_calibrate
();
printf
(
"latency test example:
\n
"
);
test_pointer_tracing_latency
(
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
5
,
0
);
test_linear_access_latency
(
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
5
,
0
);
test_random_access_latency
(
4096
,
1024
*
MB
,
_PERF_CACHELINE_SIZE_BYTE
,
0
,
1
,
0
);
test_random_access_latency
(
4096
,
1024
*
MB
,
_PERF_CACHELINE_SIZE_BYTE
,
1
,
1
,
0
);
test_same_address_load_latency
(
1024
,
0
);
printf
(
"total samples: %ld
\n
"
,
_perf_g_total_samples
);
}
void
legacy_latency_throughput_test
()
{
_perf_calibrate
();
printf
(
"Memory throughput:
\n
"
);
test_mem_throughput
(
512
);
legacy_test_mem_throughput
(
1024
);
printf
(
"L1 latency:
\n
"
);
test_
latency
(
4
*
KB
,
5
);
test_
latency
(
_PERF_L1_NOALIAS_SIZE_BYTE
,
2
);
test_
latency
(
_PERF_L1_SIZE_BYTE
/
2
,
2
);
test_
latency
(
_PERF_L1_SIZE_BYTE
,
2
);
test_
pointer_tracing_latency
(
_PERF_PAGE_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
5
,
0
);
test_
pointer_tracing_latency
(
_PERF_L1_NOALIAS_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
test_
pointer_tracing_latency
(
_PERF_L1_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
test_
pointer_tracing_latency
(
_PERF_L1_SIZE_BYTE
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
printf
(
"L2 latency:
\n
"
);
test_
latency
(
_PERF_L2_SIZE_BYTE
/
2
,
2
);
// test_
latency(_PERF_L2_SIZE_BYTE, 2
);
test_
pointer_tracing_latency
(
_PERF_L2_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
// test_
pointer_tracing_latency(_PERF_L2_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE, 2, 0
);
printf
(
"L3 latency:
\n
"
);
test_
latency
(
_PERF_L3_SIZE_BYTE
/
2
,
2
);
// test_
latency(_PERF_L3_SIZE_BYTE,2
);
test_
pointer_tracing_latency
(
_PERF_L3_SIZE_BYTE
/
2
,
_PERF_CACHELINE_SIZE_BYTE
,
2
,
0
);
// test_
pointer_tracing_latency(_PERF_L3_SIZE_BYTE, _PERF_CACHELINE_SIZE_BYTE,2, 0
);
// printf("MEM:\n");
// test_latency(_PERF_L3_SIZE_BYTE*2,2);
// test_pointer_tracing_latency(_PERF_L3_SIZE_BYTE*2, _PERF_CACHELINE_SIZE_BYTE,2, 0);
printf
(
"total samples: %ld
\n
"
,
_perf_g_total_samples
);
}
int
main
()
{
latency_test_example
();
typical_latency_test
();
// pointer_tracing_graph();
// latency_test();
// legacy_latency_throughput_test();
return
0
;
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录