Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenXiangShan
nexus-am
提交
c8d9202c
N
nexus-am
项目概览
OpenXiangShan
/
nexus-am
10 个月 前同步成功
通知
0
Star
21
Fork
25
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
N
nexus-am
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
c8d9202c
编写于
6月 16, 2022
作者:
W
William Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
apps: add basic latency and throughput test
上级
16f3b2bd
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
166 addition
and
0 deletion
+166
-0
apps/maprobe/Makefile
apps/maprobe/Makefile
+3
-0
apps/maprobe/include/maprobe.h
apps/maprobe/include/maprobe.h
+139
-0
apps/maprobe/maprobe.c
apps/maprobe/maprobe.c
+24
-0
未找到文件。
apps/maprobe/Makefile
0 → 100644
浏览文件 @
c8d9202c
NAME
=
maprobe
SRCS
=
maprobe.c
include
$(AM_HOME)/Makefile.app
apps/maprobe/include/maprobe.h
0 → 100644
浏览文件 @
c8d9202c
// basic microarchtectural probe
#ifndef PROBE_H
#define PROBE_H
#include <klib.h>
#include <csr.h>
// perf const
#define BYTE (1)
#define KB (1024*BYTE)
#define MB (1024*KB)
#define GB (1024*MB)
// platform dependent const
// #define _PERF_TEST_ADDR_BASE 0x80400000
#define _PERF_TEST_ADDR_BASE 0x2000400000
#define _PERF_CACHELINE_SIZE_BYTE (64 * BYTE)
#define _PERF_L1_NOALIAS_SIZE_BYTE (32 * KB)
#define _PERF_L1_SIZE_BYTE (128 * KB)
#define _PERF_L2_SIZE_BYTE (512 * KB)
#define _PERF_L3_SIZE_BYTE (2 * MB)
// probe const
#define _PERF_BLACKHOLE _PERF_TEST_ADDR_BASE
struct
perf
{
// const to be calibrated at run time
uint64_t
csr_read_cycle
;
//# of cycles to read mcycle
uint64_t
csr_read_ninst
;
// # of inst needed to read minstret
// timer
uint64_t
cycle
;
uint64_t
instrcnt
;
}
perf
;
void
_perf_start_timer
()
{
perf
.
cycle
=
csr_read
(
CSR_MCYCLE
);
perf
.
instrcnt
=
csr_read
(
CSR_MINSTRET
);
}
void
_perf_end_timer
()
{
perf
.
cycle
=
csr_read
(
CSR_MCYCLE
)
-
perf
.
cycle
;
perf
.
instrcnt
=
csr_read
(
CSR_MINSTRET
)
-
perf
.
instrcnt
;
}
void
_perf_print_timer
()
{
printf
(
"cycle %d inst %d ipc %lf
\n
"
,
perf
.
cycle
,
perf
.
instrcnt
,
(
float
)
perf
.
instrcnt
/
perf
.
cycle
);
}
void
_perf_calibrate
()
{
// csr read delay
uint64_t
cycle_1
=
csr_read
(
CSR_MCYCLE
);
uint64_t
cycle_2
=
csr_read
(
CSR_MCYCLE
);
perf
.
csr_read_cycle
=
cycle_2
-
cycle_1
;
printf
(
"perf_calibrate: csr_read_cycle %d
\n
"
,
perf
.
csr_read_cycle
);
// csr read inst cost
uint64_t
inst_1
=
csr_read
(
CSR_MINSTRET
);
uint64_t
inst_2
=
csr_read
(
CSR_MINSTRET
);
perf
.
csr_read_ninst
=
inst_2
-
inst_1
;
printf
(
"perf_calibrate: csr_read_ninst %d
\n
"
,
perf
.
csr_read_ninst
);
}
void
_perf_blackhole
(
uint64_t
value
)
{
*
(
uint64_t
*
)
_PERF_BLACKHOLE
=
value
;
}
uint64_t
setup_latency_test_linklist
(
uint64_t
base_addr
,
uint64_t
end_addr
,
uint64_t
step
)
{
uint64_t
num_valid_node
=
0
;
assert
(
step
%
8
==
0
);
assert
(
step
>=
8
);
for
(
uint64_t
cur_addr
=
base_addr
;
cur_addr
<
end_addr
;)
{
uint64_t
next_addr
=
cur_addr
+
step
;
*
((
uint64_t
*
)
cur_addr
)
=
next_addr
;
cur_addr
=
next_addr
;
num_valid_node
++
;
}
return
num_valid_node
;
}
uint64_t
read_latency_test_linklist
(
uint64_t
base_addr
,
uint64_t
num_valid_node
)
{
uint64_t
cur_addr
=
base_addr
;
for
(
int
i
=
0
;
i
<
num_valid_node
;
i
++
)
{
cur_addr
=
(
*
(
uint64_t
*
)
cur_addr
);
}
return
cur_addr
;
}
void
warmup
(
uint64_t
base_addr
,
uint64_t
end_addr
)
{
setup_latency_test_linklist
(
base_addr
,
end_addr
,
_PERF_CACHELINE_SIZE_BYTE
);
}
void
test_latency
(
uint64_t
size
,
int
iter
)
{
volatile
uint64_t
result
=
0
;
// make sure compiler will not opt read_latency_test_linklist
printf
(
"range 0x%xB (%d iters) latency test
\n
"
,
size
,
iter
);
_perf_start_timer
();
uint64_t
nnode
=
setup_latency_test_linklist
(
_PERF_TEST_ADDR_BASE
,
_PERF_TEST_ADDR_BASE
+
size
,
_PERF_CACHELINE_SIZE_BYTE
);
_perf_end_timer
();
uint64_t
total_node
=
nnode
*
iter
;
// _perf_print_timer();
_perf_start_timer
();
for
(
int
i
=
0
;
i
<
iter
;
i
++
)
{
result
+=
read_latency_test_linklist
(
_PERF_TEST_ADDR_BASE
,
nnode
);
}
_perf_end_timer
();
// _perf_print_timer();
printf
(
"range 0x%xB (%d intrs) read latency %f (%d samples)
\n
"
,
size
,
iter
,
(
float
)
perf
.
cycle
/
total_node
,
total_node
);
_perf_blackhole
(
result
);
}
void
test_mem_throughput
(
uint64_t
iter
)
{
uint64_t
remain
=
iter
;
uint64_t
result
=
0
;
uint64_t
access_addr
=
_PERF_TEST_ADDR_BASE
;
_perf_start_timer
();
while
(
remain
--
)
{
result
+=
*
(
uint64_t
*
)
access_addr
;
access_addr
+=
_PERF_CACHELINE_SIZE_BYTE
;
}
_perf_end_timer
();
*
(
uint64_t
*
)
_PERF_BLACKHOLE
=
result
;
printf
(
"mem band width %f B/cycle (%d samples)
\n
"
,
(
float
)
iter
*
_PERF_CACHELINE_SIZE_BYTE
/
perf
.
cycle
,
iter
);
}
#endif
\ No newline at end of file
apps/maprobe/maprobe.c
0 → 100644
浏览文件 @
c8d9202c
#include <klib.h>
#include "maprobe.h"
int
main
()
{
_perf_calibrate
();
printf
(
"Memory throughput:
\n
"
);
test_mem_throughput
(
512
);
printf
(
"L1 latency:
\n
"
);
test_latency
(
4
*
KB
,
5
);
test_latency
(
_PERF_L1_NOALIAS_SIZE_BYTE
,
2
);
test_latency
(
_PERF_L1_SIZE_BYTE
/
2
,
2
);
test_latency
(
_PERF_L1_SIZE_BYTE
,
2
);
printf
(
"L2 latency:
\n
"
);
test_latency
(
_PERF_L2_SIZE_BYTE
/
2
,
2
);
// test_latency(_PERF_L2_SIZE_BYTE, 2);
printf
(
"L3 latency:
\n
"
);
test_latency
(
_PERF_L3_SIZE_BYTE
/
2
,
2
);
// test_latency(_PERF_L3_SIZE_BYTE,2);
// printf("MEM:\n");
// test_latency(_PERF_L3_SIZE_BYTE*2,2);
return
0
;
}
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录