Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
adf75a29
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
adf75a29
编写于
4月 21, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(dnn/cuda): add sass int4 128x128
GitOrigin-RevId: 1bc54821023814c3c80b5fd22c24ab0007dd6203
上级
8da2f698
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
48 addition
and
20 deletion
+48
-20
dnn/test/cuda/conv_test_utils.cpp
dnn/test/cuda/conv_test_utils.cpp
+45
-18
src/plugin/impl/opr_footprint.cpp
src/plugin/impl/opr_footprint.cpp
+3
-2
未找到文件。
dnn/test/cuda/conv_test_utils.cpp
浏览文件 @
adf75a29
...
...
@@ -260,7 +260,7 @@ void benchmark_target_algo_with_cudnn_tsc(
megdnn_assert
(
src_dtype
.
enumv
()
==
filter_dtype
.
enumv
());
CUBenchmarker
<
ConvBiasForward
>
benchmarker
(
handle
);
CUBenchmarker
<
ConvBiasForward
>
benchmarker_cudnn
(
handle
);
size_t
RUNS
=
10
00
;
size_t
RUNS
=
2
00
;
benchmarker
.
set_display
(
false
).
set_times
(
RUNS
);
benchmarker
.
set_dtype
(
0
,
src_dtype
)
.
set_dtype
(
1
,
filter_dtype
)
...
...
@@ -282,9 +282,6 @@ void benchmark_target_algo_with_cudnn_tsc(
.
set_dtype
(
2
,
change_cudnn_bias_dtype
)
.
set_dtype
(
3
,
change_cudnn_dst_dtype
)
.
set_dtype
(
4
,
change_cudnn_dst_dtype
);
benchmarker_cudnn
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBiasForward
>
(
change_cudnn_algo
));
}
else
{
benchmarker_cudnn
.
set_dtype
(
0
,
src_dtype
)
.
set_dtype
(
1
,
filter_dtype
)
...
...
@@ -391,13 +388,28 @@ void benchmark_target_algo_with_cudnn_tsc(
}
float
time_in_ms_cudnn
=
0
;
if
(
with_cudnn
)
{
time_in_ms_cudnn
=
benchmarker_cudnn
.
execs
(
{
get_tensor_shape
(
src
,
format_cudnn
),
get_tensor_shape
(
filter
,
format_cudnn
),
get_tensor_shape
(
bias
,
format_cudnn
),
{},
{}})
/
RUNS
;
if
(
change_cudnn_algo
)
{
time_in_ms_cudnn
=
algo_benchmark
<
ConvBiasForward
,
OprProxy
<
ConvBiasForward
>
,
CUTimer
>
(
benchmarker_cudnn
,
{
get_tensor_shape
(
src
,
format_cudnn
),
get_tensor_shape
(
filter
,
format_cudnn
),
get_tensor_shape
(
bias
,
format_cudnn
),
{},
{}},
change_cudnn_algo
)
/
RUNS
;
}
else
{
time_in_ms_cudnn
=
benchmarker_cudnn
.
execs
(
{
get_tensor_shape
(
src
,
format_cudnn
),
get_tensor_shape
(
filter
,
format_cudnn
),
get_tensor_shape
(
bias
,
format_cudnn
),
{},
{}})
/
RUNS
;
}
}
float
flo
=
2.0
*
arg
.
n
*
arg
.
co
*
ho
*
wo
*
arg
.
ci
*
arg
.
f
*
arg
.
f
/
...
...
@@ -432,13 +444,28 @@ void benchmark_target_algo_with_cudnn_tsc(
}
time_in_ms_cudnn
=
0
;
if
(
with_cudnn
)
{
time_in_ms_cudnn
=
benchmarker_cudnn
.
execs
(
{
get_tensor_shape
(
src
,
format_cudnn
),
get_tensor_shape
(
filter
,
format_cudnn
),
get_tensor_shape
(
bias
,
format_cudnn
),
get_tensor_shape
(
z
,
format_cudnn
),
{}})
/
RUNS
;
if
(
change_cudnn_algo
)
{
time_in_ms_cudnn
=
algo_benchmark
<
ConvBiasForward
,
OprProxy
<
ConvBiasForward
>
,
CUTimer
>
(
benchmarker_cudnn
,
{
get_tensor_shape
(
src
,
format_cudnn
),
get_tensor_shape
(
filter
,
format_cudnn
),
get_tensor_shape
(
bias
,
format_cudnn
),
get_tensor_shape
(
z
,
format_cudnn
),
{}},
change_cudnn_algo
)
/
RUNS
;
}
else
{
time_in_ms_cudnn
=
benchmarker_cudnn
.
execs
(
{
get_tensor_shape
(
src
,
format_cudnn
),
get_tensor_shape
(
filter
,
format_cudnn
),
get_tensor_shape
(
bias
,
format_cudnn
),
get_tensor_shape
(
z
,
format_cudnn
),
{}})
/
RUNS
;
}
}
printf
(
"src=%s, filter=%s, dst=%s, time(algo=%s)=%.2f %.2fTops, "
"time(cudnn)=%.2f %.2fTops, "
...
...
src/plugin/impl/opr_footprint.cpp
浏览文件 @
adf75a29
...
...
@@ -151,7 +151,7 @@ uint64_t eval_conv_computation(const TensorShape& src_shape,
"format should be NCHW4/NCHW4_NCHW/NCHW4_NCHW32"
);
packed_size
=
4
;
}
return
dst_shape
.
total_nr_elems
()
*
fh
*
fw
*
src_shape
[
1
]
*
4
/
group
*
return
dst_shape
.
total_nr_elems
()
*
fh
*
fw
*
src_shape
[
1
]
*
packed_size
/
group
*
2
;
};
auto
eval_conv_computation_chwn4
=
[
&
param
,
&
src_shape
,
&
filter_shape
,
...
...
@@ -178,7 +178,8 @@ uint64_t eval_conv_computation(const TensorShape& src_shape,
param
.
format
==
Param
::
Format
::
NCHW44
||
param
.
format
==
Param
::
Format
::
NCHW44_DOT
||
param
.
format
==
Param
::
Format
::
NCHW32
||
param
.
format
==
Param
::
Format
::
NCHW32_NCHW4
)
{
param
.
format
==
Param
::
Format
::
NCHW32_NCHW4
||
param
.
format
==
Param
::
Format
::
NCHW64
)
{
return
eval_conv_computation_nchwx
();
}
if
(
param
.
format
==
Param
::
Format
::
CHWN4
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录