Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
1b943807
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
410
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
1b943807
编写于
8月 01, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(dnn): fix reduce sum/mean error when b is large
GitOrigin-RevId: d1bae619b1835ebe7ef7656766700720c3a99d37
上级
c7a99098
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
410 addition
and
41 deletion
+410
-41
dnn/src/fallback/reduce/opr_impl.cpp
dnn/src/fallback/reduce/opr_impl.cpp
+63
-36
dnn/src/fallback/reduce/opr_impl.h
dnn/src/fallback/reduce/opr_impl.h
+3
-0
dnn/src/fallback/reduce/reducer.h
dnn/src/fallback/reduce/reducer.h
+272
-5
dnn/test/fallback/reduce.cpp
dnn/test/fallback/reduce.cpp
+72
-0
未找到文件。
dnn/src/fallback/reduce/opr_impl.cpp
浏览文件 @
1b943807
...
...
@@ -5,7 +5,6 @@
#include "midout.h"
#include "reducer.h"
#include "src/common/reduce_helper.h"
MIDOUT_DECL
(
megdnn_fb_reduce_op
)
MIDOUT_DECL
(
megdnn_fb_reduce_c
)
...
...
@@ -67,6 +66,27 @@ void reduce_exec(size_t A, size_t B, size_t C, Op op) MEGDNN_NOEXCEPT {
namespace
megdnn
{
namespace
fallback
{
size_t
ReduceImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
dst
)
{
MEGDNN_MARK_USED_VAR
(
src
);
MEGDNN_MARK_USED_VAR
(
dst
);
if
(
src
.
dtype
.
enumv
()
==
DTypeEnum
::
Float32
&&
(
param
().
mode
==
Mode
::
MEAN
||
param
().
mode
==
Mode
::
SUM
||
param
().
mode
==
Mode
::
SUM_SQR
))
{
size_t
A
,
B
,
C
;
reduce
::
get_ABC
(
src
,
A
,
B
,
C
,
param
().
axis
);
if
(
C
==
1
)
{
// Using B = 247 as an example, you can understand why these parameters exist
size_t
_60xT_in_4
=
(
60
*
3
)
/
4
;
// T = 3
size_t
_60xX_in_4
=
4
;
// 0 < X < T, X = 1,2.
size_t
_XXxT_in_4
=
4
;
return
((
B
/
_60xT_in_4
+
_60xX_in_4
+
_XXxT_in_4
)
*
sizeof
(
float
));
}
}
return
naive
::
ReduceForwardImpl
::
get_workspace_in_bytes
(
src
,
dst
);
}
void
ReduceImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
check_exec
(
src
.
layout
,
dst
.
layout
,
workspace
.
size
);
...
...
@@ -178,45 +198,52 @@ void ReduceImpl::exec_fallback(
}
bool
ReduceImpl
::
exec_optimized
(
_megdnn_tensor_in
src
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
)
{
_megdnn_tensor_in
src
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
size_t
A
,
B
,
C
;
reduce
::
get_ABC
(
src
.
layout
,
A
,
B
,
C
,
param
().
axis
);
bool
execed
=
false
;
using
Mode
=
param
::
Reduce
::
Mode
;
#define DISPATCH_FUNC(Reducer, dtype, ctype, comp_type) \
if (C == 1) { \
using _Reducer = Reducer<dtype, ctype, comp_type, true>; \
using _ReducerC1SmallB = Reducer<dtype, ctype, comp_type, false>; \
std::function<void(const ctype*, ctype*, DType, size_t, size_t, size_t)> \
do_reduce = Exec<_Reducer, true>::do_reduce; \
if (B == 2) \
do_reduce = ExecC1SmallB<_ReducerC1SmallB, ctype, 2>::do_reduce; \
if (B == 3) \
do_reduce = ExecC1SmallB<_ReducerC1SmallB, ctype, 3>::do_reduce; \
if (B == 4) \
do_reduce = ExecC1SmallB<_ReducerC1SmallB, ctype, 4>::do_reduce; \
MIDOUT_BEGIN( \
megdnn_fallback_reduce_optimized, ctype, dtype, comp_type, \
midout_iv(0)) { \
MEGDNN_DISPATCH_CPU_KERN_OPR(do_reduce( \
reinterpret_cast<ctype*>(src.raw_ptr()), \
reinterpret_cast<ctype*>(dst.raw_ptr()), src_type, A, B, C)); \
execed = true; \
} \
MIDOUT_END(); \
} else { \
using _Reducer = Reducer<dtype, ctype, comp_type, false>; \
std::function<void(const ctype*, ctype*, DType, size_t, size_t, size_t)> \
do_reduce = Exec<_Reducer, false>::do_reduce; \
MIDOUT_BEGIN( \
megdnn_fallback_reduce_optimized, ctype, dtype, comp_type, \
midout_iv(1)) { \
MEGDNN_DISPATCH_CPU_KERN_OPR(do_reduce( \
reinterpret_cast<ctype*>(src.raw_ptr()), \
reinterpret_cast<ctype*>(dst.raw_ptr()), src_type, A, B, C)); \
execed = true; \
} \
MIDOUT_END(); \
#define DISPATCH_FUNC(Reducer, dtype, ctype, comp_type) \
if (C == 1) { \
using _Reducer = Reducer<dtype, ctype, comp_type, true>; \
using _ReducerC1SmallB = Reducer<dtype, ctype, comp_type, false>; \
std::function<void( \
const ctype*, ctype*, DType, size_t, size_t, size_t, \
_megdnn_workspace)> \
do_reduce = Exec<_Reducer, true>::do_reduce; \
if (B == 2) \
do_reduce = ExecC1SmallB<_ReducerC1SmallB, ctype, 2>::do_reduce; \
if (B == 3) \
do_reduce = ExecC1SmallB<_ReducerC1SmallB, ctype, 3>::do_reduce; \
if (B == 4) \
do_reduce = ExecC1SmallB<_ReducerC1SmallB, ctype, 4>::do_reduce; \
MIDOUT_BEGIN( \
megdnn_fallback_reduce_optimized, ctype, dtype, comp_type, \
midout_iv(0)) { \
MEGDNN_DISPATCH_CPU_KERN_OPR(do_reduce( \
reinterpret_cast<ctype*>(src.raw_ptr()), \
reinterpret_cast<ctype*>(dst.raw_ptr()), src_type, A, B, C, \
workspace)); \
execed = true; \
} \
MIDOUT_END(); \
} else { \
using _Reducer = Reducer<dtype, ctype, comp_type, false>; \
std::function<void( \
const ctype*, ctype*, DType, size_t, size_t, size_t, \
_megdnn_workspace)> \
do_reduce = Exec<_Reducer, false>::do_reduce; \
MIDOUT_BEGIN( \
megdnn_fallback_reduce_optimized, ctype, dtype, comp_type, \
midout_iv(1)) { \
MEGDNN_DISPATCH_CPU_KERN_OPR(do_reduce( \
reinterpret_cast<ctype*>(src.raw_ptr()), \
reinterpret_cast<ctype*>(dst.raw_ptr()), src_type, A, B, C, \
workspace)); \
execed = true; \
} \
MIDOUT_END(); \
}
#define DISPATCH_MODE_QUANTIZED(dtype, ctype, comp_type) \
...
...
dnn/src/fallback/reduce/opr_impl.h
浏览文件 @
1b943807
#pragma once
#include "src/common/reduce_helper.h"
#include "src/naive/reduce/opr_impl.h"
namespace
megdnn
{
...
...
@@ -13,6 +14,8 @@ public:
_megdnn_tensor_in
src
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
);
void
exec_fallback
(
_megdnn_tensor_in
src
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
);
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
dst
)
override
;
};
}
// namespace fallback
...
...
dnn/src/fallback/reduce/reducer.h
浏览文件 @
1b943807
此差异已折叠。
点击以展开。
dnn/test/fallback/reduce.cpp
浏览文件 @
1b943807
...
...
@@ -352,6 +352,78 @@ TEST_F(FALLBACK, BENCHMARK_REDUCE_VS_CONV) {
};
run
();
}
TEST_F
(
FALLBACK
,
BENCHMARK_REDUCE
)
{
auto
run
=
[
&
]()
{
Benchmarker
<
Reduce
>
benchmarker_reduce
(
handle
());
benchmarker_reduce
.
set_display
(
false
);
using
Mode
=
param
::
Reduce
::
Mode
;
constexpr
size_t
RUNS
=
100
;
benchmarker_reduce
.
set_times
(
RUNS
);
TensorShape
small
{
3
*
224
*
224
};
TensorShape
large
{
3
*
224
*
224
*
100
};
param
::
Reduce
param
;
param
.
axis
=
0
;
for
(
auto
i
=
224
;
i
<
224
*
2
;
i
++
)
{
for
(
auto
mode
:
{
Mode
::
SUM
,
Mode
::
MEAN
,
Mode
::
SUM_SQR
})
{
param
.
mode
=
mode
;
benchmarker_reduce
.
set_param
(
param
);
auto
reduce
=
benchmarker_reduce
.
execs
({{
3
*
224
*
i
},
{}})
/
RUNS
;
}
}
param
.
mode
=
param
::
Reduce
::
Mode
::
SUM
;
benchmarker_reduce
.
set_param
(
param
);
printf
(
"SUM
\n
"
);
{
TensorLayout
src
(
small
,
dtype
::
Float32
());
auto
reduce
=
benchmarker_reduce
.
execs
({
src
,
{}})
/
RUNS
;
printf
(
"case 1: reduce use time %fms
\n
"
,
reduce
);
}
{
TensorLayout
src
(
large
,
dtype
::
Float32
());
auto
reduce
=
benchmarker_reduce
.
execs
({
src
,
{}})
/
RUNS
;
printf
(
"case 1: reduce use time %fms
\n
"
,
reduce
);
}
param
.
mode
=
param
::
Reduce
::
Mode
::
MEAN
;
benchmarker_reduce
.
set_param
(
param
);
printf
(
"MEAN
\n
"
);
{
TensorLayout
src
(
small
,
dtype
::
Float32
());
auto
reduce
=
benchmarker_reduce
.
execs
({
src
,
{}})
/
RUNS
;
printf
(
"case 2: reduce use time %fms
\n
"
,
reduce
);
}
{
TensorLayout
src
(
large
,
dtype
::
Float32
());
auto
reduce
=
benchmarker_reduce
.
execs
({
src
,
{}})
/
RUNS
;
printf
(
"case 2: reduce use time %fms
\n
"
,
reduce
);
}
param
.
mode
=
param
::
Reduce
::
Mode
::
SUM_SQR
;
benchmarker_reduce
.
set_param
(
param
);
printf
(
"SUM_SQR
\n
"
);
{
TensorLayout
src
(
small
,
dtype
::
Float32
());
auto
reduce
=
benchmarker_reduce
.
execs
({
src
,
{}})
/
RUNS
;
printf
(
"case 3: reduce use time %fms
\n
"
,
reduce
);
}
{
TensorLayout
src
(
large
,
dtype
::
Float32
());
auto
reduce
=
benchmarker_reduce
.
execs
({
src
,
{}})
/
RUNS
;
printf
(
"case 3: reduce use time %fms
\n
"
,
reduce
);
}
};
run
();
}
#endif
// vim: syntax=cpp.doxygen
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录