Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
d7cc4628
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
d7cc4628
编写于
9月 16, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(gopt): opt concat for OpenCL
GitOrigin-RevId: 9bb226d4b122bacaa9d7c1d69130bbc20eaed95e
上级
3f0bb47a
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
133 addition
and
1 deletion
+133
-1
src/gopt/impl/inference.cpp
src/gopt/impl/inference.cpp
+62
-1
src/gopt/test/inference.cpp
src/gopt/test/inference.cpp
+71
-0
未找到文件。
src/gopt/impl/inference.cpp
浏览文件 @
d7cc4628
...
...
@@ -1589,6 +1589,67 @@ std::unique_ptr<ConvertFormatPass> ConvertFormatPass::make_nhwcd4_converter() {
return
new_opr
;
};
auto
replace_concat_opr
=
[
&
relayout_inp_to_chw
](
OperatorNodeBase
*
opr
,
const
VarNodeArray
&
new_inp
)
{
//! map nchw axis to CD4 axis(n h c/4 w 4)
auto
axis_nchw_to_cd4_map
=
[
=
](
int32_t
org_axis
)
->
int32_t
{
mgb_assert
(
org_axis
>=
0
&&
org_axis
<=
3
);
int32_t
ret
=
0
;
if
(
0
==
org_axis
)
{
ret
=
0
;
}
else
if
(
1
==
org_axis
)
{
ret
=
2
;
}
else
if
(
2
==
org_axis
)
{
ret
=
1
;
}
else
if
(
3
==
org_axis
)
{
mgb_throw
(
InternalError
,
"Do not support axis=3 for concat bypass for CD4!"
);
}
else
{
mgb_throw
(
InternalError
,
"Do not support axis for concat pass, may input is "
"not NCHW format!"
);
}
return
ret
;
};
mgb_assert
(
opr
->
input
().
size
()
==
new_inp
.
size
());
auto
nchw_axis
=
opr
->
cast_final_safe
<
opr
::
Concat
>
().
param
().
axis
;
if
(
nchw_axis
<
0
||
nchw_axis
>
3
)
{
mgb_log_warn
(
"concat pass fallback to relayout chw
\n
"
);
return
relayout_inp_to_chw
(
opr
,
new_inp
);
}
bool
can_exec_cd4
=
true
;
//! only consider OpenCL CD4, if other backend has relayout performance
//! issue, may add other bypass format
for
(
size_t
i
=
0
;
i
<
opr
->
input
().
size
();
i
++
)
{
if
(
opr
->
input
(
i
)
->
format
().
type
()
!=
TensorFormat
::
Type
::
DEFAULT
||
opr
->
input
(
i
)
->
shape
()[
1
]
%
4
!=
0
||
new_inp
[
i
]
->
shape
().
ndim
!=
5
||
new_inp
[
i
]
->
format
().
type
()
!=
TensorFormat
::
Type
::
IMAGE2D_PACK4
||
nchw_axis
==
3
)
{
can_exec_cd4
=
false
;
break
;
}
}
if
(
!
can_exec_cd4
)
{
mgb_log_warn
(
"concat pass fallback to relayout chw"
);
return
relayout_inp_to_chw
(
opr
,
new_inp
);
}
megdnn
::
param
::
Axis
param
;
//! now only support nchw bypass to CD4
mgb_log_warn
(
"concat pass bypass to CD4"
);
param
.
axis
=
axis_nchw_to_cd4_map
(
nchw_axis
);
return
opr
::
Concat
::
make
(
VarNodeArrayView
(
new_inp
),
param
,
opr
->
config
())
.
node
()
->
owner_opr
();
};
auto
replace_elemwise_opr
=
[
&
relayout_inp_to_chw
](
OperatorNodeBase
*
opr
,
const
VarNodeArray
&
new_inp
)
{
...
...
@@ -1654,7 +1715,7 @@ std::unique_ptr<ConvertFormatPass> ConvertFormatPass::make_nhwcd4_converter() {
replace_func
[
opr
::
ConvolutionBackwardData
::
typeinfo
()]
=
replace_deconv_opr
;
replace_func
[
opr
::
PoolingForward
::
typeinfo
()]
=
replace_pooling_opr
;
replace_func
[
opr
::
Elemwise
::
typeinfo
()]
=
replace_elemwise_opr
;
replace_func
[
opr
::
Concat
::
typeinfo
()]
=
re
layout_inp_to_chw
;
replace_func
[
opr
::
Concat
::
typeinfo
()]
=
re
place_concat_opr
;
replace_func
[
opr
::
Reshape
::
typeinfo
()]
=
relayout_inp_to_chw
;
replace_func
[
opr
::
GetVarShape
::
typeinfo
()]
=
relayout_inp_to_chw
;
replace_func
[
opr
::
Dimshuffle
::
typeinfo
()]
=
relayout_inp_to_chw
;
...
...
src/gopt/test/inference.cpp
浏览文件 @
d7cc4628
...
...
@@ -1591,6 +1591,77 @@ TEST(TestGoptInference, ConvertFormatPadIC) {
MGB_ASSERT_TENSOR_NEAR
(
host_y
,
host_y_opt
,
1e-3
);
}
TEST
(
TestGoptInference
,
concatbypass
)
{
// hwcd4 is only supported in naive handle
NaiveMegDNNHandleScope
naive_megdnn_handle
;
HostTensorGenerator
<>
gen
;
auto
cn
=
CompNode
::
load
(
"cpu0"
);
auto
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt_level
=
0
;
auto
mkcvar
=
[
&
](
const
char
*
name
,
const
TensorShape
&
shp
)
{
return
opr
::
SharedDeviceTensor
::
make
(
*
graph
,
*
gen
(
shp
,
cn
))
.
rename
(
name
);
};
auto
host_inp1
=
gen
({
1
,
6
,
16
,
16
},
cn
),
host_inp2
=
gen
({
1
,
6
,
32
,
32
},
cn
);
auto
inp1
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_inp1
),
inp2
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
host_inp2
);
auto
shape_tmp
=
mkcvar
(
"tmp"
,
{
32
,
32
});
auto
shape_of
=
opr
::
GetVarShape
::
make
(
shape_tmp
);
opr
::
Resize
::
Param
param_resize
;
param_resize
.
format
=
opr
::
Resize
::
Param
::
Format
::
NCHW
;
auto
resize
=
opr
::
ResizeForward
::
make
(
inp1
,
shape_of
,
param_resize
);
//! this concat should forward to chw
auto
concat
=
opr
::
Concat
::
make
({
inp2
,
resize
},
1
);
opr
::
Convolution
::
Param
param
;
param
.
pad_h
=
param
.
pad_w
=
1
;
param
.
sparse
=
opr
::
Convolution
::
Param
::
Sparse
::
DENSE
;
auto
w1
=
mkcvar
(
"w1"
,
{
12
,
12
,
3
,
3
});
auto
w2
=
mkcvar
(
"w1"
,
{
12
,
24
,
3
,
3
});
auto
y
=
opr
::
Convolution
::
make
(
concat
,
w1
,
param
);
//! this concat should bypass CD4
y
=
opr
::
Concat
::
make
({
y
,
y
},
0
);
y
=
opr
::
Convolution
::
make
(
y
,
w1
,
param
);
//! this concat should bypass CD4
y
=
opr
::
Concat
::
make
({
y
,
y
},
1
);
y
=
opr
::
Convolution
::
make
(
y
,
w2
,
param
);
//! this concat should bypass CD4
y
=
opr
::
Concat
::
make
({
y
,
y
},
2
);
y
=
opr
::
Convolution
::
make
(
y
,
w1
,
param
);
SymbolVar
y_opt
;
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nhwcd4
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
HostTensorND
host_y_opt
,
host_y
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
),
make_callback_copy
(
y_opt
,
host_y_opt
)});
size_t
relayout_format_nr
=
0
;
auto
cb
=
[
&
](
cg
::
OperatorNodeBase
*
opr
)
{
if
(
opr
->
try_cast_final
<
opr
::
Convolution
>
())
{
auto
conv_inputs
=
opr
->
input
();
for
(
auto
&
input
:
conv_inputs
)
{
if
(
std
::
string
::
npos
!=
std
::
string
(
input
->
cname
()).
find
(
"relayout_format"
))
{
relayout_format_nr
++
;
}
}
}
return
true
;
};
func
->
iter_opr_seq
(
cb
);
func
->
execute
();
MGB_ASSERT_TENSOR_NEAR
(
host_y
,
host_y_opt
,
1e-3
);
ASSERT_EQ
(
opr
::
Convolution
::
Param
::
Format
::
NHWCD4
,
find_opr
<
opr
::
Convolution
>
(
y_opt
).
param
().
format
);
ASSERT_EQ
(
1
,
relayout_format_nr
);
}
TEST
(
TestGoptInference
,
ConvertBatchNormPass
)
{
auto
cn
=
CompNode
::
load
(
"cpu0"
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录