Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
1fb7d34f
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
1fb7d34f
编写于
5月 06, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(gopt): refactor layout transform for optimize for inference
GitOrigin-RevId: df4e8866d3a5032fea1712b830498c18bb6e57c3
上级
50d285fc
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
253 addition
and
228 deletion
+253
-228
python_module/megengine/_internal/__init__.py
python_module/megengine/_internal/__init__.py
+20
-5
python_module/src/swig/misc.i
python_module/src/swig/misc.i
+13
-9
sdk/load-and-run/dump_with_testcase_mge.py
sdk/load-and-run/dump_with_testcase_mge.py
+2
-2
src/gopt/impl/framework.cpp
src/gopt/impl/framework.cpp
+37
-41
src/gopt/impl/tensor_reformat.cpp
src/gopt/impl/tensor_reformat.cpp
+1
-1
src/gopt/include/megbrain/gopt/framework.h
src/gopt/include/megbrain/gopt/framework.h
+56
-0
src/gopt/include/megbrain/gopt/inference.h
src/gopt/include/megbrain/gopt/inference.h
+1
-34
src/gopt/test/inference.cpp
src/gopt/test/inference.cpp
+114
-128
src/opr/test/dnn/convolution.cpp
src/opr/test/dnn/convolution.cpp
+5
-4
src/opr/test/io.cpp
src/opr/test/io.cpp
+4
-4
未找到文件。
python_module/megengine/_internal/__init__.py
浏览文件 @
1fb7d34f
...
...
@@ -539,7 +539,7 @@ def optimize_for_inference(
f16_io_comp
=
False
,
use_nhwcd4
=
False
,
fuse_conv_bias_nonlinearity
=
False
,
use_
tensor_core
=
False
,
use_
nchw32
=
False
,
fuse_conv_bias_with_z
=
False
,
use_nchw88
=
False
,
use_nchw44
=
False
...
...
@@ -564,6 +564,8 @@ def optimize_for_inference(
times.
:param use_nchw44: whether to use NCHW44 tensor format. This maybe faster some
times.
:param use_nchw32: whether to use NCHW32 tensor format. Mainly used for
nvidia tensorcore.
:return: list of transformed vars corresponding to given output vars
...
...
@@ -575,15 +577,28 @@ def optimize_for_inference(
for
i
in
[
"f16_io_f32_comp"
,
"f16_io_comp"
,
"use_nhwcd4"
,
"fuse_conv_bias_nonlinearity"
,
"use_tensor_core"
,
"fuse_conv_bias_with_z"
,
"use_nchw88"
,
"use_nchw44"
,
]:
if
settings
[
i
]:
getattr
(
opt
,
"enable_{}"
.
format
(
i
))()
layout_tranform
=
None
for
k
,
v
in
{
"use_nhwcd4"
:
"nchw2nhwcd4"
,
"use_nchw32"
:
"nchw2nchw32"
,
"use_nchw88"
:
"nchw2nchw88"
,
"use_nchw44"
:
"nchw2nchw44"
,
}.
items
():
if
settings
[
k
]:
assert
(
not
layout_tranform
),
"Only one layout transform supported, both {} and {}"
.
format
(
layout_tranform
,
k
)
getattr
(
opt
,
"enable_{}"
.
format
(
v
))()
layout_tranform
=
k
vec
=
_detail
.
_VectorSymbolVar
()
for
i
in
output_vars
:
assert
isinstance
(
i
,
_detail
.
SymbolVar
),
"bad var: {}"
.
format
(
i
)
...
...
python_module/src/swig/misc.i
浏览文件 @
1fb7d34f
...
...
@@ -71,15 +71,19 @@ class _PersistentCache {
}
;
struct
_OptimizeForInferenceOptions
{
#
define
SET
(
n
)
void
enable_
##
n
()
SET
(
f16_io_f32_comp
)
;
SET
(
f16_io_comp
)
;
SET
(
fuse_conv_bias_nonlinearity
)
;
SET
(
use_nhwcd4
)
;
SET
(
use_tensor_core
)
;
SET
(
fuse_conv_bias_with_z
)
;
SET
(
use_nchw88
)
;
SET
(
use_nchw44
)
;
#
define
SET
(
n
)
void
enable_
##
n
()
;
SET
(
f16_io_f32_comp
)
;
SET
(
f16_io_comp
)
;
SET
(
fuse_conv_bias_nonlinearity
)
;
SET
(
fuse_conv_bias_with_z
)
;
#
undef
SET
#
define
SET
(
_trans
,
_trans_capital
)
\
void
enable_
##
_trans
()
;
\
SET
(
nchw2nhwcd4
,
NCHW2NHWCD4
)
;
SET
(
nchw2nchw88
,
NCHW2NCHW88
)
;
SET
(
nchw2nchw44
,
NCHW2NCHW44
)
;
SET
(
nchw2nchw32
,
NCHW2NCHW32
)
;
#
undef
SET
}
;
...
...
sdk/load-and-run/dump_with_testcase_mge.py
浏览文件 @
1fb7d34f
...
...
@@ -255,7 +255,7 @@ def optimize_for_inference(args, outputs):
'enable_nchw88'
:
'use_nchw88'
,
'enable_nchw44'
:
'use_nchw44'
,
'enable_fuse_conv_bias_nonlinearity'
:
'fuse_conv_bias_nonlinearity'
,
'enable_
tensorcore'
:
'use_tensor_core
'
,
'enable_
nchw32'
:
'use_nchw32
'
,
'enable_fuse_conv_bias_with_z'
:
'fuse_conv_bias_with_z'
,
}
kwargs
=
{}
...
...
@@ -393,7 +393,7 @@ def main():
'for inference'
)
parser
.
add_argument
(
'--enable-
tensorcore
'
,
'--enable-
nchw32
'
,
action
=
'store_true'
,
help
=
'transform the model format from NCHW4 to NCHW32 '
'for inference on nvidia TensoCore'
...
...
src/gopt/impl/framework.cpp
浏览文件 @
1fb7d34f
...
...
@@ -642,21 +642,6 @@ GraphOptimizer& GraphOptimizer::add_preset_passes(
add_pass
<
ArithMulDistributePass
>
();
add_pass
<
ReorderArithChainPass
>
(
cv_type
);
if
(
inference_opt
)
{
if
(
inference_opt
->
use_nhwcd4
)
{
add_pass
(
ConvertFormatPass
::
make_nhwcd4_converter
());
}
if
(
inference_opt
->
f16_io_f32_comp
)
{
add_pass
(
ConvertF32ToF16Pass
::
make
(
true
));
}
if
(
inference_opt
->
f16_io_comp
)
{
add_pass
(
ConvertF32ToF16Pass
::
make
(
false
));
}
// fuse again after reordering
add_pass
<
ParamFusePass
>
();
}
add_pass
<
ArithFusePass
>
();
// reorder again because shapes of fused oprs might change
add_pass
<
ReorderArithChainPass
>
(
cv_type
);
...
...
@@ -687,32 +672,7 @@ GraphOptimizer& GraphOptimizer::add_preset_passes(
}
#endif
if
(
inference_opt
)
{
if
(
inference_opt
->
fuse_conv_bias_nonlinearity
)
add_pass
<
FuseConvBiasNonlinPass
>
();
if
(
inference_opt
->
fuse_conv_bias_with_z
)
{
mgb_assert
(
inference_opt
->
fuse_conv_bias_nonlinearity
,
"fuse conv bias with z input should fuse conv bias "
"activation "
"first"
);
add_pass
<
FuseConvBiasZPass
>
();
}
if
(
inference_opt
->
use_nchw88
)
{
add_pass
(
EnableNchwxxPass
::
make_nchwxx_converter
(
8
));
}
if
(
inference_opt
->
use_nchw44
)
{
add_pass
(
EnableNchwxxPass
::
make_nchwxx_converter
(
4
));
}
if
(
inference_opt
->
use_tensor_core
)
{
mgb_assert
(
inference_opt
->
fuse_conv_bias_nonlinearity
,
"enable tensor core should fuse conv bias activation "
"first"
);
add_pass
(
EnableTensorCorePass
::
make_tensorcore_converter
());
add_pass
<
ShuffleShuffleRemovePass
>
();
add_pass
<
RemoveRedundantTypeCvtPass
>
();
}
add_pass
<
ParamFusePass
>
();
}
apply_optimize_options
(
inference_opt
);
if
(
inference_opt
)
{
// merge params to reduce loading time and graph overhead
...
...
@@ -739,6 +699,42 @@ VarNode* GraphOptimizer::var_replace_lookup(VarNode *var) {
}
}
void
GraphOptimizer
::
apply_optimize_options
(
const
OptimizeOptions
*
options
)
{
if
(
!
options
)
return
;
if
(
options
->
f16_io_comp
)
{
add_pass
(
ConvertF32ToF16Pass
::
make
(
false
));
}
if
(
options
->
f16_io_f32_comp
)
{
add_pass
(
ConvertF32ToF16Pass
::
make
(
true
));
}
if
(
options
->
transform_nchw2nhwcd4
())
{
add_pass
(
ConvertFormatPass
::
make_nhwcd4_converter
());
add_pass
<
FuseConvBiasNonlinPass
>
();
}
if
(
options
->
transform_nchw2nchw88
())
{
add_pass
(
EnableNchwxxPass
::
make_nchwxx_converter
(
8
));
}
if
(
options
->
transform_nchw2nchw44
())
{
add_pass
(
EnableNchwxxPass
::
make_nchwxx_converter
(
4
));
}
if
(
options
->
transform_nchw2nchw32
())
{
add_pass
<
FuseConvBiasNonlinPass
>
();
add_pass
(
EnableTensorCorePass
::
make_tensorcore_converter
());
add_pass
<
ShuffleShuffleRemovePass
>
();
add_pass
<
RemoveRedundantTypeCvtPass
>
();
}
if
(
options
->
fuse_conv_bias_nonlinearity
)
{
add_pass
<
FuseConvBiasNonlinPass
>
();
}
if
(
options
->
fuse_conv_bias_with_z
)
{
add_pass
<
FuseConvBiasNonlinPass
>
();
add_pass
<
FuseConvBiasZPass
>
();
}
add_pass
<
ParamFusePass
>
();
}
/* ================ ConstVarPropogateBase ================ */
ConstVarPropogateBase
::
AddOprResult
ConstVarPropogateBase
::
add_opr
(
...
...
src/gopt/impl/tensor_reformat.cpp
浏览文件 @
1fb7d34f
...
...
@@ -1770,7 +1770,7 @@ public:
return
reformat
.
node
();
};
m_reformat
[
std
::
make_pair
(
TensorFormat
::
CHWN4
,
TensorFormat
::
NCHW4
)]
=
[](
VarNode
*
inp
)
->
VarNode
*
{
megdnn
::
param
::
RelayoutFormat
param
;
...
...
src/gopt/include/megbrain/gopt/framework.h
浏览文件 @
1fb7d34f
...
...
@@ -377,6 +377,57 @@ namespace gopt {
RecursiveSubGraphRewriteHelper
(
OptState
&
state
);
};
/**
* \brief common optimize options, it both can be used for optimize for
* inference in graph dump but also used in graph optimization in runtime.
*/
struct
OptimizeOptions
{
//! whether to enable IO in float16 compute in float32
bool
f16_io_f32_comp
=
false
;
//! whether to enable tranform to pure float16 model
bool
f16_io_comp
=
false
;
//! whether to enable conv bias nonlinearity fusion
bool
fuse_conv_bias_nonlinearity
=
false
;
enum
LayoutTransform
:
uint32_t
{
DEFAULT
,
NCHW2NHWCD4
,
///< compute using NHWCD4 tensor format
NCHW2NCHW88
,
///< compute using NCHW88 tensor format
NCHW2NCHW44
,
///< compute using NCHW44 tensor format
NCHW2NCHW32
,
///< compute using NCHW32 tensor format, used for
///< tensorcore
};
LayoutTransform
layout_transform
=
LayoutTransform
::
DEFAULT
;
//! fuse pattern like ReLU(conv_bias(x, w, b) + z) or conv_bias(x, w, b)
//! + z -> conv_bias(x, w, b, z)
bool
fuse_conv_bias_with_z
=
false
;
#define SET(n) \
OptimizeOptions& enable_##n() { \
n = true; \
return *this; \
}
SET
(
f16_io_f32_comp
);
SET
(
f16_io_comp
);
SET
(
fuse_conv_bias_nonlinearity
);
SET
(
fuse_conv_bias_with_z
);
#undef SET
#define SET(_trans, _trans_capital) \
OptimizeOptions& enable_##_trans() { \
layout_transform = LayoutTransform::_trans_capital; \
return *this; \
} \
bool transform_##_trans() const { \
return layout_transform == LayoutTransform::_trans_capital; \
}
SET
(
nchw2nhwcd4
,
NCHW2NHWCD4
);
SET
(
nchw2nchw88
,
NCHW2NCHW88
);
SET
(
nchw2nchw44
,
NCHW2NCHW44
);
SET
(
nchw2nchw32
,
NCHW2NCHW32
);
#undef SET
};
/*!
* \brief manage passes and their applying on graphs
*
...
...
@@ -465,6 +516,11 @@ namespace gopt {
* var_replace_map(var->owner_graph()) corresponding to var
*/
static
VarNode
*
var_replace_lookup
(
VarNode
*
var
);
/**
* \brief apply optimize options
*/
void
apply_optimize_options
(
const
OptimizeOptions
*
options
);
};
/*!
...
...
src/gopt/include/megbrain/gopt/inference.h
浏览文件 @
1fb7d34f
...
...
@@ -256,40 +256,7 @@ namespace gopt {
size_t
pack_c_size
);
};
struct
OptimizeForInferenceOptions
{
//! whether to enable IO in float16 compute in float32
bool
f16_io_f32_comp
=
false
;
//! whether to enable tranform to pure float16 model
bool
f16_io_comp
=
false
;
//! whether to enable conv bias nonlinearity fusion
bool
fuse_conv_bias_nonlinearity
=
false
;
//! whether to compute using NHWCD4 tensor format
bool
use_nhwcd4
=
false
;
//! whether to compute using NCHW88 tensor format
bool
use_nchw88
=
false
;
//! whether to compute using NCHW44 tensor format
bool
use_nchw44
=
false
;
//! whether to enable tensor core
bool
use_tensor_core
=
false
;
//! fuse pattern like ReLU(conv_bias(x, w, b) + z) or conv_bias(x, w, b)
//! + z -> conv_bias(x, w, b, z)
bool
fuse_conv_bias_with_z
=
false
;
#define SET(n) \
OptimizeForInferenceOptions& enable_##n() { \
n = true; \
return *this; \
}
SET
(
f16_io_f32_comp
);
SET
(
f16_io_comp
);
SET
(
fuse_conv_bias_nonlinearity
);
SET
(
use_nhwcd4
);
SET
(
use_tensor_core
);
SET
(
fuse_conv_bias_with_z
);
SET
(
use_nchw88
);
SET
(
use_nchw44
);
#undef SET
};
struct
OptimizeForInferenceOptions
:
OptimizeOptions
{};
/*!
* \brief optimize a computing graph for inference
...
...
src/gopt/test/inference.cpp
浏览文件 @
1fb7d34f
...
...
@@ -635,10 +635,9 @@ TEST(TestGoptInference, Float16IOFloat32Compute) {
y
=
opr
::
Concat
::
make
({
y
,
-
y
},
0
);
y
=
opr
::
Reduce
::
make
(
y
,
{},
y
.
make_scalar
(
1
));
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_f16_io_f32_comp
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_f16_io_f32_comp
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
y_opt
.
dtype
(),
dtype
::
Float32
());
HostTensorND
host_y
,
host_y_opt
;
...
...
@@ -683,10 +682,9 @@ TEST(TestGoptInference, Float16IOFloat32ComputeWarpPerspective) {
TensorShape
out_shp
{
20
,
20
};
auto
y
=
opr
::
WarpPerspective
::
make
(
a
,
mat
,
out_shp
);
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_f16_io_f32_comp
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_f16_io_f32_comp
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
y_opt
.
dtype
(),
dtype
::
Float32
());
HostTensorND
host_y
,
host_y_opt
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
),
...
...
@@ -723,10 +721,9 @@ TEST(TestGoptInference, Float16IOFloat32ComputeRemap) {
auto
map
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
map_host
).
rename
(
"map"
);
auto
y
=
opr
::
Remap
::
make
(
a
,
map
);
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_f16_io_f32_comp
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_f16_io_f32_comp
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
y_opt
.
dtype
(),
dtype
::
Float32
());
HostTensorND
host_y
,
host_y_opt
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
),
...
...
@@ -770,10 +767,9 @@ TEST(TestGoptInference, Uint8IOFloat16ComputeWarpPerspective) {
TensorShape
out_shp
{
20
,
20
};
auto
y
=
opr
::
WarpPerspective
::
make
(
a
,
mat
,
out_shp
);
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_f16_io_comp
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_f16_io_comp
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
y_opt
.
dtype
(),
dtype
::
Uint8
());
HostTensorND
host_y
,
host_y_opt
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
),
...
...
@@ -801,10 +797,9 @@ TEST(TestGoptInference, Float32TOFloat16) {
y
=
opr
::
Reduce
::
make
(
y
,
{},
y
.
make_scalar
(
1
));
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_f16_io_comp
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_f16_io_comp
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
return
y_opt
;
};
...
...
@@ -857,10 +852,9 @@ TEST(TestGoptInference, Float32TOFloat16EndpointElemwise) {
auto
y
=
d0
+
b
;
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_f16_io_comp
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_f16_io_comp
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
return
y_opt
;
};
...
...
@@ -897,7 +891,7 @@ TEST(TestGoptInference, Float32TOFloat16EndpointElemwise) {
TEST
(
TestGoptInference
,
Float32TOFloat16Linspace
)
{
CompNode
cn
=
CompNode
::
load
(
"cpu0"
);
HostTensorGenerator
<>
gen
(
0
,
1
,
0
);
auto
host_x
=
gen
({
3
,
1
},
cn
);
auto
host_x
=
gen
({
3
,
1
},
cn
);
auto
graph
=
ComputingGraph
::
make
();
auto
make_f32_to_f16_graph
=
[
&
]()
{
...
...
@@ -916,10 +910,9 @@ TEST(TestGoptInference, Float32TOFloat16Linspace) {
auto
mm
=
opr
::
MatrixMul
::
make
(
x
,
y
);
SymbolVar
mm_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
mm
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_f16_io_comp
()),
mm_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_f16_io_comp
();
unpack_vector
(
gopt
::
optimize_for_inference
({
mm
},
options
),
mm_opt
);
return
mm_opt
;
};
...
...
@@ -998,11 +991,9 @@ TEST(TestGoptInference, ConvertFormatNHWCD4) {
y
=
opr
::
Convolution
::
make
(
elem
,
w2
,
param
);
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}.
enable_use_nhwcd4
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nhwcd4
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
opr
::
Convolution
::
Param
::
Format
::
NHWCD4
,
find_opr
<
opr
::
Convolution
>
(
y_opt
).
param
().
format
);
...
...
@@ -1059,11 +1050,9 @@ TEST(TestGoptInference, ConvertFormatNHWCD4LOCAL) {
y
=
opr
::
Convolution
::
make
(
group_local
,
w5
,
param
);
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}.
enable_use_nhwcd4
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nhwcd4
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
opr
::
Convolution
::
Param
::
Format
::
NHWCD4
,
find_opr
<
opr
::
Convolution
>
(
y_opt
).
param
().
format
);
...
...
@@ -1112,11 +1101,9 @@ TEST(TestGoptInference, ConvertFormatNHWCD4Deconv) {
y
=
opr
::
ConvolutionBackwardData
::
make
(
w1
,
conv
,
param
,
{},
{});
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}.
enable_use_nhwcd4
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nhwcd4
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
opr
::
Convolution
::
Param
::
Format
::
NCHW
,
find_opr
<
opr
::
ConvolutionBackwardData
>
(
y_opt
).
param
().
format
);
...
...
@@ -1159,11 +1146,9 @@ TEST(TestGoptInference, ConvertFormatNHWCD4Qint8) {
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
0.2
f
)});
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}.
enable_use_nhwcd4
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nhwcd4
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
opr
::
ConvBias
::
Param
::
Format
::
NHWCD4
,
find_opr
<
opr
::
ConvBias
>
(
y_opt
).
param
().
format
);
...
...
@@ -1213,11 +1198,9 @@ TEST(TestGoptInference, ConvertFormatPadIC) {
auto
w1
=
mkcvar
(
"w1"
,
{
12
,
12
,
3
,
3
});
auto
y
=
opr
::
Convolution
::
make
(
concat
,
w1
,
param
);
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}.
enable_use_nhwcd4
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nhwcd4
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
HostTensorND
host_y_opt
,
host_y
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
host_y
),
...
...
@@ -1301,11 +1284,9 @@ TEST(TestGoptInference, ConvBiasNonlinearityFusePass) {
opr
::
Elemwise
::
make
({
y_cut
},
opr
::
Elemwise
::
Param
::
Mode
::
RELU
),
y_y
=
opr
::
Convolution
::
make
(
y_expand
,
w3
,
param
),
y
=
y_y
+
y_tmp
;
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_use_nhwcd4
()
.
enable_fuse_conv_bias_nonlinearity
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nhwcd4
().
enable_fuse_conv_bias_nonlinearity
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
3u
,
find_opr
<
opr
::
ConvBias
>
(
y_opt
).
input
().
size
());
graph
->
compile
({{
y_opt
,
{}}})
->
to_json
()
...
...
@@ -1533,15 +1514,16 @@ TEST(TestEnableTensorCore, SmallInputShape) {
SymbolVar
y_opt
;
SymbolVar
y_no_tc
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_use_tensor_core
()),
y_opt
);
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()),
y_no_tc
);
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nchw32
().
enable_fuse_conv_bias_nonlinearity
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
}
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_no_tc
);
}
auto
nr_dimshuffle
=
find_opr_num
<
mgb
::
opr
::
Dimshuffle
>
(
y_opt
);
ASSERT_EQ
(
2u
,
nr_dimshuffle
);
HostTensorND
host_y
,
host_y_opt
;
...
...
@@ -1597,15 +1579,16 @@ TEST(TestEnableTensorCore, ConvBiasWithZ) {
SymbolVar
y_opt
;
SymbolVar
y_no_tc
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_use_tensor_core
()),
y_opt
);
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()),
y_no_tc
);
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
().
enable_nchw2nchw32
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
}
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_no_tc
);
}
HostTensorND
host_y
,
host_y_opt
;
auto
func
=
graph
->
compile
({
make_callback_copy
(
y_no_tc
,
host_y
),
make_callback_copy
(
y_opt
,
host_y_opt
)});
...
...
@@ -1664,15 +1647,16 @@ TEST(TestGoptInference, EnableTensorCore) {
y4
=
opr
::
TypeCvt
::
make
(
y4
,
dtype
::
Float32
());
SymbolVar
y_opt
;
SymbolVar
y_no_tc
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y4
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_use_tensor_core
()),
y_opt
);
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y4
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()),
y_no_tc
);
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
().
enable_nchw2nchw32
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y4
},
options
),
y_opt
);
}
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
().
enable_nchw2nchw32
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y4
},
options
),
y_no_tc
);
}
auto
nr_dimshuffle
=
find_opr_num
<
mgb
::
opr
::
Dimshuffle
>
(
y_opt
);
ASSERT_EQ
(
3u
,
nr_dimshuffle
);
graph
->
compile
({{
y_opt
,
{}}})
...
...
@@ -1763,15 +1747,17 @@ TEST(FuseConvBiasZPass, BlockFuse) {
SymbolVar
z_fuse
;
SymbolVar
z_nonfuse
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
z
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_fuse_conv_bias_with_z
()),
z_fuse
);
unpack_vector
(
gopt
::
optimize_for_inference
(
{
z4
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()),
z_nonfuse
);
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_fuse_conv_bias_with_z
();
unpack_vector
(
gopt
::
optimize_for_inference
({
z
},
options
),
z_fuse
);
}
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
();
unpack_vector
(
gopt
::
optimize_for_inference
({
z4
},
options
),
z_nonfuse
);
}
auto
nr_elem_multi_type
=
find_opr_num
<
mgb
::
opr
::
ElemwiseMultiType
>
(
z_fuse
);
MGB_MARK_USED_VAR
(
nr_elem_multi_type
);
ASSERT_EQ
(
1u
,
nr_elem_multi_type
);
...
...
@@ -1867,15 +1853,16 @@ TEST(TestEnableTensorCore, ShuffleMerge) {
SymbolVar
y_opt
;
SymbolVar
y_no_tc
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_use_tensor_core
()),
y_opt
);
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()),
y_no_tc
);
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
().
enable_nchw2nchw32
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
}
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_no_tc
);
}
auto
nr_dimshuffle
=
find_opr_num
<
mgb
::
opr
::
Dimshuffle
>
(
y_opt
);
ASSERT_EQ
(
3u
,
nr_dimshuffle
);
HostTensorND
host_y
,
host_y_opt
;
...
...
@@ -1932,13 +1919,13 @@ TEST(FuseConvBiasZPass, Basic) {
opr
::
ElemwiseMultiType
::
Param
::
Mode
::
QFUSE_ADD_RELU
})
{
auto
y1
=
opr
::
ElemwiseMultiType
::
make
(
{
y
,
b1
},
{
mode
},
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y1
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_fuse_conv_bias_with_z
()
.
enable_use_tensor_core
()),
y_opt
);
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_fuse_conv_bias_with_z
()
.
enable_nchw2nchw32
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y1
},
options
),
y_opt
);
}
auto
nr_elemwisemultitype
=
find_opr_num
<
opr
::
ElemwiseMultiType
>
(
y_opt
);
if
(
mode
==
opr
::
ElemwiseMultiType
::
Param
::
Mode
::
QMUL
)
{
ASSERT_NE
(
0u
,
nr_elemwisemultitype
);
...
...
@@ -1949,13 +1936,14 @@ TEST(FuseConvBiasZPass, Basic) {
auto
y2
=
opr
::
ElemwiseMultiType
::
make
(
{
y1
,
b2
},
{
mode
},
OperatorNodeConfig
{
dtype
::
QuantizedS8
(
2.5
f
)});
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y2
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_fuse_conv_bias_with_z
()
.
enable_use_tensor_core
()),
y_opt
);
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
()
.
enable_fuse_conv_bias_with_z
()
.
enable_nchw2nchw32
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y2
},
options
),
y_opt
);
}
auto
nr_elemwisemultitype
=
find_opr_num
<
opr
::
ElemwiseMultiType
>
(
y_opt
);
ASSERT_NE
(
0u
,
nr_elemwisemultitype
);
...
...
@@ -2401,11 +2389,11 @@ TEST(TestGoptInference, ConvertFormatNCHW88) {
y
=
opr
::
ConvBias
::
make
(
conv5
,
w6
,
b6
,
param_conv_bias
);
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}.
enable_use_nchw88
()),
y_opt
);
{
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nchw88
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
}
ASSERT_EQ
(
opr
::
ConvBias
::
Param
::
Format
::
NCHW88
,
find_opr
<
opr
::
ConvBias
>
(
y_opt
).
param
().
format
);
...
...
@@ -2483,11 +2471,9 @@ TEST(TestGoptInference, ConvertFormatNCHW44) {
y
=
opr
::
ConvBias
::
make
(
conv5
,
w6
,
b6
,
param_conv_bias
);
SymbolVar
y_opt
;
unpack_vector
(
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}.
enable_use_nchw44
()),
y_opt
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nchw44
();
unpack_vector
(
gopt
::
optimize_for_inference
({
y
},
options
),
y_opt
);
ASSERT_EQ
(
opr
::
ConvBias
::
Param
::
Format
::
NCHW44
,
find_opr
<
opr
::
ConvBias
>
(
y_opt
).
param
().
format
);
...
...
src/opr/test/dnn/convolution.cpp
浏览文件 @
1fb7d34f
...
...
@@ -495,7 +495,7 @@ TEST(TestOprDNN, ConvolutionBackwardFilter) {
Param
{
Mode
::
CROSS_CORRELATION
,
PH
,
PW
,
SH
,
SW
});
dest
[
0
]
=
*
out
;
};
#define get_shp(N, P, S, F) ((N + 2 * P - F) / S + 1)
#define inp_tensor(N, IC, OC, IH, IW, FH, FW) \
{ TensorShape{N, IC, IH, IW}, \
...
...
@@ -1282,9 +1282,10 @@ TEST(TestOprDNN, ConvBiasINT8x8xX_NCHW4) {
*
graph
,
inp
[
i
]);
}
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_fuse_conv_bias_nonlinearity
();
auto
y
=
gopt
::
optimize_for_inference
({
make_graph
(
inputs
)[
0
]},
gopt
::
OptimizeForInferenceOptions
{}.
enable_fuse_conv_bias_nonlinearity
())[
0
];
//gopt::OptimizeForInferenceOptions{})[0];
options
)[
0
];
auto
func
=
graph
->
compile
({
make_callback_copy
(
y
,
dest
[
0
])});
func
->
execute
();
func
->
wait
();
...
...
@@ -1720,7 +1721,7 @@ TEST(TestOprDNN, DeformableConvForward) {
}
};
//! generate offset to avoid value near integer
/// because bilinear function is not derivable over there
/// because bilinear function is not derivable over there
checker
.
set_input_generator
(
2
,
gen_off
);
checker
.
set_input_dtype
(
0
,
dtype
::
Float32
());
checker
.
set_input_dtype
(
1
,
dtype
::
Float32
());
...
...
src/opr/test/io.cpp
浏览文件 @
1fb7d34f
...
...
@@ -500,10 +500,10 @@ TEST(TestOprIO, MultipleDeviceTensorWithFormatHolderCpu) {
conv2
=
opr
::
Convolution
::
make
(
conv1
,
w2
,
param
);
auto
y
=
opr
::
Elemwise
::
make
({
conv2
},
opr
::
Elemwise
::
Param
::
Mode
::
RELU
);
SymbolVar
y_opt
=
gopt
::
optimize_for_inference
(
{
y
},
gopt
::
OptimizeForInferenceOptions
{}
.
enable_use_nhwcd4
())[
0
]
.
rename
(
"out"
);
auto
options
=
gopt
::
OptimizeForInferenceOptions
{};
options
.
enable_nchw2nhwcd4
();
SymbolVar
y_opt
=
gopt
::
optimize_for_inference
({
y
},
options
)[
0
]
.
rename
(
"out"
);
auto
dumper
=
serialization
::
GraphDumper
::
make
(
serialization
::
OutputFile
::
make_fs
(
fname
.
c_str
()));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录