Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
add3a1bc
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
add3a1bc
编写于
9月 24, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb/opr): add weight preprocess option
GitOrigin-RevId: 9d83a174fad2c4d6a1bca86c8597c8b6b4544376
上级
ee2e2b3c
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
62 addition
and
0 deletion
+62
-0
sdk/load-and-run/src/mgblar.cpp
sdk/load-and-run/src/mgblar.cpp
+25
-0
src/core/include/megbrain/graph/cg.h
src/core/include/megbrain/graph/cg.h
+4
-0
src/opr/impl/dnn/convolution.cpp
src/opr/impl/dnn/convolution.cpp
+3
-0
src/opr/test/dnn/convolution.cpp
src/opr/test/dnn/convolution.cpp
+30
-0
未找到文件。
sdk/load-and-run/src/mgblar.cpp
浏览文件 @
add3a1bc
...
@@ -194,6 +194,26 @@ R"__usage__(
...
@@ -194,6 +194,26 @@ R"__usage__(
Execute operators with kernels implemented in MegDNN with CHWN4 tensor format. Can only be used
Execute operators with kernels implemented in MegDNN with CHWN4 tensor format. Can only be used
on Nvidia GPUs, whose compute capability is above 6.1.
on Nvidia GPUs, whose compute capability is above 6.1.
)__usage__"
)__usage__"
R"__usage__(
--enable-nchw44
Execute operators with kernels implemented in MegDNN with NCHW44 tensor format. This can only
be used on arm of armv7 and arm64, support data tyep of float32, qint8 and int8x8x16.
)__usage__"
R"__usage__(
--enable-nhw88
Execute operators with kernels implemented in MegDNN with NCHW88 tensor format. This can only
be used on x86 with data type float.
)__usage__"
R"__usage__(
--enable-nhw44-dot
Execute operators with kernels implemented in MegDNN with NCHW44-DOT tensor format. This Can
only be used on arm32 and arm64 with dot-product supported, and only support qint8 model
)__usage__"
R"__usage__(
--weight-preprocess
Execute operators with weight preprocess, which can optimize the operator execution time with
algo of winograd, im2col ,etc., but it may consume more memory.
)__usage__"
;
;
...
@@ -1226,6 +1246,11 @@ Args Args::from_argv(int argc, char **argv) {
...
@@ -1226,6 +1246,11 @@ Args Args::from_argv(int argc, char **argv) {
graph_opt
.
graph_opt
.
weight_winograd_transform
=
true
;
graph_opt
.
graph_opt
.
weight_winograd_transform
=
true
;
continue
;
continue
;
}
}
if
(
!
strcmp
(
argv
[
i
],
"--weight-preprocess"
))
{
mgb_log_warn
(
"enable weight-preprocess optimization"
);
graph_opt
.
graph_opt
.
enable_weight_preprocess
();
continue
;
}
fprintf
(
stderr
,
"invalid arg: %s
\n
"
,
argv
[
i
]);
fprintf
(
stderr
,
"invalid arg: %s
\n
"
,
argv
[
i
]);
ret
.
args_parse_ret
=
-
1
;
ret
.
args_parse_ret
=
-
1
;
...
...
src/core/include/megbrain/graph/cg.h
浏览文件 @
add3a1bc
...
@@ -97,6 +97,9 @@ struct GraphCommonOptimizeOptions {
...
@@ -97,6 +97,9 @@ struct GraphCommonOptimizeOptions {
bool
fuse_conv_bias_with_z
=
false
;
bool
fuse_conv_bias_with_z
=
false
;
//! whether to enable fast-run profiled winograd opr replace
//! whether to enable fast-run profiled winograd opr replace
bool
weight_winograd_transform
=
false
;
bool
weight_winograd_transform
=
false
;
//! whether to enable weight preprocess, if enabled it may use more
//! memory, default disable now
bool
weight_preprocess
=
false
;
enum
LayoutTransform
:
uint32_t
{
enum
LayoutTransform
:
uint32_t
{
DEFAULT
,
DEFAULT
,
NCHW4
,
///< compute using NCHW4 tensor format
NCHW4
,
///< compute using NCHW4 tensor format
...
@@ -127,6 +130,7 @@ struct GraphCommonOptimizeOptions {
...
@@ -127,6 +130,7 @@ struct GraphCommonOptimizeOptions {
SET
(
fuse_conv_bias_nonlinearity
);
SET
(
fuse_conv_bias_nonlinearity
);
SET
(
fuse_conv_bias_with_z
);
SET
(
fuse_conv_bias_with_z
);
SET
(
weight_winograd_transform
);
SET
(
weight_winograd_transform
);
SET
(
weight_preprocess
);
#undef SET
#undef SET
#define SET(_trans, _trans_capital) \
#define SET(_trans, _trans_capital) \
GraphCommonOptimizeOptions& enable_##_trans() { \
GraphCommonOptimizeOptions& enable_##_trans() { \
...
...
src/opr/impl/dnn/convolution.cpp
浏览文件 @
add3a1bc
...
@@ -963,6 +963,9 @@ void mixin::WeightPreprocessExecutor::record_preprocessed_weight(
...
@@ -963,6 +963,9 @@ void mixin::WeightPreprocessExecutor::record_preprocessed_weight(
bool
mixin
::
WeightPreprocessExecutor
::
mixin_allow_weight_preprocess
(
bool
mixin
::
WeightPreprocessExecutor
::
mixin_allow_weight_preprocess
(
const
cg
::
OperatorNodeBase
&
opr
)
const
{
const
cg
::
OperatorNodeBase
&
opr
)
const
{
if
(
!
opr
.
owner_graph
()
->
options
().
graph_opt
.
weight_preprocess
)
{
return
false
;
}
if
(
!
opr
.
input
(
1
)
->
contain_flag
(
VarNode
::
Flag
::
PERSISTENT_DEVICE_VALUE
))
if
(
!
opr
.
input
(
1
)
->
contain_flag
(
VarNode
::
Flag
::
PERSISTENT_DEVICE_VALUE
))
return
false
;
return
false
;
if
(
cg
::
is_const_var_value
(
opr
.
input
(
1
)))
if
(
cg
::
is_const_var_value
(
opr
.
input
(
1
)))
...
...
src/opr/test/dnn/convolution.cpp
浏览文件 @
add3a1bc
...
@@ -2225,6 +2225,7 @@ protected:
...
@@ -2225,6 +2225,7 @@ protected:
iw
=
ih
;
iw
=
ih
;
comp_node
=
CompNode
::
load
(
"cpux"
);
comp_node
=
CompNode
::
load
(
"cpux"
);
graph
=
ComputingGraph
::
make
();
graph
=
ComputingGraph
::
make
();
graph
->
options
().
graph_opt
.
weight_preprocess
=
is_weight_preprocess
();
TensorShape
x_shape
{
1
,
ic
,
ih
,
iw
},
w_shape
{
oc
,
ic
,
fh
,
fh
};
TensorShape
x_shape
{
1
,
ic
,
ih
,
iw
},
w_shape
{
oc
,
ic
,
fh
,
fh
};
x_host
=
std
::
make_shared
<
HostTensorND
>
(
comp_node
,
x_shape
);
x_host
=
std
::
make_shared
<
HostTensorND
>
(
comp_node
,
x_shape
);
auto
x
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
x_host
);
auto
x
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
x_host
);
...
@@ -2247,6 +2248,8 @@ protected:
...
@@ -2247,6 +2248,8 @@ protected:
void
run
()
{
func
->
execute
().
wait
();
}
void
run
()
{
func
->
execute
().
wait
();
}
virtual
bool
is_weight_preprocess
()
{
return
true
;
}
void
TearDown
()
override
{
void
TearDown
()
override
{
func
.
reset
();
func
.
reset
();
// Triggers mock check
// Triggers mock check
...
@@ -2346,6 +2349,33 @@ TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) {
...
@@ -2346,6 +2349,33 @@ TEST_F(TestWeightPreprocess, PreprocessCalledOnlyOnce) {
}
}
}
}
class
TestNoWeightPreprocess
:
public
TestWeightPreprocess
{
bool
is_weight_preprocess
()
override
{
return
false
;
}
};
TEST_F
(
TestNoWeightPreprocess
,
NoPreprocess
)
{
using
::
testing
::
_
;
using
::
testing
::
Return
;
auto
&
mock
=
mock_conv
();
MockAlgorithm
algo
;
EXPECT_CALL
(
mock
,
get_algorithm_heuristic
(
_
,
_
,
_
,
_
,
_
))
.
WillRepeatedly
(
Return
(
&
algo
));
EXPECT_CALL
(
mock
,
get_workspace_in_bytes
(
_
,
_
,
_
,
_
))
.
WillRepeatedly
(
Return
(
0
));
EXPECT_CALL
(
mock
,
get_preprocess_workspace_in_bytes
(
_
,
_
,
_
))
.
WillRepeatedly
(
Return
(
0
));
{
::
testing
::
InSequence
seq
;
// Return empty preprocess filters, indicating no need to preprocess
EXPECT_CALL
(
mock
,
deduce_preprocessed_filter_layout
(
_
,
_
,
_
)).
Times
(
0
);
EXPECT_CALL
(
mock
,
exec_preprocess
(
_
,
_
,
_
,
_
,
_
)).
Times
(
0
);
EXPECT_CALL
(
mock
,
exec
(
_
,
_
,
_
,
nullptr
,
_
));
run
();
}
}
}
// anonymous namespace
}
// anonymous namespace
#endif
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录