Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
12dc36a6
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
12dc36a6
编写于
6月 10, 2020
作者:
M
Megvii Engine Team
提交者:
Xu Xinran
6月 19, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb/gopt): add interface to reproducible
GitOrigin-RevId: f341bea40b6e52f4598640b81b477184d8473421
上级
cc4e1dfd
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
74 addition
and
44 deletion
+74
-44
sdk/load-and-run/src/mgblar.cpp
sdk/load-and-run/src/mgblar.cpp
+26
-2
src/gopt/impl/inference.cpp
src/gopt/impl/inference.cpp
+33
-40
src/gopt/include/megbrain/gopt/inference.h
src/gopt/include/megbrain/gopt/inference.h
+15
-2
未找到文件。
sdk/load-and-run/src/mgblar.cpp
浏览文件 @
12dc36a6
...
...
@@ -14,6 +14,7 @@
#include "./json_loader.h"
#include "./npy.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/utils/debug.h"
#include "megbrain/serialization/serializer.h"
#include "megbrain/serialization/extern_c_opr.h"
...
...
@@ -144,6 +145,10 @@ R"__usage__(
R"__usage__(
--fast-run-algo-policy <path>
It will read the cache file before profile, and save new fastrun in cache file.
--reproducible
Enable choose algo which is reproducible. It mainly used for cudnn algos.
See https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#reproducibility
for more details.
--wait-gdb
Print PID and wait for a line from stdin before starting execution. Useful
for waiting for gdb attach.
...
...
@@ -467,6 +472,7 @@ struct Args {
#if MGB_ENABLE_FASTRUN
bool
use_fast_run
=
false
;
#endif
bool
reproducible
=
false
;
std
::
string
fast_run_cache_path
;
bool
copy_to_host
=
false
;
int
nr_run
=
10
;
...
...
@@ -647,10 +653,24 @@ void run_test_st(Args &env) {
}
mgb
::
gopt
::
set_opr_algo_workspace_limit_inplace
(
vars
,
env
.
workspace_limit
);
using
S
=
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
;
S
strategy
=
S
::
HEURISTIC
;
#if MGB_ENABLE_FASTRUN
if
(
env
.
use_fast_run
)
mgb
::
gopt
::
enable_opr_algo_profiling_inplace
(
vars
);
if
(
env
.
use_fast_run
)
{
if
(
env
.
reproducible
)
{
strategy
=
S
::
PROFILE_REPRODUCIBLE
;
}
else
{
strategy
=
S
::
PROFILE
;
}
}
else
if
(
env
.
reproducible
)
{
strategy
=
S
::
HEURISTIC_REPRODUCIBLE
;
}
#else
if
(
env
.
reproducible
)
{
strategy
=
S
::
HEURISTIC_REPRODUCIBLE
;
}
#endif
mgb
::
gopt
::
modify_opr_algo_strategy_inplace
(
vars
,
strategy
);
if
(
!
env
.
fast_run_cache_path
.
empty
())
{
#if MGB_ENABLE_FASTRUN
if
(
!
access
(
env
.
fast_run_cache_path
.
c_str
(),
F_OK
))
{
...
...
@@ -1149,6 +1169,10 @@ Args Args::from_argv(int argc, char **argv) {
ret
.
fast_run_cache_path
=
argv
[
i
];
continue
;
}
if
(
!
strcmp
(
argv
[
i
],
"--reproducible"
))
{
ret
.
reproducible
=
true
;
continue
;
}
if
(
!
strcmp
(
argv
[
i
],
"--const-shape"
))
{
ret
.
load_config
.
const_var_shape
=
true
;
continue
;
...
...
src/gopt/impl/inference.cpp
浏览文件 @
12dc36a6
...
...
@@ -104,25 +104,21 @@ SymbolVarArray gopt::optimize_for_inference(
}
namespace
{
void
modify_conv_policy
(
opr
::
mixin
::
Convolution
&
conv
,
megdnn
::
param
::
ExecutionPolicy
::
Strategy
strategy
)
{
void
modify_conv_strategy
(
opr
::
mixin
::
Convolution
&
conv
,
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
strategy
)
{
auto
policy
=
conv
.
execution_policy_transient
();
policy
.
strategy
=
strategy
;
conv
.
set_execution_policy
(
policy
);
}
template
<
typename
Opr
>
void
inplace_conv_opr_profile_modifier
(
OperatorNodeBase
&
opr
)
{
modify_conv_policy
(
void
inplace_conv_opr_modifier
(
OperatorNodeBase
&
opr
,
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
strategy
)
{
modify_conv_strategy
(
opr
.
cast_final_safe
<
Opr
>
(),
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
::
PROFILE
);
}
template
<
typename
Opr
>
void
inplace_conv_opr_profile_cache_modifier
(
OperatorNodeBase
&
opr
)
{
modify_conv_policy
(
opr
.
cast_final_safe
<
Opr
>
(),
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
::
PROFILE_HEURISTIC
);
strategy
);
}
void
modify_conv_policy_workspace_limit
(
opr
::
mixin
::
Convolution
&
conv
,
...
...
@@ -150,12 +146,20 @@ void inplace_conv_opr_workspace_limit_modifier(OperatorNodeBase& opr,
cb(DeformableConvBackwardFilter), cb(DeformableConvBackwardData), \
cb(BatchConvBiasForward),
void
gopt
::
enable_opr_algo_profiling_inplace
(
const
VarNodeArrayView
&
dest_vars
)
{
#if MGB_ENABLE_FASTRUN
static
const
ThinHashMap
<
Typeinfo
*
,
void
(
*
)(
OperatorNodeBase
&
)
>
modifiers
=
{
#define CONV(t) {opr::t::typeinfo(), &inplace_conv_opr_profile_modifier<opr::t>}
void
gopt
::
modify_opr_algo_strategy_inplace
(
const
VarNodeArrayView
&
dest_vars
,
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
strategy
)
{
#if !MGB_ENABLE_FASTRUN
using
S
=
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
;
if
(
strategy
==
S
::
PROFILE
||
strategy
==
S
::
PROFILE_REPRODUCIBLE
)
{
mgb_throw
(
MegBrainError
,
"fastrun is disabled at compile time"
);
}
#endif
const
ThinHashMap
<
Typeinfo
*
,
std
::
function
<
void
(
OperatorNodeBase
&
)
>>
modifiers
=
{
#define CONV(t) \
{opr::t::typeinfo(), std::bind(inplace_conv_opr_modifier<opr::t>, \
std::placeholders::_1, strategy)}
MGB_FOREACH_FASTRUN_OPR
(
CONV
)
#undef CONV
};
...
...
@@ -171,34 +175,23 @@ void gopt::enable_opr_algo_profiling_inplace(
for
(
auto
i
:
dest_vars
)
{
dep_iter
.
add
(
i
);
}
#else
mgb_throw
(
MegBrainError
,
"fastrun is disabled at compile time"
);
#endif
}
void
gopt
::
enable_opr_
use_profiling_cache
_inplace
(
void
gopt
::
enable_opr_
algo_profiling
_inplace
(
const
VarNodeArrayView
&
dest_vars
)
{
static
const
ThinHashMap
<
Typeinfo
*
,
void
(
*
)(
OperatorNodeBase
&
)
>
modifiers
=
{
#define CONV(t) \
{opr::t::typeinfo(), &inplace_conv_opr_profile_cache_modifier<opr::t>}
MGB_FOREACH_FASTRUN_OPR
(
CONV
)
#undef CONV
};
auto
on_opr
=
[
&
](
OperatorNodeBase
*
opr
)
{
auto
iter
=
modifiers
.
find
(
opr
->
dyn_typeinfo
());
if
(
iter
!=
modifiers
.
end
())
{
iter
->
second
(
*
opr
);
}
};
modify_opr_algo_strategy_inplace
(
dest_vars
,
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
::
PROFILE
);
}
cg
::
DepOprIter
dep_iter
{
on_opr
};
for
(
auto
i
:
dest_vars
)
{
dep_iter
.
add
(
i
);
}
void
gopt
::
enable_opr_use_profiling_cache_inplace
(
const
VarNodeArrayView
&
dest_vars
)
{
modify_opr_algo_strategy_inplace
(
dest_vars
,
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
::
PROFILE_HEURISTIC
);
}
void
gopt
::
set_opr_algo_workspace_limit_inplace
(
const
VarNodeArrayView
&
dest_vars
,
size_t
workspace_limit
)
{
static
const
ThinHashMap
<
Typeinfo
*
,
void
(
*
)(
OperatorNodeBase
&
,
size_t
)
>
...
...
src/gopt/include/megbrain/gopt/inference.h
浏览文件 @
12dc36a6
...
...
@@ -13,6 +13,7 @@
#include "megbrain/gopt/framework.h"
#include "megbrain/graph/cg.h"
#include "megbrain/opr/dnn/convolution.h"
namespace
mgb
{
namespace
gopt
{
...
...
@@ -302,6 +303,17 @@ namespace gopt {
const
SymbolVarArray
&
dest_vars
,
const
OptimizeForInferenceOptions
&
opt
=
{});
/*!
* \brief modify execution strategy for oprs with multiple
* algorithms
*
* This would modify the operators inplace. It can be used for implement
* the fast-run mode.
*/
void
modify_opr_algo_strategy_inplace
(
const
VarNodeArrayView
&
dest_vars
,
opr
::
mixin
::
Convolution
::
ExecutionPolicy
::
Strategy
strategy
);
/*!
* \brief enable PROFILE execution strategy for oprs with multiple
* algorithms
...
...
@@ -315,7 +327,7 @@ namespace gopt {
void
enable_opr_algo_profiling_inplace
(
const
VarNodeArrayView
&
dest_vars
);
/*!
* \brief enable opr try profiling cache first, if failed,
then try
* \brief enable opr try profiling cache first, if failed,
fallback to
* heuristic
*
* This would modify the operators inplace. It is usually used to enable
...
...
@@ -324,7 +336,8 @@ namespace gopt {
* You may want to implement TimedFuncInvoker::ForkExecImpl and/or
* PersistentCache for better performance in an SDK.
*/
void
enable_opr_use_profiling_cache_inplace
(
const
VarNodeArrayView
&
dest_vars
);
void
enable_opr_use_profiling_cache_inplace
(
const
VarNodeArrayView
&
dest_vars
);
/*!
* \brief set workspace_limit for execution strategy for oprs with multiple
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录