Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
12a3ef8d
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
12a3ef8d
编写于
3月 09, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(fastrun): decouple fastrun from computing graph
GitOrigin-RevId: 27abd222950e36287784e58ad56d6819e9c4cb69
上级
0a6f4a88
变更
13
展开全部
隐藏空白更改
内联
并排
Showing
13 changed file
with
43 addition
and
1913 deletion
+43
-1913
src/CMakeLists.txt
src/CMakeLists.txt
+3
-1
src/core/include/megbrain/utils/invoke.h
src/core/include/megbrain/utils/invoke.h
+0
-0
src/gopt/impl/global_layout_transform/opr_format_modifier.cpp
...gopt/impl/global_layout_transform/opr_format_modifier.cpp
+1
-1
src/gopt/impl/inference.cpp
src/gopt/impl/inference.cpp
+1
-1
src/opr/impl/blas.cpp
src/opr/impl/blas.cpp
+0
-1
src/opr/impl/dnn/convolution.cpp
src/opr/impl/dnn/convolution.cpp
+1
-1
src/opr/impl/internal/megdnn_opr_wrapper.cpp
src/opr/impl/internal/megdnn_opr_wrapper.cpp
+0
-48
src/opr/impl/search_policy/algo_chooser.cpp
src/opr/impl/search_policy/algo_chooser.cpp
+25
-1105
src/opr/impl/search_policy/profiler.cpp
src/opr/impl/search_policy/profiler.cpp
+0
-413
src/opr/impl/search_policy/workspace_need_limit_getter.inl
src/opr/impl/search_policy/workspace_need_limit_getter.inl
+2
-2
src/opr/include/megbrain/opr/internal/megdnn_opr_wrapper.h
src/opr/include/megbrain/opr/internal/megdnn_opr_wrapper.h
+1
-37
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
+9
-138
src/opr/include/megbrain/opr/search_policy/profiler.h
src/opr/include/megbrain/opr/search_policy/profiler.h
+0
-165
未找到文件。
src/CMakeLists.txt
浏览文件 @
12a3ef8d
...
...
@@ -18,6 +18,7 @@ file(
opr/impl/nvof/*.cpp
plugin/impl/*.cpp
serialization/impl/*.cpp
rdnn/impl/*.cpp
core/impl/*.inl
gopt/impl/*.inl
opr/impl/*.inl
...
...
@@ -53,7 +54,8 @@ set(MGB_INC
${
CMAKE_CURRENT_LIST_DIR
}
/gopt/include
${
CMAKE_CURRENT_LIST_DIR
}
/opr/include
${
CMAKE_CURRENT_LIST_DIR
}
/plugin/include
${
CMAKE_CURRENT_LIST_DIR
}
/serialization/include
)
${
CMAKE_CURRENT_LIST_DIR
}
/serialization/include
${
CMAKE_CURRENT_LIST_DIR
}
/rdnn/include
)
if
(
MGE_WITH_JIT
)
list
(
APPEND MGB_INC
${
CMAKE_CURRENT_LIST_DIR
}
/jit/include
)
...
...
src/
opr/impl/internal
/invoke.h
→
src/
core/include/megbrain/utils
/invoke.h
浏览文件 @
12a3ef8d
文件已移动
src/gopt/impl/global_layout_transform/opr_format_modifier.cpp
浏览文件 @
12a3ef8d
...
...
@@ -183,7 +183,7 @@ struct OprWithPolicyMaker<opr::BatchConvBiasForward>
MakeOprWithPolicyCaller4
<
megdnn
::
BatchConvBiasForward
>
,
megdnn
::
param
::
BatchConvBias
>
{};
#include "
../../opr/impl/internal
/invoke.h"
#include "
megbrain/utils
/invoke.h"
template
<
typename
Opr
>
struct
MultiAlgoOprTrait
;
...
...
src/gopt/impl/inference.cpp
浏览文件 @
12a3ef8d
...
...
@@ -23,8 +23,8 @@
#include "megbrain/opr/imgproc.h"
#include "megbrain/opr/misc.h"
#include "megbrain/opr/nn_int.h"
#include "megbrain/opr/search_policy/algo_chooser.h"
#include "megbrain/opr/search_policy/algo_chooser_helper.h"
#include "megbrain/opr/search_policy/profiler.h"
#include "megbrain/opr/tensor_gen.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/utility.h"
...
...
src/opr/impl/blas.cpp
浏览文件 @
12a3ef8d
...
...
@@ -19,7 +19,6 @@
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/search_policy/algo_chooser.h"
#include "megbrain/opr/search_policy/profiler.h"
#include "./internal/megdnn_opr_wrapper.inl"
#include "./search_policy/workspace_need_limit_getter.inl"
...
...
src/opr/impl/dnn/convolution.cpp
浏览文件 @
12a3ef8d
...
...
@@ -18,11 +18,11 @@
#include "megbrain/graph/grad_impl.h"
#include "megbrain/system.h"
#include "megbrain/utils/hash_ct.h"
#include "megbrain/utils/invoke.h"
#include "megbrain/utils/timer.h"
#include "megdnn/oprs/utils.h"
#include "../internal/invoke.h"
#include "../internal/megdnn_opr_wrapper.inl"
#include "../search_policy/workspace_need_limit_getter.inl"
...
...
src/opr/impl/internal/megdnn_opr_wrapper.cpp
浏览文件 @
12a3ef8d
...
...
@@ -25,26 +25,6 @@ using namespace mixin;
/* ================== global functions ================== */
namespace
{
template
<
class
Opr
>
class
MegDNNGlobalOprContainer
final
:
public
UserDataContainer
::
UserData
{
MGB_TYPEINFO_OBJ_DECL
;
std
::
shared_ptr
<
megdnn
::
Handle
>
m_megdnn_handle
;
std
::
unique_ptr
<
Opr
>
m_opr
;
public:
MegDNNGlobalOprContainer
(
CompNode
cn
)
:
m_megdnn_handle
{
get_megdnn_handle_shared
(
cn
)},
m_opr
{
m_megdnn_handle
->
create_operator
<
Opr
>
()}
{
mgb_assert
(
m_opr
->
is_thread_safe
());
}
Opr
*
get
()
const
{
return
m_opr
.
get
();
}
};
template
<
class
Opr
>
MGB_TYPEINFO_OBJ_IMPL
(
MegDNNGlobalOprContainer
<
Opr
>
);
class
TempStorageContainer
final
:
public
UserDataContainer
::
UserData
{
MGB_TYPEINFO_OBJ_DECL
;
...
...
@@ -55,34 +35,6 @@ public:
MGB_TYPEINFO_OBJ_IMPL
(
TempStorageContainer
);
}
// anonymous namespace
std
::
shared_ptr
<
megdnn
::
Handle
>
intl
::
get_megdnn_handle_shared
(
CompNode
comp_node
)
{
auto
&
handle
=
MegDNNHandle
::
get
(
CompNodeEnv
::
from_comp_node
(
comp_node
));
return
{
handle
.
shared_from_this
(),
handle
.
handle
()};
}
megdnn
::
Handle
*
intl
::
get_megdnn_handle
(
CompNode
comp_node
)
{
return
MegDNNHandle
::
get
(
CompNodeEnv
::
from_comp_node
(
comp_node
)).
handle
();
}
template
<
typename
Opr
>
Opr
*
intl
::
get_megdnn_global_opr
(
CompNode
comp_node
)
{
using
T
=
MegDNNGlobalOprContainer
<
Opr
>
;
auto
maker
=
[
comp_node
]()
{
return
std
::
make_shared
<
T
>
(
comp_node
);
};
return
CompNodeEnv
::
from_comp_node
(
comp_node
).
get_user_data
<
T
>
(
maker
).
get
();
}
namespace
mgb
{
namespace
opr
{
namespace
intl
{
#define INST(o) template o* get_megdnn_global_opr<o>(CompNode)
INST
(
megdnn
::
AddUpdate
);
INST
(
megdnn
::
Relayout
);
INST
(
megdnn
::
Checksum
);
#undef INST
}
// namespace intl
}
// namespace opr
}
// namespace mgb
DeviceTensorStorage
&
intl
::
get_temp_storage
(
ComputingGraph
&
graph
,
CompNode
comp_node
)
{
auto
container
=
graph
.
options
().
user_data
.
get_user_data_or_create
<
TempStorageContainer
>
();
...
...
src/opr/impl/search_policy/algo_chooser.cpp
浏览文件 @
12a3ef8d
此差异已折叠。
点击以展开。
src/opr/impl/search_policy/profiler.cpp
已删除
100644 → 0
浏览文件 @
0a6f4a88
/**
* \file src/opr/impl/search_policy/profile.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megbrain/opr/search_policy/profiler.h"
#include "../internal/invoke.h"
#include "../internal/megdnn_opr_wrapper.inl"
#include "megdnn/handle.h"
#include "megdnn/oprs/base.h"
#if MGB_ROCM
#include "hcc_detail/hcc_defs_prologue.h"
#include "megcore_rocm.h"
#endif
//! TODO: here has to be know some megdnn::opr when there is produced midout.h
//! fix it if there is another graceful way.
#include "megdnn/oprs.h"
#include "midout.h"
MIDOUT_DECL
(
megbrain_opr_profile
)
#define MIDOUT_B(...) MIDOUT_BEGIN(megbrain_opr_profile, __VA_ARGS__) {
#define MIDOUT_E \
} \
MIDOUT_END();
namespace
{
std
::
string
serialize_policy
(
const
megdnn
::
ExecutionPolicy
&
policy
)
{
std
::
string
ret
;
//! serialize AlgorithmDesc
megdnn
::
Algorithm
::
serialize_write_pod
(
policy
.
algo
.
handle_type
,
ret
);
megdnn
::
Algorithm
::
serialize_write_pod
(
policy
.
algo
.
type
,
ret
);
uint32_t
param_size
=
policy
.
algo
.
param
.
size
();
uint32_t
name_size
=
policy
.
algo
.
name
.
size
();
megdnn
::
Algorithm
::
serialize_write_pod
<
uint32_t
>
(
param_size
,
ret
);
megdnn
::
Algorithm
::
serialize_write_pod
<
uint32_t
>
(
name_size
,
ret
);
ret
+=
policy
.
algo
.
param
;
ret
+=
policy
.
algo
.
name
;
//! serialize sub_policy
uint32_t
size
=
policy
.
sub_policy
.
size
();
megdnn
::
Algorithm
::
serialize_write_pod
(
size
,
ret
);
for
(
auto
&&
sub
:
policy
.
sub_policy
)
{
ret
+=
serialize_policy
(
sub
);
}
return
ret
;
}
megdnn
::
ExecutionPolicy
deserialize_policy
(
const
char
*
buf
,
uint32_t
size
,
uint32_t
&
offset
)
{
megdnn
::
ExecutionPolicy
ret
;
#define cb(_val, _type) \
_val = megdnn::Algorithm::deserialize_read_pod<_type>(buf, offset); \
offset += sizeof(_val)
cb
(
ret
.
algo
.
handle_type
,
megdnn
::
Handle
::
HandleType
);
cb
(
ret
.
algo
.
type
,
uint32_t
);
uint32_t
param_size
=
0
;
uint32_t
name_size
=
0
;
cb
(
param_size
,
uint32_t
);
cb
(
name_size
,
uint32_t
);
if
(
param_size
>
0
)
{
ret
.
algo
.
param
=
std
::
string
(
buf
+
offset
,
param_size
);
offset
+=
param_size
;
}
if
(
name_size
>
0
)
{
ret
.
algo
.
name
=
std
::
string
(
buf
+
offset
,
name_size
);
offset
+=
name_size
;
}
uint32_t
nr_policy
=
0
;
cb
(
nr_policy
,
uint32_t
);
#undef cb
for
(
uint32_t
i
=
0
;
i
<
nr_policy
;
i
++
)
{
ret
.
sub_policy
.
push_back
(
deserialize_policy
(
buf
,
size
,
offset
));
}
return
ret
;
}
}
// namespace
namespace
mgb
{
namespace
opr
{
#define APPLY(statement, ...) \
mgb::apply( \
[&](const auto&... args) { return statement; }, \
std::tuple_cat(__VA_ARGS__))
////////////// TimedProfiler::Param::ExecutionPolicyBlob //////////////////////
template
<
typename
Opr
>
typename
TimedProfiler
<
Opr
>::
Param
::
ExecutionPolicyBlob
TimedProfiler
<
Opr
>::
Param
::
ExecutionPolicyBlob
::
serialize
(
const
megdnn
::
ExecutionPolicy
&
policy
)
{
ExecutionPolicyBlob
ret
;
std
::
string
serialize_bin
=
serialize_policy
(
policy
);
mgb_assert
(
serialize_bin
.
size
()
<
MAX_SIZE_IN_BYTES
);
memcpy
(
ret
.
data
,
serialize_bin
.
data
(),
serialize_bin
.
size
());
ret
.
size
=
serialize_bin
.
size
();
return
ret
;
}
template
<
typename
Opr
>
megdnn
::
ExecutionPolicy
TimedProfiler
<
Opr
>::
Param
::
ExecutionPolicyBlob
::
deserialize
()
const
{
uint32_t
offset
=
0
;
auto
&&
ret
=
deserialize_policy
(
data
,
size
,
offset
);
mgb_assert
(
offset
==
size
);
return
std
::
move
(
ret
);
}
#define INST(Opr) \
template typename TimedProfiler<megdnn::Opr>::Param::ExecutionPolicyBlob \
TimedProfiler<megdnn::Opr>::Param::ExecutionPolicyBlob::serialize( \
const megdnn::ExecutionPolicy& policy); \
template megdnn::ExecutionPolicy \
TimedProfiler<megdnn::Opr>::Param::ExecutionPolicyBlob::deserialize() const;
MGB_FOREACH_FASTRUN_OPR
(
INST
)
#undef INST
////////////////// TimedProfiler //////////////////////////////
template
<
typename
Opr
>
const
double
TimedProfiler
<
Opr
>::
timeout_setting
=
TimedProfiler
<
Opr
>::
init_timeout_setting
();
template
<
typename
Opr
>
double
TimedProfiler
<
Opr
>::
init_timeout_setting
()
{
#if MGB_ENABLE_FASTRUN
sys
::
TimedFuncInvoker
::
ins
().
register_func
(
AlgoChooserFuncId
<
Opr
>::
ID
,
&
TimedProfiler
<
Opr
>::
prof_impl
,
&
TimedProfiler
<
Opr
>::
prof_init_device
);
auto
to_set
=
MGB_GETENV
(
"MGB_CONV_PROFILING_TIMEOUT"
);
if
(
to_set
)
return
std
::
stod
(
to_set
);
#endif
return
0
;
}
#define APPLY(statement, ...) \
mgb::apply( \
[&](const auto&... args) { return statement; }, \
std::tuple_cat(__VA_ARGS__))
template
<
typename
Opr
>
void
TimedProfiler
<
Opr
>::
preprocess
(
const
TensorLayoutArray
&
,
const
megdnn
::
SmallVector
<
DeviceTensorND
>&
,
intl
::
UniqPtrWithCN
<
Opr
>&
,
megdnn
::
Workspace
&
,
std
::
array
<
TensorLayout
,
arity
>&
,
std
::
array
<
DeviceTensorND
,
arity_in
>&
,
PreprocessFilter
<
Opr
>&
)
{
// Opr is neither convbias nor convolution.This function do nothing.
}
//! convbias
template
<
>
void
TimedProfiler
<
megdnn
::
ConvBias
>::
preprocess
(
const
TensorLayoutArray
&
preprocessed_layout
,
const
SmallVector
<
DeviceTensorND
>&
flt_val
,
intl
::
UniqPtrWithCN
<
megdnn
::
ConvBias
>&
megdnn_opr
,
megdnn
::
Workspace
&
mdn_workspace
,
std
::
array
<
TensorLayout
,
arity
>&
layouts
,
std
::
array
<
DeviceTensorND
,
arity_in
>&
inp_val
,
PreprocessFilter
<
megdnn
::
ConvBias
>&
prep_flt
)
{
if
(
!
preprocessed_layout
.
empty
())
{
auto
&&
pf
=
prep_flt
;
pf
.
algorithm_id
=
nullptr
;
pf
.
tensors
.
resize
(
flt_val
.
size
());
for
(
size_t
i
=
0
;
i
<
flt_val
.
size
();
i
++
)
{
pf
.
tensors
[
i
]
=
flt_val
[
i
].
as_megdnn
();
}
APPLY
(
megdnn_opr
->
exec_preprocess
(
args
...,
&
pf
,
mdn_workspace
),
std
::
forward_as_tuple
(
layouts
[
0
],
inp_val
[
1
].
as_megdnn
(),
inp_val
[
2
].
as_megdnn
()),
array_skip
<
arity_in
-
1
>
(
layouts
));
}
}
//! convolution
template
<
>
void
TimedProfiler
<
megdnn
::
ConvolutionForward
>::
preprocess
(
const
TensorLayoutArray
&
preprocessed_layout
,
const
megdnn
::
SmallVector
<
DeviceTensorND
>&
flt_val
,
intl
::
UniqPtrWithCN
<
megdnn
::
ConvolutionForward
>&
megdnn_opr
,
megdnn
::
Workspace
&
mdn_workspace
,
std
::
array
<
TensorLayout
,
arity
>&
layouts
,
std
::
array
<
DeviceTensorND
,
arity_in
>&
inp_val
,
PreprocessFilter
<
megdnn
::
ConvolutionForward
>&
prep_flt
)
{
if
(
!
preprocessed_layout
.
empty
())
{
auto
&&
pf
=
prep_flt
;
pf
.
algorithm_id
=
nullptr
;
pf
.
tensors
.
resize
(
flt_val
.
size
());
for
(
size_t
i
=
0
;
i
<
flt_val
.
size
();
i
++
)
{
pf
.
tensors
[
i
]
=
flt_val
[
i
].
as_megdnn
();
}
APPLY
(
megdnn_opr
->
exec_preprocess
(
args
...,
&
pf
,
mdn_workspace
),
std
::
forward_as_tuple
(
layouts
[
0
],
inp_val
[
1
].
as_megdnn
()),
array_skip
<
2
>
(
layouts
));
}
}
template
<
typename
Opr
>
typename
TimedProfiler
<
Opr
>::
TResult
TimedProfiler
<
Opr
>::
prof_impl
(
const
TParam
&
raw_param
)
{
MIDOUT_B
(
Opr
,
midout_iv
(
MGB_HASH_STR
(
"TimedProfiler::prof_impl"
)))
#if MGB_ROCM
bool
miopen_algo_search_enabled
;
megcore
::
getMIOpenAlgoSearchStatus
(
&
miopen_algo_search_enabled
);
mgb_assert
(
miopen_algo_search_enabled
,
"MIOpen algo search not enabled"
);
#endif
auto
&&
param
=
raw_param
.
as_single_pod
<
Param
>
();
CompNode
cn
=
CompNode
::
load
(
param
.
comp_node_physical
,
param
.
comp_node_logical
);
auto
megdnn_opr
=
intl
::
create_megdnn_opr
<
Opr
>
(
cn
);
std
::
array
<
TensorLayout
,
arity
>
layouts
;
auto
from_enum
=
[
&
](
DTypeEnum
enumv
)
->
DType
{
switch
(
enumv
)
{
#define cb(_dt) \
case DTypeTrait<_dt>::enumv: \
return _dt(1.0f, static_cast<uint8_t>(0))
cb
(
dtype
::
Quantized8Asymm
);
cb
(
dtype
::
Quantized4Asymm
);
#undef cb
#define cb(_dt) \
case DTypeTrait<_dt>::enumv: \
return _dt(1.0f)
cb
(
dtype
::
QuantizedS8
);
cb
(
dtype
::
QuantizedS16
);
cb
(
dtype
::
QuantizedS32
);
cb
(
dtype
::
QuantizedS4
);
default:
return
DType
::
from_enum
(
enumv
);
#undef cb
}
};
for
(
int
i
=
0
;
i
<
arity
;
++
i
)
{
layouts
[
i
]
=
{
param
.
shapes
[
i
],
from_enum
(
param
.
dtypes
[
i
])};
}
megdnn_opr
->
param
()
=
param
.
opr_param
;
megdnn_opr
->
execution_policy
()
=
param
.
execution_policy
.
deserialize
();
// Allocate preprocessed weight buffers.
TensorLayoutArray
preprocessed_layout
;
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
([
&
](
auto
_
)
{
if
(
param
.
allow_weight_preprocess
)
{
preprocessed_layout
=
APPLY
(
_
(
megdnn_opr
)
->
deduce_preprocessed_filter_layout
(
args
...),
layouts
);
}
});
{
// first allocate a whole chunk to avoid memory fragmentation (here we
// rely on memory allocator to reuse memory)
auto
align
=
cn
.
get_mem_addr_alignment
();
size_t
tot_size
=
align
;
for
(
int
i
=
0
;
i
<
arity
;
++
i
)
{
tot_size
+=
layouts
[
i
].
span
().
high_byte
+
align
;
}
for
(
const
auto
&
layout
:
preprocessed_layout
)
{
tot_size
+=
layout
.
span
().
high_byte
+
align
;
}
tot_size
+=
param
.
workspace
;
DeviceTensorStorage
storage
{
cn
};
storage
.
ensure_size
(
tot_size
);
}
// allocate input and output memory
std
::
array
<
DeviceTensorND
,
arity_in
>
inp_val
;
std
::
array
<
DeviceTensorND
,
arity_out
>
out_val
;
DeviceTensorND
workspace
;
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
inp_val
[
i
].
comp_node
(
cn
).
dtype
(
layouts
[
i
].
dtype
).
resize
(
layouts
[
i
]);
}
for
(
int
i
=
0
;
i
<
arity_out
;
++
i
)
{
out_val
[
i
]
.
comp_node
(
cn
)
.
dtype
(
layouts
[
arity_in
+
i
].
dtype
)
.
resize
(
layouts
[
arity_in
+
i
]);
}
megdnn
::
Workspace
mdn_workspace
;
// allocate workspace
if
(
param
.
workspace
)
{
workspace
.
comp_node
(
cn
).
dtype
(
dtype
::
Byte
()).
resize
({
param
.
workspace
});
mdn_workspace
.
size
=
param
.
workspace
;
mdn_workspace
.
raw_ptr
=
workspace
.
raw_ptr
();
}
// allocate storage for preprocessed filter
SmallVector
<
DeviceTensorND
>
flt_val
(
preprocessed_layout
.
size
());
for
(
size_t
i
=
0
;
i
<
preprocessed_layout
.
size
();
i
++
)
{
flt_val
[
i
]
=
{
cn
,
preprocessed_layout
[
i
],
preprocessed_layout
[
i
].
dtype
,
preprocessed_layout
[
i
].
format
};
}
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
fill_zero_dev_tensor
(
inp_val
[
i
]);
}
PreprocessFilter
<
Opr
>
prep_flt
;
preprocess
(
preprocessed_layout
,
flt_val
,
megdnn_opr
,
mdn_workspace
,
layouts
,
inp_val
,
prep_flt
);
RealTimer
timer
;
auto
ev_start
=
cn
.
create_event
(
CompNode
::
Event
::
NEED_TIMER
),
ev_end
=
cn
.
create_event
(
CompNode
::
Event
::
NEED_TIMER
);
ev_start
->
record
();
if_constexpr
<
opr_supports_preprocess
<
Opr
>
()
>
(
[
&
](
auto
_
)
{
auto
&&
opr
=
_
(
megdnn_opr
);
PreprocessFilter
<
Opr
>*
pf
=
preprocessed_layout
.
empty
()
?
nullptr
:
&
prep_flt
;
APPLY
(
opr
->
exec
(
args
.
as_megdnn
()...,
pf
,
mdn_workspace
),
inp_val
,
out_val
);
},
/* else */
[
&
](
auto
_
)
{
APPLY
(
_
(
megdnn_opr
)
->
exec
(
args
.
as_megdnn
()...,
mdn_workspace
),
inp_val
,
out_val
);
});
ev_end
->
record
();
megdnn
::
Algorithm
*
algo
=
megdnn_opr
->
get_algorithm_from_desc
(
megdnn_opr
->
execution_policy
().
algo
);
mgb_assert
(
algo
);
double
next_report_time
=
0.5
;
while
(
!
ev_end
->
finished
())
{
if
(
timer
.
get_secs
()
>=
next_report_time
)
{
#if MGB_ENABLE_GETENV
mgb_log_warn
(
"profiling conv algo %s already took %.3f/%.3f secs"
" (limit can be set by MGB_CONV_PROFILING_TIMEOUT) "
,
algo
->
name
(),
timer
.
get_secs
(),
param
.
actual_timeout
);
#else
mgb_log_warn
(
"profiling conv algo %s already took %.3f/%.3f secs"
,
algo
->
name
(),
timer
.
get_secs
(),
param
.
actual_timeout
);
#endif
next_report_time
=
timer
.
get_secs
()
+
1
;
}
using
namespace
std
::
literals
;
#if !__DEPLOY_ON_XP_SP2__
std
::
this_thread
::
sleep_for
(
1000us
);
#endif
}
// release all free blocks owned by child process,
// in order to avoid main process running out of memory
cn
.
try_coalesce_all_free_memory
();
mgb_assert
(
ev_start
->
finished
());
return
TResult
::
from_pod
(
Result
{
ev_start
->
elapsed_time_until
(
*
ev_end
)});
MIDOUT_E
};
template
<
typename
Opr
>
Maybe
<
typename
TimedProfiler
<
Opr
>::
Result
>
TimedProfiler
<
Opr
>::
profile
(
const
Param
&
param
,
double
&
timeout
)
{
mgb_assert
(
timeout
>=
0
);
if
(
!
timeout
)
{
timeout
=
timeout_setting
;
}
else
if
(
timeout_setting
)
{
timeout
=
std
::
min
(
timeout
,
timeout_setting
);
}
param
.
actual_timeout
=
timeout
?
timeout
:
std
::
numeric_limits
<
double
>::
infinity
();
auto
res
=
sys
::
TimedFuncInvoker
::
ins
().
invoke
(
AlgoChooserFuncId
<
Opr
>::
ID
,
TParam
::
from_pod
(
const_cast
<
Param
&>
(
param
)),
timeout
);
if
(
res
.
valid
())
return
res
.
val
().
template
as_single_pod
<
Result
>();
return
None
;
}
template
<
typename
Opr
>
void
TimedProfiler
<
Opr
>::
prof_init_device
(
const
TParam
&
raw_param
)
{
MIDOUT_B
(
Opr
,
midout_iv
(
MGB_HASH_STR
(
"TimedProfiler::prof_init_device"
)))
#if MGB_ROCM
megcore
::
enableMIOpenAlgoSearch
(
true
);
#endif
auto
&&
param
=
raw_param
.
as_single_pod
<
Param
>
();
CompNode
cn
=
CompNode
::
load
(
param
.
comp_node_physical
,
param
.
comp_node_logical
);
// wait for cuda init, so its time does not get accounted in timeout
cn
.
sync
();
MIDOUT_E
}
#define INST(Opr) \
template const double TimedProfiler<megdnn::Opr>::timeout_setting; \
template double TimedProfiler<megdnn::Opr>::init_timeout_setting(); \
template typename TimedProfiler<megdnn::Opr>::TResult \
TimedProfiler<megdnn::Opr>::prof_impl(const TParam& raw_param); \
template Maybe<typename TimedProfiler<megdnn::Opr>::Result> \
TimedProfiler<megdnn::Opr>::profile(const Param& param, double& timeout); \
template void TimedProfiler<megdnn::Opr>::prof_init_device(const TParam& raw_param);
MGB_FOREACH_FASTRUN_OPR
(
INST
)
#undef INST
}
// namespace opr
}
// namespace mgb
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/opr/impl/search_policy/workspace_need_limit_getter.inl
浏览文件 @
12a3ef8d
...
...
@@ -12,7 +12,7 @@
#pragma once
#include "megbrain/opr/search_policy/
profil
er.h"
#include "megbrain/opr/search_policy/
algo_choos
er.h"
#include "../internal/megdnn_opr_wrapper.inl"
...
...
@@ -25,7 +25,7 @@ namespace intl {
struct AutoAddWorkspaceNeedLimitGetter<megdnn::_Opr> { \
static constexpr bool val = true; \
};
MGB
_FOREACH_FASTRUN_OPR
(
cb
)
DNN
_FOREACH_FASTRUN_OPR
(
cb
)
#undef cb
...
...
src/opr/include/megbrain/opr/internal/megdnn_opr_wrapper.h
浏览文件 @
12a3ef8d
...
...
@@ -13,6 +13,7 @@
#include "megbrain/graph.h"
#include "megbrain/opr/internal/mixin_base.h"
#include "megbrain/rdnn/management.h"
#include "megdnn/handle.h"
...
...
@@ -20,43 +21,6 @@ namespace mgb {
namespace
opr
{
namespace
intl
{
//! get megdnn handle from comp node
MGE_WIN_DECLSPEC_FUC
megdnn
::
Handle
*
get_megdnn_handle
(
CompNode
comp_node
);
MGE_WIN_DECLSPEC_FUC
std
::
shared_ptr
<
megdnn
::
Handle
>
get_megdnn_handle_shared
(
CompNode
comp_node
);
/*!
* \brief get global megdnn operator asscoated with a computing node
* \tparam Opr megdnn operator class, must be one of:
* * AddUpdate
* * Relayout
* * Checksum
*/
template
<
typename
Opr
>
MGE_WIN_DECLSPEC_FUC
Opr
*
get_megdnn_global_opr
(
CompNode
comp_node
);
template
<
class
Obj
>
class
UniqPtrWithCN
:
public
std
::
unique_ptr
<
Obj
>
{
CompNode
m_cn
;
public:
UniqPtrWithCN
()
=
default
;
template
<
class
RObj
>
UniqPtrWithCN
(
UniqPtrWithCN
<
RObj
>&&
o
)
:
std
::
unique_ptr
<
Obj
>
(
std
::
move
(
o
)),
m_cn
(
o
.
comp_node
())
{}
UniqPtrWithCN
(
std
::
unique_ptr
<
Obj
>
ptr
,
CompNode
cn
)
:
std
::
unique_ptr
<
Obj
>
{
std
::
move
(
ptr
)},
m_cn
{
cn
}
{}
CompNode
comp_node
()
const
{
return
m_cn
;
}
};
//! create megdnn opr from megdnn handle in a CompNode
template
<
class
Opr
>
UniqPtrWithCN
<
Opr
>
create_megdnn_opr
(
CompNode
comp_node
)
{
return
{
get_megdnn_handle
(
comp_node
)
->
create_operator
<
Opr
>
(),
comp_node
};
}
/*!
* \brief get temporary storage for oprs
...
...
src/opr/include/megbrain/opr/search_policy/algo_chooser.h
浏览文件 @
12a3ef8d
...
...
@@ -19,7 +19,7 @@
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/dnn/pooling.h"
#include "megbrain/opr/search_policy/algo_chooser_helper.h"
#include "megbrain/
opr/search_policy/profil
er.h"
#include "megbrain/
rdnn/algo_choos
er.h"
#include "megdnn/oprs/base.h"
template
<
class
MegDNNOpr
>
...
...
@@ -31,18 +31,13 @@ struct MegDNNOpr2MGBOpr;
using MGBOpr = mgb::opr::_Opr; \
};
MGB
_FOREACH_FASTRUN_OPR
(
cb
)
DNN
_FOREACH_FASTRUN_OPR
(
cb
)
#undef cb
namespace
mgb
{
//! define logical operation of megdnn::param::ExecutionPolicy::Strategy::Enum
//! and megdnn::detail::AlgoAttribute enum
using
ExecutionStrategy
=
megdnn
::
param
::
ExecutionPolicy
::
Strategy
;
using
AlgoAttribute
=
megdnn
::
AlgoAttribute
;
#define MGB_FOREACH_FASTRUN_OPR(cb) DNN_FOREACH_FASTRUN_OPR(cb)
namespace
mgb
{
namespace
opr
{
/* =================== AlgoChooser =================== */
...
...
@@ -56,138 +51,14 @@ namespace opr {
* \tparam Opr megdnn operator impl
*/
template
<
typename
Opr
>
class
AlgoChooser
{
static
constexpr
int
arity_in
=
OprArityTrait
<
Opr
>::
arity_in
;
static
constexpr
int
arity_out
=
OprArityTrait
<
Opr
>::
arity_out
;
static
constexpr
int
arity
=
OprArityTrait
<
Opr
>::
arity
;
using
ImplAlgo
=
typename
Opr
::
AlgorithmInfo
;
using
ImplAlgoDesc
=
typename
Opr
::
AlgorithmInfo
::
Desc
;
using
ImplExecutionPolicy
=
megdnn
::
ExecutionPolicy
;
class
AlgoChooser
:
public
rdnn
::
AlgoChooser
<
Opr
>
{
using
Base
=
rdnn
::
AlgoChooser
<
Opr
>
;
using
MGBOpr
=
typename
MegDNNOpr2MGBOpr
<
Opr
>::
MGBOpr
;
using
ImplExecutionPolicy
=
typename
Base
::
ImplExecutionPolicy
;
public:
using
FixedTensorLayouts
=
std
::
array
<
TensorLayout
,
arity
>
;
class
AlgoChooserHelper
{
//! fastrun layouts
FixedTensorLayouts
m_fastrun_layouts
;
//! layouts used when get and set cache item
FixedTensorLayouts
m_incache_layouts
;
Opr
*
m_dnn_opr
;
std
::
string
m_param
;
const
cg
::
OperatorNodeBase
*
m_base_mgb_opr
;
CompNode
m_cn
;
megdnn
::
param
::
ExecutionPolicy
m_execution_policy
;
bool
m_allow_weight_preprocess
;
public:
AlgoChooserHelper
(
const
FixedTensorLayouts
&
layouts
,
Opr
*
megdnn_opr
,
const
std
::
string
&
param_str
,
const
cg
::
OperatorNodeBase
*
mgb_opr
,
const
CompNode
&
cn
,
const
megdnn
::
param
::
ExecutionPolicy
&
execution_policy
,
bool
allow_weight_preprocess
);
Opr
*
megdnn_opr
()
const
{
return
m_dnn_opr
;
}
const
cg
::
OperatorNodeBase
*
mgb_opr
()
const
{
return
m_base_mgb_opr
;
}
const
TensorLayout
&
inp_layout
(
size_t
idx
)
const
{
return
m_fastrun_layouts
[
idx
];
}
cg
::
ComputingGraph
*
owner_graph
()
const
{
return
m_base_mgb_opr
->
owner_graph
();
}
const
megdnn
::
param
::
ExecutionPolicy
&
execution_policy
()
const
{
return
m_execution_policy
;
}
CompNode
comp_node
()
const
{
return
m_cn
;
}
const
std
::
string
&
param
()
const
{
return
m_param
;
}
bool
allow_weight_preprocess
()
const
{
return
m_allow_weight_preprocess
;
}
megdnn
::
Algorithm
*
get_algorithm_from_desc
(
const
megdnn
::
Algorithm
::
Info
::
Desc
&
desc
)
const
{
return
m_dnn_opr
->
get_algorithm_from_desc
(
desc
);
}
const
FixedTensorLayouts
&
fastrun_layouts
()
const
{
return
m_fastrun_layouts
;
}
const
FixedTensorLayouts
&
incache_layouts
()
const
{
return
m_incache_layouts
;
}
//! construct algo chain by heuristic
ImplExecutionPolicy
choose_by_heuristic
(
const
ExecutionStrategy
&
selected_strategy
)
const
;
//! construct algo chain by profiling
ImplExecutionPolicy
choose_by_profile
(
const
ExecutionStrategy
&
selected_strategy
,
bool
enable_update
)
const
;
//! get all profile algorithm from cache, return invalid if not exists
std
::
pair
<
ImplAlgoDesc
,
Maybe
<
AlgoChooserProfileCache
::
Result
>>
get_profile_result_from_cache
(
const
ExecutionStrategy
&
selected_strategy
)
const
;
/**
* \brief construct execution policy from cache or heuristic.
*
* \param selected_strategy select algo which matched this strategy
* \param[in,out] policy execution policy
* \param retrive_from_cache retrive algo from cache if set True, get
* from heuristic otherwise.
* \param allow_log no warning log print if set True, print warning info
* otherwise.
*/
void
construct_execution_policy
(
const
ExecutionStrategy
&
selected_strategy
,
ImplExecutionPolicy
&
policy
,
bool
retrive_from_cache
=
true
,
bool
allow_log
=
true
)
const
;
//! get workspace size required for specific execution policy
size_t
get_workspace_size_bytes
(
const
ImplExecutionPolicy
&
policy
,
const
FixedTensorLayouts
&
layouts
=
{})
const
;
//! get all candidate algos, and the one choose_by_heuristic() is
//! put first
std
::
vector
<
ImplAlgo
>
get_all_candidates
()
const
;
/*!
* \brief profile a single algorithm
*
* This is actually a wrapper that constructs param and call
* TimedProfiler<Opr>::profile for the actual profiling
*
* \param[in,out] timeout set the timeout, and return the actual
* timeout used during profiling
*/
Maybe
<
AlgoChooserProfileCache
::
ResultEntry
>
profile_single_algo
(
const
ImplExecutionPolicy
&
policy
,
double
&
timeout
)
const
;
//! profile and save to cache
void
profile
(
const
ExecutionStrategy
&
selected_strategy
)
const
;
/**
* \brief extract algo attribute from execution strategy and graph
* option.
*
* \param strategy select algo which matched this strategy
* \return pair<positive_attr, negative_attr>
*/
std
::
pair
<
AlgoAttribute
,
AlgoAttribute
>
extract_algo_attribute
(
const
ExecutionStrategy
&
strategy
)
const
;
private:
Maybe
<
PreprocessFilter
<
Opr
>>
construct_fake_preprocess_filter
(
const
FixedTensorLayouts
&
layouts
=
{})
const
;
};
template
<
typename
U
>
friend
class
AlgoChooser
;
private:
//! entrance for getting algorithm according to execution strategy
static
ImplExecutionPolicy
get_policy
(
const
AlgoChooserHelper
&
helper
);
public:
using
AlgoChooserHelper
=
typename
Base
::
AlgoChooserHelper
;
using
FixedTensorLayouts
=
typename
Base
::
FixedTensorLayouts
;
/*!
* \brief setup algorithm and return workspace size
*/
...
...
src/opr/include/megbrain/opr/search_policy/profiler.h
已删除
100644 → 0
浏览文件 @
0a6f4a88
/**
* \file src/opr/include/megbrain/opr/search_policy/profile.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megbrain/comp_node.h"
#include "megbrain/opr/internal/megdnn_opr_wrapper.h"
#include "megbrain/system.h"
#include "megbrain/tensor.h"
#include "megbrain/utils/hash_ct.h"
#include "megbrain/utils/timer.h"
#include "megdnn/basic_types.h"
#include "megdnn/oprs.h"
namespace
mgb
{
namespace
opr
{
// clang-format off
#define MGB_FOREACH_FASTRUN_OPR(cb) \
cb(ConvolutionForward) \
cb(ConvBiasForward) \
cb(ConvolutionBackwardData) \
cb(ConvolutionBackwardFilter) \
cb(Convolution3DForward) \
cb(Convolution3DBackwardData) \
cb(Convolution3DBackwardFilter) \
cb(LocalShareForward) \
cb(LocalShareBackwardData) \
cb(LocalShareBackwardFilter) \
cb(DeformableConvForward) \
cb(DeformableConvBackwardFilter) \
cb(DeformableConvBackwardData) \
cb(BatchConvBiasForward) \
cb(MatrixMul) \
cb(BatchedMatrixMul) \
cb(PoolingForward) \
cb(PoolingBackward)
// clang-format on
template
<
typename
Opr
>
constexpr
bool
opr_supports_preprocess
()
{
return
std
::
is_same
<
Opr
,
megdnn
::
ConvolutionForward
>::
value
||
std
::
is_same
<
Opr
,
megdnn
::
ConvBias
>::
value
;
}
template
<
typename
Opr
>
constexpr
bool
opr_contain_bias
()
{
return
std
::
is_same
<
Opr
,
megdnn
::
ConvBias
>::
value
;
}
//! matmul and batchedMatrixMul
template
<
typename
Opr
>
constexpr
bool
is_matmul
()
{
return
std
::
is_same
<
Opr
,
megdnn
::
MatrixMul
>::
value
||
std
::
is_same
<
Opr
,
megdnn
::
BatchedMatrixMul
>::
value
;
}
template
<
typename
Opr
,
bool
has_prep
>
struct
PreprocessFilterImpl
{
using
T
=
union
{};
};
template
<
typename
Opr
>
struct
PreprocessFilterImpl
<
Opr
,
true
>
{
using
T
=
typename
Opr
::
PreprocessedFilter
;
};
template
<
typename
Opr
>
using
PreprocessFilter
=
typename
PreprocessFilterImpl
<
Opr
,
opr_supports_preprocess
<
Opr
>
()
>::
T
;
template
<
typename
Opr
>
struct
AlgoChooserFuncId
{};
#define DEF_FUNC_ID(func) \
template <> \
struct AlgoChooserFuncId<megdnn::func> { \
__attribute__((unused)) static constexpr sys::TimedFuncInvoker::FuncId ID = \
static_cast<sys::TimedFuncInvoker::FuncId>( \
MGB_HASH_STR("megdnn::" #func)); \
};
MGB_FOREACH_FASTRUN_OPR
(
DEF_FUNC_ID
)
#undef DEF_FUNC_ID
/* =================== TimedProfiler =================== */
/*!
* \brief profile a megdnn opr conv with given param
*
* This class only provides static methods, and the entry point is
* TimedProfiler::profile; it would run profiler in a timed environment by
* sys::TimedFuncInvoker
*
* \tparam Opr megdnn opr impl
*/
template
<
typename
Opr
>
class
TimedProfiler
{
static
constexpr
int
arity_in
=
OprArityTrait
<
Opr
>::
arity_in
;
static
constexpr
int
arity_out
=
OprArityTrait
<
Opr
>::
arity_out
;
static
constexpr
int
arity
=
OprArityTrait
<
Opr
>::
arity
;
using
TensorShapeArray
=
std
::
array
<
megdnn
::
TensorShape
,
arity
>
;
public:
struct
Param
{
struct
ExecutionPolicyBlob
{
//! enlarge the max size if needed
constexpr
static
size_t
MAX_SIZE_IN_BYTES
=
10240
;
char
data
[
MAX_SIZE_IN_BYTES
];
uint32_t
size
;
static
ExecutionPolicyBlob
serialize
(
const
megdnn
::
ExecutionPolicy
&
policy
);
megdnn
::
ExecutionPolicy
deserialize
()
const
;
};
ExecutionPolicyBlob
execution_policy
;
size_t
workspace
;
megdnn
::
DTypeEnum
dtypes
[
arity
];
CompNode
::
Locator
comp_node_physical
,
comp_node_logical
;
TensorShapeArray
shapes
;
typename
Opr
::
Param
opr_param
;
bool
allow_weight_preprocess
;
//! filled by profile()
mutable
double
actual_timeout
;
};
struct
Result
{
double
time
;
};
static
Maybe
<
Result
>
profile
(
const
Param
&
param
,
double
&
timeout
);
private:
using
TParam
=
sys
::
TimedFuncInvoker
::
Param
;
using
TResult
=
sys
::
TimedFuncInvoker
::
Result
;
static
const
double
timeout_setting
;
static
double
init_timeout_setting
();
static
void
preprocess
(
const
megdnn
::
TensorLayoutArray
&
preprocessed_layout
,
const
SmallVector
<
DeviceTensorND
>&
flt_val
,
intl
::
UniqPtrWithCN
<
Opr
>&
megdnn_opr
,
megdnn
::
Workspace
&
mdn_workspace
,
std
::
array
<
TensorLayout
,
arity
>&
layouts
,
std
::
array
<
DeviceTensorND
,
arity_in
>&
inp_val
,
PreprocessFilter
<
Opr
>&
prep_flt
);
static
TResult
prof_impl
(
const
TParam
&
raw_param
);
static
void
prof_init_device
(
const
TParam
&
raw_param
);
};
}
// namespace opr
}
// namespace mgb
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录