提交 12a3ef8d 编写于 作者: M Megvii Engine Team

refactor(fastrun): decouple fastrun from computing graph

GitOrigin-RevId: 27abd222950e36287784e58ad56d6819e9c4cb69
上级 0a6f4a88
......@@ -18,6 +18,7 @@ file(
opr/impl/nvof/*.cpp
plugin/impl/*.cpp
serialization/impl/*.cpp
rdnn/impl/*.cpp
core/impl/*.inl
gopt/impl/*.inl
opr/impl/*.inl
......@@ -53,7 +54,8 @@ set(MGB_INC
${CMAKE_CURRENT_LIST_DIR}/gopt/include
${CMAKE_CURRENT_LIST_DIR}/opr/include
${CMAKE_CURRENT_LIST_DIR}/plugin/include
${CMAKE_CURRENT_LIST_DIR}/serialization/include)
${CMAKE_CURRENT_LIST_DIR}/serialization/include
${CMAKE_CURRENT_LIST_DIR}/rdnn/include)
if(MGE_WITH_JIT)
list(APPEND MGB_INC ${CMAKE_CURRENT_LIST_DIR}/jit/include)
......
......@@ -183,7 +183,7 @@ struct OprWithPolicyMaker<opr::BatchConvBiasForward>
MakeOprWithPolicyCaller4<megdnn::BatchConvBiasForward>,
megdnn::param::BatchConvBias> {};
#include "../../opr/impl/internal/invoke.h"
#include "megbrain/utils/invoke.h"
template <typename Opr>
struct MultiAlgoOprTrait;
......
......@@ -23,8 +23,8 @@
#include "megbrain/opr/imgproc.h"
#include "megbrain/opr/misc.h"
#include "megbrain/opr/nn_int.h"
#include "megbrain/opr/search_policy/algo_chooser.h"
#include "megbrain/opr/search_policy/algo_chooser_helper.h"
#include "megbrain/opr/search_policy/profiler.h"
#include "megbrain/opr/tensor_gen.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/utility.h"
......
......@@ -19,7 +19,6 @@
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/search_policy/algo_chooser.h"
#include "megbrain/opr/search_policy/profiler.h"
#include "./internal/megdnn_opr_wrapper.inl"
#include "./search_policy/workspace_need_limit_getter.inl"
......
......@@ -18,11 +18,11 @@
#include "megbrain/graph/grad_impl.h"
#include "megbrain/system.h"
#include "megbrain/utils/hash_ct.h"
#include "megbrain/utils/invoke.h"
#include "megbrain/utils/timer.h"
#include "megdnn/oprs/utils.h"
#include "../internal/invoke.h"
#include "../internal/megdnn_opr_wrapper.inl"
#include "../search_policy/workspace_need_limit_getter.inl"
......
......@@ -25,26 +25,6 @@ using namespace mixin;
/* ================== global functions ================== */
namespace {
template <class Opr>
class MegDNNGlobalOprContainer final : public UserDataContainer::UserData {
MGB_TYPEINFO_OBJ_DECL;
std::shared_ptr<megdnn::Handle> m_megdnn_handle;
std::unique_ptr<Opr> m_opr;
public:
MegDNNGlobalOprContainer(CompNode cn)
: m_megdnn_handle{get_megdnn_handle_shared(cn)},
m_opr{m_megdnn_handle->create_operator<Opr>()} {
mgb_assert(m_opr->is_thread_safe());
}
Opr* get() const { return m_opr.get(); }
};
template <class Opr>
MGB_TYPEINFO_OBJ_IMPL(MegDNNGlobalOprContainer<Opr>);
class TempStorageContainer final : public UserDataContainer::UserData {
MGB_TYPEINFO_OBJ_DECL;
......@@ -55,34 +35,6 @@ public:
MGB_TYPEINFO_OBJ_IMPL(TempStorageContainer);
} // anonymous namespace
std::shared_ptr<megdnn::Handle> intl::get_megdnn_handle_shared(CompNode comp_node) {
auto& handle = MegDNNHandle::get(CompNodeEnv::from_comp_node(comp_node));
return {handle.shared_from_this(), handle.handle()};
}
megdnn::Handle* intl::get_megdnn_handle(CompNode comp_node) {
return MegDNNHandle::get(CompNodeEnv::from_comp_node(comp_node)).handle();
}
template <typename Opr>
Opr* intl::get_megdnn_global_opr(CompNode comp_node) {
using T = MegDNNGlobalOprContainer<Opr>;
auto maker = [comp_node]() { return std::make_shared<T>(comp_node); };
return CompNodeEnv::from_comp_node(comp_node).get_user_data<T>(maker).get();
}
namespace mgb {
namespace opr {
namespace intl {
#define INST(o) template o* get_megdnn_global_opr<o>(CompNode)
INST(megdnn::AddUpdate);
INST(megdnn::Relayout);
INST(megdnn::Checksum);
#undef INST
} // namespace intl
} // namespace opr
} // namespace mgb
DeviceTensorStorage& intl::get_temp_storage(ComputingGraph& graph, CompNode comp_node) {
auto container =
graph.options().user_data.get_user_data_or_create<TempStorageContainer>();
......
/**
* \file src/opr/impl/search_policy/profile.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megbrain/opr/search_policy/profiler.h"
#include "../internal/invoke.h"
#include "../internal/megdnn_opr_wrapper.inl"
#include "megdnn/handle.h"
#include "megdnn/oprs/base.h"
#if MGB_ROCM
#include "hcc_detail/hcc_defs_prologue.h"
#include "megcore_rocm.h"
#endif
//! TODO: here has to be know some megdnn::opr when there is produced midout.h
//! fix it if there is another graceful way.
#include "megdnn/oprs.h"
#include "midout.h"
MIDOUT_DECL(megbrain_opr_profile)
#define MIDOUT_B(...) MIDOUT_BEGIN(megbrain_opr_profile, __VA_ARGS__) {
#define MIDOUT_E \
} \
MIDOUT_END();
namespace {
std::string serialize_policy(const megdnn::ExecutionPolicy& policy) {
std::string ret;
//! serialize AlgorithmDesc
megdnn::Algorithm::serialize_write_pod(policy.algo.handle_type, ret);
megdnn::Algorithm::serialize_write_pod(policy.algo.type, ret);
uint32_t param_size = policy.algo.param.size();
uint32_t name_size = policy.algo.name.size();
megdnn::Algorithm::serialize_write_pod<uint32_t>(param_size, ret);
megdnn::Algorithm::serialize_write_pod<uint32_t>(name_size, ret);
ret += policy.algo.param;
ret += policy.algo.name;
//! serialize sub_policy
uint32_t size = policy.sub_policy.size();
megdnn::Algorithm::serialize_write_pod(size, ret);
for (auto&& sub : policy.sub_policy) {
ret += serialize_policy(sub);
}
return ret;
}
megdnn::ExecutionPolicy deserialize_policy(
const char* buf, uint32_t size, uint32_t& offset) {
megdnn::ExecutionPolicy ret;
#define cb(_val, _type) \
_val = megdnn::Algorithm::deserialize_read_pod<_type>(buf, offset); \
offset += sizeof(_val)
cb(ret.algo.handle_type, megdnn::Handle::HandleType);
cb(ret.algo.type, uint32_t);
uint32_t param_size = 0;
uint32_t name_size = 0;
cb(param_size, uint32_t);
cb(name_size, uint32_t);
if (param_size > 0) {
ret.algo.param = std::string(buf + offset, param_size);
offset += param_size;
}
if (name_size > 0) {
ret.algo.name = std::string(buf + offset, name_size);
offset += name_size;
}
uint32_t nr_policy = 0;
cb(nr_policy, uint32_t);
#undef cb
for (uint32_t i = 0; i < nr_policy; i++) {
ret.sub_policy.push_back(deserialize_policy(buf, size, offset));
}
return ret;
}
} // namespace
namespace mgb {
namespace opr {
#define APPLY(statement, ...) \
mgb::apply( \
[&](const auto&... args) { return statement; }, \
std::tuple_cat(__VA_ARGS__))
////////////// TimedProfiler::Param::ExecutionPolicyBlob //////////////////////
template <typename Opr>
typename TimedProfiler<Opr>::Param::ExecutionPolicyBlob TimedProfiler<Opr>::Param::
ExecutionPolicyBlob::serialize(const megdnn::ExecutionPolicy& policy) {
ExecutionPolicyBlob ret;
std::string serialize_bin = serialize_policy(policy);
mgb_assert(serialize_bin.size() < MAX_SIZE_IN_BYTES);
memcpy(ret.data, serialize_bin.data(), serialize_bin.size());
ret.size = serialize_bin.size();
return ret;
}
template <typename Opr>
megdnn::ExecutionPolicy TimedProfiler<Opr>::Param::ExecutionPolicyBlob::deserialize()
const {
uint32_t offset = 0;
auto&& ret = deserialize_policy(data, size, offset);
mgb_assert(offset == size);
return std::move(ret);
}
#define INST(Opr) \
template typename TimedProfiler<megdnn::Opr>::Param::ExecutionPolicyBlob \
TimedProfiler<megdnn::Opr>::Param::ExecutionPolicyBlob::serialize( \
const megdnn::ExecutionPolicy& policy); \
template megdnn::ExecutionPolicy \
TimedProfiler<megdnn::Opr>::Param::ExecutionPolicyBlob::deserialize() const;
MGB_FOREACH_FASTRUN_OPR(INST)
#undef INST
////////////////// TimedProfiler //////////////////////////////
template <typename Opr>
const double TimedProfiler<Opr>::timeout_setting =
TimedProfiler<Opr>::init_timeout_setting();
template <typename Opr>
double TimedProfiler<Opr>::init_timeout_setting() {
#if MGB_ENABLE_FASTRUN
sys::TimedFuncInvoker::ins().register_func(
AlgoChooserFuncId<Opr>::ID, &TimedProfiler<Opr>::prof_impl,
&TimedProfiler<Opr>::prof_init_device);
auto to_set = MGB_GETENV("MGB_CONV_PROFILING_TIMEOUT");
if (to_set)
return std::stod(to_set);
#endif
return 0;
}
#define APPLY(statement, ...) \
mgb::apply( \
[&](const auto&... args) { return statement; }, \
std::tuple_cat(__VA_ARGS__))
template <typename Opr>
void TimedProfiler<Opr>::preprocess(
const TensorLayoutArray&, const megdnn::SmallVector<DeviceTensorND>&,
intl::UniqPtrWithCN<Opr>&, megdnn::Workspace&, std::array<TensorLayout, arity>&,
std::array<DeviceTensorND, arity_in>&, PreprocessFilter<Opr>&) {
// Opr is neither convbias nor convolution.This function do nothing.
}
//! convbias
template <>
void TimedProfiler<megdnn::ConvBias>::preprocess(
const TensorLayoutArray& preprocessed_layout,
const SmallVector<DeviceTensorND>& flt_val,
intl::UniqPtrWithCN<megdnn::ConvBias>& megdnn_opr,
megdnn::Workspace& mdn_workspace, std::array<TensorLayout, arity>& layouts,
std::array<DeviceTensorND, arity_in>& inp_val,
PreprocessFilter<megdnn::ConvBias>& prep_flt) {
if (!preprocessed_layout.empty()) {
auto&& pf = prep_flt;
pf.algorithm_id = nullptr;
pf.tensors.resize(flt_val.size());
for (size_t i = 0; i < flt_val.size(); i++) {
pf.tensors[i] = flt_val[i].as_megdnn();
}
APPLY(megdnn_opr->exec_preprocess(args..., &pf, mdn_workspace),
std::forward_as_tuple(
layouts[0], inp_val[1].as_megdnn(), inp_val[2].as_megdnn()),
array_skip<arity_in - 1>(layouts));
}
}
//! convolution
template <>
void TimedProfiler<megdnn::ConvolutionForward>::preprocess(
const TensorLayoutArray& preprocessed_layout,
const megdnn::SmallVector<DeviceTensorND>& flt_val,
intl::UniqPtrWithCN<megdnn::ConvolutionForward>& megdnn_opr,
megdnn::Workspace& mdn_workspace, std::array<TensorLayout, arity>& layouts,
std::array<DeviceTensorND, arity_in>& inp_val,
PreprocessFilter<megdnn::ConvolutionForward>& prep_flt) {
if (!preprocessed_layout.empty()) {
auto&& pf = prep_flt;
pf.algorithm_id = nullptr;
pf.tensors.resize(flt_val.size());
for (size_t i = 0; i < flt_val.size(); i++) {
pf.tensors[i] = flt_val[i].as_megdnn();
}
APPLY(megdnn_opr->exec_preprocess(args..., &pf, mdn_workspace),
std::forward_as_tuple(layouts[0], inp_val[1].as_megdnn()),
array_skip<2>(layouts));
}
}
template <typename Opr>
typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
const TParam& raw_param) {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_impl")))
#if MGB_ROCM
bool miopen_algo_search_enabled;
megcore::getMIOpenAlgoSearchStatus(&miopen_algo_search_enabled);
mgb_assert(miopen_algo_search_enabled, "MIOpen algo search not enabled");
#endif
auto&& param = raw_param.as_single_pod<Param>();
CompNode cn = CompNode::load(param.comp_node_physical, param.comp_node_logical);
auto megdnn_opr = intl::create_megdnn_opr<Opr>(cn);
std::array<TensorLayout, arity> layouts;
auto from_enum = [&](DTypeEnum enumv) -> DType {
switch (enumv) {
#define cb(_dt) \
case DTypeTrait<_dt>::enumv: \
return _dt(1.0f, static_cast<uint8_t>(0))
cb(dtype::Quantized8Asymm);
cb(dtype::Quantized4Asymm);
#undef cb
#define cb(_dt) \
case DTypeTrait<_dt>::enumv: \
return _dt(1.0f)
cb(dtype::QuantizedS8);
cb(dtype::QuantizedS16);
cb(dtype::QuantizedS32);
cb(dtype::QuantizedS4);
default:
return DType::from_enum(enumv);
#undef cb
}
};
for (int i = 0; i < arity; ++i) {
layouts[i] = {param.shapes[i], from_enum(param.dtypes[i])};
}
megdnn_opr->param() = param.opr_param;
megdnn_opr->execution_policy() = param.execution_policy.deserialize();
// Allocate preprocessed weight buffers.
TensorLayoutArray preprocessed_layout;
if_constexpr<opr_supports_preprocess<Opr>()>([&](auto _) {
if (param.allow_weight_preprocess) {
preprocessed_layout = APPLY(
_(megdnn_opr)->deduce_preprocessed_filter_layout(args...), layouts);
}
});
{
// first allocate a whole chunk to avoid memory fragmentation (here we
// rely on memory allocator to reuse memory)
auto align = cn.get_mem_addr_alignment();
size_t tot_size = align;
for (int i = 0; i < arity; ++i) {
tot_size += layouts[i].span().high_byte + align;
}
for (const auto& layout : preprocessed_layout) {
tot_size += layout.span().high_byte + align;
}
tot_size += param.workspace;
DeviceTensorStorage storage{cn};
storage.ensure_size(tot_size);
}
// allocate input and output memory
std::array<DeviceTensorND, arity_in> inp_val;
std::array<DeviceTensorND, arity_out> out_val;
DeviceTensorND workspace;
for (int i = 0; i < arity_in; ++i) {
inp_val[i].comp_node(cn).dtype(layouts[i].dtype).resize(layouts[i]);
}
for (int i = 0; i < arity_out; ++i) {
out_val[i]
.comp_node(cn)
.dtype(layouts[arity_in + i].dtype)
.resize(layouts[arity_in + i]);
}
megdnn::Workspace mdn_workspace;
// allocate workspace
if (param.workspace) {
workspace.comp_node(cn).dtype(dtype::Byte()).resize({param.workspace});
mdn_workspace.size = param.workspace;
mdn_workspace.raw_ptr = workspace.raw_ptr();
}
// allocate storage for preprocessed filter
SmallVector<DeviceTensorND> flt_val(preprocessed_layout.size());
for (size_t i = 0; i < preprocessed_layout.size(); i++) {
flt_val[i] = {
cn, preprocessed_layout[i], preprocessed_layout[i].dtype,
preprocessed_layout[i].format};
}
for (int i = 0; i < arity_in; ++i) {
fill_zero_dev_tensor(inp_val[i]);
}
PreprocessFilter<Opr> prep_flt;
preprocess(
preprocessed_layout, flt_val, megdnn_opr, mdn_workspace, layouts, inp_val,
prep_flt);
RealTimer timer;
auto ev_start = cn.create_event(CompNode::Event::NEED_TIMER),
ev_end = cn.create_event(CompNode::Event::NEED_TIMER);
ev_start->record();
if_constexpr<opr_supports_preprocess<Opr>()>(
[&](auto _) {
auto&& opr = _(megdnn_opr);
PreprocessFilter<Opr>* pf =
preprocessed_layout.empty() ? nullptr : &prep_flt;
APPLY(opr->exec(args.as_megdnn()..., pf, mdn_workspace), inp_val,
out_val);
},
/* else */
[&](auto _) {
APPLY(_(megdnn_opr)->exec(args.as_megdnn()..., mdn_workspace), inp_val,
out_val);
});
ev_end->record();
megdnn::Algorithm* algo =
megdnn_opr->get_algorithm_from_desc(megdnn_opr->execution_policy().algo);
mgb_assert(algo);
double next_report_time = 0.5;
while (!ev_end->finished()) {
if (timer.get_secs() >= next_report_time) {
#if MGB_ENABLE_GETENV
mgb_log_warn(
"profiling conv algo %s already took %.3f/%.3f secs"
" (limit can be set by MGB_CONV_PROFILING_TIMEOUT) ",
algo->name(), timer.get_secs(), param.actual_timeout);
#else
mgb_log_warn(
"profiling conv algo %s already took %.3f/%.3f secs", algo->name(),
timer.get_secs(), param.actual_timeout);
#endif
next_report_time = timer.get_secs() + 1;
}
using namespace std::literals;
#if !__DEPLOY_ON_XP_SP2__
std::this_thread::sleep_for(1000us);
#endif
}
// release all free blocks owned by child process,
// in order to avoid main process running out of memory
cn.try_coalesce_all_free_memory();
mgb_assert(ev_start->finished());
return TResult::from_pod(Result{ev_start->elapsed_time_until(*ev_end)});
MIDOUT_E
};
template <typename Opr>
Maybe<typename TimedProfiler<Opr>::Result> TimedProfiler<Opr>::profile(
const Param& param, double& timeout) {
mgb_assert(timeout >= 0);
if (!timeout) {
timeout = timeout_setting;
} else if (timeout_setting) {
timeout = std::min(timeout, timeout_setting);
}
param.actual_timeout = timeout ? timeout : std::numeric_limits<double>::infinity();
auto res = sys::TimedFuncInvoker::ins().invoke(
AlgoChooserFuncId<Opr>::ID, TParam::from_pod(const_cast<Param&>(param)),
timeout);
if (res.valid())
return res.val().template as_single_pod<Result>();
return None;
}
template <typename Opr>
void TimedProfiler<Opr>::prof_init_device(const TParam& raw_param) {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_init_device")))
#if MGB_ROCM
megcore::enableMIOpenAlgoSearch(true);
#endif
auto&& param = raw_param.as_single_pod<Param>();
CompNode cn = CompNode::load(param.comp_node_physical, param.comp_node_logical);
// wait for cuda init, so its time does not get accounted in timeout
cn.sync();
MIDOUT_E
}
#define INST(Opr) \
template const double TimedProfiler<megdnn::Opr>::timeout_setting; \
template double TimedProfiler<megdnn::Opr>::init_timeout_setting(); \
template typename TimedProfiler<megdnn::Opr>::TResult \
TimedProfiler<megdnn::Opr>::prof_impl(const TParam& raw_param); \
template Maybe<typename TimedProfiler<megdnn::Opr>::Result> \
TimedProfiler<megdnn::Opr>::profile(const Param& param, double& timeout); \
template void TimedProfiler<megdnn::Opr>::prof_init_device(const TParam& raw_param);
MGB_FOREACH_FASTRUN_OPR(INST)
#undef INST
} // namespace opr
} // namespace mgb
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -12,7 +12,7 @@
#pragma once
#include "megbrain/opr/search_policy/profiler.h"
#include "megbrain/opr/search_policy/algo_chooser.h"
#include "../internal/megdnn_opr_wrapper.inl"
......@@ -25,7 +25,7 @@ namespace intl {
struct AutoAddWorkspaceNeedLimitGetter<megdnn::_Opr> { \
static constexpr bool val = true; \
};
MGB_FOREACH_FASTRUN_OPR(cb)
DNN_FOREACH_FASTRUN_OPR(cb)
#undef cb
......
......@@ -13,6 +13,7 @@
#include "megbrain/graph.h"
#include "megbrain/opr/internal/mixin_base.h"
#include "megbrain/rdnn/management.h"
#include "megdnn/handle.h"
......@@ -20,43 +21,6 @@ namespace mgb {
namespace opr {
namespace intl {
//! get megdnn handle from comp node
MGE_WIN_DECLSPEC_FUC megdnn::Handle* get_megdnn_handle(CompNode comp_node);
MGE_WIN_DECLSPEC_FUC std::shared_ptr<megdnn::Handle> get_megdnn_handle_shared(
CompNode comp_node);
/*!
* \brief get global megdnn operator asscoated with a computing node
* \tparam Opr megdnn operator class, must be one of:
* * AddUpdate
* * Relayout
* * Checksum
*/
template <typename Opr>
MGE_WIN_DECLSPEC_FUC Opr* get_megdnn_global_opr(CompNode comp_node);
template <class Obj>
class UniqPtrWithCN : public std::unique_ptr<Obj> {
CompNode m_cn;
public:
UniqPtrWithCN() = default;
template <class RObj>
UniqPtrWithCN(UniqPtrWithCN<RObj>&& o)
: std::unique_ptr<Obj>(std::move(o)), m_cn(o.comp_node()) {}
UniqPtrWithCN(std::unique_ptr<Obj> ptr, CompNode cn)
: std::unique_ptr<Obj>{std::move(ptr)}, m_cn{cn} {}
CompNode comp_node() const { return m_cn; }
};
//! create megdnn opr from megdnn handle in a CompNode
template <class Opr>
UniqPtrWithCN<Opr> create_megdnn_opr(CompNode comp_node) {
return {get_megdnn_handle(comp_node)->create_operator<Opr>(), comp_node};
}
/*!
* \brief get temporary storage for oprs
......
......@@ -19,7 +19,7 @@
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/dnn/pooling.h"
#include "megbrain/opr/search_policy/algo_chooser_helper.h"
#include "megbrain/opr/search_policy/profiler.h"
#include "megbrain/rdnn/algo_chooser.h"
#include "megdnn/oprs/base.h"
template <class MegDNNOpr>
......@@ -31,18 +31,13 @@ struct MegDNNOpr2MGBOpr;
using MGBOpr = mgb::opr::_Opr; \
};
MGB_FOREACH_FASTRUN_OPR(cb)
DNN_FOREACH_FASTRUN_OPR(cb)
#undef cb
namespace mgb {
//! define logical operation of megdnn::param::ExecutionPolicy::Strategy::Enum
//! and megdnn::detail::AlgoAttribute enum
using ExecutionStrategy = megdnn::param::ExecutionPolicy::Strategy;
using AlgoAttribute = megdnn::AlgoAttribute;
#define MGB_FOREACH_FASTRUN_OPR(cb) DNN_FOREACH_FASTRUN_OPR(cb)
namespace mgb {
namespace opr {
/* =================== AlgoChooser =================== */
......@@ -56,138 +51,14 @@ namespace opr {
* \tparam Opr megdnn operator impl
*/
template <typename Opr>
class AlgoChooser {
static constexpr int arity_in = OprArityTrait<Opr>::arity_in;
static constexpr int arity_out = OprArityTrait<Opr>::arity_out;
static constexpr int arity = OprArityTrait<Opr>::arity;
using ImplAlgo = typename Opr::AlgorithmInfo;
using ImplAlgoDesc = typename Opr::AlgorithmInfo::Desc;
using ImplExecutionPolicy = megdnn::ExecutionPolicy;
class AlgoChooser : public rdnn::AlgoChooser<Opr> {
using Base = rdnn::AlgoChooser<Opr>;
using MGBOpr = typename MegDNNOpr2MGBOpr<Opr>::MGBOpr;
using ImplExecutionPolicy = typename Base::ImplExecutionPolicy;
public:
using FixedTensorLayouts = std::array<TensorLayout, arity>;
class AlgoChooserHelper {
//! fastrun layouts
FixedTensorLayouts m_fastrun_layouts;
//! layouts used when get and set cache item
FixedTensorLayouts m_incache_layouts;
Opr* m_dnn_opr;
std::string m_param;
const cg::OperatorNodeBase* m_base_mgb_opr;
CompNode m_cn;
megdnn::param::ExecutionPolicy m_execution_policy;
bool m_allow_weight_preprocess;
public:
AlgoChooserHelper(
const FixedTensorLayouts& layouts, Opr* megdnn_opr,
const std::string& param_str, const cg::OperatorNodeBase* mgb_opr,
const CompNode& cn,
const megdnn::param::ExecutionPolicy& execution_policy,
bool allow_weight_preprocess);
Opr* megdnn_opr() const { return m_dnn_opr; }
const cg::OperatorNodeBase* mgb_opr() const { return m_base_mgb_opr; }
const TensorLayout& inp_layout(size_t idx) const {
return m_fastrun_layouts[idx];
}
cg::ComputingGraph* owner_graph() const {
return m_base_mgb_opr->owner_graph();
}
const megdnn::param::ExecutionPolicy& execution_policy() const {
return m_execution_policy;
}
CompNode comp_node() const { return m_cn; }
const std::string& param() const { return m_param; }
bool allow_weight_preprocess() const { return m_allow_weight_preprocess; }
megdnn::Algorithm* get_algorithm_from_desc(
const megdnn::Algorithm::Info::Desc& desc) const {
return m_dnn_opr->get_algorithm_from_desc(desc);
}
const FixedTensorLayouts& fastrun_layouts() const { return m_fastrun_layouts; }
const FixedTensorLayouts& incache_layouts() const { return m_incache_layouts; }
//! construct algo chain by heuristic
ImplExecutionPolicy choose_by_heuristic(
const ExecutionStrategy& selected_strategy) const;
//! construct algo chain by profiling
ImplExecutionPolicy choose_by_profile(
const ExecutionStrategy& selected_strategy, bool enable_update) const;
//! get all profile algorithm from cache, return invalid if not exists
std::pair<ImplAlgoDesc, Maybe<AlgoChooserProfileCache::Result>>
get_profile_result_from_cache(const ExecutionStrategy& selected_strategy) const;
/**
* \brief construct execution policy from cache or heuristic.
*
* \param selected_strategy select algo which matched this strategy
* \param[in,out] policy execution policy
* \param retrive_from_cache retrive algo from cache if set True, get
* from heuristic otherwise.
* \param allow_log no warning log print if set True, print warning info
* otherwise.
*/
void construct_execution_policy(
const ExecutionStrategy& selected_strategy, ImplExecutionPolicy& policy,
bool retrive_from_cache = true, bool allow_log = true) const;
//! get workspace size required for specific execution policy
size_t get_workspace_size_bytes(
const ImplExecutionPolicy& policy,
const FixedTensorLayouts& layouts = {}) const;
//! get all candidate algos, and the one choose_by_heuristic() is
//! put first
std::vector<ImplAlgo> get_all_candidates() const;
/*!
* \brief profile a single algorithm
*
* This is actually a wrapper that constructs param and call
* TimedProfiler<Opr>::profile for the actual profiling
*
* \param[in,out] timeout set the timeout, and return the actual
* timeout used during profiling
*/
Maybe<AlgoChooserProfileCache::ResultEntry> profile_single_algo(
const ImplExecutionPolicy& policy, double& timeout) const;
//! profile and save to cache
void profile(const ExecutionStrategy& selected_strategy) const;
/**
* \brief extract algo attribute from execution strategy and graph
* option.
*
* \param strategy select algo which matched this strategy
* \return pair<positive_attr, negative_attr>
*/
std::pair<AlgoAttribute, AlgoAttribute> extract_algo_attribute(
const ExecutionStrategy& strategy) const;
private:
Maybe<PreprocessFilter<Opr>> construct_fake_preprocess_filter(
const FixedTensorLayouts& layouts = {}) const;
};
template <typename U>
friend class AlgoChooser;
private:
//! entrance for getting algorithm according to execution strategy
static ImplExecutionPolicy get_policy(const AlgoChooserHelper& helper);
public:
using AlgoChooserHelper = typename Base::AlgoChooserHelper;
using FixedTensorLayouts = typename Base::FixedTensorLayouts;
/*!
* \brief setup algorithm and return workspace size
*/
......
/**
* \file src/opr/include/megbrain/opr/search_policy/profile.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megbrain/comp_node.h"
#include "megbrain/opr/internal/megdnn_opr_wrapper.h"
#include "megbrain/system.h"
#include "megbrain/tensor.h"
#include "megbrain/utils/hash_ct.h"
#include "megbrain/utils/timer.h"
#include "megdnn/basic_types.h"
#include "megdnn/oprs.h"
namespace mgb {
namespace opr {
// clang-format off
#define MGB_FOREACH_FASTRUN_OPR(cb) \
cb(ConvolutionForward) \
cb(ConvBiasForward) \
cb(ConvolutionBackwardData) \
cb(ConvolutionBackwardFilter) \
cb(Convolution3DForward) \
cb(Convolution3DBackwardData) \
cb(Convolution3DBackwardFilter) \
cb(LocalShareForward) \
cb(LocalShareBackwardData) \
cb(LocalShareBackwardFilter) \
cb(DeformableConvForward) \
cb(DeformableConvBackwardFilter) \
cb(DeformableConvBackwardData) \
cb(BatchConvBiasForward) \
cb(MatrixMul) \
cb(BatchedMatrixMul) \
cb(PoolingForward) \
cb(PoolingBackward)
// clang-format on
template <typename Opr>
constexpr bool opr_supports_preprocess() {
return std::is_same<Opr, megdnn::ConvolutionForward>::value ||
std::is_same<Opr, megdnn::ConvBias>::value;
}
template <typename Opr>
constexpr bool opr_contain_bias() {
return std::is_same<Opr, megdnn::ConvBias>::value;
}
//! matmul and batchedMatrixMul
template <typename Opr>
constexpr bool is_matmul() {
return std::is_same<Opr, megdnn::MatrixMul>::value ||
std::is_same<Opr, megdnn::BatchedMatrixMul>::value;
}
template <typename Opr, bool has_prep>
struct PreprocessFilterImpl {
using T = union {};
};
template <typename Opr>
struct PreprocessFilterImpl<Opr, true> {
using T = typename Opr::PreprocessedFilter;
};
template <typename Opr>
using PreprocessFilter =
typename PreprocessFilterImpl<Opr, opr_supports_preprocess<Opr>()>::T;
template <typename Opr>
struct AlgoChooserFuncId {};
#define DEF_FUNC_ID(func) \
template <> \
struct AlgoChooserFuncId<megdnn::func> { \
__attribute__((unused)) static constexpr sys::TimedFuncInvoker::FuncId ID = \
static_cast<sys::TimedFuncInvoker::FuncId>( \
MGB_HASH_STR("megdnn::" #func)); \
};
MGB_FOREACH_FASTRUN_OPR(DEF_FUNC_ID)
#undef DEF_FUNC_ID
/* =================== TimedProfiler =================== */
/*!
* \brief profile a megdnn opr conv with given param
*
* This class only provides static methods, and the entry point is
* TimedProfiler::profile; it would run profiler in a timed environment by
* sys::TimedFuncInvoker
*
* \tparam Opr megdnn opr impl
*/
template <typename Opr>
class TimedProfiler {
static constexpr int arity_in = OprArityTrait<Opr>::arity_in;
static constexpr int arity_out = OprArityTrait<Opr>::arity_out;
static constexpr int arity = OprArityTrait<Opr>::arity;
using TensorShapeArray = std::array<megdnn::TensorShape, arity>;
public:
struct Param {
struct ExecutionPolicyBlob {
//! enlarge the max size if needed
constexpr static size_t MAX_SIZE_IN_BYTES = 10240;
char data[MAX_SIZE_IN_BYTES];
uint32_t size;
static ExecutionPolicyBlob serialize(const megdnn::ExecutionPolicy& policy);
megdnn::ExecutionPolicy deserialize() const;
};
ExecutionPolicyBlob execution_policy;
size_t workspace;
megdnn::DTypeEnum dtypes[arity];
CompNode::Locator comp_node_physical, comp_node_logical;
TensorShapeArray shapes;
typename Opr::Param opr_param;
bool allow_weight_preprocess;
//! filled by profile()
mutable double actual_timeout;
};
struct Result {
double time;
};
static Maybe<Result> profile(const Param& param, double& timeout);
private:
using TParam = sys::TimedFuncInvoker::Param;
using TResult = sys::TimedFuncInvoker::Result;
static const double timeout_setting;
static double init_timeout_setting();
static void preprocess(
const megdnn::TensorLayoutArray& preprocessed_layout,
const SmallVector<DeviceTensorND>& flt_val,
intl::UniqPtrWithCN<Opr>& megdnn_opr, megdnn::Workspace& mdn_workspace,
std::array<TensorLayout, arity>& layouts,
std::array<DeviceTensorND, arity_in>& inp_val,
PreprocessFilter<Opr>& prep_flt);
static TResult prof_impl(const TParam& raw_param);
static void prof_init_device(const TParam& raw_param);
};
} // namespace opr
} // namespace mgb
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册