提交 5d637d07 编写于 作者: M Megvii Engine Team

refactor(mgb): code refactor of fast run

GitOrigin-RevId: 2c4b8e06bb3c4b4cb0228ee28988c2371455b1b0
上级 f6bd4f59
......@@ -243,31 +243,33 @@ typename opr::AlgoChooser<Opr>::FixedTensorLayouts to_fixed_layouts(
*/
template <typename Opr>
std::vector<megdnn::Algorithm::SearchItem> flatten_search_space(
const typename opr::AlgoChooser<Opr>::ExeContext& ctx,
const typename opr::AlgoChooser<Opr>::AlgoChooserHelper& helper,
CircularDepsChecker& checker) {
auto&& search_item = megdnn::Algorithm::SearchItem{
OprTypeFromOprTrait<Opr>::opr_type, ctx.param(),
to_layout_array<Opr>(ctx.layouts())};
OprTypeFromOprTrait<Opr>::opr_type, helper.param(),
to_layout_array<Opr>(helper.layouts())};
checker.put(search_item);
std::vector<megdnn::Algorithm::SearchItem> ret;
for (auto algo_info : ctx.get_all_candidates()) {
megdnn::Algorithm* algo = ctx.get_algorithm_from_desc(algo_info.desc);
for (auto algo_info : helper.get_all_candidates()) {
megdnn::Algorithm* algo =
helper.get_algorithm_from_desc(algo_info.desc);
mgb_assert(algo, "Unknown algo description");
std::vector<megdnn::Algorithm::SearchItem>&& sub_items =
algo->get_subopr_list(to_layout_array<Opr>(ctx.layouts()),
ctx.megdnn_opr());
algo->get_subopr_list(to_layout_array<Opr>(helper.layouts()),
helper.megdnn_opr());
FOREACH_OPR_TYPE_DISPATCH(sub_items, {
auto&& megdnn_opr =
opr::intl::create_megdnn_opr<_Opr>(ctx.comp_node());
opr::intl::create_megdnn_opr<_Opr>(helper.comp_node());
megdnn_opr->param() =
Algorithm::deserialize_read_pod<typename _Opr::Param>(
_item.param);
typename opr::AlgoChooser<_Opr>::ExeContext sub_ctx(
typename opr::AlgoChooser<_Opr>::AlgoChooserHelper sub_helper(
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
_item.param, ctx.mgb_opr(), ctx.comp_node(),
ctx.execution_policy(), ctx.allow_weight_preprocess());
auto space = flatten_search_space<_Opr>(sub_ctx, checker);
_item.param, helper.mgb_opr(), helper.comp_node(),
helper.execution_policy(),
helper.allow_weight_preprocess());
auto space = flatten_search_space<_Opr>(sub_helper, checker);
ret.insert(ret.end(), space.begin(), space.end());
});
}
......@@ -280,255 +282,113 @@ std::vector<megdnn::Algorithm::SearchItem> flatten_search_space(
namespace mgb {
namespace opr {
///////////////////////////// AlgoChooserHelper //////////////////////////
template <typename Opr>
void AlgoChooser<Opr>::profile(ExeContext& ctx,
ExecutionStrategy selected_strategy) {
if (ctx.get_profile_result_from_cache(selected_strategy).valid())
return;
AlgoChooserProfileCache::Result prof_rst;
auto target_attr = ctx.extract_algo_attribute(selected_strategy);
std::string layouts_str =
format_fixlayouts<Opr>(ctx.layouts(), arity_in, arity_out);
double cur_timeout = 0;
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit(
ctx.owner_graph(), ctx.comp_node(),
ctx.execution_policy().workspace_limit);
RealTimer timer;
for (auto algo : ctx.get_all_candidates()) {
Maybe<AlgoChooserProfileCache::ResultEntry> cur_rst;
AlgoChooser<Opr>::AlgoChooserHelper::AlgoChooserHelper(
const FixedTensorLayouts& layouts, Opr* megdnn_opr,
const std::string& param_str, const cg::OperatorNodeBase* mgb_opr,
const CompNode& cn,
const megdnn::param::ExecutionPolicy& execution_policy,
bool allow_weight_preprocess)
: m_layouts{layouts},
m_megdnn_opr{megdnn_opr},
m_param{param_str},
m_base_mgb_opr{mgb_opr},
m_cn{cn},
m_execution_policy{execution_policy},
m_allow_weight_preprocess{allow_weight_preprocess} {
mgb_assert(m_layouts.size() == layouts.size());
static_assert(std::tuple_size<FixedTensorLayouts>::value == 3 ||
std::tuple_size<FixedTensorLayouts>::value == 5 ||
std::tuple_size<FixedTensorLayouts>::value == 8,
"Convolution AlgoChooser assumes arity = 3 , 5 or 8 (for "
"deformable conv)");
}
template <typename Opr>
typename AlgoChooser<Opr>::ImplExecutionPolicy
AlgoChooser<Opr>::AlgoChooserHelper::choose_by_heuristic(
const ExecutionStrategy& selected_strategy) const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("choose_by_heuristic")))
ImplExecutionPolicy policy;
policy.algo = algo.desc;
//! check negative attribute : skip negative attribute
auto palgo = ctx.megdnn_opr()->get_algorithm_from_desc(policy.algo);
if (palgo->contain_attribute_any(target_attr.second)) {
mgb_log_debug(
"skip algo %s with attribute(%s), which is not match the "
"profile strategy required contain attribute(%s) and not "
"contain attribute(%s).",
algo.desc.name.c_str(),
Algorithm::attribute_str(palgo->attribute()).c_str(),
Algorithm::attribute_str(target_attr.first).c_str(),
Algorithm::attribute_str(target_attr.second).c_str());
continue;
}
//! check workspace limit
ctx.construct_execution_policy(selected_strategy, policy);
if (ctx.get_workspace_size_bytes(policy) >= workspace_limit) {
continue;
}
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit(
owner_graph(), m_cn, m_execution_policy.workspace_limit);
auto attr = extract_algo_attribute(selected_strategy);
policy.algo =
APPLY(m_megdnn_opr->get_algorithm_info_heuristic(
args..., workspace_limit, attr.first, attr.second),
m_layouts)
.desc;
std::string msg = ssprintf("profiling %s algorithm %s %s",
ctx.mgb_opr()->dyn_typeinfo()->name,
algo.desc.name.c_str(), layouts_str.c_str());
timer.reset();
MGB_TRY { cur_rst = ctx.profile_single_algo(policy, cur_timeout); }
MGB_CATCH(std::exception & exc, {
mgb_log_warn("caught exception during %s: %s", msg.c_str(),
exc.what());
continue;
})
MGB_CATCH(..., {
mgb_log_warn("caught exception during %s", msg.c_str());
continue;
})
if (!cur_rst.valid()) {
mgb_log_warn("timeout when %s; timeout setting: %.3fsec",
msg.c_str(), cur_timeout);
continue;
}
if (!cur_timeout) {
cur_timeout = timer.get_secs() + TIMEOUT_TOLERANCE;
} else {
cur_timeout =
std::min(cur_timeout, timer.get_secs() + TIMEOUT_TOLERANCE);
}
auto&& rst = cur_rst.val();
mgb_log_debug("%s: workspace: %zu; time: %.3gsec", msg.c_str(),
rst.workspace, rst.time);
prof_rst.push_back(rst);
}
std::string msg = ssprintf(
"no usable %s algorithm %s without attribute(%s) or could not meet "
"workspace limite requirement(%zu)",
ctx.mgb_opr()->dyn_typeinfo()->name, layouts_str.c_str(),
Algorithm::attribute_str(target_attr.second).c_str(),
workspace_limit);
mgb_assert(!prof_rst.empty(), "%s", msg.c_str());
Algorithm* algo = m_megdnn_opr->get_algorithm_from_desc(policy.algo);
mgb_assert(algo, "Unknown algo description");
std::vector<Algorithm::SearchItem>&& sub_items = algo->get_subopr_list(
to_layout_array<Opr>(m_layouts), m_megdnn_opr);
FixedTensorLayouts origin_layouts = ctx.layouts();
typename Opr::Param origin_param = ctx.megdnn_opr()->param();
AlgoChooserProfileCache::Key cache_key{origin_layouts.data(),
origin_layouts.size(), &origin_param,
sizeof(origin_param)};
FOREACH_OPR_TYPE_DISPATCH(sub_items, {
auto&& megdnn_opr = intl::create_megdnn_opr<_Opr>(m_cn);
megdnn_opr->param() =
Algorithm::deserialize_read_pod<typename _Opr::Param>(
_item.param);
typename AlgoChooser<_Opr>::AlgoChooserHelper sub_helper(
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
_item.param, m_base_mgb_opr, m_cn, m_execution_policy,
m_allow_weight_preprocess);
policy.sub_policy.push_back(
sub_helper.choose_by_heuristic(selected_strategy));
});
AlgoChooserProfileCache cache(ctx.comp_node(),
profile_name(ctx.megdnn_opr()).c_str());
cache.put(cache_key, prof_rst);
return policy;
MIDOUT_E
}
template <typename Opr>
typename AlgoChooser<Opr>::ImplExecutionPolicy
AlgoChooser<Opr>::choose_by_profile(ExeContext& ctx,
ExecutionStrategy selected_strategy,
bool enable_update) {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("AlgoChooser::choose_by_profile")))
if (ctx.owner_graph()->options().no_profiling_on_shape_change) {
auto policy = ctx.megdnn_opr()->execution_policy();
if (policy.algo.valid()){
AlgoChooser<Opr>::AlgoChooserHelper::choose_by_profile(
const ExecutionStrategy& selected_strategy, bool enable_update) const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("choose_by_profile")))
if (owner_graph()->options().no_profiling_on_shape_change) {
auto policy = m_megdnn_opr->execution_policy();
if (policy.algo.valid()) {
return policy;
}
if (!algo_usable_on_shape_change<Opr>()) {
mgb_log_warn(
"choose algo by heuristic, which may cause performance "
"regression.");
return ctx.choose_by_heuristic(selected_strategy);
return choose_by_heuristic(selected_strategy);
}
}
if (enable_update) {
CircularDepsChecker circular_deps_checker;
auto&& search_items =
flatten_search_space<Opr>(ctx, circular_deps_checker);
flatten_search_space<Opr>(*this, circular_deps_checker);
FOREACH_OPR_TYPE_DISPATCH(search_items, {
auto&& megdnn_opr = intl::create_megdnn_opr<_Opr>(ctx.comp_node());
auto&& megdnn_opr = intl::create_megdnn_opr<_Opr>(m_cn);
megdnn_opr->param() =
Algorithm::deserialize_read_pod<typename _Opr::Param>(
_item.param);
typename AlgoChooser<_Opr>::ExeContext sub_ctx(
typename AlgoChooser<_Opr>::AlgoChooserHelper sub_helper(
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
_item.param, ctx.mgb_opr(), ctx.comp_node(),
ctx.execution_policy(), ctx.allow_weight_preprocess());
AlgoChooser<_Opr>::profile(sub_ctx, selected_strategy);
_item.param, m_base_mgb_opr, m_cn, m_execution_policy,
m_allow_weight_preprocess);
sub_helper.profile(selected_strategy);
});
}
typename AlgoChooser<Opr>::ImplExecutionPolicy policy;
ctx.construct_execution_policy(selected_strategy, policy);
construct_execution_policy(selected_strategy, true, policy);
return policy;
MIDOUT_E
}
template <typename Opr>
size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
Opr* megdnn_opr, const MGBOpr* mgb_opr,
bool allow_weight_preprocess) {
if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) {
return 0;
}
std::string param_str;
Algorithm::serialize_write_pod(megdnn_opr->param(), param_str);
ExeContext ctx(layouts, megdnn_opr, param_str, mgb_opr,
mgb_opr->comp_node(), mgb_opr->execution_policy(),
allow_weight_preprocess);
ImplExecutionPolicy policy;
if (auto algo_choose_hook = mgb_opr->algo_chooser()) {
policy = algo_choose_hook(mgb_opr);
ctx.construct_execution_policy((ExecutionStrategy::HEURISTIC |
ExecutionStrategy::REPRODUCIBLE),
policy, false);
}
if (!policy.algo.valid()) {
policy = get_policy(ctx);
}
size_t workspace = ctx.get_workspace_size_bytes(policy);
std::string ret;
ret.append(mgb_opr->dyn_typeinfo()->name);
ret += format_fixlayouts<Opr>(layouts, arity_in, arity_out);
Algorithm* palgo = megdnn_opr->get_algorithm_from_desc(policy.algo);
mgb_assert(palgo, "Unknown algo description");
ret.append("): algo=" + std::string(palgo->name()));
ret.append(ssprintf(" workspace=%.2fMiB attirbute(%s)",
workspace / (1024 * 1024.0),
Algorithm::attribute_str(palgo->attribute()).c_str()));
mgb_log_debug("%s", ret.c_str());
megdnn_opr->execution_policy() = policy;
return workspace;
}
template <typename Opr>
typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::get_policy(
ExeContext& ctx) {
MGB_MARK_USED_VAR(TIMEOUT_TOLERANCE);
auto opr_strategy = ctx.execution_policy().strategy;
if ((opr_strategy & ExecutionStrategy::HEURISTIC) &&
(opr_strategy & ExecutionStrategy::PROFILE)) {
ImplExecutionPolicy policy =
choose_by_profile(ctx, opr_strategy, false);
if (!policy.algo.valid())
policy = ctx.choose_by_heuristic(opr_strategy);
return policy;
} else if (!static_cast<int>(opr_strategy) ||
(opr_strategy & ExecutionStrategy::HEURISTIC)) {
return ctx.choose_by_heuristic(opr_strategy);
}
#if MGB_ENABLE_FASTRUN
else if (opr_strategy & ExecutionStrategy::PROFILE) {
return choose_by_profile(ctx, opr_strategy);
}
#endif
else {
mgb_throw(GraphError, "bad ExecutionPolicy strategy");
}
}
#define INST(Opr) \
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::get_policy(ExeContext& ctx); \
template void AlgoChooser<megdnn::Opr>::profile(ExeContext& ctx, \
ExecutionStrategy); \
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::choose_by_profile( \
ExeContext& ctx, ExecutionStrategy, bool enable_update); \
template size_t AlgoChooser<megdnn::Opr>::setup_algo( \
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
const MGBOpr* mgb_opr, bool allow_weight_preprocess);
MGB_FOREACH_FASTRUN_OPR(INST)
#undef INST
//////////////////////////////// ExeContext /////////////////////////////
template <typename Opr>
AlgoChooser<Opr>::ExeContext::ExeContext(
const FixedTensorLayouts& layouts, Opr* megdnn_opr,
const std::string& param_str, const cg::OperatorNodeBase* mgb_opr,
const CompNode& cn,
const megdnn::param::ExecutionPolicy& execution_policy,
bool allow_weight_preprocess)
: m_layouts{layouts},
m_megdnn_opr{megdnn_opr},
m_param{param_str},
m_base_mgb_opr{mgb_opr},
m_cn{cn},
m_execution_policy{execution_policy},
m_allow_weight_preprocess{allow_weight_preprocess} {
mgb_assert(m_layouts.size() == layouts.size());
static_assert(std::tuple_size<FixedTensorLayouts>::value == 3 ||
std::tuple_size<FixedTensorLayouts>::value == 5 ||
std::tuple_size<FixedTensorLayouts>::value == 8,
"Convolution AlgoChooser assumes arity = 3 , 5 or 8 (for "
"deformable conv)");
}
template <typename Opr>
typename AlgoChooser<Opr>::ImplAlgo
AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
ExecutionStrategy selected_strategy) const {
MIDOUT_B(Opr,
midout_iv(MGB_HASH_STR(
"AlgoChooser::ExeContext::get_profile_result_from_cache")))
AlgoChooserProfileCache cache(m_cn,
profile_name(m_megdnn_opr).c_str());
AlgoChooser<Opr>::AlgoChooserHelper::get_profile_result_from_cache(
const ExecutionStrategy& selected_strategy) const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("get_profile_result_from_cache")))
AlgoChooserProfileCache cache(m_cn, profile_name(m_megdnn_opr).c_str());
typename Opr::Param origin_param = m_megdnn_opr->param();
AlgoChooserProfileCache::Key cache_key{m_layouts.data(), m_layouts.size(),
......@@ -538,23 +398,22 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
return {};
auto&& prof = rst.val();
if (prof.empty())
return {};
std::unordered_map<std::string, ImplAlgo> algo_map;
for (auto i : get_all_candidates()) {
auto ins = algo_map.emplace(i.desc.name.c_str(), i);
mgb_assert(ins.second, "duplicated algo name: %s", i.desc.name.c_str());
}
if (prof.empty())
return {};
auto target_attr = extract_algo_attribute(selected_strategy);
bool skip_by_negative = false;
for (auto&& i : prof) {
auto attr_of_algo =
static_cast<megdnn::Algorithm::Attribute>(i.attribute);
bool contain_attr_all_positive =
(target_attr.first ==
(attr_of_algo & target_attr.first));
(target_attr.first == (attr_of_algo & target_attr.first));
bool contain_attr_any_negative =
static_cast<bool>(attr_of_algo & target_attr.second);
if (contain_attr_all_positive) {
......@@ -578,13 +437,14 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
if (skip_by_negative) {
mgb_log_error(
"No usable algo. Only navie algos are available, but negative "
"stategy is %s.",
"No usable algo. There are available algos match positive "
"strategy(%s), but filtered by negative stategy(%s).",
Algorithm::attribute_str(target_attr.first).c_str(),
Algorithm::attribute_str(target_attr.second).c_str());
} else {
mgb_log_error(
"No usable algo. algos read from cache could not satisfy "
"attribute with %s",
"positive strategy(%s)",
Algorithm::attribute_str(target_attr.first).c_str());
}
......@@ -593,75 +453,10 @@ AlgoChooser<Opr>::ExeContext::get_profile_result_from_cache(
}
template <typename Opr>
typename AlgoChooser<Opr>::ImplExecutionPolicy
AlgoChooser<Opr>::ExeContext::choose_by_heuristic(
ExecutionStrategy selected_strategy) const {
if (m_execution_policy.workspace_limit !=
std::numeric_limits<decltype(
m_execution_policy.workspace_limit)>::max()) {
mgb_log_warn(
"workspace_limit should not be setted if choose algo by "
"heuristic");
}
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit(
owner_graph(), m_cn, m_execution_policy.workspace_limit);
auto attr = extract_algo_attribute(selected_strategy);
ImplExecutionPolicy policy;
policy.algo =
APPLY(m_megdnn_opr->get_algorithm_info_heuristic(
args..., workspace_limit, attr.first, attr.second),
m_layouts)
.desc;
Algorithm* algo = m_megdnn_opr->get_algorithm_from_desc(policy.algo);
mgb_assert(algo, "Unknown algo description");
std::vector<Algorithm::SearchItem>&& sub_items = algo->get_subopr_list(
to_layout_array<Opr>(m_layouts), m_megdnn_opr);
FOREACH_OPR_TYPE_DISPATCH(sub_items, {
auto&& megdnn_opr = intl::create_megdnn_opr<_Opr>(m_cn);
megdnn_opr->param() =
Algorithm::deserialize_read_pod<typename _Opr::Param>(
_item.param);
typename AlgoChooser<_Opr>::ExeContext sub_ctx(
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
_item.param, m_base_mgb_opr, m_cn, m_execution_policy,
m_allow_weight_preprocess);
policy.sub_policy.push_back(
sub_ctx.choose_by_heuristic(selected_strategy));
});
return policy;
}
template <typename Opr>
std::vector<typename AlgoChooser<Opr>::ImplAlgo>
AlgoChooser<Opr>::ExeContext::get_all_candidates() const {
auto heu = choose_by_heuristic(ExecutionStrategy::HEURISTIC);
auto&& ret = APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts);
bool found = false;
for (size_t i = 0; i < ret.size(); ++i) {
if (ret[i].desc == heu.algo) {
found = true;
std::swap(ret[i], ret[0]);
break;
}
}
Algorithm* palgo = m_megdnn_opr->get_algorithm_from_desc(heu.algo);
mgb_assert(palgo, "Unknown algo description");
mgb_assert(found,
"algo %s got by heuristic not found in "
"candidate list",
palgo->name());
return std::move(ret);
}
template <typename Opr>
void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
ExecutionStrategy selected_strategy,
typename AlgoChooser<Opr>::ImplExecutionPolicy& policy,
bool retrive_from_cache) const {
void AlgoChooser<Opr>::AlgoChooserHelper::construct_execution_policy(
const ExecutionStrategy& selected_strategy, bool retrive_from_cache,
typename AlgoChooser<Opr>::ImplExecutionPolicy& policy) const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("construct_execution_policy")))
if (!policy.algo.valid()) {
if (retrive_from_cache) {
policy.algo = get_profile_result_from_cache(selected_strategy).desc;
......@@ -712,26 +507,28 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
megdnn_opr->param() =
Algorithm::deserialize_read_pod<typename _Opr::Param>(
_item.param);
typename AlgoChooser<_Opr>::ExeContext sub_ctx(
typename AlgoChooser<_Opr>::AlgoChooserHelper sub_helper(
to_fixed_layouts<_Opr>(_item.layouts), megdnn_opr.get(),
_item.param, m_base_mgb_opr, m_cn, m_execution_policy,
m_allow_weight_preprocess);
policy.sub_policy.push_back({});
sub_ctx.construct_execution_policy(selected_strategy,
policy.sub_policy.back(),
retrive_from_cache);
sub_helper.construct_execution_policy(selected_strategy,
retrive_from_cache,
policy.sub_policy.back());
if (!policy.sub_policy.back().algo.valid()) {
// means sub_ctx.construct_execution_policy fails. clean up
// means sub_helper.construct_execution_policy fails. clean up
// policy.algo and return
policy = {};
return;
}
});
MIDOUT_E
}
template <typename Opr>
size_t AlgoChooser<Opr>::ExeContext::get_workspace_size_bytes(
size_t AlgoChooser<Opr>::AlgoChooserHelper::get_workspace_size_bytes(
const ImplExecutionPolicy& policy) const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("get_workspace_size_bytes")))
m_megdnn_opr->execution_policy() = policy;
size_t result;
if_constexpr<opr_supports_preprocess<Opr>()>(
......@@ -752,12 +549,40 @@ size_t AlgoChooser<Opr>::ExeContext::get_workspace_size_bytes(
m_layouts);
});
return result;
MIDOUT_E
}
template <typename Opr>
std::vector<typename AlgoChooser<Opr>::ImplAlgo>
AlgoChooser<Opr>::AlgoChooserHelper::get_all_candidates() const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("get_all_candidates")))
auto heu = choose_by_heuristic(m_execution_policy.strategy);
auto&& ret =
APPLY(m_megdnn_opr->get_all_algorithms_info(args...), m_layouts);
bool found = false;
for (size_t i = 0; i < ret.size(); ++i) {
if (ret[i].desc == heu.algo) {
found = true;
std::swap(ret[i], ret[0]);
break;
}
}
Algorithm* palgo = m_megdnn_opr->get_algorithm_from_desc(heu.algo);
mgb_assert(palgo, "Unknown algo description");
mgb_assert(found,
"algo %s got by heuristic not found in "
"candidate list",
palgo->name());
return std::move(ret);
MIDOUT_E
}
template <typename Opr>
Maybe<AlgoChooserProfileCache::ResultEntry>
AlgoChooser<Opr>::ExeContext::profile_single_algo(
AlgoChooser<Opr>::AlgoChooserHelper::profile_single_algo(
const ImplExecutionPolicy& policy, double& timeout) const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("profile_single_algo")))
typename TimedProfiler<Opr>::Param param;
// force check copy size <= dest len-1 from gcc8 for safe
param.execution_policy =
......@@ -791,14 +616,103 @@ AlgoChooser<Opr>::ExeContext::profile_single_algo(
if (!rst.valid())
return None;
return AlgoChooserProfileCache::ResultEntry{
palgo->name(),
static_cast<uint32_t>(palgo->attribute()),
palgo->name(), static_cast<uint32_t>(palgo->attribute()),
rst.val().time, param.workspace};
MIDOUT_E
}
template <typename Opr>
void AlgoChooser<Opr>::AlgoChooserHelper::profile(
const ExecutionStrategy& selected_strategy) const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("profile")))
if (get_profile_result_from_cache(selected_strategy).valid())
return;
AlgoChooserProfileCache::Result prof_rst;
auto target_attr = extract_algo_attribute(selected_strategy);
std::string layouts_str =
format_fixlayouts<Opr>(m_layouts, arity_in, arity_out);
double cur_timeout = 0;
auto workspace_limit = WorkspaceLimitGetter::get_workspace_limit(
owner_graph(), m_cn, m_execution_policy.workspace_limit);
RealTimer timer;
for (auto algo : get_all_candidates()) {
Maybe<AlgoChooserProfileCache::ResultEntry> cur_rst;
ImplExecutionPolicy policy;
policy.algo = algo.desc;
//! check negative attribute : skip negative attribute
auto palgo = m_megdnn_opr->get_algorithm_from_desc(policy.algo);
if (palgo->contain_attribute_any(target_attr.second)) {
mgb_log_debug(
"skip algo %s, which matches the profile strategy required "
"'not contain attribute(%s).'",
algo.desc.name.c_str(),
Algorithm::attribute_str(target_attr.second).c_str());
continue;
}
//! check workspace limit
construct_execution_policy(selected_strategy, true, policy);
if (get_workspace_size_bytes(policy) >= workspace_limit) {
continue;
}
std::string msg = ssprintf("profiling %s algorithm %s %s",
m_base_mgb_opr->dyn_typeinfo()->name,
algo.desc.name.c_str(), layouts_str.c_str());
timer.reset();
MGB_TRY { cur_rst = profile_single_algo(policy, cur_timeout); }
MGB_CATCH(std::exception & exc, {
mgb_log_warn("caught exception during %s: %s", msg.c_str(),
exc.what());
continue;
})
MGB_CATCH(..., {
mgb_log_warn("caught exception during %s", msg.c_str());
continue;
})
if (!cur_rst.valid()) {
mgb_log_warn("timeout when %s; timeout setting: %.3fsec",
msg.c_str(), cur_timeout);
continue;
}
if (!cur_timeout) {
cur_timeout = timer.get_secs() + TIMEOUT_TOLERANCE;
} else {
cur_timeout =
std::min(cur_timeout, timer.get_secs() + TIMEOUT_TOLERANCE);
}
auto&& rst = cur_rst.val();
mgb_log_debug("%s: workspace: %zu; time: %.3gsec", msg.c_str(),
rst.workspace, rst.time);
prof_rst.push_back(rst);
}
std::string msg = ssprintf(
"no usable %s algorithm %s without attribute(%s) or could not meet "
"workspace limite requirement(%zu)",
m_base_mgb_opr->dyn_typeinfo()->name, layouts_str.c_str(),
Algorithm::attribute_str(target_attr.second).c_str(),
workspace_limit);
mgb_assert(!prof_rst.empty(), "%s", msg.c_str());
FixedTensorLayouts origin_layouts = m_layouts;
typename Opr::Param origin_param = m_megdnn_opr->param();
AlgoChooserProfileCache::Key cache_key{origin_layouts.data(),
origin_layouts.size(), &origin_param,
sizeof(origin_param)};
AlgoChooserProfileCache cache(m_cn, profile_name(m_megdnn_opr).c_str());
cache.put(cache_key, prof_rst);
MIDOUT_E
}
template <typename Opr>
Maybe<PreprocessFilter<Opr>>
AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
AlgoChooser<Opr>::AlgoChooserHelper::construct_fake_preprocess_filter() const {
MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("construct_fake_preprocess_filter")))
Maybe<PreprocessFilter<Opr>> result = None;
if_constexpr<opr_supports_preprocess<Opr>()>([&](auto _) {
if (!m_allow_weight_preprocess)
......@@ -830,11 +744,12 @@ AlgoChooser<Opr>::ExeContext::construct_fake_preprocess_filter() const {
}
});
return result;
MIDOUT_E
}
template <typename Opr>
std::pair<AlgoAttribute, AlgoAttribute>
AlgoChooser<Opr>::ExeContext::extract_algo_attribute(
AlgoChooser<Opr>::AlgoChooserHelper::extract_algo_attribute(
const ExecutionStrategy& strategy) const {
std::pair<AlgoAttribute, AlgoAttribute> ret =
std::make_pair(AlgoAttribute::DEFAULT, AlgoAttribute::DEFAULT);
......@@ -851,41 +766,128 @@ AlgoChooser<Opr>::ExeContext::extract_algo_attribute(
}
#define INST(Opr) \
template AlgoChooser<megdnn::Opr>::ExeContext::ExeContext( \
template AlgoChooser<megdnn::Opr>::AlgoChooserHelper::AlgoChooserHelper( \
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
const std::string& param_str, const cg::OperatorNodeBase* mgb_opr, \
const CompNode& cn, \
const megdnn::param::ExecutionPolicy& execution_policy, \
bool allow_weight_preprocess); \
template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::ExeContext::choose_by_heuristic( \
ExecutionStrategy select_strategy) const; \
AlgoChooser<megdnn::Opr>::AlgoChooserHelper::choose_by_heuristic( \
const ExecutionStrategy& select_strategy) const; \
template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::AlgoChooserHelper::choose_by_profile( \
const ExecutionStrategy& select_strategy, bool enable_update) \
const; \
template typename AlgoChooser<megdnn::Opr>::ImplAlgo \
AlgoChooser<megdnn::Opr>::ExeContext::get_profile_result_from_cache( \
ExecutionStrategy select_strategy) const; \
template std::vector<typename AlgoChooser<megdnn::Opr>::ImplAlgo> \
AlgoChooser<megdnn::Opr>::ExeContext::get_all_candidates() const; \
AlgoChooser<megdnn::Opr>::AlgoChooserHelper:: \
get_profile_result_from_cache( \
const ExecutionStrategy& select_strategy) const; \
template void \
AlgoChooser<megdnn::Opr>::AlgoChooserHelper::construct_execution_policy( \
const ExecutionStrategy& select_strategy, bool retrive_from_cache, \
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy) \
const; \
template size_t \
AlgoChooser<megdnn::Opr>::ExeContext::get_workspace_size_bytes( \
AlgoChooser<megdnn::Opr>::AlgoChooserHelper::get_workspace_size_bytes( \
const typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& \
policy) const; \
template void \
AlgoChooser<megdnn::Opr>::ExeContext::construct_execution_policy( \
ExecutionStrategy select_strategy, \
typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& policy, \
bool retrive_from_cache) const; \
template std::vector<typename AlgoChooser<megdnn::Opr>::ImplAlgo> \
AlgoChooser<megdnn::Opr>::AlgoChooserHelper::get_all_candidates() const; \
template Maybe<AlgoChooserProfileCache::ResultEntry> \
AlgoChooser<megdnn::Opr>::ExeContext::profile_single_algo( \
AlgoChooser<megdnn::Opr>::AlgoChooserHelper::profile_single_algo( \
const typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy& \
policy, \
double& timeout) const; \
template std::pair<AlgoAttribute, AlgoAttribute> \
AlgoChooser<megdnn::Opr>::ExeContext::extract_algo_attribute( \
const ExecutionStrategy& strategy) const;
AlgoChooser<megdnn::Opr>::AlgoChooserHelper::extract_algo_attribute( \
const ExecutionStrategy& strategy) const; \
template void AlgoChooser<megdnn::Opr>::AlgoChooserHelper::profile( \
const ExecutionStrategy& selected_strategy) const;
MGB_FOREACH_FASTRUN_OPR(INST)
#undef INST
//////////////////////////////// AlgoChoose /////////////////////////////
template <typename Opr>
typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::get_policy(
const AlgoChooserHelper& helper) {
auto opr_strategy = helper.execution_policy().strategy;
if (opr_strategy & ExecutionStrategy::HEURISTIC) {
if (opr_strategy & ExecutionStrategy::PROFILE) {
//! this strategy will choose from cache first, then choost by
//! heuristic if fail.
ImplExecutionPolicy policy =
helper.choose_by_profile(opr_strategy, false);
if (!policy.algo.valid()) {
policy = helper.choose_by_heuristic(opr_strategy);
}
return policy;
} else {
return helper.choose_by_heuristic(opr_strategy);
}
}
#if MGB_ENABLE_FASTRUN
else if (opr_strategy & ExecutionStrategy::PROFILE) {
return helper.choose_by_profile(opr_strategy, true);
}
#endif
else {
mgb_throw(GraphError, "bad ExecutionPolicy strategy");
}
}
template <typename Opr>
size_t AlgoChooser<Opr>::setup_algo(const FixedTensorLayouts& layouts,
Opr* megdnn_opr, const MGBOpr* mgb_opr,
bool allow_weight_preprocess) {
if (WorkspaceLimitGetter::is_prealloc_run(mgb_opr->owner_graph())) {
return 0;
}
std::string param_str;
Algorithm::serialize_write_pod(megdnn_opr->param(), param_str);
AlgoChooserHelper helper(layouts, megdnn_opr, param_str, mgb_opr,
mgb_opr->comp_node(), mgb_opr->execution_policy(),
allow_weight_preprocess);
ImplExecutionPolicy policy;
if (auto algo_choose_hook = mgb_opr->algo_chooser()) {
policy = algo_choose_hook(mgb_opr);
auto strategy =
ExecutionStrategy::HEURISTIC | ExecutionStrategy::REPRODUCIBLE;
helper.construct_execution_policy(strategy, false, policy);
}
if (!policy.algo.valid()) {
policy = get_policy(helper);
}
size_t workspace = helper.get_workspace_size_bytes(policy);
std::string ret;
ret.append(mgb_opr->dyn_typeinfo()->name);
ret += format_fixlayouts<Opr>(layouts, arity_in, arity_out);
Algorithm* palgo = megdnn_opr->get_algorithm_from_desc(policy.algo);
mgb_assert(palgo, "Unknown algo description");
ret.append("): algo=" + std::string(palgo->name()));
ret.append(ssprintf(" workspace=%.2fMiB attirbute=%d",
workspace / (1024 * 1024.0),
static_cast<uint32_t>(palgo->attribute())));
mgb_log_debug("%s", ret.c_str());
megdnn_opr->execution_policy() = policy;
return workspace;
}
#define INST(Opr) \
template AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::get_policy(const AlgoChooserHelper& proxy); \
template size_t AlgoChooser<megdnn::Opr>::setup_algo( \
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
const MGBOpr* mgb_opr, bool allow_weight_preprocess);
MGB_FOREACH_FASTRUN_OPR(INST)
#undef INST
} // namespace opr
} // namespace mgb
......
......@@ -66,7 +66,7 @@ class AlgoChooser {
public:
using FixedTensorLayouts = std::array<TensorLayout, arity>;
class ExeContext {
class AlgoChooserHelper {
FixedTensorLayouts m_layouts;
Opr* m_megdnn_opr;
std::string m_param;
......@@ -76,7 +76,8 @@ public:
bool m_allow_weight_preprocess;
public:
ExeContext(const FixedTensorLayouts& layouts, Opr* megdnn_opr,
AlgoChooserHelper(
const FixedTensorLayouts& layouts, Opr* megdnn_opr,
const std::string& param_str,
const cg::OperatorNodeBase* mgb_opr, const CompNode& cn,
const megdnn::param::ExecutionPolicy& execution_policy,
......@@ -84,14 +85,14 @@ public:
Opr* megdnn_opr() const { return m_megdnn_opr; }
const cg::OperatorNodeBase* mgb_opr() const { return m_base_mgb_opr; }
const TensorLayout& inp_layout(size_t idx) const {
return m_layouts[idx];
}
cg::ComputingGraph* owner_graph() const {
return m_base_mgb_opr->owner_graph();
}
const cg::OperatorNodeBase* mgb_opr() const { return m_base_mgb_opr; }
const megdnn::param::ExecutionPolicy& execution_policy() const {
return m_execution_policy;
}
......@@ -109,17 +110,40 @@ public:
const FixedTensorLayouts& layouts() const { return m_layouts; }
//! construct algo chain by heuristic
ImplExecutionPolicy choose_by_heuristic(
ExecutionStrategy selected_strategy) const;
const ExecutionStrategy& selected_strategy) const;
//! get all candidate algos, and the one choose_by_heuristic() is
//! put first
std::vector<ImplAlgo> get_all_candidates() const;
//! construct algo chain by profiling
ImplExecutionPolicy choose_by_profile(
const ExecutionStrategy& selected_strategy,
bool enable_update) const;
//! get all profile algorithm from cache, return invalid if not exists
ImplAlgo get_profile_result_from_cache(
const ExecutionStrategy& selected_strategy) const;
/**
* \brief construct execution policy from cache or heuristic.
*
* \param selected_strategy select algo which matched this strategy
* \param[in,out] policy execution policy
* \param retrive_from_cache retrive algo from cache if set True, get
* from heuristic otherwise.
* \return true if contruct success and false when fail
*/
void construct_execution_policy(
const ExecutionStrategy& selected_strategy,
bool retrive_from_cache, ImplExecutionPolicy& policy) const;
//! get workspace size required for specific execution policy
size_t get_workspace_size_bytes(
const ImplExecutionPolicy& policy) const;
//! get all candidate algos, and the one choose_by_heuristic() is
//! put first
std::vector<ImplAlgo> get_all_candidates() const;
/*!
* \brief profile a single algorithm
*
......@@ -132,22 +156,8 @@ public:
Maybe<AlgoChooserProfileCache::ResultEntry> profile_single_algo(
const ImplExecutionPolicy& policy, double& timeout) const;
//! get all profile algorithm from cache, return invalid if not exists
ImplAlgo get_profile_result_from_cache(
ExecutionStrategy selected_strategy) const;
/**
* \brief construct execution policy from cache or heuristic.
*
* \param selected_strategy select algo which matched this strategy
* \param [out] policy execution policy
* \param retrive_from_cache retrive algo from cache if set True, get
* from heuristic otherwise.
* \note When contruction fail, the policy will be cleaned.
*/
void construct_execution_policy(ExecutionStrategy selected_strategy,
ImplExecutionPolicy& policy,
bool retrive_from_cache = true) const;
//! profile and save to cache
void profile(const ExecutionStrategy& selected_strategy) const;
/**
* \brief extract algo attribute from execution strategy and graph
......@@ -168,14 +178,7 @@ public:
private:
//! entrance for getting algorithm according to execution strategy
static ImplExecutionPolicy get_policy(ExeContext& ctx);
//! profile and save to cache
static void profile(ExeContext& ctx, ExecutionStrategy selected_strategy);
static ImplExecutionPolicy choose_by_profile(
ExeContext& ctx, ExecutionStrategy selected_strategy,
bool enable_update = true);
static ImplExecutionPolicy get_policy(const AlgoChooserHelper& helper);
public:
/*!
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册