提交 50ea5ae8 编写于 作者: M Megvii Engine Team

feat(mgb/gopt): add dynamic programming solver

GitOrigin-RevId: 595392ec89e3723fa702efdbf695f5bd04bec95a
上级 c14e5719
此差异已折叠。
/**
* \file src/gopt/impl/layout_transform_context.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "./utils.h"
#include "megbrain/gopt/global_layout_transform.h"
using namespace mgb;
using namespace gopt;
/* ================= LayoutTransformContext ==================*/
LayoutTransformContext& LayoutTransformContext::add_opr_config(
Typeinfo* opr, OprFormat opr_format) {
auto& dispatchers = m_opr_configs[opr];
dispatchers[opr_format] =
OprTensorFormatsConfiguration::find_dispatcher_by_type_format(
opr, opr_format);
return *this;
}
LayoutTransformContext& LayoutTransformContext::add_opr_config(
Typeinfo* opr, SmallVector<OprFormat> opr_formats) {
auto& dispatchers = m_opr_configs[opr];
for (auto opr_fmt : opr_formats) {
dispatchers[opr_fmt] =
OprTensorFormatsConfiguration::find_dispatcher_by_type_format(
opr, opr_fmt);
}
return *this;
}
// vim: syntax=cpp.doxygen
......@@ -17,6 +17,7 @@
#include "megbrain/graph/event.h"
#include "megbrain/opr/dnn/pooling.h"
#include "megbrain/opr/imgproc.h"
#include "megbrain/opr/nn_int.h"
#include "megbrain/opr/io.h"
#include "megbrain/plugin/base.h"
#include "megbrain/serialization/sereg.h"
......@@ -265,6 +266,10 @@ ProfilerImpl::OperatorNodeRecord ProfilerImpl::profile_operator(
record.opr = opr;
auto& costs = record.costs;
for (auto&& i : available_configs) {
/// XXXX remove later
if (i.opr_format == OprFormat::NCHW &&
opr->input(0)->dtype().enumv() != DTypeEnum::Float32)
continue;
costs[i.opr_format] = profile_operator(opr, base_config, i);
}
return record;
......@@ -414,37 +419,42 @@ ProfilerImpl::ProfilingResult ProfilerImpl::profile(
cb(Resize, 1),
#undef cb
};
static const ThinHashSet<Typeinfo*> skip_opr_types = {
TypeCvt::typeinfo(), Elemwise::typeinfo(),
ElemwiseMultiType::typeinfo()};
ThinHashSet<VarNode*> vars;
ThinHashSet<OperatorNodeBase*> oprs;
{
auto cb = [&cvprop, &vars, &oprs](OperatorNodeBase* opr) {
if (cvprop.is_const(opr))
return;
oprs.insert(opr);
auto find = format_aware_input_tensors.find(opr->dyn_typeinfo());
if (find == format_aware_input_tensors.end()) {
for (auto&& i : opr->input()) {
if (!cvprop.is_const(i)) {
vars.insert(i);
}
ThinHashSet<OperatorNodeBase*> skip_oprs;
for (auto&& opr : problem.graph_partition().all_oprs()) {
if (cvprop.is_const(opr))
continue;
bool skip = true;
for (auto&& i : opr->input()) {
skip &= problem.graph_partition().input().count(i) > 0 ||
skip_oprs.count(i->owner_opr()) > 0;
}
skip &= skip_opr_types.count(opr->dyn_typeinfo());
if (skip)
skip_oprs.insert(opr);
oprs.insert(opr);
auto find = format_aware_input_tensors.find(opr->dyn_typeinfo());
if (find == format_aware_input_tensors.end()) {
for (auto&& i : opr->input()) {
if (!cvprop.is_const(i)) {
vars.insert(i);
}
} else {
size_t nr_input_tensor =
std::min(find->second, opr->input().size());
for (size_t i = 0; i < nr_input_tensor; ++i) {
if (!cvprop.is_const(opr->input(i))) {
vars.insert(opr->input(i));
}
}
} else {
size_t nr_input_tensor =
std::min(find->second, opr->input().size());
for (size_t i = 0; i < nr_input_tensor; ++i) {
if (!cvprop.is_const(opr->input(i))) {
vars.insert(opr->input(i));
}
}
vars.insert(opr->output(0));
};
DepOprIter iter{cb};
for (auto&& i : problem.graph_partition().input()) {
iter.set_visited(i->owner_opr());
}
for (auto&& o : problem.graph_partition().output()) {
iter.add(o->owner_opr());
for (auto&& ov : opr->usable_output()) {
vars.insert(ov);
}
}
......@@ -462,8 +472,14 @@ ProfilerImpl::ProfilingResult ProfilerImpl::profile(
auto&& opr_configs = problem.opr_configs();
auto find = opr_configs.find(opr->dyn_typeinfo());
if (find == opr_configs.end()) {
opr_record[opr] = profile_operator(opr, base_format,
available_tensor_formats);
if (skip_oprs.count(opr) > 0) {
SmallVector<TensorFormats> tensor_formats = {base_format};
opr_record[opr] =
profile_operator(opr, base_format, tensor_formats);
} else {
opr_record[opr] = profile_operator(opr, base_format,
available_tensor_formats);
}
} else {
auto&& dispatchers = find->second;
SmallVector<OprTensorFormatsConfiguration> configs;
......
/**
* \file src/gopt/impl/profiling_based_solver.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megbrain/gopt/global_layout_transform.h"
#include "megbrain/opr/dnn/pooling.h"
#include "megbrain/opr/imgproc.h"
using namespace mgb;
using namespace gopt;
using namespace opr;
/* =================== ProfilingBasedSolverSolver ======================*/
ProfilingBasedSolver::ProfilingBasedSolver(
std::unique_ptr<ProfilerBase> profiler)
: m_profiler{std::move(profiler)} {
static const ThinHashSet<Typeinfo*> format_aware_oprs = {
#define cb(_Opr) _Opr::typeinfo()
cb(Convolution),
cb(ConvBiasForward),
cb(ConvolutionBackwardData),
cb(PoolingForward),
cb(WarpPerspective),
cb(Resize),
};
m_graph_partition_filter = [](const GraphPartition& partition) {
bool has_format_aware_opr = false;
for (auto&& opr : partition.all_oprs()) {
if (!has_format_aware_opr &&
format_aware_oprs.count(opr->dyn_typeinfo())) {
has_format_aware_opr = true;
break;
}
}
return has_format_aware_opr;
};
}
ProfilingBasedSolver::Solution ProfilingBasedSolver::solve(
const Problem& problem) const {
const auto& partition = problem.graph_partition();
if (!m_graph_partition_filter(partition))
return Solution{};
return do_solve(problem);
}
// vim: syntax=cpp.doxygen
......@@ -11,9 +11,9 @@
*/
#include "megbrain/gopt/reformat_manager.h"
#include "./utils.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/utils/arith_helper.h"
#include "./utils.h"
using namespace mgb;
using namespace gopt;
......@@ -87,21 +87,6 @@ bool ReformatManager::ReformatKey::Equal::operator()(
lhs.attribute == rhs.attribute;
}
ReformatManager::ReformatKey&
ReformatManager::ReformatKey::deduce_reformat_dtype_enum(const DType& dt) {
static const ThinHashSet<std::pair<TensorFormats, TensorFormats>> set = {
{TensorFormats::NCHW, TensorFormats::NCHWc64},
{TensorFormats::NCHWc64, TensorFormats::NCHW},
{TensorFormats::NCHW, TensorFormats::NHWC},
{TensorFormats::NHWC, TensorFormats::NCHW}};
if (set.count({input_format, output_format}) > 0 &&
(dt.enumv() == DTypeEnum::QuantizedS4 ||
dt.enumv() == DTypeEnum::Quantized4Asymm)) {
input_dtype = output_dtype = dt.enumv();
}
return *this;
}
// =================== ReformatManager ====================*/
ReformatManager::ReformatManager() {
using Attribute = ReformatKey::Attribute;
......@@ -378,7 +363,7 @@ ReformatManager::ReformatImpl ReformatManager::auto_aligned_reformat_featrue(
divup(orig_channel, input_alignment) * input_alignment;
size_t aligned_out_channel =
divup(orig_channel, output_alignment) * output_alignment;
size_t common_alignment = input_alignment * output_alignment /
size_t common_alignment = input_alignment * output_alignment /
gcd(input_alignment, output_alignment);
size_t aligned_channel =
divup(orig_channel, common_alignment) * common_alignment;
......@@ -427,11 +412,11 @@ ReformatManager::ReformatImpl ReformatManager::auto_aligned_reformat_weight(
for (size_t i = 0; i < input_shape.ndim; ++i) {
if (input_shape[i].name() == Dimension::Name::C &&
input_shape[i].extent() == Dimension::UNDETERMINED_EXTENT) {
in_channels = orig_var->shape()[i];
in_channels = orig_var->shape()[i] * input_shape[i].stride();
input_channel_idx = i;
mgb_assert(input_shape[i].stride() == 1,
"unsupport weight format(got:%s)",
input_shape.to_string().c_str());
// mgb_assert(input_shape[i].stride() == 1,
// "unsupport weight format(got:%s)",
// input_shape.to_string().c_str());
} else if ((input_shape[i].name() == Dimension::Name::K ||
input_shape[i].name() == Dimension::Name::N) &&
input_shape[i].extent() == Dimension::UNDETERMINED_EXTENT) {
......@@ -536,7 +521,8 @@ TensorShape mgb::gopt::make_aligned_tensor_shape(const VarNode* var,
"formats(var:%s;shp:%s;fmt:%s)",
var->cname(), oshp.to_string().c_str(),
orig_shape.to_string().c_str());
if (oshp.is_scalar()) return oshp;
if (oshp.is_scalar())
return oshp;
TensorShape tshp;
ThinHashMap<Dimension::Name, int> name2dominant;
for (size_t i = 0; i < orig_shape.ndim; ++i) {
......@@ -597,4 +583,32 @@ TensorShape mgb::gopt::make_aligned_weight_shape(const VarNode* var,
return tshp;
}
ReformatManager::AlignmentDesc mgb::gopt::make_aligned_desc(
TensorFormats weight_format, TensorFormats out_feature_format) {
using AlignmentDesc = ReformatManager::AlignmentDesc;
using Name = Dimension::Name;
auto weight_shape = tensor_formats_to_named_tensor_shape(weight_format);
auto out_shape = tensor_formats_to_named_tensor_shape(out_feature_format);
size_t out_channel_alignment = 1;
for (size_t i = 0; i < out_shape.ndim; ++i) {
auto name = out_shape[i].name();
auto extent = out_shape[i].extent();
if ((name == Name::C || name == Name::K) &&
extent == Dimension::UNDETERMINED_EXTENT) {
out_channel_alignment = out_shape[i].stride();
break;
}
}
Name out_channel_name;
for (size_t i = 0; i < weight_shape.ndim; ++i) {
auto name = weight_shape[i].name();
auto extent = weight_shape[i].extent();
if ((name == Name::N || name == Name::K) &&
extent == Dimension::UNDETERMINED_EXTENT) {
out_channel_name = name;
}
}
return AlignmentDesc{out_channel_name, out_channel_alignment};
}
// vim: syntax=cpp.doxygen
......@@ -304,10 +304,15 @@ std::vector<GraphPartition> SubGraphExtractor::extract(
}
}
partition->opr_set().insert(opr);
partition->all_oprs().push_back(opr);
for (const auto& i : opr->input())
partition->input().insert(i);
}
}
for (auto&& partition : partitions) {
auto& all_oprs = partition.all_oprs();
std::reverse(all_oprs.begin(), all_oprs.end());
}
return partitions;
}
......
......@@ -36,6 +36,28 @@ static inline const char* opr_format_to_string(
#undef cb
}
static inline TensorFormats opr_format_to_tensor_formats(
OprTensorFormatsConfiguration::OprFormat opr_format) {
using OprFormat = OprTensorFormatsConfiguration::OprFormat;
switch (opr_format) {
case OprFormat::NCHW:
return TensorFormats::NCHW;
case OprFormat::NHWC:
return TensorFormats::NHWC;
case OprFormat::NCHW4:
return TensorFormats::NCHWc4;
case OprFormat::NCHW32:
return TensorFormats::NCHWc32;
case OprFormat::NCHW64:
return TensorFormats::NCHWc64;
case OprFormat::CHWN4:
return TensorFormats::CHWNc4;
default:
mgb_throw(AssertionError, "format(%s) is not supported",
opr_format_to_string(opr_format));
};
}
static inline megdnn::NamedTensorShape tensor_formats_to_named_tensor_shape(
TensorFormats format) {
switch (format) {
......
......@@ -11,6 +11,7 @@
*/
#pragma once
#include "megbrain/gopt/framework.h"
#include "megbrain/gopt/reformat_manager.h"
#include "megbrain/gopt/subgraph_extractor.h"
#include "megbrain/opr/dnn/convolution.h"
......@@ -41,14 +42,16 @@ struct OprTensorFormatsConfiguration {
/*!
* \brief A structure that describes the global layout transform problem
*/
class Problem {
class LayoutTransformContext {
public:
using OprList = SubGraphExtractor::OprList;
using OprFormat = OprTensorFormatsConfiguration::OprFormat;
using OprTensorFormatsDispatcher =
OprTensorFormatsConfiguration::OprTensorFormatsDispatcher;
using OprConfigTrait =
ThinHashMap<Typeinfo*,
ThinHashMap<OprFormat, OprTensorFormatsDispatcher*>>;
using ReformatAttribute = ReformatManager::ReformatKey::Attribute;
struct Attribute {
OprFormat base_opr_format; /// the base opr format indicates that the
/// network to be optimized is constructed
......@@ -62,58 +65,110 @@ public:
/// (like elemwise, elemwise multi type,
/// typecvt etc.) are built in the base
/// tensor format.
ReformatAttribute
reformat_attribute; /// additional reformat attribute, which
/// indicates whether to pad nhwc layout
/// automatically or to enable nhwcd4 format
/// on opencl platform to use image object
};
Problem(const GraphPartition& graph_partition,
const SmallVector<TensorFormats>& available_tensor_formats,
const OprConfigTrait& opr_config, const Attribute& attribute)
: m_graph_partition{graph_partition},
m_available_tensor_formats{available_tensor_formats},
m_opr_configs{opr_config},
LayoutTransformContext() = delete;
LayoutTransformContext(OprList opr_list,
SmallVector<TensorFormats> available_tensor_formats,
Attribute attribute)
: m_opr_list{std::move(opr_list)},
m_available_tensor_formats{std::move(available_tensor_formats)},
m_attribute{attribute} {}
LayoutTransformContext(OprList opr_list,
SmallVector<TensorFormats> available_tensor_formats,
OprConfigTrait opr_configs, Attribute attribute)
: m_opr_list{std::move(opr_list)},
m_available_tensor_formats{std::move(available_tensor_formats)},
m_opr_configs{std::move(opr_configs)},
m_attribute{attribute} {}
const OprList& opr_list() const { return m_opr_list; }
const SmallVector<TensorFormats>& available_tensor_formats() const {
return m_available_tensor_formats;
}
const OprConfigTrait& opr_configs() const { return m_opr_configs; }
Attribute attribute() const { return m_attribute; }
/*!
* \brief add an op format configuration for a particular operator type
* \param opr runtime typeinfo of operator
* \param opr_format op format configuration which to be enabled in the
* layout transform problem
*/
LayoutTransformContext& add_opr_config(Typeinfo* opr, OprFormat opr_format);
/*!
* \brief add a vector of op format configurations for a particular operator
* type
* \param opr runtime typeinfo of operator
* \param opr_format op format configuration which to be enabled in the
* layout transform problem
*/
LayoutTransformContext& add_opr_config(Typeinfo* opr,
SmallVector<OprFormat> opr_formats);
private:
OprList m_opr_list; /// supported operator list
SmallVector<TensorFormats>
m_available_tensor_formats; /// the available tensor formats, used
/// for format agnostic operators (like
/// elemwise, elemwise multi type,
/// typecvt, etc.
OprConfigTrait m_opr_configs; /// the available opr format configurations,
/// used for format aware operators (like
/// conv, deconv, conv_bias, etc.
Attribute m_attribute; /// the extra attributes to describe the problem
};
class Problem {
public:
using OprFormat = OprTensorFormatsConfiguration::OprFormat;
using OprTensorFormatsDispatcher =
OprTensorFormatsConfiguration::OprTensorFormatsDispatcher;
using OprConfigTrait = LayoutTransformContext::OprConfigTrait;
using Attribute = LayoutTransformContext::Attribute;
Problem(const GraphPartition& graph_partition,
const LayoutTransformContext& ctx)
: m_graph_partition{graph_partition}, m_ctx{ctx} {}
~Problem() noexcept = default;
const GraphPartition& graph_partition() const { return m_graph_partition; }
const OprConfigTrait& opr_configs() const { return m_opr_configs; }
const OprConfigTrait& opr_configs() const { return m_ctx.opr_configs(); }
const SmallVector<TensorFormats>& available_tensor_formats() const {
return m_available_tensor_formats;
return m_ctx.available_tensor_formats();
}
TensorFormats base_format() const {
return m_attribute.base_tensor_formats;
return m_ctx.attribute().base_tensor_formats;
}
/*!
* \brief return the tensor formats configuration of an operator in the
* default op format
*/
OprTensorFormatsConfiguration base_config(
const cg::OperatorNodeBase* opr) const {
auto _ = OprTensorFormatsConfiguration::find_dispatcher_by_type_format(
opr->dyn_typeinfo(), m_attribute.base_opr_format);
opr->dyn_typeinfo(), m_ctx.attribute().base_opr_format);
auto rst = (*_)(opr);
if (rst.valid())
return rst.val();
OprTensorFormatsConfiguration config;
config.typeinfo = opr->dyn_typeinfo();
config.opr_format = m_attribute.base_opr_format;
config.opr_format = m_ctx.attribute().base_opr_format;
for (const auto& i : opr->input()) {
config.input_dtypes.emplace_back(i->dtype().enumv());
config.input_tensor_formats.emplace_back(
m_attribute.base_tensor_formats);
config.input_tensor_formats.emplace_back(base_format());
config.input_tensor_types.emplace_back(TensorType::FEATURE);
}
config.output_dtypes.emplace_back(opr->output(0)->dtype().enumv());
config.output_tensor_formats.emplace_back(
m_attribute.base_tensor_formats);
config.output_tensor_formats.emplace_back(base_format());
return config;
}
private:
const GraphPartition& m_graph_partition; /// the graph partition
const SmallVector<TensorFormats>&
m_available_tensor_formats; /// the available tensor formats, used
/// for format agnostic operators (like
/// elemwise, elemwise multi type,
/// typecvt, etc.
const OprConfigTrait&
m_opr_configs; /// the available opr format configurations, used
/// for format aware operators (like conv, deconv,
/// conv_bias, etc.
Attribute m_attribute; /// the extra attributes to describe the problem
const LayoutTransformContext& m_ctx;
};
/*!
......@@ -170,6 +225,92 @@ public:
static std::unique_ptr<ProfilerBase> make_profiler();
};
/*!
* \brief abstract solver
*/
class SolverBase {
public:
using OprFormat = Problem::OprFormat;
using Solution = ThinHashMap<cg::OperatorNodeBase*, OprFormat>;
SolverBase() = default;
virtual ~SolverBase() = default;
/*!
* \brief solve the given problem
*/
virtual Solution solve(const Problem& problem) const = 0;
/*!
* \brief check whether the given problem can be solved by the
* algorithm(i.e. solver).
*/
virtual bool can_solve(const Problem& problem) const = 0;
};
/*!
* \brief solvers that will first collect the costs of operators in different op
* format and the costs of layout transform of varnode with a user provided
* profiler on the target device. This will lead to time consuming.
*/
class ProfilingBasedSolver : public SolverBase {
public:
using GraphPartitionFilter =
thin_function<bool(const GraphPartition& graph_partition)>;
ProfilingBasedSolver(std::unique_ptr<ProfilerBase> profiler);
/*!
* \note some graph partition (for example, graph partition without format
* aware operators like conv, deconv, warp, resize etc.) will be filtered by
* the GraphPartitionFilter, which can reduce the profiling time. */
ProfilingBasedSolver(std::unique_ptr<ProfilerBase> profiler,
GraphPartitionFilter graph_partition_filter)
: m_profiler{std::move(profiler)},
m_graph_partition_filter{std::move(graph_partition_filter)} {}
virtual ~ProfilingBasedSolver() = default;
Solution solve(const Problem& problem) const override;
virtual Solution do_solve(const Problem& problem) const = 0;
protected:
std::unique_ptr<ProfilerBase> m_profiler;
private:
GraphPartitionFilter m_graph_partition_filter;
};
/*!
* \brief A solver that solves the layout selection problem using dynamic
* programming algorithm (Markov decision process).
*/
class DynamicProgrammingSolver final : public ProfilingBasedSolver {
public:
DynamicProgrammingSolver(std::unique_ptr<ProfilerBase> profiler)
: ProfilingBasedSolver(std::move(profiler)){};
DynamicProgrammingSolver(std::unique_ptr<ProfilerBase> profiler,
GraphPartitionFilter graph_partition_filter)
: ProfilingBasedSolver(std::move(profiler),
std::move(graph_partition_filter)){};
~DynamicProgrammingSolver() noexcept = default;
Solution do_solve(const Problem& problem) const override;
bool can_solve(const Problem& problem) const override;
private:
class Impl;
};
/*!
* \brief A layout transform pass, which convert the operator's format to the
* optimal format using the results of the solver.
*/
class LayoutTransformPass final : public Pass {
public:
const char* name() const override { return "layout assignment pass"; }
void apply(OptState& opt) const override;
LayoutTransformPass(std::unique_ptr<LayoutTransformContext> ctx,
std::unique_ptr<SolverBase> solver)
: m_ctx{std::move(ctx)}, m_solver{std::move(solver)} {}
private:
std::unique_ptr<LayoutTransformContext> m_ctx;
std::unique_ptr<SolverBase> m_solver;
};
} // namespace gopt
} // namespace mgb
......
......@@ -84,7 +84,7 @@ public:
output_dtype{DTypeEnum::Float32},
attribute{Attribute::DEFAULT} {}
ReformatKey(TensorFormats input_format_, TensorFormats output_format_,
Attribute attribute_ = Attribute::DEFAULT,
Attribute attribute_,
DTypeEnum input_dtype_ = DTypeEnum::Float32,
DTypeEnum output_dtype_ = DTypeEnum::Float32)
: input_format{input_format_},
......@@ -92,6 +92,15 @@ public:
input_dtype{input_dtype_},
output_dtype{output_dtype_},
attribute{attribute_} {}
ReformatKey(TensorFormats input_format_, TensorFormats output_format_,
DTypeEnum input_dtype_ = DTypeEnum::Float32,
DTypeEnum output_dtype_ = DTypeEnum::Float32,
Attribute attribute_ = Attribute::DEFAULT)
: input_format{input_format_},
output_format{output_format_},
input_dtype{input_dtype_},
output_dtype{output_dtype_},
attribute{attribute_} {}
struct Hash {
size_t operator()(const ReformatKey& key) const;
};
......@@ -99,7 +108,6 @@ public:
bool operator()(const ReformatKey& lhs,
const ReformatKey& rhs) const;
};
ReformatKey& deduce_reformat_dtype_enum(const DType& dt);
};
using ReformatCache =
std::unordered_map<ReformatKey, ReformatImpl, ReformatKey::Hash,
......@@ -130,6 +138,9 @@ TensorShape make_aligned_weight_shape(const VarNode* var,
TensorFormats orig_formats,
TensorFormats target_formats,
TensorFormats extra_formats);
ReformatManager::AlignmentDesc make_aligned_desc(
TensorFormats weight_format, TensorFormats out_feature_format);
} // namespace gopt
} // namespace mgb
......
......@@ -20,6 +20,7 @@ class GraphPartition {
public:
using VarNodeSet = ThinHashSet<VarNode*>;
using OperatorNodeSet = ThinHashSet<cg::OperatorNodeBase*>;
using OperatorNodeList = std::vector<cg::OperatorNodeBase*>;
class InputPlaceholder;
......@@ -32,15 +33,18 @@ public:
const OperatorNodeSet& opr_set() const { return m_opr_set; }
const VarNodeSet& input() const { return m_inputs; }
const VarNodeSet& output() const { return m_outputs; }
const OperatorNodeList& all_oprs() const { return m_oprs; }
OperatorNodeSet& opr_set() { return m_opr_set; }
OperatorNodeList& all_oprs() { return m_oprs; }
VarNodeSet& input() { return m_inputs; }
VarNodeSet& output() { return m_outputs; }
private:
std::pair<VarNodeArray, VarNodeArray> replace_graph_by_placeholder() const;
OperatorNodeSet m_opr_set;
OperatorNodeList m_oprs;
VarNodeSet m_inputs;
VarNodeSet m_outputs;
std::pair<VarNodeArray, VarNodeArray> replace_graph_by_placeholder() const;
};
class SubGraphExtractor {
......
......@@ -10,6 +10,7 @@
* implied.
*/
#include "megbrain/plugin/profiler.h"
#include "./helper.h"
#include "megbrain/gopt/global_layout_transform.h"
#include "megbrain/gopt/inference.h"
......@@ -22,123 +23,59 @@ using namespace mgb;
using namespace gopt;
using namespace serialization;
#if MGB_CUDA
namespace {
class LayoutTransformContext : public NonCopyableObj {
public:
using OprList = SubGraphExtractor::OprList;
using OprFormat = Problem::OprFormat;
using OprConfigTrait = Problem::OprConfigTrait;
LayoutTransformContext() = delete;
LayoutTransformContext(OprList opr_list,
SmallVector<TensorFormats> available_tensor_formats,
OprConfigTrait opr_configs)
: m_opr_list{std::move(opr_list)},
m_available_tensor_formats{std::move(available_tensor_formats)},
m_opr_configs{std::move(opr_configs)} {}
const OprList& opr_list() const { return m_opr_list; }
const SmallVector<TensorFormats>& available_tensor_formats() const {
return m_available_tensor_formats;
}
const OprConfigTrait& opr_configs() const { return m_opr_configs; }
static std::unique_ptr<LayoutTransformContext> make() {
OprList opr_list = {
opr::ConvBiasForward::typeinfo(),
opr::ConvolutionForward::typeinfo(),
opr::ConvolutionBackwardData::typeinfo(),
opr::ElemwiseMultiType::typeinfo(),
opr::Elemwise::typeinfo(),
opr::TypeCvt::typeinfo(),
opr::PoolingForward::typeinfo(),
opr::WarpPerspectiveForward::typeinfo(),
};
OprConfigTrait opr_configs;
{
auto& dispatchers = opr_configs[opr::ConvBias::typeinfo()];
#define cb(_fmt) \
dispatchers[OprFormat::_fmt] = \
OprTensorFormatsConfiguration::find_dispatcher_by_type_format( \
opr::ConvBias::typeinfo(), OprFormat::_fmt);
cb(NCHW4);
cb(NCHW32);
cb(NHWC);
cb(NCHW64);
cb(CHWN4);
#undef cb
}
{
auto& dispatchers =
opr_configs[opr::ConvolutionBackwardData::typeinfo()];
#define cb(_fmt) \
dispatchers[OprFormat::_fmt] = \
OprTensorFormatsConfiguration::find_dispatcher_by_type_format( \
opr::ConvolutionBackwardData::typeinfo(), \
OprFormat::_fmt);
cb(NCHW4);
#undef cb
}
{
auto& dispatchers =
opr_configs[opr::ConvolutionForward::typeinfo()];
#define cb(_fmt) \
dispatchers[OprFormat::_fmt] = \
OprTensorFormatsConfiguration::find_dispatcher_by_type_format( \
opr::ConvolutionForward::typeinfo(), OprFormat::_fmt);
cb(NCHW4);
#undef cb
}
{
auto& dispatchers = opr_configs[opr::PoolingForward::typeinfo()];
#define cb(_fmt) \
dispatchers[OprFormat::_fmt] = \
OprTensorFormatsConfiguration::find_dispatcher_by_type_format( \
opr::PoolingForward::typeinfo(), OprFormat::_fmt);
cb(NCHW4);
cb(NCHW32);
cb(NHWC);
cb(NCHW64);
cb(CHWN4);
#undef cb
}
{
auto& dispatchers =
opr_configs[opr::WarpPerspectiveForward::typeinfo()];
#define cb(_fmt) \
dispatchers[OprFormat::_fmt] = \
OprTensorFormatsConfiguration::find_dispatcher_by_type_format( \
opr::WarpPerspectiveForward::typeinfo(), OprFormat::_fmt);
cb(NHWC);
cb(NCHW4);
cb(NCHW64);
#undef cb
}
SmallVector<TensorFormats> available_tensor_formats = {
TensorFormats::NHWC, TensorFormats::NCHWc4,
TensorFormats::NCHWc32, TensorFormats::NCHWc64};
return std::make_unique<LayoutTransformContext>(
std::move(opr_list), std::move(available_tensor_formats),
std::move(opr_configs));
}
std::unique_ptr<LayoutTransformContext> make_ctx() {
using OprFormat = LayoutTransformContext::OprFormat;
using OprList = LayoutTransformContext::OprList;
using ReformatAttribute = LayoutTransformContext::ReformatAttribute;
using Attribute = LayoutTransformContext::Attribute;
OprList opr_list = {
opr::ConvBiasForward::typeinfo(),
opr::ConvolutionForward::typeinfo(),
opr::ConvolutionBackwardData::typeinfo(),
opr::ElemwiseMultiType::typeinfo(),
opr::Elemwise::typeinfo(),
opr::TypeCvt::typeinfo(),
opr::PoolingForward::typeinfo(),
opr::WarpPerspectiveForward::typeinfo(),
};
private:
OprList m_opr_list;
SmallVector<TensorFormats> m_available_tensor_formats;
OprConfigTrait m_opr_configs;
};
}; // namespace
SmallVector<TensorFormats> available_tensor_formats = {
TensorFormats::NCHW, TensorFormats::NHWC,
TensorFormats::NCHWc4, TensorFormats::NCHWc32,
TensorFormats::NCHWc64, TensorFormats::CHWNc4};
Attribute attribute = {OprFormat::NCHW, TensorFormats::NCHW,
ReformatAttribute::DEFAULT};
auto ctx = std::make_unique<LayoutTransformContext>(
std::move(opr_list), std::move(available_tensor_formats),
attribute);
ctx->add_opr_config(
opr::ConvBiasForward::typeinfo(),
{OprFormat::NCHW, OprFormat::NHWC, OprFormat::NCHW4,
OprFormat::NCHW32, OprFormat::NCHW64, OprFormat::CHWN4})
.add_opr_config(opr::ConvolutionForward::typeinfo(),
{OprFormat::NCHW, OprFormat::NCHW4})
.add_opr_config(opr::ConvolutionBackwardData::typeinfo(),
{OprFormat::NCHW, OprFormat::NCHW4})
.add_opr_config(
opr::PoolingForward::typeinfo(),
{OprFormat::NCHW4, OprFormat::NCHW32, OprFormat::NHWC,
OprFormat::NCHW64, OprFormat::CHWN4})
.add_opr_config(
opr::WarpPerspectiveForward::typeinfo(),
{OprFormat::NHWC, OprFormat::NCHW4, OprFormat::NCHW64});
return ctx;
}
} // namespace
#if MGB_CUDA
#if CUDA_VERSION >= 10020
TEST(TestProfiler, Conv) {
REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0");
cn.activate();
REQUIRE_CUDA_COMPUTE_CAPABILITY_EQ(7, 5);
auto ctx = LayoutTransformContext::make();
auto ctx = make_ctx();
HostTensorGenerator<dtype::Int8> gen;
auto graph = ComputingGraph::make();
......@@ -177,14 +114,10 @@ TEST(TestProfiler, Conv) {
using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
S strategy = S::PROFILE;
gopt::modify_opr_algo_strategy_inplace({c2}, strategy);
using OprFormat = OprTensorFormatsConfiguration::OprFormat;
SubGraphExtractor extractor(ctx->opr_list());
auto partitions = extractor.extract({c2});
ASSERT_EQ(partitions.size(), 1u);
using Attribute = Problem::Attribute;
Attribute attribute = {OprFormat::NCHW, TensorFormats::NCHW};
Problem problem(partitions[0], ctx->available_tensor_formats(),
ctx->opr_configs(), attribute);
Problem problem(partitions[0], *ctx);
auto profiler = ProfilerBase::make_profiler();
auto rst = profiler->profile(problem);
const auto& opr_rst = rst.opr_record;
......@@ -204,7 +137,7 @@ TEST(TestProfiler, Deconv) {
auto cn = CompNode::load("gpu0");
cn.activate();
REQUIRE_CUDA_COMPUTE_CAPABILITY_EQ(7, 5);
auto ctx = LayoutTransformContext::make();
auto ctx = make_ctx();
HostTensorGenerator<dtype::Int8> gen;
auto graph = ComputingGraph::make();
......@@ -238,14 +171,10 @@ TEST(TestProfiler, Deconv) {
using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
S strategy = S::PROFILE;
gopt::modify_opr_algo_strategy_inplace({c2}, strategy);
using OprFormat = OprTensorFormatsConfiguration::OprFormat;
SubGraphExtractor extractor(ctx->opr_list());
auto partitions = extractor.extract({c2});
ASSERT_EQ(partitions.size(), 1u);
using Attribute = Problem::Attribute;
Attribute attribute = {OprFormat::NCHW, TensorFormats::NCHW};
Problem problem(partitions[0], ctx->available_tensor_formats(),
ctx->opr_configs(), attribute);
Problem problem(partitions[0], *ctx);
auto profiler = ProfilerBase::make_profiler();
auto rst = profiler->profile(problem);
const auto& opr_rst = rst.opr_record;
......@@ -262,7 +191,7 @@ TEST(TestProfiler, Warp) {
auto cn = CompNode::load("gpu0");
cn.activate();
REQUIRE_CUDA_COMPUTE_CAPABILITY_EQ(7, 5);
auto ctx = LayoutTransformContext::make();
auto ctx = make_ctx();
constexpr size_t INP_H = 10, INP_W = 10, N = 16;
......@@ -307,14 +236,9 @@ TEST(TestProfiler, Warp) {
using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
S strategy = S::PROFILE;
gopt::modify_opr_algo_strategy_inplace({w1}, strategy);
using OprFormat = OprTensorFormatsConfiguration::OprFormat;
SubGraphExtractor extractor(ctx->opr_list());
auto partitions = extractor.extract({w1});
ASSERT_EQ(partitions.size(), 1u);
using Attribute = Problem::Attribute;
Attribute attribute = {OprFormat::NCHW, TensorFormats::NCHW};
Problem problem(partitions[0], ctx->available_tensor_formats(),
ctx->opr_configs(), attribute);
Problem problem(partitions[0], *ctx);
auto profiler = ProfilerBase::make_profiler();
auto rst = profiler->profile(problem);
const auto& opr_rst = rst.opr_record;
......@@ -330,7 +254,7 @@ TEST(TestProfiler, Pooling) {
auto cn = CompNode::load("gpu0");
cn.activate();
REQUIRE_CUDA_COMPUTE_CAPABILITY_EQ(7, 5);
auto ctx = LayoutTransformContext::make();
auto ctx = make_ctx();
HostTensorGenerator<dtype::Int8> gen;
auto graph = ComputingGraph::make();
......@@ -353,14 +277,10 @@ TEST(TestProfiler, Pooling) {
using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
S strategy = S::PROFILE;
gopt::modify_opr_algo_strategy_inplace({p2}, strategy);
using OprFormat = OprTensorFormatsConfiguration::OprFormat;
SubGraphExtractor extractor(ctx->opr_list());
auto partitions = extractor.extract({p2});
ASSERT_EQ(partitions.size(), 1u);
using Attribute = Problem::Attribute;
Attribute attribute = {OprFormat::NCHW, TensorFormats::NCHW};
Problem problem(partitions[0], ctx->available_tensor_formats(),
ctx->opr_configs(), attribute);
Problem problem(partitions[0], *ctx);
auto profiler = ProfilerBase::make_profiler();
auto rst = profiler->profile(problem);
const auto& opr_rst = rst.opr_record;
......@@ -373,8 +293,7 @@ TEST(TestProfiler, Elemwise) {
REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0");
cn.activate();
REQUIRE_CUDA_COMPUTE_CAPABILITY_EQ(7, 5);
auto ctx = LayoutTransformContext::make();
auto ctx = make_ctx();
HostTensorGenerator<dtype::Int8> gen;
auto graph = ComputingGraph::make();
......@@ -403,14 +322,10 @@ TEST(TestProfiler, Elemwise) {
OperatorNodeConfig(
dtype::Quantized4Asymm(13.f, static_cast<uint8_t>(4))));
using OprFormat = OprTensorFormatsConfiguration::OprFormat;
SubGraphExtractor extractor(ctx->opr_list());
auto partitions = extractor.extract({q4e});
ASSERT_EQ(partitions.size(), 1u);
using Attribute = Problem::Attribute;
Attribute attribute = {OprFormat::NCHW, TensorFormats::NCHW};
Problem problem(partitions[0], ctx->available_tensor_formats(),
ctx->opr_configs(), attribute);
Problem problem(partitions[0], *ctx);
auto profiler = ProfilerBase::make_profiler();
auto rst = profiler->profile(problem);
const auto& opr_rst = rst.opr_record;
......@@ -423,7 +338,6 @@ TEST(TestProfiler, Elemwise) {
EXPECT_TRUE(var_rst.count(q8a.node()) > 0);
EXPECT_TRUE(var_rst.count(q8b.node()) > 0);
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -447,6 +447,7 @@ TEST(TestReformatManager, AutoAlignedFeatureProfiling) {
for (size_t i = 0; i < RUNS; ++i)
func->execute();
double time_profiler = profiler->duration() * 1e6;
printf("time: %f, %f\n", time_cuda_evt, time_profiler);
MGB_CUDA_CHECK(cudaEventDestroy(evt0));
MGB_CUDA_CHECK(cudaEventDestroy(evt1));
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册