提交 8a3eb05a 编写于 作者: M Megvii Engine Team

refactor(mgb/gopt): refactor tensor reformat opt pass

GitOrigin-RevId: a1b1e89b76e4fbdca4f481156bb8af6cae8fe4d8
上级 c33126ab
...@@ -120,10 +120,6 @@ Dimension Dimension::operator/(const Dimension& rhs) const { ...@@ -120,10 +120,6 @@ Dimension Dimension::operator/(const Dimension& rhs) const {
static_cast<char>(m_name), static_cast<char>(rhs.m_name)); static_cast<char>(m_name), static_cast<char>(rhs.m_name));
if (operator==(rhs)) if (operator==(rhs))
return Dimension(m_name, 1, 1); return Dimension(m_name, 1, 1);
megdnn_assert(
!(*this < rhs),
"Divisor must be smaller than dividend(dividend:%s, divisor:%s)",
to_string().c_str(), rhs.to_string().c_str());
if (m_stride == rhs.m_stride) { if (m_stride == rhs.m_stride) {
if (m_extent == UNDETERMINED_EXTENT) { if (m_extent == UNDETERMINED_EXTENT) {
megdnn_assert(rhs.m_extent != UNDETERMINED_EXTENT, megdnn_assert(rhs.m_extent != UNDETERMINED_EXTENT,
......
/**
* \file src/gopt/impl/folding_conv_dimshuffle.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megbrain/gopt/inference.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/utility.h"
#include "megbrain/serialization/opr_shallow_copy.h"
#include "megdnn/opr_param_defs.h"
#include "megbrain/opr/internal/megdnn_opr_wrapper.h"
#include "megbrain/utils/hash_ct.h"
#include "midout.h"
#include "megbrain/gopt/reformat_manager.h"
#if CUDA_VERSION >= 10020
MIDOUT_DECL(megbrain_folding_conv_dimshuffle)
#define MIDOUT_B(tag) \
MIDOUT_BEGIN(megbrain_folding_conv_dimshuffle, \
midout_iv(MGB_HASH_STR(tag))) {
#define MIDOUT_E \
} \
MIDOUT_END();
using namespace mgb;
using namespace gopt;
using ReformatKey = ReformatManager::ReformatKey;
/* ==================== FoldingConvBiasDimshufflePass ================= */
const char* FoldingConvBiasDimshufflePass::name() const {
return mgb_cstr_log("folding conv bias dimshuffle pass");
}
void FoldingConvBiasDimshufflePass::apply(OptState& opt) const {
MIDOUT_B("FoldingConvBiasDimshufflePass::apply");
using DepType = cg::OperatorNodeProp::DepType;
ThinHashMap<OperatorNodeBase*,
SmallVector<std::pair<OperatorNodeBase*, DepType>>>
readers;
static const ThinHashSet<Typeinfo*> opr_type_list = {
opr::TypeCvt::typeinfo(), opr::Dimshuffle::typeinfo(),
opr::Reshape::typeinfo(), opr::ConvBias::typeinfo()};
opt.graph().iter([&readers](OperatorNodeBase* opr) {
for (auto&& i : opr->node_prop().dep_map()) {
if (opr_type_list.count(i.first->owner_opr()->dyn_typeinfo())) {
readers[i.first->owner_opr()].emplace_back(opr, i.second);
}
}
});
auto rewriter = opt.graph().make_rewriter();
auto try_conv_dimshuffle_reshape_typecvt = [&rewriter, &readers](
OperatorNodeBase* opr) {
ThinHashSet<OperatorNodeBase*> opr_set;
ThinHashSet<OperatorNodeBase*> reader_set;
// check typecvt
auto typecvt = try_cast_as_op<opr::TypeCvt>(opr);
if (typecvt == nullptr)
return false;
auto inp_dtype = typecvt->input(0)->dtype(),
out_dtype = typecvt->output(0)->dtype();
bool is_s82f32 = inp_dtype.enumv() == DTypeEnum::QuantizedS8 &&
out_dtype.enumv() == DTypeEnum::Float32;
if (!is_s82f32)
return false;
opr_set.insert(opr);
// check reshape
auto reshape =
try_cast_as_op<opr::Reshape>(typecvt->input(0)->owner_opr());
if (reshape == nullptr)
return false;
opr_set.insert(reshape);
for (auto&& i : readers[reshape]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
// check shuffle
auto shuffle =
try_cast_as_op<opr::Dimshuffle>(reshape->input(0)->owner_opr());
if (shuffle == nullptr)
return false;
auto&& param = shuffle->param();
if (param.pattern_len != 5)
return false;
bool is_nchw42nchw = param.pattern[0] == 0 && param.pattern[1] == 1 &&
param.pattern[2] == 4 && param.pattern[3] == 2 &&
param.pattern[4] == 3 &&
shuffle->input(0)->shape()[4] == 4;
if (!is_nchw42nchw)
return false;
opr_set.insert(shuffle);
for (auto&& i : readers[shuffle]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
// check conv bias
auto conv_bias =
try_cast_as_op<opr::ConvBias>(shuffle->input(0)->owner_opr());
if (conv_bias == nullptr)
return false;
inp_dtype = conv_bias->input(0)->dtype();
bool is_s8nchw4 = inp_dtype.enumv() == DTypeEnum::QuantizedS8 &&
conv_bias->param().format ==
megdnn::param::ConvBias::Format::NCHW4;
if (!is_s8nchw4)
return false;
if (conv_bias->input().size() != 3)
return false;
opr_set.insert(conv_bias);
for (auto&& i : readers[conv_bias]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
for (auto reader : reader_set) {
if (opr_set.count(reader) <= 0) {
return false;
}
}
auto src = rewriter.get_var(conv_bias->input(0)),
filter = rewriter.get_var(conv_bias->input(1)),
bias = rewriter.get_var(conv_bias->input(2));
auto new_bias = ReformatManager::instance().get(ReformatKey{
TensorFormats::NCHWc4, TensorFormats::NCHW})({bias});
new_bias = opr::TypeCvt::make(new_bias, dtype::Float32()).node();
auto new_param = conv_bias->param();
new_param.format = megdnn::param::ConvBias::Format::NCHW4_NCHW;
auto conv_bias_shuffle = opr::ConvBias::make(
src, filter, new_bias, new_param, conv_bias->execution_policy(),
OperatorNodeConfig{dtype::Float32()});
rewriter.replace_var(opr->output(0), conv_bias_shuffle.node(),
mgb_cstr_log("replace conv_bias + typecvt + "
"dimshuffle + "
"reshape to conv_bias(NCHW4_NCHW)"));
return true;
};
auto try_conv_reformat_nchw42nchw32 = [&rewriter,
&readers](OperatorNodeBase* opr) {
ThinHashSet<OperatorNodeBase*> opr_set;
ThinHashSet<OperatorNodeBase*> reader_set;
// check reshape
auto reshape1 = try_cast_as_op<opr::Reshape>(opr);
if (reshape1 == nullptr)
return false;
opr_set.insert(opr);
// check dimshuffle
auto shuffle = try_cast_as_op<opr::Dimshuffle>(
reshape1->input(0)->owner_opr());
if (shuffle == nullptr)
return false;
auto&& param = shuffle->param();
if (param.pattern_len != 6)
return false;
bool is_nchw42nchw32 = param.pattern[0] == 0 && param.pattern[1] == 1 &&
param.pattern[2] == 3 && param.pattern[3] == 4 &&
param.pattern[4] == 2 && param.pattern[5] == 5 &&
shuffle->output(0)->shape()[5] == 4 &&
shuffle->output(0)->shape()[4] == 8;
if (!is_nchw42nchw32)
return false;
opr_set.insert(shuffle);
for (auto&& i : readers[shuffle]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
// check reshape
auto reshape2 =
try_cast_as_op<opr::Reshape>(shuffle->input(0)->owner_opr());
if (reshape2 == nullptr)
return false;
opr_set.insert(reshape2);
for (auto&& i : readers[reshape2]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
// check conv bias
auto conv_bias =
try_cast_as_op<opr::ConvBias>(reshape2->input(0)->owner_opr());
if (conv_bias == nullptr)
return false;
auto inp_dtype = conv_bias->input(0)->dtype();
bool is_s8nchw4 = inp_dtype.enumv() == DTypeEnum::QuantizedS8 &&
conv_bias->param().format ==
megdnn::param::ConvBias::Format::NCHW4;
if (!is_s8nchw4)
return false;
if (conv_bias->input().size() != 3)
return false;
opr_set.insert(conv_bias);
for (auto&& i : readers[conv_bias]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
for (auto reader : reader_set) {
if (opr_set.count(reader) <= 0) {
return false;
}
}
auto src = rewriter.get_var(conv_bias->input(0)),
filter = rewriter.get_var(conv_bias->input(1)),
bias = rewriter.get_var(conv_bias->input(2));
auto new_bias = ReformatManager::instance().get(ReformatKey{
TensorFormats::NCHWc4, TensorFormats::NCHWc32})({bias});
auto new_param = conv_bias->param();
new_param.format = megdnn::param::ConvBias::Format::NCHW4_NCHW32;
auto conv_bias_shuffle = opr::ConvBias::make(
src, filter, new_bias, new_param, conv_bias->execution_policy(),
conv_bias->config());
rewriter.replace_var(
opr->output(0), conv_bias_shuffle.node(),
mgb_cstr_log("replace conv_bias + "
"reformat to conv_bias(NCHW4_NCHW32)"));
return true;
};
auto try_conv_reformat_nchw42nhwc = [&rewriter,
&readers](OperatorNodeBase* opr) {
ThinHashSet<OperatorNodeBase*> opr_set;
ThinHashSet<OperatorNodeBase*> reader_set;
// check reshape
auto reshape = try_cast_as_op<opr::Reshape>(opr);
if (reshape == nullptr)
return false;
opr_set.insert(opr);
// check dimshuffle
auto shuffle =
try_cast_as_op<opr::Dimshuffle>(reshape->input(0)->owner_opr());
if (shuffle == nullptr)
return false;
auto&& param = shuffle->param();
if (param.pattern_len != 5)
return false;
bool is_nchw42nhwc = param.pattern[0] == 0 && param.pattern[1] == 2 &&
param.pattern[2] == 3 && param.pattern[3] == 1 &&
param.pattern[4] == 4 &&
shuffle->output(0)->shape()[4] == 4;
if (!is_nchw42nhwc)
return false;
opr_set.insert(shuffle);
for (auto&& i : readers[shuffle]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
auto typecvt =
try_cast_as_op<opr::TypeCvt>(shuffle->input(0)->owner_opr());
if (typecvt == nullptr)
return false;
auto in_dtype = typecvt->input(0)->dtype(),
out_dtype = typecvt->output(0)->dtype();
bool is_s82s4 = in_dtype.enumv() == DTypeEnum::QuantizedS8 &&
(out_dtype.enumv() == DTypeEnum::QuantizedS4 ||
out_dtype.enumv() == DTypeEnum::Quantized4Asymm);
if (!is_s82s4)
return false;
opr_set.insert(typecvt);
for (auto&& i : readers[typecvt]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
// check conv bias
auto conv_bias =
try_cast_as_op<opr::ConvBias>(typecvt->input(0)->owner_opr());
if (conv_bias == nullptr)
return false;
auto inp_dtype = conv_bias->input(0)->dtype();
bool is_s8nchw4 = inp_dtype.enumv() == DTypeEnum::QuantizedS8 &&
conv_bias->param().format ==
megdnn::param::ConvBias::Format::NCHW4;
if (!is_s8nchw4)
return false;
if (conv_bias->input().size() != 3)
return false;
opr_set.insert(conv_bias);
for (auto&& i : readers[conv_bias]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
for (auto reader : reader_set) {
if (opr_set.count(reader) <= 0) {
return false;
}
}
auto src = rewriter.get_var(conv_bias->input(0)),
filter = rewriter.get_var(conv_bias->input(1)),
bias = rewriter.get_var(conv_bias->input(2));
auto new_bias = ReformatManager::instance().get(ReformatKey{
TensorFormats::NCHWc4, TensorFormats::NHWC})({bias});
auto new_param = conv_bias->param();
new_param.format = megdnn::param::ConvBias::Format::NCHW4_NHWC;
auto conv_bias_shuffle = opr::ConvBias::make(
src, filter, new_bias, new_param, conv_bias->execution_policy(),
OperatorNodeConfig{out_dtype});
rewriter.replace_var(opr->output(0), conv_bias_shuffle.node(),
mgb_cstr_log("replace conv_bias + "
"reformat to conv_bias(NCHW4_NHWC)"));
return true;
};
auto try_conv_reformat_nchw322nchw4 = [&rewriter,
&readers](OperatorNodeBase* opr) {
ThinHashSet<OperatorNodeBase*> opr_set;
ThinHashSet<OperatorNodeBase*> reader_set;
// check reshape
auto reshape1 = try_cast_as_op<opr::Reshape>(opr);
if (reshape1 == nullptr)
return false;
opr_set.insert(opr);
// check dimshuffle
auto shuffle = try_cast_as_op<opr::Dimshuffle>(
reshape1->input(0)->owner_opr());
if (shuffle == nullptr)
return false;
auto&& param = shuffle->param();
if (param.pattern_len != 6)
return false;
bool is_nchw322nchw4 = param.pattern[0] == 0 && param.pattern[1] == 1 &&
param.pattern[2] == 4 && param.pattern[3] == 2 &&
param.pattern[4] == 3 && param.pattern[5] == 5 &&
shuffle->input(0)->shape()[5] == 4 &&
shuffle->input(0)->shape()[4] == 8;
if (!is_nchw322nchw4)
return false;
opr_set.insert(shuffle);
for (auto&& i : readers[shuffle]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
// check reshape
auto reshape2 =
try_cast_as_op<opr::Reshape>(shuffle->input(0)->owner_opr());
if (reshape2 == nullptr)
return false;
opr_set.insert(reshape2);
for (auto&& i : readers[reshape2]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
// check conv bias
auto conv_bias =
try_cast_as_op<opr::ConvBias>(reshape2->input(0)->owner_opr());
if (conv_bias == nullptr)
return false;
auto inp_dtype = conv_bias->input(0)->dtype();
bool is_s8nchw32 = inp_dtype.enumv() == DTypeEnum::QuantizedS8 &&
conv_bias->param().format ==
megdnn::param::ConvBias::Format::NCHW32;
if (!is_s8nchw32)
return false;
if (conv_bias->input().size() != 3)
return false;
opr_set.insert(conv_bias);
for (auto&& i : readers[conv_bias]) {
if (i.second & DepType::DEV_VALUE) {
reader_set.insert(i.first);
}
}
for (auto reader : reader_set) {
if (opr_set.count(reader) <= 0) {
return false;
}
}
auto src = rewriter.get_var(conv_bias->input(0)),
filter = rewriter.get_var(conv_bias->input(1)),
bias = rewriter.get_var(conv_bias->input(2));
auto new_bias = ReformatManager::instance().get(ReformatKey{
TensorFormats::NCHWc32, TensorFormats::NCHWc4})({bias});
auto new_param = conv_bias->param();
new_param.format = megdnn::param::ConvBias::Format::NCHW32_NCHW4;
auto conv_bias_shuffle = opr::ConvBias::make(
src, filter, new_bias, new_param, conv_bias->execution_policy(),
conv_bias->config());
rewriter.replace_var(
opr->output(0), conv_bias_shuffle.node(),
mgb_cstr_log("replace conv_bias + "
"reformat to conv_bias(NCHW32_NCHW4)"));
return true;
};
MGB_MARK_USED_VAR(try_conv_reformat_nchw322nchw4);
MGB_MARK_USED_VAR(try_conv_reformat_nchw42nchw32);
auto on_opr = [&try_conv_dimshuffle_reshape_typecvt,
&try_conv_reformat_nchw42nchw32,
&try_conv_reformat_nchw42nhwc,
&try_conv_reformat_nchw322nchw4,
&rewriter](OperatorNodeBase* opr) {
if (!try_conv_dimshuffle_reshape_typecvt(opr) &&
!try_conv_reformat_nchw42nchw32(opr) &&
!try_conv_reformat_nchw42nhwc(opr) &&
!try_conv_reformat_nchw322nchw4(opr)) {
rewriter.auto_replace_outputs(opr);
}
};
opt.graph().iter(on_opr);
rewriter.apply_inplace();
MIDOUT_E
}
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
此差异已折叠。
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
*/ */
#include "megbrain/gopt/reformat_manager.h" #include "megbrain/gopt/reformat_manager.h"
#include <numeric>
#include "megbrain/opr/tensor_manip.h" #include "megbrain/opr/tensor_manip.h"
using namespace mgb; using namespace mgb;
...@@ -65,6 +64,10 @@ NamedTensorShape tensor_formats_to_named_tensor_shape(TensorFormats format) { ...@@ -65,6 +64,10 @@ NamedTensorShape tensor_formats_to_named_tensor_shape(TensorFormats format) {
return {{"C//8"}, {"C%1"}, {"C%1"}, {"R"}, {"S"}, {"C%8"}}; return {{"C//8"}, {"C%1"}, {"C%1"}, {"R"}, {"S"}, {"C%8"}};
case TensorFormats::KRSCk8: case TensorFormats::KRSCk8:
return {{"K//8"}, {"R"}, {"S"}, {"C"}, {"K%8"}}; return {{"K//8"}, {"R"}, {"S"}, {"C"}, {"K%8"}};
case TensorFormats::KCRSc4:
return {{"K"}, {"C//4"}, {"R"}, {"S"}, {"C%4"}};
case TensorFormats::GKCRSc4:
return {{"G"}, {"K"}, {"C//4"}, {"R"}, {"S"}, {"C%4"}};
case TensorFormats::KCRS: case TensorFormats::KCRS:
return {{"K"}, {"C"}, {"R"}, {"S"}}; return {{"K"}, {"C"}, {"R"}, {"S"}};
case TensorFormats::GKCRS: case TensorFormats::GKCRS:
...@@ -130,70 +133,40 @@ bool ReformatManager::ReformatKey::Equal::operator()( ...@@ -130,70 +133,40 @@ bool ReformatManager::ReformatKey::Equal::operator()(
lhs.attribute == rhs.attribute; lhs.attribute == rhs.attribute;
} }
ReformatManager::ReformatKey&
ReformatManager::ReformatKey::deduce_reformat_dtype_enum(const DType& dt) {
static const ThinHashSet<std::pair<TensorFormats, TensorFormats>> set = {
{TensorFormats::NCHW, TensorFormats::NCHWc64},
{TensorFormats::NCHWc64, TensorFormats::NCHW},
{TensorFormats::NCHW, TensorFormats::NHWC},
{TensorFormats::NHWC, TensorFormats::NCHW}};
if (set.count({input_format, output_format}) > 0 &&
(dt.enumv() == DTypeEnum::QuantizedS4 ||
dt.enumv() == DTypeEnum::Quantized4Asymm)) {
input_dtype = output_dtype = dt.enumv();
}
return *this;
}
// =================== ReformatManager ====================*/ // =================== ReformatManager ====================*/
#define FOREACH_FEATURE_TENSOR_FORMATS(cb) \
cb(NCHW) cb(NHWC) cb(NCHWc4) cb(NCHWc8) cb(NCHWc32) cb(NCHWc64) cb(CHWNc4) \
cb(NHCWc4)
#define FOREACH_WEIGHT_TENSOR_FORMATS(cb) \
cb(KRSCk4) cb(KRSCk4c4) cb(KCRSk4c4) cb(KCRSc4k4) cb(KCRSc8k8) cb(KRSCk8) \
cb(GKRSCk4) cb(GKRSCk4c4) cb(GKCRSc4k4) cb(GKCRSk4c4) \
cb(GKCRSc8k8) cb(C11RSc4) cb(C11RSc8)
ReformatManager::ReformatManager() { ReformatManager::ReformatManager() {
static constexpr TensorFormats feature_tensor_formats[] = { using Attribute = ReformatKey::Attribute;
#define cb(_fmt) TensorFormats::_fmt, {
FOREACH_FEATURE_TENSOR_FORMATS(cb) auto i = TensorFormats::NCHWc4, o = TensorFormats::CHWNc4;
#undef cb auto&& impl1 = [](const VarNodeArray& vars) {
}; return opr::RelayoutFormat::make(
static constexpr int nr_feature_tensor_formats = vars[0],
sizeof(feature_tensor_formats) / sizeof(TensorFormats); megdnn::param::RelayoutFormat::Mode::NCHW4_CHWN4)
for (int i = 0; i < nr_feature_tensor_formats; ++i) { .node();
for (int o = 0; o < nr_feature_tensor_formats; ++o) { };
if (i == o) m_cache.emplace(ReformatKey{i, o}, impl1);
continue; auto&& impl2 = [](const VarNodeArray& vars) {
NamedTensorShape input_shape = tensor_formats_to_named_tensor_shape( return opr::RelayoutFormat::make(
feature_tensor_formats[i]); vars[0],
NamedTensorShape output_shape = megdnn::param::RelayoutFormat::Mode::CHWN4_NCHW4)
tensor_formats_to_named_tensor_shape( .node();
feature_tensor_formats[o]); };
auto impl = std::get<0>( m_cache.emplace(ReformatKey{o, i}, impl2);
ReformatEmitter{input_shape, output_shape}.emit());
m_cache.emplace(ReformatKey{feature_tensor_formats[i],
feature_tensor_formats[o]},
impl);
}
}
static constexpr TensorFormats default_weight_tensor_formats =
TensorFormats::KCRS;
static constexpr TensorFormats default_group_conv_weight_tensor_formats =
TensorFormats::GKCRS;
static constexpr TensorFormats default_chan_conv_weight_tensor_formats =
TensorFormats::C11RS;
static constexpr TensorFormats weight_tensor_formats[] = {
#define cb(_fmt) TensorFormats::_fmt,
FOREACH_WEIGHT_TENSOR_FORMATS(cb)
#undef cb
};
static constexpr int nr_weight_tensor_formats =
sizeof(weight_tensor_formats) / sizeof(TensorFormats);
using Name = megdnn::Dimension::Name;
for (int o = 0; o < nr_weight_tensor_formats; ++o) {
NamedTensorShape output_shape =
tensor_formats_to_named_tensor_shape(weight_tensor_formats[o]);
TensorFormats input_format;
if (output_shape[0].name() == Name::G) {
input_format = default_group_conv_weight_tensor_formats;
} else if (output_shape[0].name() == Name::C) {
input_format = default_chan_conv_weight_tensor_formats;
} else {
mgb_assert(output_shape[0].name() == Name::K);
input_format = default_weight_tensor_formats;
}
NamedTensorShape input_shape =
tensor_formats_to_named_tensor_shape(input_format);
auto impl =
std::get<0>(ReformatEmitter{input_shape, output_shape}.emit());
m_cache.emplace(ReformatKey{input_format, weight_tensor_formats[o]},
impl);
} }
{ {
auto i = TensorFormats::NCHW, o = TensorFormats::NCHWc4; auto i = TensorFormats::NCHW, o = TensorFormats::NCHWc4;
...@@ -206,7 +179,7 @@ ReformatManager::ReformatManager() { ...@@ -206,7 +179,7 @@ ReformatManager::ReformatManager() {
m_cache.emplace(ReformatKey{i, o, Attribute::IC_SMALL}, impl); m_cache.emplace(ReformatKey{i, o, Attribute::IC_SMALL}, impl);
} }
{ {
auto i = TensorFormats::KCRS, o = TensorFormats::KCRSc4k4; auto i = TensorFormats::KCRS, o = TensorFormats::KCRSc4;
auto&& impl = [](const VarNodeArray& vars) { auto&& impl = [](const VarNodeArray& vars) {
return opr::RelayoutFormat::make( return opr::RelayoutFormat::make(
vars[0], vars[0],
...@@ -238,7 +211,7 @@ ReformatManager::ReformatManager() { ...@@ -238,7 +211,7 @@ ReformatManager::ReformatManager() {
auto&& impl = [](const VarNodeArray& vars) { auto&& impl = [](const VarNodeArray& vars) {
return opr::RelayoutFormat::make( return opr::RelayoutFormat::make(
vars[0], vars[0],
megdnn::param::RelayoutFormat::Mode::NCHW_NCHW64) megdnn::param::RelayoutFormat::Mode::NCHW64_NCHW)
.node(); .node();
}; };
m_cache.emplace( m_cache.emplace(
...@@ -272,7 +245,7 @@ ReformatManager::ReformatManager() { ...@@ -272,7 +245,7 @@ ReformatManager::ReformatManager() {
auto&& impl = [](const VarNodeArray& vars) { auto&& impl = [](const VarNodeArray& vars) {
return opr::RelayoutFormat::make( return opr::RelayoutFormat::make(
vars[0], vars[0],
megdnn::param::RelayoutFormat::Mode::NCHW_NHWC) megdnn::param::RelayoutFormat::Mode::NHWC_NCHW)
.node(); .node();
}; };
m_cache.emplace( m_cache.emplace(
...@@ -371,14 +344,23 @@ ReformatManager::ReformatManager() { ...@@ -371,14 +344,23 @@ ReformatManager::ReformatManager() {
impl); impl);
} }
} }
#undef FOREACH_FEATURE_TENSOR_FORMATS
#undef FOREACH_WEIGHT_TENSOR_FORMATS
const ReformatManager::ReformatImpl& ReformatManager::get( ReformatManager::ReformatImpl ReformatManager::get(
const ReformatKey& key) const { const ReformatKey& key) const {
using Attribute = ReformatKey::Attribute;
MGB_TRY { MGB_TRY {
auto&& impl = m_cache.at(key); auto find = m_cache.find(key);
return impl; if (find != m_cache.end()) {
auto rst = find->second;
return rst;
}
mgb_assert(key.attribute == Attribute::DEFAULT);
auto&& i = key.input_format;
auto&& o = key.output_format;
auto ishp = tensor_formats_to_named_tensor_shape(i);
auto oshp = tensor_formats_to_named_tensor_shape(o);
auto builder = std::get<0>(ReformatEmitter{ishp, oshp}.emit());
return builder;
} }
MGB_CATCH(std::exception & exc, { MGB_CATCH(std::exception & exc, {
mgb_log_error( mgb_log_error(
...@@ -390,10 +372,7 @@ const ReformatManager::ReformatImpl& ReformatManager::get( ...@@ -390,10 +372,7 @@ const ReformatManager::ReformatImpl& ReformatManager::get(
} }
const ReformatManager& ReformatManager::instance() { const ReformatManager& ReformatManager::instance() {
static ReformatManager* inst = nullptr; static ReformatManager inst;
if (inst == nullptr) { return inst;
inst = new ReformatManager();
}
return *inst;
} }
// vim: syntax=cpp.doxygen // vim: syntax=cpp.doxygen
此差异已折叠。
...@@ -227,6 +227,7 @@ namespace gopt { ...@@ -227,6 +227,7 @@ namespace gopt {
VarReplaceCheckFlag m_var_replace_check_flag = VarReplaceCheckFlag m_var_replace_check_flag =
VarReplaceCheckFlag::CHECK_ALL; VarReplaceCheckFlag::CHECK_ALL;
class RelayoutPlaceholder; class RelayoutPlaceholder;
friend class ShuffleShuffleRemovePass;
public: public:
TensorReformatPass& set_var_replace_check_flag(VarReplaceCheckFlag flag) { TensorReformatPass& set_var_replace_check_flag(VarReplaceCheckFlag flag) {
......
...@@ -49,10 +49,14 @@ enum class TensorFormats : uint32_t { ...@@ -49,10 +49,14 @@ enum class TensorFormats : uint32_t {
KRSCk8 = 21, ///< [K/8, R, S, C, K%8] KRSCk8 = 21, ///< [K/8, R, S, C, K%8]
// NCHW4
KCRSc4 = 22, ///< [K, C/4, R, S, C%4]
GKCRSc4 = 23, ///< [G, K, C/4, R, S, C%4]
// default weight format // default weight format
KCRS = 22, ///< [K, C, R, S] KCRS = 24, ///< [K, C, R, S]
GKCRS = 23, ///< [G, K, C, R, S] GKCRS = 25, ///< [G, K, C, R, S]
C11RS = 24, ///< [C, 1, 1, R, S] C11RS = 26, ///< [C, 1, 1, R, S]
}; };
class ReformatManager : public NonCopyableObj { class ReformatManager : public NonCopyableObj {
...@@ -60,16 +64,20 @@ class ReformatManager : public NonCopyableObj { ...@@ -60,16 +64,20 @@ class ReformatManager : public NonCopyableObj {
public: public:
using ReformatImpl = thin_function<VarNode*(const VarNodeArray&)>; using ReformatImpl = thin_function<VarNode*(const VarNodeArray&)>;
enum class Attribute : uint32_t {
DEFAULT = 0,
IMAGE2D = 1 << 0,
IC_SMALL = 1 << 1,
};
struct ReformatKey { struct ReformatKey {
enum class Attribute : uint32_t {
DEFAULT = 0,
IMAGE2D = 1 << 0,
IC_SMALL = 1 << 1,
};
TensorFormats input_format, output_format; TensorFormats input_format, output_format;
DTypeEnum input_dtype, output_dtype; DTypeEnum input_dtype, output_dtype;
Attribute attribute; Attribute attribute;
std::string to_string() const; std::string to_string() const;
ReformatKey()
: input_dtype{DTypeEnum::Float32},
output_dtype{DTypeEnum::Float32},
attribute{Attribute::DEFAULT} {}
ReformatKey(TensorFormats input_format_, TensorFormats output_format_, ReformatKey(TensorFormats input_format_, TensorFormats output_format_,
Attribute attribute_ = Attribute::DEFAULT, Attribute attribute_ = Attribute::DEFAULT,
DTypeEnum input_dtype_ = DTypeEnum::Float32, DTypeEnum input_dtype_ = DTypeEnum::Float32,
...@@ -86,11 +94,13 @@ public: ...@@ -86,11 +94,13 @@ public:
bool operator()(const ReformatKey& lhs, bool operator()(const ReformatKey& lhs,
const ReformatKey& rhs) const; const ReformatKey& rhs) const;
}; };
ReformatKey& deduce_reformat_dtype_enum(const DType& dt);
}; };
using ReformatCache = using ReformatCache =
std::unordered_map<ReformatKey, ReformatImpl, ReformatKey::Hash, std::unordered_map<ReformatKey, ReformatImpl, ReformatKey::Hash,
ReformatKey::Equal>; ReformatKey::Equal>;
const ReformatImpl& get(const ReformatKey& key) const; ReformatImpl get(const ReformatKey& key) const;
ReformatImpl get(ReformatKey&& key) const { return get(key); }
static const ReformatManager& instance(); static const ReformatManager& instance();
private: private:
......
/**
* \file src/gopt/test/reformat_manager.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "./helper.h"
#include "megbrain/gopt/reformat_manager.h"
#include "megbrain/opr/tensor_manip.h"
using namespace mgb;
using namespace gopt;
TEST(TestReformatManager, Feature) {
constexpr size_t N = 16, C = 128, H = 7, W = 7;
HostTensorGenerator<> gen;
using ReformatKey = ReformatManager::ReformatKey;
auto src_format = TensorFormats::NHWC, dst_format = TensorFormats::NCHWc64;
ReformatKey key{src_format, dst_format};
auto reformat = ReformatManager::instance().get(key);
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
auto r = [](VarNode* inp) {
auto x = SymbolVar(inp);
auto xshp = opr::GetVarShape::make(x);
auto cv = [&x](int v) { return x.make_scalar(v); };
auto sub = [&xshp, &cv](int idx) {
return opr::IndexAt::make(xshp, {{0, cv(idx)}});
};
auto tshp0 = opr::Concat::make(
{sub(0), sub(1), sub(2), sub(3) / 64, cv(64)}, 0);
auto y0 = opr::Reshape::make(x, tshp0);
auto y1 = opr::Dimshuffle::make(y0, {0, 3, 1, 2, 4});
return y1;
};
auto mkvar = [&](const char* name, const TensorShape& shp) {
return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name);
};
auto x = mkvar("x", {N, H, W, C});
auto y1 = SymbolVar(reformat({x.node()}));
auto y2 = r(x.node());
size_t nr_shapeof = 0;
size_t nr_reshape = 0;
cg::DepOprIter{[&nr_shapeof, &nr_reshape](cg::OperatorNodeBase* o) {
if (o->same_type<opr::GetVarShape>())
nr_shapeof++;
if (o->same_type<opr::Reshape>())
nr_reshape++;
}}
.add(y1.node()->owner_opr());
ASSERT_EQ(nr_shapeof, 1);
ASSERT_EQ(nr_reshape, 1);
HostTensorND t1, t2;
auto func1 = graph->compile({make_callback_copy(y1, t1)});
func1->execute();
auto func2 = graph->compile({make_callback_copy(y2, t2)});
func2->execute();
MGB_ASSERT_TENSOR_EQ(t1, t2);
}
TEST(TestReformatManager, Weight) {
constexpr size_t G = 8, K = 128, C = 128, R = 3, S = 3;
HostTensorGenerator<> gen;
using ReformatKey = ReformatManager::ReformatKey;
auto src_format = TensorFormats::GKCRS,
dst_format = TensorFormats::GKCRSk4c4;
ReformatKey key{src_format, dst_format};
auto reformat = ReformatManager::instance().get(key);
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
auto r = [](VarNode* inp) {
auto x = SymbolVar(inp);
auto xshp = opr::GetVarShape::make(x);
auto cv = [&x](int v) { return x.make_scalar(v); };
auto sub = [&xshp, &cv](int idx) {
return opr::IndexAt::make(xshp, {{0, cv(idx)}});
};
auto tshp0 = opr::Concat::make({sub(0), sub(1) / 4, cv(4), sub(2) / 4,
cv(4), sub(3), sub(4)},
0),
tshp1 = opr::Concat::make({sub(0), sub(1) / 4, sub(2) / 4, sub(3),
sub(4), cv(4), cv(4)},
0);
auto y0 = opr::Reshape::make(x, tshp0);
auto y1 = opr::Dimshuffle::make(y0, {0, 1, 3, 5, 6, 2, 4});
auto y2 = opr::Reshape::make(y1, tshp1);
return y2;
};
auto mkvar = [&](const char* name, const TensorShape& shp) {
return opr::Host2DeviceCopy::make(*graph, gen(shp)).rename(name);
};
auto w = mkvar("w", {G, K / G, C / G, R, S});
auto y1 = SymbolVar(reformat({w.node()}));
auto y2 = r(w.node());
size_t nr_shapeof = 0;
size_t nr_reshape = 0;
cg::DepOprIter{[&nr_shapeof, &nr_reshape](cg::OperatorNodeBase* o) {
if (o->same_type<opr::GetVarShape>())
nr_shapeof++;
if (o->same_type<opr::Reshape>())
nr_reshape++;
}}
.add(y1.node()->owner_opr());
ASSERT_EQ(nr_shapeof, 1);
ASSERT_EQ(nr_reshape, 1);
HostTensorND t1, t2;
auto func1 = graph->compile({make_callback_copy(y1, t1)});
func1->execute();
auto func2 = graph->compile({make_callback_copy(y2, t2)});
func2->execute();
MGB_ASSERT_TENSOR_EQ(t1, t2);
}
TEST(TestReformatManager, InvalidKey) {
using ReformatKey = ReformatManager::ReformatKey;
using Attribute = ReformatKey::Attribute;
auto src_format = TensorFormats::GKCRS,
dst_format = TensorFormats::GKCRSk4c4;
Attribute attribute = Attribute::IMAGE2D;
ReformatKey key{src_format, dst_format, attribute};
ASSERT_THROW(ReformatManager::instance().get(key), AssertionError);
}
TEST(TestReformatManager, InputChannelSmall) {
constexpr size_t N = 16, C = 3, H = 224, W = 224;
auto cn = CompNode::load("cpux");
HostTensorGenerator<> gen;
using ReformatKey = ReformatManager::ReformatKey;
using Attribute = ReformatKey::Attribute;
auto src_format = TensorFormats::NCHW, dst_format = TensorFormats::NCHWc4;
ReformatKey key{src_format, dst_format, Attribute::IC_SMALL};
auto reformat = ReformatManager::instance().get(key);
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
auto r = [](VarNode* inp) {
auto x = SymbolVar(inp);
auto y = opr::RelayoutFormat::make(
x, megdnn::param::RelayoutFormat::Mode::NCHW_NCHW4_IC_SMALL);
return y;
};
auto mkvar = [&](const char* name, const TensorShape& shp) {
return opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name);
};
auto x = mkvar("x", {N, C, H, W});
auto y1 = SymbolVar(reformat({x.node()}));
auto y2 = r(x.node());
HostTensorND t1, t2;
auto func1 = graph->compile({make_callback_copy(y1, t1)});
func1->execute();
auto func2 = graph->compile({make_callback_copy(y2, t2)});
func2->execute();
MGB_ASSERT_TENSOR_EQ(t1, t2);
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册