提交 a3cd3fc7 编写于 作者: M Megvii Engine Team

test(mgb/gopt): add testcase for global layout transform

GitOrigin-RevId: f9669e1ba0d4c46ca8aab3161870d17d7762bf8b
上级 af576e9a
......@@ -28,7 +28,10 @@ public:
private:
using TensorFormatsBitSet = uint32_t;
using State = SmallVector<TensorFormatsBitSet>;
static constexpr uint32_t MAX_TENSOR_FORMATS = sizeof(TensorFormatsBitSet);
/// 1bit represents one kind of tensor formats
static constexpr uint32_t BITS_PER_BYTE = 8;
static constexpr uint32_t MAX_TENSOR_FORMATS =
sizeof(TensorFormatsBitSet) * BITS_PER_BYTE;
TensorFormatsBitSet add(TensorFormatsBitSet& set, TensorFormats fmt) {
mgb_assert(static_cast<uint32_t>(fmt) < MAX_TENSOR_FORMATS);
set |= (1 << static_cast<uint32_t>(fmt));
......
......@@ -111,8 +111,6 @@ void LayoutTransformPass::apply(OptState& opt) const {
}
new_var = reformat({new_var});
}
if (from != to && !new_var->shape().is_scalar())
new_var = reformat({new_var});
new_inp[i] = new_var;
}
VarNode* new_out;
......@@ -164,7 +162,9 @@ void LayoutTransformPass::apply(OptState& opt) const {
}
} else {
auto new_opr = rewriter.auto_replace_outputs(opr);
var2fmts[new_opr->output(0)] = base_fmt;
for (auto&& ov : new_opr->usable_output()) {
var2fmts[ov] = base_fmt;
}
}
};
opt.graph().iter(on_opr);
......
......@@ -245,19 +245,26 @@ struct ConvTensorFormatsDispatcherImpl<Opr, OprFormat::NHWC> {
if (i == 2)
available &= opr->input(i)->dtype().enumv() ==
DTypeEnum::QuantizedS32;
else
available &= opr->input(i)->dtype().enumv() ==
DTypeEnum::Quantized4Asymm ||
opr->input(i)->dtype().enumv() ==
DTypeEnum::QuantizedS4;
else {
bool i4_config = opr->input(i)->dtype().enumv() ==
DTypeEnum::Quantized4Asymm ||
opr->input(i)->dtype().enumv() ==
DTypeEnum::QuantizedS4;
bool i8_config = opr->input(i)->dtype().enumv() ==
DTypeEnum::QuantizedS8;
available &= (i4_config || i8_config);
}
config.input_dtypes.emplace_back(opr->input(i)->dtype().enumv());
TensorType tensor_type =
i == 1 ? TensorType::WEIGHT : TensorType::FEATURE;
config.input_tensor_types.emplace_back(tensor_type);
}
available &=
bool i4_config =
opr->output(0)->dtype().enumv() == DTypeEnum::Quantized4Asymm ||
opr->output(0)->dtype().enumv() == DTypeEnum::QuantizedS4;
bool i8_config =
opr->output(0)->dtype().enumv() == DTypeEnum::QuantizedS8;
available &= (i4_config || i8_config);
config.output_dtypes.emplace_back(opr->output(0)->dtype().enumv());
available &= conv.param().sparse == Opr::Param::Sparse::DENSE;
config.input_tensor_formats = {TensorFormats::NHWC, TensorFormats::NHWC,
......@@ -496,6 +503,38 @@ struct ConvTensorFormatsDispatcherImpl<opr::ConvolutionBackwardData,
}
};
template <>
struct ConvTensorFormatsDispatcherImpl<opr::ConvolutionBackwardData,
OprFormat::NHWC> {
using Opr = opr::ConvolutionBackwardData;
static Maybe<OprTensorFormatsConfiguration> dispatch(
const OperatorNodeBase* opr) {
const auto& conv = opr->cast_final_safe<Opr>();
OprTensorFormatsConfiguration config;
config.typeinfo = opr->dyn_typeinfo();
config.opr_format = OprFormat::NCHW4;
bool available = true;
for (size_t i = 0; i < opr->input().size(); ++i) {
available &=
opr->input(i)->dtype().enumv() == DTypeEnum::QuantizedS8;
config.input_dtypes.emplace_back(opr->input(i)->dtype().enumv());
TensorType tensor_type =
i == 0 ? TensorType::WEIGHT : TensorType::FEATURE;
config.input_tensor_types.emplace_back(tensor_type);
}
available &= opr->output(0)->dtype().enumv() == DTypeEnum::QuantizedS8;
config.output_dtypes.emplace_back(opr->output(0)->dtype().enumv());
available &= conv.param().sparse == opr::ConvBias::Param::Sparse::DENSE;
config.input_tensor_formats = {TensorFormats::NHWC, TensorFormats::NHWC,
TensorFormats::NHWC,
TensorFormats::NHWC};
config.output_tensor_formats = {TensorFormats::NHWC};
if (available)
return config;
return None;
}
};
struct StaticData {
struct KeyHash {
size_t operator()(const std::pair<Typeinfo*, OprFormat>& val) const {
......@@ -543,6 +582,7 @@ StaticData::StaticData() {
OPR_TENSOR_FORMATS_CONFIG_REG(ConvolutionForward, NCHW4);
OPR_TENSOR_FORMATS_CONFIG_REG(ConvolutionBackwardData, NCHW);
OPR_TENSOR_FORMATS_CONFIG_REG(ConvolutionBackwardData, NHWC);
OPR_TENSOR_FORMATS_CONFIG_REG(ConvolutionBackwardData, NCHW4);
OPR_SINGLE_IN_OUT_TENSOR_FORMATS_CONFIG_REG(WarpPerspectiveForward, NCHW);
......
......@@ -17,7 +17,6 @@
#include "megbrain/graph/event.h"
#include "megbrain/opr/dnn/pooling.h"
#include "megbrain/opr/imgproc.h"
#include "megbrain/opr/nn_int.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/nn_int.h"
#include "megbrain/plugin/base.h"
......@@ -167,11 +166,12 @@ private:
static constexpr float PROFILE_TIME_OUT = 1e7;
using ReformatAttribute = ReformatKey::Attribute;
/*!
* \brief profile opr format agnostic operators (like elemwise, elemwise multi type, typecvt etc.)
* \brief profile opr format agnostic operators (like elemwise, elemwise
* multi type, typecvt etc.)
*
* \param opr pointer to the operator node to be profiled
* \param base_format the original tensor format of the operator node.
* \param available_tensor_formats the available tensor formats
* \param available_tensor_formats the available tensor formats
* \return the operator node record
*/
OperatorNodeRecord profile_operator(
......@@ -220,7 +220,7 @@ private:
ReformatAttribute::DEFAULT) const;
float profile_var_node(const VarNode* var, TensorFormats base_format,
const ReformatKey& key) const;
int m_runs; /// sample times of the profiler
int m_runs; /// sample times of the profiler
};
ProfilerImpl::OperatorNodeRecord ProfilerImpl::profile_operator(
......@@ -281,10 +281,6 @@ ProfilerImpl::OperatorNodeRecord ProfilerImpl::profile_operator(
record.opr = opr;
auto& costs = record.costs;
for (auto&& i : available_configs) {
/// XXXX remove later
if (i.opr_format == OprFormat::NCHW &&
opr->input(0)->dtype().enumv() != DTypeEnum::Float32)
continue;
costs[i.opr_format] =
profile_operator(opr, base_config, i, extra_attribute);
}
......@@ -403,8 +399,8 @@ float ProfilerImpl::profile_var_node(const VarNode* var,
auto builder = ReformatManager::instance().auto_aligned_reformat_featrue(
var, base_format, key);
auto y = builder({aligned_var.node()});
if (!m_var_node_filter(var, aligned_tensor_shape, y->shape(),
TensorFormat{}))
if (!m_var_node_filter(var, aligned_tensor_shape, y->shape(), key))
return PROFILE_TIME_OUT;
ThinHashSet<OperatorNodeBase*> set;
DepOprIter iter([&set](OperatorNodeBase* opr) { set.insert(opr); });
......@@ -533,6 +529,17 @@ ProfilerBase::ProfilerBase(float opr_threshold, float var_node_threshold)
m_var_node_threshold{var_node_threshold} {
m_opr_filter = [this](const OperatorNodeBase* opr,
OperatorNodeBase* new_opr) {
/// \note: for the considerations of performance, we skip nchw(naive)
/// kernels for conv bias on CUDA platform. to remove this later
if (auto conv = try_cast_as_op<opr::ConvBiasForward>(new_opr)) {
if (conv->output(0)->comp_node().device_type() ==
CompNode::DeviceType::CUDA &&
conv->input(0)->dtype().category() ==
DTypeCategory::QUANTIZED &&
conv->param().format == OprFormat::NCHW) {
return false;
}
}
float comp1 = m_opr_footprint.get_computation(
const_cast<OperatorNodeBase*>(opr));
float comp2 = m_opr_footprint.get_computation(new_opr);
......@@ -541,18 +548,27 @@ ProfilerBase::ProfilerBase(float opr_threshold, float var_node_threshold)
return true;
};
m_var_node_filter = [this](const VarNode* var, TensorShape from,
TensorShape to, TensorFormat format) {
TensorFormat default_;
TensorLayout orig_ly, from_ly, to_ly;
if (format == default_) {
orig_ly = {var->shape(), var->dtype()};
from_ly = {from, var->dtype()};
to_ly = {to, var->dtype()};
} else {
orig_ly = {var->shape(), var->dtype(), format};
from_ly = {from, var->dtype(), format};
to_ly = {to, var->dtype(), format};
TensorShape to, ReformatKey key) {
/// \note: due to the alignment requirement of low-bit tensor, we skip
/// some layout transform for low-bit tensors. The skipped layout
/// transforms do not have corresponding dnn kernel and cannot be
/// implemented by tensor manip operators (like reshape, dimshuffle,
/// subtensor, etc.).
if (var->dtype().enumv() == DTypeEnum::QuantizedS4 ||
var->dtype().enumv() == DTypeEnum::Quantized4Asymm) {
if (key.input_format == TensorFormats::NCHW &&
key.output_format != TensorFormats::NHWC &&
key.output_format != TensorFormats::NCHWc64) {
return false;
}
if (key.output_format == TensorFormats::NCHW &&
key.input_format != TensorFormats::NHWC &&
key.input_format != TensorFormats::NCHWc64) {
return false;
}
}
TensorLayout orig_ly = {var->shape(), var->dtype()},
from_ly = {from, var->dtype()}, to_ly = {to, var->dtype()};
float orig_memory = orig_ly.span().dist_byte() * 2.f;
float reformat_memory =
from_ly.span().dist_byte() + to_ly.span().dist_byte();
......
......@@ -329,10 +329,21 @@ ReformatManager::ReformatImpl ReformatManager::get(
const ReformatKey& key) const {
using Attribute = ReformatKey::Attribute;
MGB_TRY {
auto find = m_cache.find(key);
if (find != m_cache.end()) {
auto rst = find->second;
return rst;
{
auto find = m_cache.find(key);
if (find != m_cache.end()) {
auto rst = find->second;
return rst;
}
}
if (key.attribute == Attribute::AUTO_PADDING_NHWC) {
auto key_ = key;
key_.attribute = Attribute::DEFAULT;
auto find = m_cache.find(key_);
if (find != m_cache.end()) {
auto rst = find->second;
return rst;
}
}
mgb_assert(!(key.attribute & Attribute::IMAGE2D) &&
!(key.attribute & Attribute::IC_SMALL));
......
......@@ -222,8 +222,9 @@ public:
};
using OprFilter = thin_function<bool(const cg::OperatorNodeBase*,
cg::OperatorNodeBase*)>;
using VarNodeFilter = thin_function<bool(const VarNode*, TensorShape,
TensorShape, TensorFormat)>;
using VarNodeFilter =
thin_function<bool(const VarNode*, TensorShape, TensorShape,
ReformatManager::ReformatKey)>;
ProfilerBase(float opr_threshold = 2.f, float var_node_threshold = 2.f);
ProfilerBase(OprFilter opr_filter, VarNodeFilter var_node_filter = {})
......
......@@ -146,18 +146,6 @@ private:
};
MGB_DEF_ENUM_CLASS_BIT_OPR(ReformatManager::ReformatKey::Attribute);
//
//TensorShape make_aligned_tensor_shape(
// const VarNode* var, TensorFormats orig_formats,
// TensorFormats target_formats,
// ReformatManager::ReformatKey::Attribute extra_attribute =
// ReformatManager::ReformatKey::Attribute::DEFAULT);
//
//TensorShape make_aligned_weight_shape(
// const VarNode* var, TensorFormats orig_formats,
// TensorFormats target_formats, TensorFormats extra_formats,
// ReformatManager::ReformatKey::Attribute extra_attribute =
// ReformatManager::ReformatKey::Attribute::DEFAULT);
} // namespace gopt
} // namespace mgb
......
......@@ -4104,6 +4104,79 @@ TEST(TestGoptInference, PreProcessCaseAutopadNCHW64) {
opr::RelayoutFormat::Param::Mode::NCHW_NCHW4);
}
TEST(TestGoptInference, PreProcessCaseAutopadNHWC) {
REQUIRE_GPU(1);
HostTensorGenerator<dtype::Uint8, RandomDistribution::UNIFORM> gen(0, 255);
auto cn = CompNode::load("gpu0");
auto&& prop = CompNodeEnv::from_comp_node(cn).cuda_env().device_prop;
auto sm_ver = prop.major * 10 + prop.minor;
if (sm_ver < 75) {
printf("This testcast ignored due to insufficient cuda cap(got: %d, "
"expected: %d)\n",
sm_ver, 75);
return;
}
auto graph = ComputingGraph::make();
graph->options().graph_opt_level = 0;
auto mkcvar = [&](const char* name, const TensorShape& shp,
const DType& dtype) {
return opr::TypeCvt::make(
opr::SharedDeviceTensor::make(*graph, *gen(shp, cn))
.rename(name),
dtype);
};
size_t n = 2;
size_t c = 3;
size_t h = 32;
size_t w = 32;
auto host_x1 = gen({n, c, h, w}, cn);
auto x = opr::Host2DeviceCopy::make(*graph, host_x1);
auto x_u8_fp32 = opr::TypeCvt::make(x, dtype::Float32(), cn);
auto x_s8_fp32 = x_u8_fp32 - 128;
auto x_s8 = opr::TypeCvt::make(x_s8_fp32, dtype::QuantizedS8(2.5f), cn);
auto host_val =
std::make_shared<HostTensorND>(cn, dtype::QuantizedS8(2.5f));
TensorShape scalar{1, 1, 1, 1};
host_val->resize(scalar);
auto ptr = host_val->raw_ptr();
size_t size_bytes =
TensorLayout{scalar, dtype::QuantizedS8(2.5f)}.span().dist_byte();
std::memset(ptr, 0, size_bytes);
auto padding = opr::ImmutableTensor::make(*graph, *host_val);
padding = opr::Broadcast::make(padding, {n, 1, h, w});
auto padded_x = opr::Concat::make({x_s8, padding}, 1);
auto nhwc_x = opr::Dimshuffle::make(padded_x, {0, 2, 3, 1});
auto weight = mkcvar("weight", {16, 3, 3, 4}, dtype::QuantizedS8(2.5f)),
bias = mkcvar("bias", {1, 1, 1, 16}, dtype::QuantizedS32(6.25f));
opr::ConvBias::Param param;
param.format = opr::ConvBias::Param::Format::NHWC;
param.nonlineMode = opr::ConvBias::Param::NonlineMode::RELU;
param.stride_h = param.stride_w = 2;
param.pad_h = param.pad_w = 1;
auto result =
opr::ConvBias::make(nhwc_x, weight, bias, param, {},
OperatorNodeConfig{dtype::QuantizedS8(2.5f)});
auto y = opr::TypeCvt::make(result, dtype::Float32());
SymbolVar y_opt;
auto options = gopt::OptimizeForInferenceOptions{};
options.enable_fuse_preprocess();
unpack_vector(gopt::optimize_for_inference({y}, options), y_opt);
graph->compile({{y_opt, {}}})
->to_json()
->writeto_fpath(output_file(
"TestGoptInference.PreProcessCaseAutopadNHWC.json"));
HostTensorND host_y_opt, host_y;
auto func = graph->compile({make_callback_copy(y, host_y),
make_callback_copy(y_opt, host_y_opt)});
func->execute();
MGB_ASSERT_TENSOR_NEAR(host_y, host_y_opt, 1e-5);
ASSERT_TRUE(find_opr<opr::RelayoutFormat>(y_opt).param().mode ==
opr::RelayoutFormat::Param::Mode::NCHW_NCHW4);
}
TEST(TestGoptInference, WarpAndPreProcessCase1) {
REQUIRE_GPU(1);
HostTensorGenerator<dtype::Uint8, RandomDistribution::UNIFORM> gen(0, 255);
......
/**
* \file src/gopt/test/network.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "./network.h"
using namespace mgb;
SymbolVar Network::add_conv(SymbolVar f, size_t output_channels,
KernSize kern_size, DType out_dtype, bool has_relu,
Stride stride, Padding padding) {
static int weight_idx = 0;
static int bias_idx = 0;
size_t input_channels = f.node()->shape()[1];
auto weight = add_cvar(
ssprintf("w%d", weight_idx).c_str(),
{output_channels, input_channels, kern_size[0], kern_size[1]});
auto bias = add_cvar(ssprintf("b%d", bias_idx).c_str(),
{1, output_channels, 1, 1});
if (out_dtype.category() == DTypeCategory::QUANTIZED) {
weight = add_type_cvt(weight, out_dtype);
bias = add_type_cvt(bias, dtype::QuantizedS32{1.f});
}
opr::ConvBias::Param param;
param.stride_h = stride[0], param.stride_w = stride[1];
param.pad_h = padding[0], param.pad_w = padding[1];
if (has_relu) {
param.nonlineMode = opr::ConvBias::Param::NonlineMode::RELU;
} else {
param.nonlineMode = opr::ConvBias::Param::NonlineMode::IDENTITY;
}
auto conv = opr::ConvBias::make(f, weight, bias, param, {},
OperatorNodeConfig{out_dtype});
weight_idx++;
bias_idx++;
return conv;
}
SymbolVar Network::add_deconv(SymbolVar f, size_t ratio, size_t output_channels,
DType out_dtype) {
static int weight_idx = 0;
size_t kernel = ratio * 2 - ratio % 2;
size_t pad = ratio / 2;
size_t input_channels = f.node()->shape()[1];
auto weight = add_cvar(ssprintf("w%d", weight_idx).c_str(),
{input_channels, output_channels, kernel, kernel});
if (out_dtype.category() == DTypeCategory::QUANTIZED) {
weight = add_type_cvt(weight, out_dtype);
}
opr::ConvolutionBackwardData::Param param;
param.stride_h = param.stride_w = ratio;
param.pad_h = param.pad_w = pad;
auto deconv = opr::ConvolutionBackwardData::make(
weight, f, param, {}, OperatorNodeConfig{out_dtype});
weight_idx++;
return deconv;
}
SymbolVar Network::add_elemwise(const SymbolVarArray inps, DType out_dtype,
opr::Elemwise::Param::Mode mode) {
using ElemMode = opr::Elemwise::Param::Mode;
using MultiMode = opr::ElemwiseMultiType::Param::Mode;
static const ThinHashMap<ElemMode, MultiMode> map = {
{ElemMode::ADD, MultiMode::QADD},
{ElemMode::FUSE_ADD_RELU, MultiMode::QFUSE_ADD_RELU}};
if (out_dtype.category() == DTypeCategory::QUANTIZED) {
MultiMode alter_mode = map.at(mode);
return opr::ElemwiseMultiType::make(inps, {alter_mode},
OperatorNodeConfig{out_dtype});
} else {
return opr::Elemwise::make(inps, mode);
}
}
SymbolVar Network::add_pooling(SymbolVar f, Window window, Stride stride,
Padding padding,
opr::Pooling::Param::Mode mode) {
opr::Pooling::Param param;
param.window_h = window[0], param.window_w = window[1];
param.stride_h = stride[0], param.stride_w = stride[1];
param.pad_h = padding[0], param.pad_w = padding[1];
param.mode = mode;
return opr::Pooling::make(f, param);
}
SymbolVar Network::add_type_cvt(SymbolVar f, DType out_dtype) {
return opr::TypeCvt::make(f, out_dtype);
}
SymbolVar mgb::create_block(Network& network, SymbolVar f_in, size_t stride,
size_t num_outputs1, bool has_proj,
DType out_dtype) {
auto proj = f_in;
if (has_proj) {
proj = network.add_conv(f_in, num_outputs1, {1, 1}, out_dtype, false,
{stride, stride});
}
auto f = network.add_conv(f_in, num_outputs1, {3, 3}, out_dtype, true,
{stride, stride}, {1, 1});
f = network.add_conv(f, num_outputs1, {3, 3}, out_dtype, true, {1, 1},
{1, 1});
f = network.add_elemwise({f, proj}, out_dtype,
opr::Elemwise::Mode::FUSE_ADD_RELU);
return f;
}
SymbolVar mgb::make_resnet18(Network& network, size_t batch, DType out_dtype) {
auto data = network.add_var("data", {batch, 4, 224, 224});
if (out_dtype.category() == DTypeCategory::QUANTIZED)
data = network.add_type_cvt(data, dtype::QuantizedS8{1.f});
auto first = out_dtype;
if (out_dtype.category() == DTypeCategory::QUANTIZED)
first = dtype::QuantizedS8{1.f};
auto f = network.add_conv(data, 64, {7, 7}, first, true, {2, 2}, {3, 3});
if (out_dtype.enumv() == DTypeEnum::QuantizedS4 ||
out_dtype.enumv() == DTypeEnum::Quantized4Asymm) {
f = network.add_type_cvt(f, out_dtype);
}
f = network.add_pooling(f, {3, 3}, {2, 2}, {1, 1});
using Vector = SmallVector<size_t, 4>;
Vector stages = {2, 2, 2, 2};
Vector mid_outputs = {64, 128, 256, 512};
Vector enable_stride = {0, 1, 1, 1};
for (size_t i = 0; i < 4; ++i) {
auto s = stages[i];
auto o = mid_outputs[i];
auto es = enable_stride[i];
for (size_t j = 0; j < s; ++j) {
size_t stride = !es || j > 0 ? 1 : 2;
bool has_proj = j > 0 ? false : true;
f = create_block(network, f, stride, o, has_proj, out_dtype);
}
}
f = network.add_pooling(f, {7, 7}, {7, 7}, {0, 0},
opr::Pooling::Param::Mode::AVERAGE);
f = network.add_type_cvt(f, dtype::Float32());
return f;
}
namespace {
SymbolVarArray make_pyramids(Network& network, size_t batch, DType out_dtype) {
SymbolVarArray pyramids;
auto data = network.add_var("data", {batch, 3, 256, 256});
data = data + (-128.f);
if (out_dtype.category() == DTypeCategory::QUANTIZED)
data = network.add_type_cvt(data, dtype::QuantizedS8{1.f});
auto first = out_dtype;
if (out_dtype.category() == DTypeCategory::QUANTIZED)
first = dtype::QuantizedS8{1.f};
auto f = network.add_conv(data, 16, {3, 3}, first, true, {2, 2}, {1, 1});
f = network.add_conv(f, 16, {3, 3}, first, true, {1, 1}, {1, 1});
f = network.add_conv(f, 32, {3, 3}, first, true, {2, 2}, {1, 1});
if (out_dtype.enumv() == DTypeEnum::QuantizedS4 ||
out_dtype.enumv() == DTypeEnum::Quantized4Asymm) {
f = network.add_type_cvt(f, out_dtype);
}
using Vector = SmallVector<size_t, 4>;
Vector stages = {3, 6, 6, 3};
Vector mid_outputs = {32, 64, 128, 256};
Vector enable_stride = {0, 1, 1, 1};
for (size_t i = 0; i < 4; ++i) {
auto s = stages[i];
auto o = mid_outputs[i];
auto es = enable_stride[i];
for (size_t j = 0; j < s; ++j) {
size_t stride = !es || j > 0 ? 1 : 2;
bool has_proj = j > 0 ? false : true;
f = create_block(network, f, stride, o, has_proj, out_dtype);
}
pyramids.push_back(f);
}
for (size_t i = 0; i < pyramids.size(); ++i) {
pyramids[i] = network.add_type_cvt(pyramids[i], first);
}
return pyramids;
}
SymbolVarArray fusion_pyramids_feature(Network& network,
SymbolVarArray pyramids,
size_t fpn_conv_channels) {
bool touch = false;
SymbolVar x;
SymbolVarArray fpn;
for (int i = 5; i >= 3; --i) {
auto f = network.add_conv(pyramids[i - 2], fpn_conv_channels, {1, 1},
dtype::QuantizedS8{1.f}, false, {1, 1},
{0, 0});
if (!touch) {
x = f;
} else {
x = network.add_deconv(x, 2, 16, dtype::QuantizedS8{1.f});
x = network.add_elemwise({x, f}, dtype::QuantizedS8{1.f},
opr::Elemwise::Mode::ADD);
}
fpn.push_back(x);
}
x = fpn[0];
for (int i = 6; i < 8; ++i) {
x = network.add_conv(x, fpn_conv_channels, {3, 3},
dtype::QuantizedS8{1.f}, true, {2, 2}, {1, 1});
}
return fpn;
}
} // namespace
SymbolVarArray mgb::make_det(Network& network, size_t batch, DType out_dtype) {
SymbolVarArray outputs;
auto pyramids = make_pyramids(network, batch, out_dtype);
auto fpn_hv = fusion_pyramids_feature(network, pyramids, 16);
auto fpn_plate = fusion_pyramids_feature(network, pyramids, 16);
outputs.insert(outputs.end(), fpn_hv.begin(), fpn_hv.end());
outputs.insert(outputs.end(), fpn_plate.begin(), fpn_plate.end());
return outputs;
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
/**
* \file src/gopt/test/network.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include "megbrain/test/helper.h"
#include "megbrain/gopt/framework.h"
#include "megbrain/opr/basic_arith_wrapper.h"
#include "megbrain/opr/blas.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/opr/dnn/pooling.h"
#include "megbrain/opr/imgproc.h"
#include "megbrain/opr/nn_int.h"
#include "megbrain/opr/tensor_gen.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/utility.h"
namespace mgb {
class Network {
private:
HostTensorGenerator<> gen;
CompNode cn;
public:
std::shared_ptr<ComputingGraph> graph = ComputingGraph::make();
Network(CompNode cn_) : cn{cn_} {}
~Network() noexcept = default;
using KernSize = SmallVector<size_t, 2>;
using Stride = SmallVector<size_t, 2>;
using Padding = SmallVector<size_t, 2>;
SymbolVar add_var(const char* name, const TensorShape& shp = {1}) {
return opr::Host2DeviceCopy::make(*graph, gen(shp), cn).rename(name);
}
SymbolVar add_cvar(const char* name, const TensorShape& shp = {1}) {
return opr::SharedDeviceTensor::make(*graph, *gen(shp), cn)
.rename(name);
}
SymbolVar add_conv(SymbolVar f, size_t output_channels, KernSize kern_size,
DType out_dtype = dtype::Float32(), bool has_relu = true,
Stride stride = {1, 1}, Padding padding = {0, 0});
SymbolVar add_deconv(SymbolVar f, size_t ratio, size_t output_channels,
DType out_dtype);
SymbolVar add_elemwise(
const SymbolVarArray inps, DType out_dtype = dtype::Float32(),
opr::Elemwise::Param::Mode mode = opr::Elemwise::Param::Mode::ADD);
using Window = SmallVector<size_t, 2>;
SymbolVar add_pooling(
SymbolVar f, Window window, Stride stride = {1, 1},
Padding padding = {0, 0},
opr::Pooling::Param::Mode mode = opr::Pooling::Param::Mode::MAX);
SymbolVar add_type_cvt(SymbolVar f, DType out_dtype = dtype::Float32());
};
SymbolVar create_block(Network& network, SymbolVar f, size_t stride,
size_t num_outputs1, bool has_proj = false,
DType out_dtype = dtype::Float32());
SymbolVar make_resnet18(Network& network, size_t batch = 16,
DType out_dtype = dtype::Float32());
SymbolVarArray make_det(Network& network, size_t batch = 16,
DType out_dtype = dtype::Float32());
} // namespace mgb
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册