提交 6ce212d2 编写于 作者: M Megvii Engine Team

refactor(mgb): refactor group conv

GitOrigin-RevId: 7afd3126900938661927c047aac87cb4aeaa9f13
上级 febd0b17
...@@ -42,24 +42,11 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() { ...@@ -42,24 +42,11 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
conv_algos.push_back(&matmul); conv_algos.push_back(&matmul);
conv_algos.push_back(&matmul8x8x32); conv_algos.push_back(&matmul8x8x32);
conv_algos.push_back(&batched_matmul); conv_algos.push_back(&batched_matmul);
conv_algos.push_back(&group);
conv_algos.reserve(conv_algos.size() * 2);
//! add gconv algos by AlgoGroupConvGeneral
size_t algo_size = conv_algos.size();
for (size_t i = 3; i < algo_size; ++i) {
gconv_refhold.emplace_back(new AlgoGroupConvGeneral(conv_algos[i]));
algo2gconv[conv_algos[i]] = gconv_refhold.back().get();
conv_algos.push_back(gconv_refhold.back().get());
}
for (auto&& algo : conv_algos) { for (auto&& algo : conv_algos) {
all_algos.push_back(algo); all_algos.push_back(algo);
} }
non_cudnn_algos.push_back(all_algos.rbegin()[4]); // group inplace_matmul
non_cudnn_algos.push_back(all_algos.rbegin()[3]); // group matmul
non_cudnn_algos.push_back(all_algos.rbegin()[2]); // group matmul_8x8x32
non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group batched_matmul
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1
all_algos.push_back(&bfloat16); all_algos.push_back(&bfloat16);
bfloat16_algos.push_back(&bfloat16); bfloat16_algos.push_back(&bfloat16);
...@@ -118,7 +105,7 @@ ConvBiasForwardImpl::AlgoPack ConvBiasForwardImpl::sm_algo_pack; ...@@ -118,7 +105,7 @@ ConvBiasForwardImpl::AlgoPack ConvBiasForwardImpl::sm_algo_pack;
MEGDNN_DEF_GET_ALGO_FROM_DESC(ConvBiasForwardImpl) MEGDNN_DEF_GET_ALGO_FROM_DESC(ConvBiasForwardImpl)
ConvBiasForwardImpl::AlgoBase::SizeArgs::SizeArgs( ConvBiasForwardImpl::AlgoBase::SizeArgs::SizeArgs(
ConvBiasForwardImpl* o, const TensorLayout& src, const ConvBiasForwardImpl* o, const TensorLayout& src,
const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& filter, const TensorLayout& bias,
const TensorLayout& z, const TensorLayout& dst, const TensorLayout& z, const TensorLayout& dst,
const PreprocessedFilter* preprocessed_filter) const PreprocessedFilter* preprocessed_filter)
...@@ -127,7 +114,7 @@ ConvBiasForwardImpl::AlgoBase::SizeArgs::SizeArgs( ...@@ -127,7 +114,7 @@ ConvBiasForwardImpl::AlgoBase::SizeArgs::SizeArgs(
dst, preprocessed_filter) {} dst, preprocessed_filter) {}
ConvBiasForwardImpl::AlgoBase::SizeArgs::SizeArgs( ConvBiasForwardImpl::AlgoBase::SizeArgs::SizeArgs(
ConvBiasForwardImpl* o, const TensorLayout& src, const ConvBiasForwardImpl* o, const TensorLayout& src,
const TensorLayout& filter, const CanonizedFilterMeta& filter_meta, const TensorLayout& filter, const CanonizedFilterMeta& filter_meta,
const TensorLayout& bias, const TensorLayout& z, const TensorLayout& bias, const TensorLayout& z,
const TensorLayout& dst, const PreprocessedFilter* preprocessed_filter) const TensorLayout& dst, const PreprocessedFilter* preprocessed_filter)
......
...@@ -78,15 +78,15 @@ public: ...@@ -78,15 +78,15 @@ public:
AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; } AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; }
struct SizeArgs : public conv_bias::BiasForwardSizeArgs { struct SizeArgs : public conv_bias::BiasForwardSizeArgs {
ConvBiasForwardImpl* opr; const ConvBiasForwardImpl* opr;
const PreprocessedFilter* preprocessed_filter; const PreprocessedFilter* preprocessed_filter;
std::string to_string() const; std::string to_string() const;
SizeArgs(ConvBiasForwardImpl* opr, const TensorLayout& src, SizeArgs(const ConvBiasForwardImpl* opr, const TensorLayout& src,
const TensorLayout& filter, const TensorLayout& bias, const TensorLayout& filter, const TensorLayout& bias,
const TensorLayout& z, const TensorLayout& dst, const TensorLayout& z, const TensorLayout& dst,
const PreprocessedFilter* preprocessed_filter = nullptr); const PreprocessedFilter* preprocessed_filter = nullptr);
SizeArgs(ConvBiasForwardImpl* opr, const TensorLayout& src, SizeArgs(const ConvBiasForwardImpl* opr, const TensorLayout& src,
const TensorLayout& filter, const TensorLayout& filter,
const CanonizedFilterMeta& filter_meta, const CanonizedFilterMeta& filter_meta,
const TensorLayout& bias, const TensorLayout& z, const TensorLayout& bias, const TensorLayout& z,
...@@ -434,27 +434,24 @@ private: ...@@ -434,27 +434,24 @@ private:
//! implement group conv by another algo //! implement group conv by another algo
class ConvBiasForwardImpl::AlgoGroupConvGeneral final : public AlgoBase { class ConvBiasForwardImpl::AlgoGroupConvGeneral final : public AlgoBase {
public: public:
AlgoGroupConvGeneral(AlgoBase* impl);
bool is_available(const SizeArgs& args) const override; bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override; void exec(const ExecArgs& args) const override;
const char* name() const override { return m_name.c_str(); } std::vector<SearchItem> get_subopr_list(
const TensorLayoutArray& layouts,
const OperatorBase* opr) const override;
AlgoAttribute attribute() const override { const char* name() const override {
auto ret = AlgoAttribute::DEFAULT; if (m_name.empty()) {
#define cb(attr) \ m_name = ConvBiasForward::algo_name<DirectParam>("CUDA:GROUP_CONV",
if (m_impl->contain_attribute_all(attr)) { \ {});
ret |= attr; \ }
return m_name.c_str();
} }
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb)
#undef cb
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { AlgoAttribute attribute() const override {
ret |= AlgoAttribute::REPRODUCIBLE; return AlgoAttribute::REPRODUCIBLE;
}
return ret;
} }
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, static void modify_size_args(SizeArgs& args, TensorLayout& src_pg,
...@@ -463,8 +460,7 @@ public: ...@@ -463,8 +460,7 @@ public:
private: private:
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
AlgoBase* m_impl; mutable std::string m_name;
std::string m_name;
}; };
#if CUDA_VERSION >= 10000 #if CUDA_VERSION >= 10000
...@@ -1087,9 +1083,8 @@ public: ...@@ -1087,9 +1083,8 @@ public:
std::vector<AlgoInt4Int4NHWCIMMAImplicitGemm> int4_int4_nhwc_imma; std::vector<AlgoInt4Int4NHWCIMMAImplicitGemm> int4_int4_nhwc_imma;
std::vector<AlgoUInt4Int4NHWCIMMAImplicitGemm> uint4_int4_nhwc_imma; std::vector<AlgoUInt4Int4NHWCIMMAImplicitGemm> uint4_int4_nhwc_imma;
#endif #endif
std::vector<std::unique_ptr<AlgoGroupConvGeneral>> gconv_refhold; AlgoGroupConvGeneral group;
AlgoBFloat16 bfloat16; AlgoBFloat16 bfloat16;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
AlgoBase* cudnn_conv_bias_act_from_enum(cudnnConvolutionFwdAlgo_t algo); AlgoBase* cudnn_conv_bias_act_from_enum(cudnnConvolutionFwdAlgo_t algo);
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/ */
#include <utility>
#include "src/common/conv_bias.h" #include "src/common/conv_bias.h"
#include "src/cuda/conv_bias/algo.h" #include "src/cuda/conv_bias/algo.h"
...@@ -16,36 +17,80 @@ using namespace megdnn; ...@@ -16,36 +17,80 @@ using namespace megdnn;
using namespace cuda; using namespace cuda;
using namespace conv_bias; using namespace conv_bias;
void ConvBiasForwardImpl::AlgoGroupConvGeneral::modify_size_args( namespace {
ConvBiasForwardImpl::AlgoBase::SizeArgs& args, TensorLayout& src_pg, std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> sub_opr_config(
TensorLayout& dst_pg, TensorLayout& bias_pg) { const ConvBiasForwardImpl::AlgoBase::SizeArgs& args) {
src_pg = *args.src_layout; TensorLayout src_pg = *args.src_layout;
dst_pg = *args.dst_layout;
bias_pg = *args.bias_layout; SmallVector<size_t> flt_shape(0);
std::vector<ptrdiff_t> flt_stride(0);
size_t idx = 0;
// check if the first dim is group
if (args.filter_layout->ndim > args.src_layout->ndim)
++idx;
for (; idx < args.filter_layout->ndim; ++idx) {
flt_shape.push_back(args.filter_layout->shape[idx]);
flt_stride.push_back(args.filter_layout->stride[idx]);
}
TensorLayout filter_pg(flt_shape, flt_stride,
args.filter_layout->dtype,
args.filter_layout->format);
TensorLayout bias_pg = *args.bias_layout;
TensorLayout z_pg = *args.z_layout;
TensorLayout dst_pg = *args.dst_layout;
auto nr_grp = args.filter_meta.group; auto nr_grp = args.filter_meta.group;
args.filter_meta.group = 1;
size_t c_pos; size_t c_pos;
if (args.filter_meta.format == Param::Format::NCHW || if (args.filter_meta.format == megdnn::param::ConvBias::Format::NCHW ||
args.filter_meta.format == Param::Format::NCHW4) { args.filter_meta.format == megdnn::param::ConvBias::Format::NCHW4) {
c_pos = 1; c_pos = 1;
} else { } else {
megdnn_assert(args.filter_meta.format == Param::Format::NHWC, megdnn_assert(args.filter_meta.format ==
megdnn::param::ConvBias::Format::NHWC,
"invalid conv format"); "invalid conv format");
c_pos = 3; c_pos = 3;
} }
src_pg.shape[c_pos] /= nr_grp; src_pg.shape[c_pos] /= nr_grp;
dst_pg.shape[c_pos] /= nr_grp;
bias_pg.ndim = 0; bias_pg.ndim = 0;
args.src_layout = &src_pg; dst_pg.shape[c_pos] /= nr_grp;
args.dst_layout = &dst_pg;
args.bias_layout = &bias_pg; megdnn::param::ConvBias param = args.opr->param();
args.nonlinear_mode = Param::NonlineMode::IDENTITY; param.sparse = megdnn::param::ConvBias::Sparse::DENSE;
param.nonlineMode =
megdnn::param::ConvBias::NonlineMode::IDENTITY;
std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> ret;
ret.first = {src_pg, filter_pg, bias_pg, z_pg, dst_pg};
ret.second = param;
return ret;
} }
ConvBiasForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(AlgoBase* impl) std::pair<TensorLayoutArray, std::unique_ptr<ConvBiasForward>> prepare_sub_opr(
: m_impl{impl} { const ConvBiasForwardImpl::AlgoBase::SizeArgs& args) {
m_name = ConvBiasForward::algo_name<DirectParam>( auto convbias_opr = args.handle->create_operator<ConvBias>();
ssprintf("%s:%s", "CUDA:GROUP_CONV", impl->name()), {}); set_execution_policy<ConvBiasForward, ConvBiasForward*>(
args.opr, convbias_opr.get());
auto&& config = sub_opr_config(args);
convbias_opr->param() = config.second;
return {config.first, std::move(convbias_opr)};
}
} // namespace
std::vector<Algorithm::SearchItem>
ConvBiasForwardImpl::AlgoGroupConvGeneral::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
AlgoBase::SizeArgs args{static_cast<const ConvBiasForwardImpl*>(opr),
layouts[0],
layouts[1],
layouts[2],
layouts[3],
layouts[4]};
auto&& config = sub_opr_config(args);
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVBIAS_FORWARD, param_str, config.first}};
} }
bool ConvBiasForwardImpl::AlgoGroupConvGeneral::is_available( bool ConvBiasForwardImpl::AlgoGroupConvGeneral::is_available(
...@@ -62,10 +107,10 @@ bool ConvBiasForwardImpl::AlgoGroupConvGeneral::is_available( ...@@ -62,10 +107,10 @@ bool ConvBiasForwardImpl::AlgoGroupConvGeneral::is_available(
param.format == param::ConvBias::Format::NCHW32) param.format == param::ConvBias::Format::NCHW32)
return false; return false;
auto sub_args = args; auto config = prepare_sub_opr(args);
TensorLayout src_pg, dst_pg, bias_pg; return get_algorithm(static_cast<ConvBiasForwardImpl*>(config.second.get()),
modify_size_args(sub_args, src_pg, dst_pg, bias_pg); config.first[0], config.first[1], config.first[2],
return m_impl->is_available(sub_args); config.first[3], config.first[4]);
} }
WorkspaceBundle ConvBiasForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle( WorkspaceBundle ConvBiasForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle(
...@@ -80,12 +125,12 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle( ...@@ -80,12 +125,12 @@ WorkspaceBundle ConvBiasForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle(
sizes.push_back(dst_layout.span().dist_byte()); sizes.push_back(dst_layout.span().dist_byte());
} }
auto sub_args = args; auto config = prepare_sub_opr(args);
sub_args.dst_layout = &dst_layout; size_t mm_ws = config.second->get_workspace_in_bytes(
TensorLayout src_pg, dst_pg, bias_pg; config.first[0], config.first[1], config.first[2],
modify_size_args(sub_args, src_pg, dst_pg, bias_pg); config.first[3], config.first[4], nullptr);
sizes.insert(sizes.begin(),
m_impl->get_workspace_in_bytes(sub_args)); sizes.insert(sizes.begin(), mm_ws);
return {ptr, std::move(sizes)}; return {ptr, std::move(sizes)};
} }
...@@ -109,28 +154,13 @@ void ConvBiasForwardImpl::AlgoGroupConvGeneral::exec( ...@@ -109,28 +154,13 @@ void ConvBiasForwardImpl::AlgoGroupConvGeneral::exec(
auto sub_args = args; auto sub_args = args;
sub_args.dst_tensor = &conv_dst_tensor; sub_args.dst_tensor = &conv_dst_tensor;
sub_args.dst_layout = &conv_dst_tensor.layout; sub_args.dst_layout = &conv_dst_tensor.layout;
TensorND tsrc{*args.src_tensor}, tdst{conv_dst_tensor},
tbias{*args.bias_tensor};
SmallVector<size_t> flt_shape(0);
std::vector<ptrdiff_t> flt_stride(0);
size_t idx = 0;
// check if the first dim is group
if (args.filter_tensor->layout.ndim > args.src_layout->ndim)
++idx;
for (; idx < args.filter_tensor->layout.ndim; ++idx) {
flt_shape.push_back(args.filter_tensor->layout[idx]);
flt_stride.push_back(args.filter_tensor->layout.stride[idx]);
}
TensorND tflt{args.filter_tensor->raw_ptr,
TensorLayout{flt_shape, flt_stride,
args.filter_tensor->layout.dtype,
args.filter_tensor->layout.format}};
modify_size_args(sub_args, tsrc.layout, tdst.layout, tbias.layout); auto config = prepare_sub_opr(sub_args);
sub_args.src_tensor = &tsrc; TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]};
sub_args.dst_tensor = &tdst; TensorND tfilter{args.filter_tensor->raw_ptr, config.first[1]};
sub_args.filter_tensor = &tflt; TensorND tbias{args.bias_tensor->raw_ptr, config.first[2]};
sub_args.bias_tensor = &tbias; TensorND tz{args.z_tensor->raw_ptr, config.first[3]};
TensorND tdst{conv_dst_tensor.raw_ptr, config.first[4]};
size_t c_pos; size_t c_pos;
if (args.filter_meta.format == Param::Format::NCHW || if (args.filter_meta.format == Param::Format::NCHW ||
...@@ -150,16 +180,17 @@ void ConvBiasForwardImpl::AlgoGroupConvGeneral::exec( ...@@ -150,16 +180,17 @@ void ConvBiasForwardImpl::AlgoGroupConvGeneral::exec(
strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * strd_dst = tdst.layout.stride[c_pos] * fm.ocpg *
tdst.layout.dtype.size(), tdst.layout.dtype.size(),
strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
tflt.layout.dtype.size(); tfilter.layout.dtype.size();
if (args.filter_meta.format == Param::Format::NCHW4) { if (args.filter_meta.format == Param::Format::NCHW4) {
strd_src >>= 2; strd_src >>= 2;
strd_dst >>= 2; strd_dst >>= 2;
} }
for (uint32_t g = 0; g < grp; ++g) { for (uint32_t g = 0; g < grp; ++g) {
m_impl->exec(sub_args); config.second->exec(tsrc, tfilter, tbias,
tz, tdst, nullptr, bundle.get_workspace(0));
incr_voidp(tsrc.raw_ptr, strd_src); incr_voidp(tsrc.raw_ptr, strd_src);
incr_voidp(tdst.raw_ptr, strd_dst); incr_voidp(tdst.raw_ptr, strd_dst);
incr_voidp(tflt.raw_ptr, strd_flt); incr_voidp(tfilter.raw_ptr, strd_flt);
} }
} }
handle_bias_and_nonlinear(args.handle, args.nonlinear_mode, handle_bias_and_nonlinear(args.handle, args.nonlinear_mode,
......
...@@ -193,25 +193,17 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( ...@@ -193,25 +193,17 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
return algo; return algo;
} }
if (args.filter_meta.group > 1) {
auto orig_args = conv_args;
TensorLayout src, dst, bias;
AlgoGroupConvGeneral::modify_size_args(conv_args, src, dst, bias);
if (auto algo = get_1x1_algo(conv_args)) {
return sm_algo_pack.algo2gconv.at(algo);
}
if (is_cudnn_supported(conv_args)) {
if (auto algo = get_cudnn_algo(cudnn_conv_from_enum_wrapper)) {
return sm_algo_pack.algo2gconv.at(algo);
}
}
conv_args = orig_args;
}
if (auto algo = get_1x1_algo(args)) { if (auto algo = get_1x1_algo(args)) {
return algo; return algo;
} }
if (args.filter_meta.group > 1) {
if (auto algo = megdnn::get_algo_match_attribute<ConvBiasForwardImpl>(
&sm_algo_pack.group, positive_attr, negative_attr)){
return algo;
}
}
if (sm_algo_pack.fallback_nchw_qs8.is_available_attribute( if (sm_algo_pack.fallback_nchw_qs8.is_available_attribute(
args, positive_attr, negative_attr, workspace_limit_in_bytes)) { args, positive_attr, negative_attr, workspace_limit_in_bytes)) {
return &sm_algo_pack.fallback_nchw_qs8; return &sm_algo_pack.fallback_nchw_qs8;
......
...@@ -39,25 +39,9 @@ ConvolutionBackwardDataImpl::AlgoPack::AlgoPack() { ...@@ -39,25 +39,9 @@ ConvolutionBackwardDataImpl::AlgoPack::AlgoPack() {
int8_algos.push_back(&int8_nchw_dotprod); int8_algos.push_back(&int8_nchw_dotprod);
all_algos.push_back(&int8_nchw_dotprod); all_algos.push_back(&int8_nchw_dotprod);
all_algos.reserve(all_algos.size() * 2);
// add gconv algos by AlgoGroupConvGeneral
auto all_algos_data = all_algos.data();
size_t group_algo_start = 2;
for (size_t i = group_algo_start; i < all_algos.size(); ++i) {
gconv.push_back({all_algos[i]});
}
for (size_t i = group_algo_start; i < all_algos.size(); ++i) {
algo2gconv[all_algos[i]] = &gconv[i - group_algo_start];
}
for (auto&& i : gconv) {
all_algos.push_back(&i);
}
megdnn_assert(all_algos_data == all_algos.data());
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group matmul
all_algos.push_back(&bfloat16); all_algos.push_back(&bfloat16);
bfloat16_algos.push_back(&bfloat16); bfloat16_algos.push_back(&bfloat16);
all_algos.push_back(&group);
for (auto&& algo : all_algos) { for (auto&& algo : all_algos) {
m_all_algos_map.emplace(algo->info().desc, algo); m_all_algos_map.emplace(algo->info().desc, algo);
...@@ -80,13 +64,13 @@ ConvolutionBackwardDataImpl::AlgoPack::cudnn_from_enum( ...@@ -80,13 +64,13 @@ ConvolutionBackwardDataImpl::AlgoPack::cudnn_from_enum(
ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack; ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack;
ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs(
ConvolutionBackwardDataImpl* o, const TensorLayout& filter, const ConvolutionBackwardDataImpl* o, const TensorLayout& filter,
const TensorLayout& diff, const TensorLayout& grad) const TensorLayout& diff, const TensorLayout& grad)
: SizeArgs(o, filter, o->make_canonized_filter_meta(grad.ndim, filter), : SizeArgs(o, filter, o->make_canonized_filter_meta(grad.ndim, filter),
diff, grad) {} diff, grad) {}
ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs(
ConvolutionBackwardDataImpl* o, const TensorLayout& filter, const ConvolutionBackwardDataImpl* o, const TensorLayout& filter,
const CanonizedFilterMeta& filter_meta, const TensorLayout& diff, const CanonizedFilterMeta& filter_meta, const TensorLayout& diff,
const TensorLayout& grad) const TensorLayout& grad)
: handle{concrete_handle(o->handle())}, : handle{concrete_handle(o->handle())},
...@@ -97,7 +81,7 @@ ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( ...@@ -97,7 +81,7 @@ ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs(
opr{o} {} opr{o} {}
ConvolutionBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs( ConvolutionBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs(
ConvolutionBackwardDataImpl* opr, _megdnn_tensor_in filter, const ConvolutionBackwardDataImpl* opr, _megdnn_tensor_in filter,
_megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace) _megdnn_workspace workspace)
: SizeArgs(opr, filter.layout, diff.layout, grad.layout), : SizeArgs(opr, filter.layout, diff.layout, grad.layout),
......
...@@ -49,15 +49,17 @@ public: ...@@ -49,15 +49,17 @@ public:
HandleImpl* handle; HandleImpl* handle;
CanonizedFilterMeta filter_meta; CanonizedFilterMeta filter_meta;
const TensorLayout *diff_layout, *grad_layout, *filter_layout; const TensorLayout *diff_layout, *grad_layout, *filter_layout;
ConvolutionBackwardDataImpl* opr; const ConvolutionBackwardDataImpl* opr;
std::string to_string() const; std::string to_string() const;
void init_desc(convolution::CUDNNBwdDataDescs& desc) const { void init_desc(convolution::CUDNNBwdDataDescs& desc) const {
desc.set(filter_meta, *diff_layout, *grad_layout, opr->param()); desc.set(filter_meta, *diff_layout, *grad_layout, opr->param());
} }
SizeArgs(ConvolutionBackwardDataImpl* opr, const TensorLayout& filter, SizeArgs(const ConvolutionBackwardDataImpl* opr,
const TensorLayout& diff, const TensorLayout& grad); const TensorLayout& filter, const TensorLayout& diff,
SizeArgs(ConvolutionBackwardDataImpl* opr, const TensorLayout& filter, const TensorLayout& grad);
SizeArgs(const ConvolutionBackwardDataImpl* opr,
const TensorLayout& filter,
const CanonizedFilterMeta& filter_meta, const CanonizedFilterMeta& filter_meta,
const TensorLayout& diff, const TensorLayout& grad); const TensorLayout& diff, const TensorLayout& grad);
...@@ -70,7 +72,7 @@ public: ...@@ -70,7 +72,7 @@ public:
const TensorND *filter_tensor, *diff_tensor, *grad_tensor; const TensorND *filter_tensor, *diff_tensor, *grad_tensor;
Workspace workspace; Workspace workspace;
ExecArgs(ConvolutionBackwardDataImpl* opr, _megdnn_tensor_in filter, ExecArgs(const ConvolutionBackwardDataImpl* opr, _megdnn_tensor_in filter,
_megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace); _megdnn_workspace workspace);
}; };
...@@ -219,35 +221,26 @@ private: ...@@ -219,35 +221,26 @@ private:
//! implement group conv by another algo //! implement group conv by another algo
class ConvolutionBackwardDataImpl::AlgoGroupConvGeneral final class ConvolutionBackwardDataImpl::AlgoGroupConvGeneral final
: public AlgoBase { : public AlgoBase {
AlgoBase* m_impl;
std::string m_name;
public: public:
AlgoGroupConvGeneral(AlgoBase* impl);
bool is_available(const SizeArgs& args) const override; bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override; void exec(const ExecArgs& args) const override;
const char* name() const override { return m_name.c_str(); } std::vector<SearchItem> get_subopr_list(
const TensorLayoutArray& layouts,
const OperatorBase* opr) const override;
const char* name() const override {
return "CUDA:GROUP_CONV_BACKWARD_DATA";
}
static void modify_size_args(SizeArgs& args, TensorLayout& diff_pg,
TensorLayout& grad_pg);
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL)
AlgoAttribute attribute() const override { AlgoAttribute attribute() const override {
auto ret = AlgoAttribute::DEFAULT; return AlgoAttribute::REPRODUCIBLE;
#define cb(attr) \
if (m_impl->contain_attribute_all(attr)) { \
ret |= attr; \
}
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb)
#undef cb
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
return ret;
} }
private:
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
}; };
class ConvolutionBackwardDataImpl::AlgoInt8NCHW4DotProdImplicitGemm final class ConvolutionBackwardDataImpl::AlgoInt8NCHW4DotProdImplicitGemm final
...@@ -319,9 +312,8 @@ public: ...@@ -319,9 +312,8 @@ public:
AlgoMatmul matmul; AlgoMatmul matmul;
AlgoChanwise chanwise; AlgoChanwise chanwise;
AlgoChanwiseSmall chanwise_small; AlgoChanwiseSmall chanwise_small;
std::vector<AlgoGroupConvGeneral> gconv;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
AlgoBFloat16 bfloat16; AlgoBFloat16 bfloat16;
AlgoGroupConvGeneral group;
std::vector<AlgoInt8NCHW4DotProdImplicitGemm> int8_nchw4_dotprod; std::vector<AlgoInt8NCHW4DotProdImplicitGemm> int8_nchw4_dotprod;
AlgoInt8NCHWDotProdImplicitGemm int8_nchw_dotprod; AlgoInt8NCHWDotProdImplicitGemm int8_nchw_dotprod;
......
...@@ -16,24 +16,63 @@ using namespace megdnn; ...@@ -16,24 +16,63 @@ using namespace megdnn;
using namespace cuda; using namespace cuda;
using namespace convolution; using namespace convolution;
void ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::modify_size_args( namespace {
ConvolutionBackwardDataImpl::AlgoBase::SizeArgs& args, std::pair<TensorLayoutArray, Convolution::Param> sub_opr_config(
TensorLayout& diff_pg, TensorLayout& grad_pg) { const ConvolutionBackwardDataImpl::AlgoBase::SizeArgs& args) {
diff_pg = *args.diff_layout; SmallVector<size_t> flt_shape(0);
grad_pg = *args.grad_layout; std::vector<ptrdiff_t> flt_stride(0);
size_t idx = 0;
// check if the first dim is group
if (args.filter_layout->ndim > args.diff_layout->ndim)
++idx;
for (; idx < args.filter_layout->ndim; ++idx) {
flt_shape.push_back(args.filter_layout->shape[idx]);
flt_stride.push_back(args.filter_layout->stride[idx]);
}
TensorLayout filter_pg(flt_shape, flt_stride, args.filter_layout->dtype,
args.filter_layout->format);
TensorLayout diff_pg = *args.diff_layout;
TensorLayout grad_pg = *args.grad_layout;
auto nr_grp = args.filter_meta.group; auto nr_grp = args.filter_meta.group;
args.filter_meta.group = 1; size_t c_pos = 1;
diff_pg.shape[1] /= nr_grp; diff_pg.shape[c_pos] /= nr_grp;
grad_pg.shape[1] /= nr_grp; grad_pg.shape[c_pos] /= nr_grp;
args.diff_layout = &diff_pg;
args.grad_layout = &grad_pg; megdnn::param::Convolution param = args.opr->param();
param.sparse = megdnn::param::ConvBias::Sparse::DENSE;
std::pair<TensorLayoutArray, ConvolutionBackwardDataImpl::Param> ret;
ret.first = {filter_pg, diff_pg, grad_pg};
ret.second = param;
return ret;
} }
ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( std::pair<TensorLayoutArray, std::unique_ptr<ConvolutionBackwardData>>
AlgoBase* impl) prepare_sub_opr(const ConvolutionBackwardDataImpl::AlgoBase::SizeArgs& args) {
: m_impl{impl} { auto conv_bwd_data_opr =
m_name = "group_conv:"; args.handle->create_operator<ConvolutionBackwardData>();
m_name += impl->name(); set_execution_policy<ConvolutionBackwardData, ConvolutionBackwardData*>(
args.opr, conv_bwd_data_opr.get());
auto&& config = sub_opr_config(args);
conv_bwd_data_opr->param() = config.second;
return {config.first, std::move(conv_bwd_data_opr)};
}
} // namespace
std::vector<Algorithm::SearchItem>
ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
AlgoBase::SizeArgs args{
static_cast<const ConvolutionBackwardDataImpl*>(opr), layouts[0],
layouts[1], layouts[2]};
auto&& config = sub_opr_config(args);
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVOLUTION_BACKWARD_DATA, param_str,
config.first}};
} }
bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available( bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available(
...@@ -46,44 +85,60 @@ bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available( ...@@ -46,44 +85,60 @@ bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available(
} }
if (args.filter_meta.group <= 1) if (args.filter_meta.group <= 1)
return false; return false;
auto sub_args = args;
TensorLayout diff_pg, grad_pg; if (args.filter_meta.format !=
modify_size_args(sub_args, diff_pg, grad_pg); megdnn::param::Convolution::Format::NCHW) {
return m_impl->is_available(sub_args); return false;
}
auto config = prepare_sub_opr(args);
return get_algorithm(
static_cast<ConvolutionBackwardDataImpl*>(config.second.get()),
config.first[0], config.first[1], config.first[2]);
}
WorkspaceBundle
ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_bundle(
void* ptr, const SizeArgs& args) const {
auto config = prepare_sub_opr(args);
size_t sizes = config.second->get_workspace_in_bytes(
config.first[0], config.first[1], config.first[2]);
return {ptr, {sizes}};
} }
size_t size_t
ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
const SizeArgs& args) const { const SizeArgs& args) const {
auto sub_args = args; return get_workspace_bundle(nullptr, args).total_size_in_bytes();
TensorLayout diff_pg, grad_pg;
modify_size_args(sub_args, diff_pg, grad_pg);
return m_impl->get_workspace_in_bytes(sub_args);
} }
void ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::exec( void ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::exec(
const ExecArgs& args) const { const ExecArgs& args) const {
auto sub_args = args; auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
TensorND tflt{*args.filter_tensor}, tdiff{*args.diff_tensor}, {
tgrad{*args.grad_tensor}; auto config = prepare_sub_opr(args);
modify_size_args(sub_args, tdiff.layout, tgrad.layout); TensorND tfilter{args.filter_tensor->raw_ptr, config.first[0]};
sub_args.filter_tensor = &tflt; TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]};
sub_args.diff_tensor = &tdiff; TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]};
sub_args.grad_tensor = &tgrad;
auto grp = args.filter_meta.group; size_t c_pos = 1;
auto&& fm = args.filter_meta; auto&& fm = args.filter_meta;
auto strd_flt = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
tflt.layout.dtype.size()), auto strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
strd_diff = tfilter.layout.dtype.size(),
(tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), strd_diff = tdiff.layout.stride[c_pos] * fm.ocpg *
strd_grad = tdiff.layout.dtype.size(),
(tgrad.layout.stride[1] * fm.icpg * tgrad.layout.dtype.size()); strd_grad = (tgrad.layout.stride[c_pos] * fm.icpg *
for (uint32_t g = 0; g < grp; ++g) { tgrad.layout.dtype.size());
m_impl->exec(sub_args);
incr_voidp(tflt.raw_ptr, strd_flt); auto grp = args.filter_meta.group;
incr_voidp(tdiff.raw_ptr, strd_diff); for (uint32_t g = 0; g < grp; ++g) {
incr_voidp(tgrad.raw_ptr, strd_grad); config.second->exec(tfilter, tdiff, tgrad, bundle.get_workspace(0));
incr_voidp(tfilter.raw_ptr, strd_flt);
incr_voidp(tdiff.raw_ptr, strd_diff);
incr_voidp(tgrad.raw_ptr, strd_grad);
}
} }
} }
......
...@@ -26,23 +26,8 @@ ConvolutionBackwardFilterImpl::AlgoPack::AlgoPack() { ...@@ -26,23 +26,8 @@ ConvolutionBackwardFilterImpl::AlgoPack::AlgoPack() {
all_algos.push_back(&i); all_algos.push_back(&i);
} }
all_algos.push_back(&matmul); all_algos.push_back(&matmul);
all_algos.push_back(&group);
all_algos.reserve(all_algos.size() * 2);
// add gconv algos by AlgoGroupConvGeneral
auto all_algos_data = all_algos.data();
for (size_t i = 1; i < all_algos.size(); ++ i) {
gconv.push_back({all_algos[i]});
}
for (size_t i = 1; i < all_algos.size(); ++ i) {
algo2gconv[all_algos[i]] = &gconv[i - 1];
}
for (auto &&i: gconv) {
all_algos.push_back(&i);
}
megdnn_assert(all_algos_data == all_algos.data());
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group matmul
all_algos.push_back(&bfloat16); all_algos.push_back(&bfloat16);
bfloat16_algos.push_back(&bfloat16); bfloat16_algos.push_back(&bfloat16);
...@@ -68,7 +53,7 @@ ConvolutionBackwardFilterImpl::AlgoPack ...@@ -68,7 +53,7 @@ ConvolutionBackwardFilterImpl::AlgoPack
ConvolutionBackwardFilterImpl::sm_algo_pack; ConvolutionBackwardFilterImpl::sm_algo_pack;
ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
ConvolutionBackwardFilterImpl *o, const ConvolutionBackwardFilterImpl *o,
const TensorLayout &src, const TensorLayout &diff, const TensorLayout &src, const TensorLayout &diff,
const TensorLayout &grad): const TensorLayout &grad):
SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad)) SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad))
...@@ -76,7 +61,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( ...@@ -76,7 +61,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
} }
ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
ConvolutionBackwardFilterImpl* o, const TensorLayout& src, const ConvolutionBackwardFilterImpl* o, const TensorLayout& src,
const TensorLayout& diff, const TensorLayout& grad, const TensorLayout& diff, const TensorLayout& grad,
const CanonizedFilterMeta& grad_meta) const CanonizedFilterMeta& grad_meta)
: handle{concrete_handle(o->handle())}, : handle{concrete_handle(o->handle())},
...@@ -87,7 +72,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( ...@@ -87,7 +72,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
opr{o} {} opr{o} {}
ConvolutionBackwardFilterImpl::AlgoBase::ExecArgs::ExecArgs( ConvolutionBackwardFilterImpl::AlgoBase::ExecArgs::ExecArgs(
ConvolutionBackwardFilterImpl *opr, const ConvolutionBackwardFilterImpl *opr,
_megdnn_tensor_in src, _megdnn_tensor_in src,
_megdnn_tensor_in diff, _megdnn_tensor_in diff,
_megdnn_tensor_out grad, _megdnn_tensor_out grad,
......
...@@ -45,16 +45,18 @@ public: ...@@ -45,16 +45,18 @@ public:
HandleImpl* handle; HandleImpl* handle;
const TensorLayout *src_layout, *diff_layout, *grad_layout; const TensorLayout *src_layout, *diff_layout, *grad_layout;
CanonizedFilterMeta grad_filter_meta; CanonizedFilterMeta grad_filter_meta;
ConvolutionBackwardFilterImpl* opr; const ConvolutionBackwardFilterImpl* opr;
std::string to_string() const; std::string to_string() const;
void init_desc(convolution::CUDNNBwdFilterDescs& desc) const { void init_desc(convolution::CUDNNBwdFilterDescs& desc) const {
desc.set(*src_layout, *diff_layout, grad_filter_meta, opr->param()); desc.set(*src_layout, *diff_layout, grad_filter_meta, opr->param());
} }
SizeArgs(ConvolutionBackwardFilterImpl* opr, const TensorLayout& src, SizeArgs(const ConvolutionBackwardFilterImpl* opr,
const TensorLayout& diff, const TensorLayout& grad); const TensorLayout& src, const TensorLayout& diff,
SizeArgs(ConvolutionBackwardFilterImpl* opr, const TensorLayout& src, const TensorLayout& grad);
const TensorLayout& diff, const TensorLayout& grad, SizeArgs(const ConvolutionBackwardFilterImpl* opr,
const TensorLayout& src, const TensorLayout& diff,
const TensorLayout& grad,
const CanonizedFilterMeta& grad_meta); const CanonizedFilterMeta& grad_meta);
convolution::ForwardSizeArgs as_fwd_args() const { convolution::ForwardSizeArgs as_fwd_args() const {
...@@ -66,9 +68,9 @@ public: ...@@ -66,9 +68,9 @@ public:
const TensorND *src_tensor, *diff_tensor, *grad_tensor; const TensorND *src_tensor, *diff_tensor, *grad_tensor;
Workspace workspace; Workspace workspace;
ExecArgs(ConvolutionBackwardFilterImpl* opr, _megdnn_tensor_in src, ExecArgs(const ConvolutionBackwardFilterImpl* opr,
_megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_tensor_in src, _megdnn_tensor_in diff,
_megdnn_workspace workspace); _megdnn_tensor_out grad, _megdnn_workspace workspace);
}; };
virtual bool is_available(const SizeArgs& args) const = 0; virtual bool is_available(const SizeArgs& args) const = 0;
virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0;
...@@ -203,29 +205,25 @@ private: ...@@ -203,29 +205,25 @@ private:
//! implement group conv by another algo //! implement group conv by another algo
class ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral final class ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral final
: public AlgoBase { : public AlgoBase {
AlgoBase* m_impl;
std::string m_name;
public: public:
AlgoGroupConvGeneral(AlgoBase* impl);
bool is_available(const SizeArgs& args) const override; bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override; void exec(const ExecArgs& args) const override;
std::vector<SearchItem> get_subopr_list(
const TensorLayoutArray& layouts,
const OperatorBase* opr) const override;
const char* name() const override { return m_name.c_str(); } const char* name() const override {
return "CUDA:GROUP_CONV_BACKWARD_FILTER";
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, }
TensorLayout& diff_pg);
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL)
AlgoAttribute attribute() const override { AlgoAttribute attribute() const override {
auto ret = static_cast<AlgoAttribute>(0); return AlgoAttribute::REPRODUCIBLE;
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
return ret;
} }
private:
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
}; };
class ConvolutionBackwardFilterImpl::AlgoPack : NonCopyableObj { class ConvolutionBackwardFilterImpl::AlgoPack : NonCopyableObj {
...@@ -240,8 +238,7 @@ public: ...@@ -240,8 +238,7 @@ public:
std::vector<AlgoCUDNN> cudnn; std::vector<AlgoCUDNN> cudnn;
AlgoMatmul matmul; AlgoMatmul matmul;
AlgoChanwise chanwise; AlgoChanwise chanwise;
std::vector<AlgoGroupConvGeneral> gconv; AlgoGroupConvGeneral group;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
AlgoBFloat16 bfloat16; AlgoBFloat16 bfloat16;
std::vector<AlgoBase*> std::vector<AlgoBase*>
......
...@@ -15,25 +15,63 @@ using namespace megdnn; ...@@ -15,25 +15,63 @@ using namespace megdnn;
using namespace cuda; using namespace cuda;
using namespace convolution; using namespace convolution;
void ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::modify_size_args( namespace {
ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs &args, std::pair<TensorLayoutArray, Convolution::Param> sub_opr_config(
TensorLayout &src_pg, TensorLayout &diff_pg) { const ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs& args) {
src_pg = *args.src_layout; SmallVector<size_t> flt_shape(0);
diff_pg = *args.diff_layout; std::vector<ptrdiff_t> flt_stride(0);
size_t idx = 0;
// check if the first dim is group
if (args.grad_layout->ndim > args.diff_layout->ndim)
++idx;
for (; idx < args.grad_layout->ndim; ++idx) {
flt_shape.push_back(args.grad_layout->shape[idx]);
flt_stride.push_back(args.grad_layout->stride[idx]);
}
TensorLayout filter_pg(flt_shape, flt_stride, args.grad_layout->dtype,
args.grad_layout->format);
TensorLayout src_pg = *args.src_layout;
TensorLayout diff_pg = *args.diff_layout;
auto nr_grp = args.grad_filter_meta.group; auto nr_grp = args.grad_filter_meta.group;
args.grad_filter_meta.group = 1; size_t c_pos = 1;
src_pg.shape[1] /= nr_grp; src_pg.shape[c_pos] /= nr_grp;
diff_pg.shape[1] /= nr_grp; diff_pg.shape[c_pos] /= nr_grp;
args.src_layout = &src_pg;
args.diff_layout = &diff_pg; megdnn::param::Convolution param = args.opr->param();
param.sparse = megdnn::param::ConvBias::Sparse::DENSE;
std::pair<TensorLayoutArray, ConvolutionBackwardFilterImpl::Param> ret;
ret.first = {src_pg, diff_pg, filter_pg};
ret.second = param;
return ret;
} }
ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( std::pair<TensorLayoutArray, std::unique_ptr<ConvolutionBackwardFilter>>
AlgoBase *impl): prepare_sub_opr(const ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs& args) {
m_impl{impl} auto conv_bwd_filter_opr =
{ args.handle->create_operator<ConvolutionBackwardFilter>();
m_name = "group_conv:"; set_execution_policy<ConvolutionBackwardFilter, ConvolutionBackwardFilter*>(
m_name += impl->name(); args.opr, conv_bwd_filter_opr.get());
auto&& config = sub_opr_config(args);
conv_bwd_filter_opr->param() = config.second;
return {config.first, std::move(conv_bwd_filter_opr)};
}
} // namespace
std::vector<Algorithm::SearchItem>
ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
AlgoBase::SizeArgs args{
static_cast<const ConvolutionBackwardFilterImpl*>(opr), layouts[0],
layouts[1], layouts[2]};
auto&& config = sub_opr_config(args);
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVOLUTION_BACKWARD_FILTER, param_str,
config.first}};
} }
bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available( bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available(
...@@ -44,44 +82,60 @@ bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available( ...@@ -44,44 +82,60 @@ bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available(
} }
if (args.grad_filter_meta.group <= 1) if (args.grad_filter_meta.group <= 1)
return false; return false;
auto sub_args = args; if (args.grad_filter_meta.format !=
TensorLayout src_pg, diff_pg; megdnn::param::Convolution::Format::NCHW) {
modify_size_args(sub_args, src_pg, diff_pg); return false;
return m_impl->is_available(sub_args); }
auto config = prepare_sub_opr(args);
return get_algorithm(
static_cast<ConvolutionBackwardFilterImpl*>(config.second.get()),
config.first[0], config.first[1], config.first[2]);
} }
size_t ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral:: WorkspaceBundle
get_workspace_in_bytes(const SizeArgs &args) const { ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_bundle(
auto sub_args = args; void* ptr, const SizeArgs& args) const {
TensorLayout src_pg, diff_pg; auto config = prepare_sub_opr(args);
modify_size_args(sub_args, src_pg, diff_pg); size_t sizes = config.second->get_workspace_in_bytes(
return m_impl->get_workspace_in_bytes(sub_args); config.first[0], config.first[1], config.first[2]);
return {ptr, {sizes}};
}
size_t
ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
const SizeArgs& args) const {
return get_workspace_bundle(nullptr, args).total_size_in_bytes();
} }
void ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::exec( void ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::exec(
const ExecArgs &args) const { const ExecArgs& args) const {
auto sub_args = args; auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
TensorND tsrc{*args.src_tensor}, tdiff{*args.diff_tensor},
tgrad{*args.grad_tensor}; {
modify_size_args(sub_args, tsrc.layout, tdiff.layout); auto config = prepare_sub_opr(args);
sub_args.src_tensor = &tsrc; TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]};
sub_args.diff_tensor = &tdiff; TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]};
sub_args.grad_tensor = &tgrad; TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]};
auto &&fm = args.grad_filter_meta; size_t c_pos = 1;
auto grp = fm.group;
auto&& fm = args.grad_filter_meta;
auto strd_src = (
tsrc.layout.stride[1] * fm.icpg * tsrc.layout.dtype.size()), auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg *
strd_diff = ( tsrc.layout.dtype.size(),
tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), strd_diff = tdiff.layout.stride[c_pos] * fm.ocpg *
strd_grad = (fm.icpg * fm.ocpg * tdiff.layout.dtype.size(),
fm.spatial[0] * fm.spatial[1] * tgrad.layout.dtype.size()); strd_grad = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
for (uint32_t g = 0; g < grp; ++ g) { tgrad.layout.dtype.size();
m_impl->exec(sub_args);
incr_voidp(tsrc.raw_ptr, strd_src); auto grp = fm.group;
incr_voidp(tdiff.raw_ptr, strd_diff); for (uint32_t g = 0; g < grp; ++g) {
incr_voidp(tgrad.raw_ptr, strd_grad); config.second->exec(tsrc, tdiff, tgrad, bundle.get_workspace(0));
incr_voidp(tsrc.raw_ptr, strd_src);
incr_voidp(tdiff.raw_ptr, strd_diff);
incr_voidp(tgrad.raw_ptr, strd_grad);
}
} }
} }
......
...@@ -104,19 +104,7 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic( ...@@ -104,19 +104,7 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(
const TensorLayout& grad, size_t workspace_limit_in_bytes, const TensorLayout& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr, const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) { const AlgoAttribute& negative_attr) {
auto fm = check_layout_fwd(grad, filter, diff); AlgoBase::SizeArgs args(this, filter, diff, grad);
return get_algorithm_heuristic(filter, fm, diff, grad,
workspace_limit_in_bytes, positive_attr,
negative_attr);
}
ConvolutionBackwardDataImpl::Algorithm*
ConvolutionBackwardDataImpl::get_algorithm_heuristic(const TensorLayout& filter,
const CanonizedFilterMeta& filter_meta, const TensorLayout& diff,
const TensorLayout& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
AlgoBase::SizeArgs args(this, filter, filter_meta, diff, grad);
if (args.filter_meta.group > 1 && if (args.filter_meta.group > 1 &&
sm_algo_pack.chanwise.is_available_attribute( sm_algo_pack.chanwise.is_available_attribute(
...@@ -186,14 +174,11 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(const TensorLayout& filter, ...@@ -186,14 +174,11 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(const TensorLayout& filter,
} }
if (args.filter_meta.group > 1) { if (args.filter_meta.group > 1) {
auto orig_args = args; if (auto algo = megdnn::get_algo_match_attribute<
TensorLayout a, b; ConvolutionBackwardDataImpl>(
AlgoGroupConvGeneral::modify_size_args(args, a, b); &sm_algo_pack.group, positive_attr, negative_attr)) {
if (is_cudnn_supported(args.as_fwd_args())) { return algo;
if (auto algo = get_cudnn_algo())
return sm_algo_pack.algo2gconv.at(algo);
} }
args = orig_args;
} }
if (args.filter_layout->dtype.enumv() != if (args.filter_layout->dtype.enumv() !=
...@@ -212,7 +197,7 @@ size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes( ...@@ -212,7 +197,7 @@ size_t ConvolutionBackwardDataImpl::get_workspace_in_bytes(
const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& filter, const TensorLayout& diff,
const TensorLayout& grad) { const TensorLayout& grad) {
AlgoBase::SizeArgs args(this, filter, diff, grad); AlgoBase::SizeArgs args(this, filter, diff, grad);
return get_algorithm(this, filter, args.filter_meta, diff, grad) return get_algorithm(this, filter, diff, grad)
->get_workspace_in_bytes(args); ->get_workspace_in_bytes(args);
} }
...@@ -227,8 +212,7 @@ void ConvolutionBackwardFilterImpl::exec(_megdnn_tensor_in src, ...@@ -227,8 +212,7 @@ void ConvolutionBackwardFilterImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out grad, _megdnn_tensor_out grad,
_megdnn_workspace workspace) { _megdnn_workspace workspace) {
AlgoBase::ExecArgs args(this, src, diff, grad, workspace); AlgoBase::ExecArgs args(this, src, diff, grad, workspace);
auto algo = get_algorithm(this, src.layout, diff.layout, grad.layout, auto algo = get_algorithm(this, src.layout, diff.layout, grad.layout);
args.grad_filter_meta);
algo->check_workspace(args, workspace).exec(args); algo->check_workspace(args, workspace).exec(args);
} }
...@@ -246,20 +230,7 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic( ...@@ -246,20 +230,7 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
const TensorLayout& grad, size_t workspace_limit_in_bytes, const TensorLayout& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr, const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) { const AlgoAttribute& negative_attr) {
auto fm = check_layout_fwd(src, grad, diff); AlgoBase::SizeArgs args(this, src, diff, grad);
return get_algorithm_heuristic(src, diff, grad, fm,
workspace_limit_in_bytes, positive_attr,
negative_attr);
}
ConvolutionBackwardFilterImpl::Algorithm*
ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
const TensorLayout& src, const TensorLayout& diff,
const TensorLayout& grad, const CanonizedFilterMeta& grad_meta,
size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
AlgoBase::SizeArgs args(this, src, diff, grad, grad_meta);
if (args.grad_filter_meta.group > 1 && if (args.grad_filter_meta.group > 1 &&
sm_algo_pack.chanwise.is_available_attribute( sm_algo_pack.chanwise.is_available_attribute(
...@@ -332,14 +303,11 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic( ...@@ -332,14 +303,11 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
} }
if (args.grad_filter_meta.group > 1) { if (args.grad_filter_meta.group > 1) {
auto orig_args = args; if (auto algo = megdnn::get_algo_match_attribute<
TensorLayout a, b; ConvolutionBackwardFilterImpl>(
AlgoGroupConvGeneral::modify_size_args(args, a, b); &sm_algo_pack.group, positive_attr, negative_attr)) {
if (is_cudnn_supported(args.as_fwd_args())) { return algo;
if (auto algo = get_cudnn_algo())
return sm_algo_pack.algo2gconv.at(algo);
} }
args = orig_args;
} }
if (args.src_layout->dtype.enumv() != DTypeTrait<dtype::BFloat16>::enumv) { if (args.src_layout->dtype.enumv() != DTypeTrait<dtype::BFloat16>::enumv) {
...@@ -357,7 +325,7 @@ size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes( ...@@ -357,7 +325,7 @@ size_t ConvolutionBackwardFilterImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& diff, const TensorLayout& src, const TensorLayout& diff,
const TensorLayout& grad) { const TensorLayout& grad) {
AlgoBase::SizeArgs args(this, src, diff, grad); AlgoBase::SizeArgs args(this, src, diff, grad);
return get_algorithm(this, src, diff, grad, args.grad_filter_meta) return get_algorithm(this, src, diff, grad)
->get_workspace_in_bytes(args); ->get_workspace_in_bytes(args);
} }
......
...@@ -74,17 +74,6 @@ public: ...@@ -74,17 +74,6 @@ public:
using ConvolutionBackwardData::ConvolutionBackwardData; using ConvolutionBackwardData::ConvolutionBackwardData;
void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff,
_megdnn_tensor_out grad, _megdnn_workspace workspace) override; _megdnn_tensor_out grad, _megdnn_workspace workspace) override;
AlgorithmInfo get_algorithm_info_heuristic(
const TensorLayout& filter, const CanonizedFilterMeta& filter_meta,
const TensorLayout& diff, const TensorLayout& grad,
size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
return get_algorithm_heuristic(filter, filter_meta, diff, grad,
workspace_limit_in_bytes, positive_attr,
negative_attr)
->info();
}
AlgorithmInfo get_algorithm_info_heuristic( AlgorithmInfo get_algorithm_info_heuristic(
const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& filter, const TensorLayout& diff,
const TensorLayout& grad, size_t workspace_limit_in_bytes, const TensorLayout& grad, size_t workspace_limit_in_bytes,
...@@ -128,14 +117,6 @@ protected: ...@@ -128,14 +117,6 @@ protected:
const AlgoAttribute& negative_attr) override; const AlgoAttribute& negative_attr) override;
private: private:
Algorithm* get_algorithm_heuristic(const TensorLayout& filter,
const CanonizedFilterMeta& filter_meta,
const TensorLayout& diff,
const TensorLayout& grad,
size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr);
static AlgoPack sm_algo_pack; static AlgoPack sm_algo_pack;
}; };
...@@ -147,17 +128,6 @@ public: ...@@ -147,17 +128,6 @@ public:
size_t get_workspace_in_bytes(const TensorLayout& src, size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& diff, const TensorLayout& diff,
const TensorLayout& grad) override; const TensorLayout& grad) override;
AlgorithmInfo get_algorithm_info_heuristic(
const TensorLayout& src, const TensorLayout& diff,
const TensorLayout& grad, const CanonizedFilterMeta& grad_meta,
size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
return get_algorithm_heuristic(src, diff, grad, grad_meta,
workspace_limit_in_bytes, positive_attr,
negative_attr)
->info();
}
AlgorithmInfo get_algorithm_info_heuristic( AlgorithmInfo get_algorithm_info_heuristic(
const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& filter, const TensorLayout& diff,
const TensorLayout& grad, size_t workspace_limit_in_bytes, const TensorLayout& grad, size_t workspace_limit_in_bytes,
...@@ -195,14 +165,6 @@ protected: ...@@ -195,14 +165,6 @@ protected:
const AlgoAttribute& negative_attr) override; const AlgoAttribute& negative_attr) override;
private: private:
Algorithm* get_algorithm_heuristic(const TensorLayout& src,
const TensorLayout& diff,
const TensorLayout& grad,
const CanonizedFilterMeta& grad_meta,
size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr);
static AlgoPack sm_algo_pack; static AlgoPack sm_algo_pack;
}; };
......
...@@ -1034,10 +1034,11 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_GROUP) { ...@@ -1034,10 +1034,11 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_GROUP) {
// float case // float case
Checker<ConvBiasForward> checker(handle_cuda()); Checker<ConvBiasForward> checker(handle_cuda());
checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker< checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker<
ConvBias>( ConvBias>(ExecutionPolicyAlgoName{
ConvBiasForward::algo_name<ConvBiasForward::DirectParam>( ConvBiasForward::algo_name<ConvBiasForward::DirectParam>(
"CUDA:GROUP_CONV", {}) "CUDA:GROUP_CONV", {})
.c_str())); .c_str(),
{{"CUDNN", {}}}}));
ConvBias::Param param; ConvBias::Param param;
param.sparse = ConvBias::Param::Sparse::GROUP; param.sparse = ConvBias::Param::Sparse::GROUP;
param.nonlineMode = mode; param.nonlineMode = mode;
......
...@@ -108,39 +108,33 @@ TEST_F(CUDA, GROUP_CONV_FORWARD) ...@@ -108,39 +108,33 @@ TEST_F(CUDA, GROUP_CONV_FORWARD)
} }
TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) { TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) {
auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH,
size_t FH, size_t FW, size_t FW, size_t OC, size_t group) {
size_t OC, size_t group) {
Checker<Convolution> checker(handle_cuda()); Checker<Convolution> checker(handle_cuda());
#if CUDNN_MAJOR <= 6
std::string conv1x1_name = std::string conv1x1_name =
ConvBiasForward::algo_name<ConvBiasForward::MatmulParam>( ConvBiasForward::algo_name<ConvBias::MatmulParam>(
"BATCHEDMATMUL", {}); "INPLACE_MATMUL", {});
checker.set_before_exec_callback( checker.set_before_exec_callback(AlgoChecker<ConvolutionForward>(
AlgoChecker<ConvolutionForward>(ExecutionPolicyAlgoName{ ExecutionPolicyAlgoName{"DEFAULT",
"DEFAULT", {{ConvBiasForward::algo_name<
{{ConvBiasForward::algo_name< ConvBiasForward::DirectParam>(
ConvBiasForward::DirectParam>( "CUDA:GROUP_CONV", {})
ssprintf("%s:%s", "CUDA:GROUP_CONV", .c_str(),
conv1x1_name.c_str()) {{conv1x1_name.c_str(), {}}}}}}));
.c_str(),
{})
.c_str(),
{}}}}));
#endif
Convolution::Param param; Convolution::Param param;
param.sparse = Convolution::Param::Sparse::GROUP; param.sparse = Convolution::Param::Sparse::GROUP;
auto ICg = IC / group; auto ICg = IC / group;
auto OCg = OC / group; auto OCg = OC / group;
checker.set_param(param).exec({{N, IC, IH, IW}, checker.set_param(param).exec(
{group, OCg, ICg, FH, FW}, {}}); {{N, IC, IH, IW}, {group, OCg, ICg, FH, FW}, {}});
}; };
size_t ic = 192; size_t ic = 192;
for (size_t g = 2; g <= 3; g += 1) { for (size_t g = 2; g <= 3; g += 1) {
for (size_t ih = 8; ih <= 128; ih *= 4) { for (size_t ih = 8; ih <= 128; ih *= 4) {
size_t iw = ih; size_t iw = ih;
run(2, ic, ih, iw, 1, 1, ic / g, g); run(2, ic, ih, iw, 1, 1, ic / g, g);
run(2, ic, ih+1, iw+1, 1, 1, ic / g, g); run(2, ic, ih + 1, iw + 1, 1, 1, ic / g, g);
} }
} }
} }
...@@ -189,6 +183,54 @@ TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA) ...@@ -189,6 +183,54 @@ TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA)
8); 8);
} }
TEST_F(CUDA, GROUP_CONV_BACKWARD_DATA_CUDNN)
{
auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
size_t FH, size_t FW,
size_t OC, size_t OH, size_t OW,
size_t PH, size_t PW,
size_t SH, size_t SW,
size_t group)
{
Checker<ConvolutionBackwardData> checker(handle_cuda());
checker.set_before_exec_callback(
AlgoChecker<ConvolutionBackwardData>(ExecutionPolicyAlgoName{
"CUDA:GROUP_CONV_BACKWARD_DATA", {{"CUDNN", {}}}}));
ConvolutionBackwardData::Param param;
param.sparse = Convolution::Param::Sparse::GROUP;
param.pad_h = PH;
param.pad_w = PW;
param.stride_h = SH;
param.stride_w = SW;
auto ICg = IC / group;
auto OCg = OC / group;
checker.set_param(param).exec({{group, OCg, ICg, FH, FW},
{N, OC, OH, OW}, {N, IC, IH, IW}});
};
// normal case
run(2, 64, 7, 7,
3, 3,
32, 5, 5,
0, 0,
1, 1,
2);
// padded case
run(2, 32, 7, 7,
3, 3,
64, 7, 7,
1, 1,
1, 1,
4);
// strided case
run(2, 32, 7, 7,
3, 3,
64, 3, 3,
0, 0,
2, 2,
8);
}
TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER) TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER)
{ {
auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
...@@ -233,6 +275,52 @@ TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER) ...@@ -233,6 +275,52 @@ TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER)
8); 8);
} }
TEST_F(CUDA, GROUP_CONV_BACKWARD_FILTER_CUDNN)
{
auto run = [&](size_t N, size_t IC, size_t IH, size_t IW,
size_t FH, size_t FW,
size_t OC, size_t OH, size_t OW,
size_t PH, size_t PW,
size_t SH, size_t SW,
size_t group)
{
Checker<ConvolutionBackwardFilter> checker(handle_cuda());
checker.set_before_exec_callback(
AlgoChecker<ConvolutionBackwardFilter>(ExecutionPolicyAlgoName{
"CUDA:GROUP_CONV_BACKWARD_FILTER", {{"CUDNN", {}}}}));
ConvolutionBackwardFilter::Param param;
param.sparse = Convolution::Param::Sparse::GROUP;
param.pad_h = PH;
param.pad_w = PW;
param.stride_h = SH;
param.stride_w = SW;
auto ICg = IC / group;
auto OCg = OC / group;
checker.set_param(param).exec({{N, IC, IH, IW},
{N, OC, OH, OW}, {group, OCg, ICg, FH, FW}});
};
// normal case
run(2, 64, 7, 7,
3, 3,
32, 5, 5,
0, 0,
1, 1,
2);
// padded case
run(2, 32, 7, 7,
3, 3,
64, 7, 7,
1, 1,
1, 1,
4);
// strided case
run(2, 32, 7, 7,
3, 3,
64, 3, 3,
0, 0,
2, 2,
8);
}
} // namespace test } // namespace test
} // namespace megdnn } // namespace megdnn
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册