diff --git a/dnn/src/cuda/convolution3d/backward_data/algo.cpp b/dnn/src/cuda/convolution3d/backward_data/algo.cpp index 2ecc5a6a87379123bd4d4fd0464432eac7096418..62107f0f36567e68b64bc0631213f3e6a7734e2e 100644 --- a/dnn/src/cuda/convolution3d/backward_data/algo.cpp +++ b/dnn/src/cuda/convolution3d/backward_data/algo.cpp @@ -24,21 +24,7 @@ Convolution3DBackwardDataImpl::AlgoPack::AlgoPack() { for (auto &&i: cudnn) { all_algos.push_back(&i); } - - all_algos.reserve(all_algos.size() * 2); - - // add gconv algos by AlgoGroupConvGeneral - auto all_algos_data = all_algos.data(); - for (size_t i = 1; i < all_algos.size(); ++ i) { - gconv.push_back({all_algos[i]}); - } - for (size_t i = 1; i < all_algos.size(); ++ i) { - algo2gconv[all_algos[i]] = &gconv[i - 1]; - } - for (auto &&i: gconv) { - all_algos.push_back(&i); - } - megdnn_assert(all_algos_data == all_algos.data()); + all_algos.push_back(&group); for (auto&& algo : all_algos) { m_all_algos_map.emplace(algo->info().desc, algo); @@ -61,27 +47,26 @@ Convolution3DBackwardDataImpl::AlgoPack::cudnn_from_enum( Convolution3DBackwardDataImpl::AlgoPack Convolution3DBackwardDataImpl::sm_algo_pack; Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( - Convolution3DBackwardDataImpl *o, - const TensorLayout &filter, const TensorLayout &diff, - const TensorLayout &grad): - SizeArgs(o, o->make_canonized_filter_meta(grad.ndim, filter), diff, grad) -{ -} + const Convolution3DBackwardDataImpl* o, const TensorLayout& filter, + const TensorLayout& diff, const TensorLayout& grad) + : SizeArgs(o, filter, o->make_canonized_filter_meta(grad.ndim, filter), + diff, grad) {} Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( - Convolution3DBackwardDataImpl *o, - const CanonizedFilterMeta &filter, const TensorLayout &diff, + const Convolution3DBackwardDataImpl *o, const TensorLayout& filter, + const CanonizedFilterMeta &filter_meta, const TensorLayout &diff, const TensorLayout &grad): handle{concrete_handle(o->handle())}, - filter_meta{filter}, + filter_meta{filter_meta}, diff_layout{&diff}, grad_layout{&grad}, + filter_layout{&filter}, opr{o} { } Convolution3DBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs( - Convolution3DBackwardDataImpl *opr, + const Convolution3DBackwardDataImpl *opr, _megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_out grad, diff --git a/dnn/src/cuda/convolution3d/backward_data/algo.h b/dnn/src/cuda/convolution3d/backward_data/algo.h index 2cdcf87329463a58fb3bb2492b3564f882ac0a0f..41eb05daf305b3aceeafee600618e7396aeef0c7 100644 --- a/dnn/src/cuda/convolution3d/backward_data/algo.h +++ b/dnn/src/cuda/convolution3d/backward_data/algo.h @@ -42,31 +42,33 @@ public: struct SizeArgs { HandleImpl* handle; CanonizedFilterMeta filter_meta; - const TensorLayout *diff_layout, *grad_layout; - Convolution3DBackwardDataImpl* opr; + const TensorLayout *diff_layout, *grad_layout, *filter_layout; + const Convolution3DBackwardDataImpl* opr; std::string to_string() const; void init_desc(convolution3d::CUDNNBwdDataDescs& desc) const { desc.set(filter_meta, *diff_layout, *grad_layout, opr->param()); } - SizeArgs(Convolution3DBackwardDataImpl* opr, const TensorLayout& filter, - const TensorLayout& diff, const TensorLayout& grad); - SizeArgs(Convolution3DBackwardDataImpl* opr, - const CanonizedFilterMeta& filter, const TensorLayout& diff, + SizeArgs(const Convolution3DBackwardDataImpl* opr, + const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& grad); + SizeArgs(const Convolution3DBackwardDataImpl* opr, + const TensorLayout& filter, + const CanonizedFilterMeta& filter_meta, + const TensorLayout& diff, const TensorLayout& grad); convolution3d::ForwardSizeArgs as_fwd_args() const { - return {handle, grad_layout, filter_meta, diff_layout, - opr->param().data_type}; + return {handle, grad_layout, filter_layout, + filter_meta, diff_layout, opr->param().data_type}; } }; struct ExecArgs : public SizeArgs { const TensorND *filter_tensor, *diff_tensor, *grad_tensor; Workspace workspace; - ExecArgs(Convolution3DBackwardDataImpl* opr, _megdnn_tensor_in filter, - _megdnn_tensor_in diff, _megdnn_tensor_out grad, - _megdnn_workspace workspace); + ExecArgs(const Convolution3DBackwardDataImpl* opr, + _megdnn_tensor_in filter, _megdnn_tensor_in diff, + _megdnn_tensor_out grad, _megdnn_workspace workspace); }; virtual bool is_available(const SizeArgs& args) const = 0; virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; @@ -154,29 +156,25 @@ public: //! implement group conv by another algo class Convolution3DBackwardDataImpl::AlgoGroupConvGeneral final : public AlgoBase { - AlgoBase* m_impl; - std::string m_name; - public: - AlgoGroupConvGeneral(AlgoBase* impl); - bool is_available(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override; void exec(const ExecArgs& args) const override; + std::vector get_subopr_list( + const TensorLayoutArray& layouts, + const OperatorBase* opr) const override; - const char* name() const override { return m_name.c_str(); } + const char* name() const override { + return "CUDA:GROUP_CONV3D_BACKWARD_DATA"; + } - static void modify_size_args(SizeArgs& args, TensorLayout& diff_pg, - TensorLayout& grad_pg); AlgoAttribute attribute() const override { - auto ret = static_cast(0); - if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { - ret |= AlgoAttribute::REPRODUCIBLE; - } - return ret; + return AlgoAttribute::REPRODUCIBLE; } MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) +private: + WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; }; class Convolution3DBackwardDataImpl::AlgoPack : NonCopyableObj { @@ -190,8 +188,7 @@ public: std::vector cudnn; AlgoChanwise chanwise; - std::vector gconv; - std::unordered_map algo2gconv; + AlgoGroupConvGeneral group; std::vector //! all algorithms diff --git a/dnn/src/cuda/convolution3d/backward_data/group_conv.cpp b/dnn/src/cuda/convolution3d/backward_data/group_conv.cpp index 9e6d90b59facc8c835258fa0f9552d924db73c35..c4181b1368006bdf97893d8706b01b9b49fbadee 100644 --- a/dnn/src/cuda/convolution3d/backward_data/group_conv.cpp +++ b/dnn/src/cuda/convolution3d/backward_data/group_conv.cpp @@ -15,68 +15,121 @@ using namespace megdnn; using namespace cuda; using namespace convolution3d; -void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::modify_size_args( - Convolution3DBackwardDataImpl::AlgoBase::SizeArgs &args, - TensorLayout &diff_pg, TensorLayout &grad_pg) { - diff_pg = *args.diff_layout; - grad_pg = *args.grad_layout; +namespace { +std::pair +sub_opr_config(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) { + SmallVector flt_shape(0); + std::vector flt_stride(0); + size_t idx = 0; + // check if the first dim is group + if (args.filter_layout->ndim > args.grad_layout->ndim) + ++idx; + for (; idx < args.filter_layout->ndim; ++idx) { + flt_shape.push_back(args.filter_layout->shape[idx]); + flt_stride.push_back(args.filter_layout->stride[idx]); + } + TensorLayout filter_pg(flt_shape, flt_stride, + args.filter_layout->dtype, + args.filter_layout->format); + TensorLayout diff_pg = *args.diff_layout; + TensorLayout grad_pg = *args.grad_layout; + auto nr_grp = args.filter_meta.group; - args.filter_meta.group = 1; - diff_pg.shape[1] /= nr_grp; - grad_pg.shape[1] /= nr_grp; - args.diff_layout = &diff_pg; - args.grad_layout = &grad_pg; + size_t c_pos = 1; + diff_pg.shape[c_pos] /= nr_grp; + grad_pg.shape[c_pos] /= nr_grp; + + megdnn::param::Convolution3D param = args.opr->param(); + param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; + std::pair ret; + ret.first = {filter_pg, diff_pg, grad_pg}; + ret.second = param; + + return ret; } -Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( - AlgoBase *impl): - m_impl{impl} -{ - m_name = "group_conv3d:"; - m_name += impl->name(); +std::pair> +prepare_sub_opr(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) { + auto conv3d_backdata_opr = + args.handle->create_operator(); + set_execution_policy( + args.opr, conv3d_backdata_opr.get()); + auto&& config = sub_opr_config(args); + conv3d_backdata_opr->param() = config.second; + + return {config.first, std::move(conv3d_backdata_opr)}; +} +} // namespace + +std::vector +Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_subopr_list( + const TensorLayoutArray& layouts, const OperatorBase* opr) const { + AlgoBase::SizeArgs args{ + static_cast(opr), layouts[0], + layouts[1], layouts[2]}; + auto&& config = sub_opr_config(args); + + std::string param_str; + Algorithm::serialize_write_pod(config.second, param_str); + return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_DATA, param_str, + config.first}}; } bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available( const SizeArgs &args) const { if (args.filter_meta.group <= 1) return false; - auto sub_args = args; - TensorLayout diff_pg, grad_pg; - modify_size_args(sub_args, diff_pg, grad_pg); - return m_impl->is_available(sub_args); + if (args.filter_meta.format != Param::Format::NCDHW) { + return false; + } + + auto config = prepare_sub_opr(args); + return get_algorithm( + static_cast(config.second.get()), + config.first[0], config.first[1], config.first[2]); +} + +WorkspaceBundle +Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_bundle( + void* ptr, const SizeArgs& args) const { + auto config = prepare_sub_opr(args); + size_t sizes = config.second->get_workspace_in_bytes( + config.first[0], config.first[1], config.first[2]); + return {ptr, {sizes}}; } -size_t Convolution3DBackwardDataImpl::AlgoGroupConvGeneral:: -get_workspace_in_bytes(const SizeArgs &args) const { - auto sub_args = args; - TensorLayout diff_pg, grad_pg; - modify_size_args(sub_args, diff_pg, grad_pg); - return m_impl->get_workspace_in_bytes(sub_args); +size_t +Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( + const SizeArgs& args) const { + return get_workspace_bundle(nullptr, args).total_size_in_bytes(); } void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::exec( - const ExecArgs &args) const { - auto sub_args = args; - TensorND tflt{*args.filter_tensor}, tdiff{*args.diff_tensor}, - tgrad{*args.grad_tensor}; - modify_size_args(sub_args, tdiff.layout, tgrad.layout); - sub_args.filter_tensor = &tflt; - sub_args.diff_tensor = &tdiff; - sub_args.grad_tensor = &tgrad; - auto grp = args.filter_meta.group; - - auto &&fm = args.filter_meta; - auto strd_flt = (fm.icpg * fm.ocpg * - fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tflt.layout.dtype.size()), - strd_diff = ( - tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), - strd_grad = ( - tgrad.layout.stride[1] * fm.icpg * tgrad.layout.dtype.size()); - for (uint32_t g = 0; g < grp; ++ g) { - m_impl->exec(sub_args); - incr_voidp(tflt.raw_ptr, strd_flt); - incr_voidp(tdiff.raw_ptr, strd_diff); - incr_voidp(tgrad.raw_ptr, strd_grad); + const ExecArgs& args) const { + auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); + { + auto config = prepare_sub_opr(args); + TensorND tfilter{args.filter_tensor->raw_ptr, config.first[0]}; + TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]}; + TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]}; + + size_t c_pos = 1; + auto grp = args.filter_meta.group; + + auto&& fm = args.filter_meta; + auto strd_flt = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * + fm.spatial[2] * tfilter.layout.dtype.size()), + strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg * + tdiff.layout.dtype.size()), + strd_grad = (tgrad.layout.stride[c_pos] * fm.icpg * + tgrad.layout.dtype.size()); + + for (uint32_t g = 0; g < grp; ++g) { + config.second->exec(tfilter, tdiff, tgrad, bundle.get_workspace(0)); + incr_voidp(tfilter.raw_ptr, strd_flt); + incr_voidp(tdiff.raw_ptr, strd_diff); + incr_voidp(tgrad.raw_ptr, strd_grad); + } } } diff --git a/dnn/src/cuda/convolution3d/backward_filter/algo.cpp b/dnn/src/cuda/convolution3d/backward_filter/algo.cpp index ad9568a30dee334f63348a08bf3384f56551757b..9c07655ad4bbb3dc28f41759905e1ee27d958de3 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/algo.cpp +++ b/dnn/src/cuda/convolution3d/backward_filter/algo.cpp @@ -26,21 +26,7 @@ Convolution3DBackwardFilterImpl::AlgoPack::AlgoPack() { } all_algos.push_back(&inplace_matmul); - all_algos.reserve(all_algos.size() * 2); - - // add gconv algos by AlgoGroupConvGeneral - auto all_algos_data = all_algos.data(); - for (size_t i = 1; i < all_algos.size(); ++ i) { - gconv.push_back({all_algos[i]}); - } - for (size_t i = 1; i < all_algos.size(); ++ i) { - algo2gconv[all_algos[i]] = &gconv[i - 1]; - } - for (auto &&i: gconv) { - all_algos.push_back(&i); - } - megdnn_assert(all_algos_data == all_algos.data()); - non_cudnn_algos.push_back(all_algos.rbegin()[0]); //group inplace_matmul + all_algos.push_back(&group); for (auto&& algo : all_algos) { m_all_algos_map.emplace(algo->info().desc, algo); @@ -64,27 +50,26 @@ Convolution3DBackwardFilterImpl::AlgoPack Convolution3DBackwardFilterImpl::sm_algo_pack; Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( - Convolution3DBackwardFilterImpl *o, + const Convolution3DBackwardFilterImpl *o, const TensorLayout &src, const TensorLayout &diff, const TensorLayout &grad): - SizeArgs(o, src, diff, o->make_canonized_filter_meta(src.ndim, grad)) + SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad)) { } Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( - Convolution3DBackwardFilterImpl *o, - const TensorLayout &src, const TensorLayout &diff, - const CanonizedFilterMeta &grad): - handle{concrete_handle(o->handle())}, - src_layout{&src}, - diff_layout{&diff}, - grad_filter_meta{grad}, - opr{o} -{ -} + const Convolution3DBackwardFilterImpl* o, const TensorLayout& src, + const TensorLayout& diff, const TensorLayout& grad, + const CanonizedFilterMeta& grad_meta) + : handle{concrete_handle(o->handle())}, + src_layout{&src}, + diff_layout{&diff}, + grad_layout{&grad}, + grad_filter_meta{grad_meta}, + opr{o} {} Convolution3DBackwardFilterImpl::AlgoBase::ExecArgs::ExecArgs( - Convolution3DBackwardFilterImpl *opr, + const Convolution3DBackwardFilterImpl *opr, _megdnn_tensor_in src, _megdnn_tensor_in diff, _megdnn_tensor_out grad, diff --git a/dnn/src/cuda/convolution3d/backward_filter/algo.h b/dnn/src/cuda/convolution3d/backward_filter/algo.h index d3c5c398402886a4ae7ab3bd43a4916876b09239..e1f5b3bafb359402cf1ec061e73f578ee86c961e 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/algo.h +++ b/dnn/src/cuda/convolution3d/backward_filter/algo.h @@ -36,31 +36,34 @@ public: struct SizeArgs { HandleImpl* handle; - const TensorLayout *src_layout, *diff_layout; + const TensorLayout *src_layout, *diff_layout, *grad_layout; CanonizedFilterMeta grad_filter_meta; - Convolution3DBackwardFilterImpl* opr; + const Convolution3DBackwardFilterImpl* opr; std::string to_string() const; void init_desc(convolution3d::CUDNNBwdFilterDescs& desc) const { desc.set(*src_layout, *diff_layout, grad_filter_meta, opr->param()); } - SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src, - const TensorLayout& diff, const TensorLayout& grad); - SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src, - const TensorLayout& diff, const CanonizedFilterMeta& grad); + SizeArgs(const Convolution3DBackwardFilterImpl* opr, + const TensorLayout& src, const TensorLayout& diff, + const TensorLayout& grad); + SizeArgs(const Convolution3DBackwardFilterImpl* opr, + const TensorLayout& src, const TensorLayout& diff, + const TensorLayout& grad, + const CanonizedFilterMeta& grad_meta); convolution3d::ForwardSizeArgs as_fwd_args() const { - return {handle, src_layout, grad_filter_meta, diff_layout, - opr->param().data_type}; + return {handle, src_layout, grad_layout, + grad_filter_meta, diff_layout, opr->param().data_type}; } }; struct ExecArgs : public SizeArgs { const TensorND *src_tensor, *diff_tensor, *grad_tensor; Workspace workspace; - ExecArgs(Convolution3DBackwardFilterImpl* opr, _megdnn_tensor_in src, - _megdnn_tensor_in diff, _megdnn_tensor_out grad, - _megdnn_workspace workspace); + ExecArgs(const Convolution3DBackwardFilterImpl* opr, + _megdnn_tensor_in src, _megdnn_tensor_in diff, + _megdnn_tensor_out grad, _megdnn_workspace workspace); }; virtual bool is_available(const SizeArgs& args) const = 0; virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0; @@ -162,30 +165,25 @@ public: //! implement group conv by another algo class Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral final : public AlgoBase { - AlgoBase* m_impl; - std::string m_name; - public: - AlgoGroupConvGeneral(AlgoBase* impl); - bool is_available(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override; void exec(const ExecArgs& args) const override; + std::vector get_subopr_list( + const TensorLayoutArray& layouts, + const OperatorBase* opr) const override; - const char* name() const override { return m_name.c_str(); } + const char* name() const override { + return "CUDA:GROUP_CONV3D_BACKWARD_FILTER"; + } AlgoAttribute attribute() const override { - auto ret = static_cast(0); - if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { - ret |= AlgoAttribute::REPRODUCIBLE; - } - return ret; + return AlgoAttribute::REPRODUCIBLE; } - static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, - TensorLayout& diff_pg); - MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) +private: + WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; }; class Convolution3DBackwardFilterImpl::AlgoPack : NonCopyableObj { @@ -200,8 +198,7 @@ public: std::vector cudnn; AlgoInplaceMatmul inplace_matmul; AlgoChanwise chanwise; - std::vector gconv; - std::unordered_map algo2gconv; + AlgoGroupConvGeneral group; std::vector //! all algorithms diff --git a/dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp b/dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp index eaf4ea92f142b4f31142140114cd02ab0d000a97..d9564b4a4625d6d992264fb7587085b258a989e0 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp +++ b/dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp @@ -15,69 +15,123 @@ using namespace megdnn; using namespace cuda; using namespace convolution3d; -void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::modify_size_args( - Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs &args, - TensorLayout &src_pg, TensorLayout &diff_pg) { - src_pg = *args.src_layout; - diff_pg = *args.diff_layout; +namespace { +std::pair +sub_opr_config( + const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) { + SmallVector flt_shape(0); + std::vector flt_stride(0); + size_t idx = 0; + // check if the first dim is group + if (args.grad_layout->ndim > args.src_layout->ndim) + ++idx; + for (; idx < args.grad_layout->ndim; ++idx) { + flt_shape.push_back(args.grad_layout->shape[idx]); + flt_stride.push_back(args.grad_layout->stride[idx]); + } + TensorLayout grad_pg(flt_shape, flt_stride, args.grad_layout->dtype, + args.grad_layout->format); + TensorLayout src_pg = *args.src_layout; + TensorLayout diff_pg = *args.diff_layout; + auto nr_grp = args.grad_filter_meta.group; - args.grad_filter_meta.group = 1; - src_pg.shape[1] /= nr_grp; - diff_pg.shape[1] /= nr_grp; - args.src_layout = &src_pg; - args.diff_layout = &diff_pg; + size_t c_pos = 1; + src_pg.shape[c_pos] /= nr_grp; + diff_pg.shape[c_pos] /= nr_grp; + + megdnn::param::Convolution3D param = args.opr->param(); + param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; + std::pair ret; + ret.first = {src_pg, diff_pg, grad_pg}; + ret.second = param; + + return ret; } -Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( - AlgoBase *impl): - m_impl{impl} -{ - m_name = "group_conv3d:"; - m_name += impl->name(); +std::pair> +prepare_sub_opr( + const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) { + auto conv3d_backfilter_opr = + args.handle->create_operator(); + set_execution_policy( + args.opr, conv3d_backfilter_opr.get()); + auto&& config = sub_opr_config(args); + conv3d_backfilter_opr->param() = config.second; + + return {config.first, std::move(conv3d_backfilter_opr)}; +} +} // namespace + +std::vector +Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_subopr_list( + const TensorLayoutArray& layouts, const OperatorBase* opr) const { + AlgoBase::SizeArgs args{ + static_cast(opr), + layouts[0], layouts[1], layouts[2]}; + auto&& config = sub_opr_config(args); + + std::string param_str; + Algorithm::serialize_write_pod(config.second, param_str); + return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_FILTER, param_str, + config.first}}; } bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available( - const SizeArgs &args) const { + const SizeArgs& args) const { if (args.grad_filter_meta.group <= 1) return false; - auto sub_args = args; - TensorLayout src_pg, diff_pg; - modify_size_args(sub_args, src_pg, diff_pg); - return m_impl->is_available(sub_args); + if (args.grad_filter_meta.format != Param::Format::NCDHW) { + return false; + } + + auto config = prepare_sub_opr(args); + return get_algorithm( + static_cast(config.second.get()), + config.first[0], config.first[1], config.first[2]); +} + +WorkspaceBundle +Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_bundle( + void* ptr, const SizeArgs& args) const { + auto config = prepare_sub_opr(args); + size_t sizes = config.second->get_workspace_in_bytes( + config.first[0], config.first[1], config.first[2]); + return {ptr, {sizes}}; } -size_t Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral:: -get_workspace_in_bytes(const SizeArgs &args) const { - auto sub_args = args; - TensorLayout src_pg, diff_pg; - modify_size_args(sub_args, src_pg, diff_pg); - return m_impl->get_workspace_in_bytes(sub_args); +size_t +Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( + const SizeArgs& args) const { + return get_workspace_bundle(nullptr, args).total_size_in_bytes(); } void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::exec( - const ExecArgs &args) const { - auto sub_args = args; - TensorND tsrc{*args.src_tensor}, tdiff{*args.diff_tensor}, - tgrad{*args.grad_tensor}; - modify_size_args(sub_args, tsrc.layout, tdiff.layout); - sub_args.src_tensor = &tsrc; - sub_args.diff_tensor = &tdiff; - sub_args.grad_tensor = &tgrad; - - auto &&fm = args.grad_filter_meta; - auto grp = fm.group; - - auto strd_src = ( - tsrc.layout.stride[1] * fm.icpg * tsrc.layout.dtype.size()), - strd_diff = ( - tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()), - strd_grad = (fm.icpg * fm.ocpg * - fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tgrad.layout.dtype.size()); - for (uint32_t g = 0; g < grp; ++ g) { - m_impl->exec(sub_args); - incr_voidp(tsrc.raw_ptr, strd_src); - incr_voidp(tdiff.raw_ptr, strd_diff); - incr_voidp(tgrad.raw_ptr, strd_grad); + const ExecArgs& args) const { + auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); + { + auto config = prepare_sub_opr(args); + TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]}; + TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]}; + TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]}; + + size_t c_pos = 1; + auto grp = args.grad_filter_meta.group; + + auto&& fm = args.grad_filter_meta; + auto strd_src = (tsrc.layout.stride[c_pos] * fm.icpg * + tsrc.layout.dtype.size()), + strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg * + tdiff.layout.dtype.size()), + strd_grad = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * + fm.spatial[2] * tgrad.layout.dtype.size()); + + for (uint32_t g = 0; g < grp; ++g) { + config.second->exec(tsrc, tdiff, tgrad, bundle.get_workspace(0)); + incr_voidp(tsrc.raw_ptr, strd_src); + incr_voidp(tdiff.raw_ptr, strd_diff); + incr_voidp(tgrad.raw_ptr, strd_grad); + } } } diff --git a/dnn/src/cuda/convolution3d/forward/algo.cpp b/dnn/src/cuda/convolution3d/forward/algo.cpp index e8639f82dd196f3fe49ca8a03dc07065339ec79a..3c3f749850df3bba8c224bcc1205c0809748895b 100644 --- a/dnn/src/cuda/convolution3d/forward/algo.cpp +++ b/dnn/src/cuda/convolution3d/forward/algo.cpp @@ -28,22 +28,7 @@ Convolution3DForwardImpl::AlgoPack::AlgoPack() { } all_algos.push_back(&inplace_matmul); all_algos.push_back(&a1x1x1); - all_algos.reserve(all_algos.size() * 2); - - // add gconv algos by AlgoGroupConvGeneral - auto all_algos_data = all_algos.data(); - for (size_t i = 1; i < all_algos.size(); ++ i) { - gconv.push_back({all_algos[i]}); - } - for (size_t i = 1; i < all_algos.size(); ++ i) { - algo2gconv[all_algos[i]] = &gconv[i - 1]; - } - for (auto &&i: gconv) { - all_algos.push_back(&i); - } - megdnn_assert(all_algos_data == all_algos.data()); - non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group inplace_matmul - non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1x1 + all_algos.push_back(&group); for (auto&& algo : all_algos) { m_all_algos_map.emplace(algo->info().desc, algo); @@ -66,28 +51,25 @@ Convolution3DForwardImpl::AlgoPack::cudnn_from_enum( Convolution3DForwardImpl::AlgoPack Convolution3DForwardImpl::sm_algo_pack; Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( - Convolution3DForwardImpl *o, - const TensorLayout &src, const TensorLayout &filter, - const TensorLayout &dst): - SizeArgs(o, src, o->make_canonized_filter_meta(src.ndim, filter), dst) -{ -} + const Convolution3DForwardImpl* o, const TensorLayout& src, + const TensorLayout& filter, const TensorLayout& dst) + : SizeArgs(o, src, filter, + o->make_canonized_filter_meta(src.ndim, filter), dst) {} Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( - Convolution3DForwardImpl *o, - const TensorLayout &src, const CanonizedFilterMeta &filter, - const TensorLayout &dst): - ForwardSizeArgs{ - concrete_handle(o->handle()), - &src, filter, &dst, - o->param().data_type - }, - opr{o} -{ -} + const Convolution3DForwardImpl* o, const TensorLayout& src, + const TensorLayout& filter, const CanonizedFilterMeta& filter_meta, + const TensorLayout& dst) + : ForwardSizeArgs{concrete_handle(o->handle()), + &src, + &filter, + filter_meta, + &dst, + o->param().data_type}, + opr{o} {} Convolution3DForwardImpl::AlgoBase::ExecArgs::ExecArgs( - Convolution3DForwardImpl *opr, + const Convolution3DForwardImpl *opr, _megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_out dst, diff --git a/dnn/src/cuda/convolution3d/forward/algo.h b/dnn/src/cuda/convolution3d/forward/algo.h index 96048e98af08896bdbfed23ba1b41eab7a423ba5..2fdfcbc0949b1ad95071b0e48d9d6449b0e0652b 100644 --- a/dnn/src/cuda/convolution3d/forward/algo.h +++ b/dnn/src/cuda/convolution3d/forward/algo.h @@ -48,22 +48,24 @@ public: AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; } struct SizeArgs : public convolution3d::ForwardSizeArgs { - Convolution3DForwardImpl* opr; + const Convolution3DForwardImpl* opr; std::string to_string() const; void init_desc(convolution3d::CUDNNForwardDescs& desc) const { desc.set(*src_layout, filter_meta, *dst_layout, opr->param()); } - SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src, + SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst); - SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src, - const CanonizedFilterMeta& filter, const TensorLayout& dst); + SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src, + const TensorLayout& filter, + const CanonizedFilterMeta& filter_meta, + const TensorLayout& dst); }; struct ExecArgs : public SizeArgs { const TensorND *src_tensor, *filter_tensor, *dst_tensor; Workspace workspace; - ExecArgs(Convolution3DForwardImpl* opr, _megdnn_tensor_in src, + ExecArgs(const Convolution3DForwardImpl* opr, _megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_out dst, _megdnn_workspace workspace); }; @@ -114,35 +116,22 @@ public: //! implement group conv by another algo class Convolution3DForwardImpl::AlgoGroupConvGeneral final : public AlgoBase { - AlgoBase* m_impl; - std::string m_name; - public: - AlgoGroupConvGeneral(AlgoBase* impl); - bool is_available(const SizeArgs& args) const override; size_t get_workspace_in_bytes(const SizeArgs& args) const override; void exec(const ExecArgs& args) const override; + std::vector get_subopr_list( + const TensorLayoutArray& layouts, + const OperatorBase* opr) const override; - const char* name() const override { return m_name.c_str(); } + const char* name() const override { return "CUDA:GROUP_CONV3D_FORWARD"; } AlgoAttribute attribute() const override { - auto ret = AlgoAttribute::DEFAULT; - if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) { - ret |= AlgoAttribute::REPRODUCIBLE; - } -#define cb(attr) \ - if (m_impl->contain_attribute_all(attr)) { \ - ret |= attr; \ - } - MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb) -#undef cb - - return ret; + return AlgoAttribute::REPRODUCIBLE; } - static void modify_size_args(SizeArgs& args, TensorLayout& src_pg, - TensorLayout& dst_pg); MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL) +private: + WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; }; class Convolution3DForwardImpl::AlgoCUDNN final : public AlgoBase { @@ -226,8 +215,7 @@ public: Algo1x1x1 a1x1x1; AlgoInplaceMatmul inplace_matmul; AlgoChanwise chanwise; - std::vector gconv; - std::unordered_map algo2gconv; + AlgoGroupConvGeneral group; std::vector //! all algorithms diff --git a/dnn/src/cuda/convolution3d/forward/group_conv.cpp b/dnn/src/cuda/convolution3d/forward/group_conv.cpp index c20cb37e55bd95b41403d8bd3df10a77b898a9cc..b614b6880405c445f3382db37dfeec0e63e6d497 100644 --- a/dnn/src/cuda/convolution3d/forward/group_conv.cpp +++ b/dnn/src/cuda/convolution3d/forward/group_conv.cpp @@ -15,84 +15,136 @@ using namespace megdnn; using namespace cuda; using namespace convolution3d; -void Convolution3DForwardImpl::AlgoGroupConvGeneral::modify_size_args( - Convolution3DForwardImpl::AlgoBase::SizeArgs &args, - TensorLayout &src_pg, TensorLayout &dst_pg) { - src_pg = *args.src_layout; - dst_pg = *args.dst_layout; +namespace { +std::pair sub_opr_config( + const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) { + TensorLayout src_pg = *args.src_layout; + + SmallVector flt_shape(0); + std::vector flt_stride(0); + size_t idx = 0; + // check if the first dim is group + if (args.filter_layout->ndim > args.src_layout->ndim) + ++idx; + for (; idx < args.filter_layout->ndim; ++idx) { + flt_shape.push_back(args.filter_layout->shape[idx]); + flt_stride.push_back(args.filter_layout->stride[idx]); + } + TensorLayout filter_pg(flt_shape, flt_stride, + args.filter_layout->dtype, + args.filter_layout->format); + TensorLayout dst_pg = *args.dst_layout; + auto nr_grp = args.filter_meta.group; - args.filter_meta.group = 1; size_t c_pos; - if (args.filter_meta.format == Param::Format::NCDHW) { + if (args.filter_meta.format == param::Convolution3D::Format::NCDHW) { c_pos = 1; } else { - megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, + megdnn_assert( + args.filter_meta.format == param::Convolution3D::Format::NDHWC, "invalid conv format"); c_pos = 4; } src_pg.shape[c_pos] /= nr_grp; dst_pg.shape[c_pos] /= nr_grp; - args.src_layout = &src_pg; - args.dst_layout = &dst_pg; + + megdnn::param::Convolution3D param = args.opr->param(); + param.sparse = megdnn::param::Convolution3D::Sparse::DENSE; + std::pair ret; + ret.first = {src_pg, filter_pg, dst_pg}; + ret.second = param; + + return ret; +} + +std::pair> +prepare_sub_opr(const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) { + auto conv3d_opr = args.handle->create_operator(); + set_execution_policy( + args.opr, conv3d_opr.get()); + auto&& config = sub_opr_config(args); + conv3d_opr->param() = config.second; + + return {config.first, std::move(conv3d_opr)}; } +} // namespace + +std::vector +Convolution3DForwardImpl::AlgoGroupConvGeneral::get_subopr_list( + const TensorLayoutArray& layouts, const OperatorBase* opr) const { + AlgoBase::SizeArgs args{static_cast(opr), + layouts[0], layouts[1], layouts[2]}; + auto&& config = sub_opr_config(args); -Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( - AlgoBase *impl): - m_impl{impl} { - m_name = "group_conv3d:"; - m_name += impl->name(); + std::string param_str; + Algorithm::serialize_write_pod(config.second, param_str); + return {{Algorithm::OprType::CONVOLUTION3D_FORWARD, param_str, + config.first}}; } bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available( const SizeArgs &args) const { if (args.filter_meta.group <= 1) return false; - auto sub_args = args; - TensorLayout src_pg, dst_pg; - modify_size_args(sub_args, src_pg, dst_pg); - return m_impl->is_available(sub_args); + if (args.filter_meta.format != Param::Format::NCDHW && + args.filter_meta.format != Param::Format::NDHWC) { + return false; + } + + auto config = prepare_sub_opr(args); + return get_algorithm( + static_cast(config.second.get()), + config.first[0], config.first[1], config.first[2]); +} + +WorkspaceBundle +Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle( + void* ptr, const SizeArgs& args) const { + auto config = prepare_sub_opr(args); + size_t sizes = config.second->get_workspace_in_bytes( + config.first[0], config.first[1], config.first[2]); + return {ptr, {sizes}}; } size_t Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_in_bytes( - const SizeArgs &args) const { - auto sub_args = args; - TensorLayout src_pg, dst_pg; - modify_size_args(sub_args, src_pg, dst_pg); - return m_impl->get_workspace_in_bytes(sub_args); + const SizeArgs& args) const { + return get_workspace_bundle(nullptr, args).total_size_in_bytes(); } void Convolution3DForwardImpl::AlgoGroupConvGeneral::exec( - const ExecArgs &args) const { - auto sub_args = args; - TensorND tsrc{*args.src_tensor}, tdst{*args.dst_tensor}, - tflt{*args.filter_tensor}; - modify_size_args(sub_args, tsrc.layout, tdst.layout); - sub_args.src_tensor = &tsrc; - sub_args.dst_tensor = &tdst; - sub_args.filter_tensor = &tflt; + const ExecArgs& args) const { + auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); + { + auto config = prepare_sub_opr(args); + TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]}; + TensorND tfilter{args.filter_tensor->raw_ptr, config.first[1]}; + TensorND tdst{args.dst_tensor->raw_ptr, config.first[2]}; - size_t c_pos; - if (args.filter_meta.format == Param::Format::NCDHW) { - c_pos = 1; - } else { - megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, - "invalid conv format"); - c_pos = 4; - } + size_t c_pos; + if (args.filter_meta.format == Param::Format::NCDHW) { + c_pos = 1; + } else { + megdnn_assert(args.filter_meta.format == Param::Format::NDHWC, + "invalid conv format"); + c_pos = 4; + } + + auto grp = args.filter_meta.group; + + auto&& fm = args.filter_meta; + auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * + tsrc.layout.dtype.size(), + strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * + tdst.layout.dtype.size(), + strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] * + fm.spatial[2] * tfilter.layout.dtype.size(); - auto grp = args.filter_meta.group; - - auto &&fm = args.filter_meta; - auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * tsrc.layout.dtype.size(), - strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * tdst.layout.dtype.size(), - strd_flt = fm.icpg * fm.ocpg * - fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * - tflt.layout.dtype.size(); - for (uint32_t g = 0; g < grp; ++ g) { - m_impl->exec(sub_args); - incr_voidp(tsrc.raw_ptr, strd_src); - incr_voidp(tdst.raw_ptr, strd_dst); - incr_voidp(tflt.raw_ptr, strd_flt); + for (uint32_t g = 0; g < grp; ++g) { + config.second->exec(tsrc, tfilter, tdst, bundle.get_workspace(0)); + incr_voidp(tsrc.raw_ptr, strd_src); + incr_voidp(tdst.raw_ptr, strd_dst); + incr_voidp(tfilter.raw_ptr, strd_flt); + } } } diff --git a/dnn/src/cuda/convolution3d/helper.h b/dnn/src/cuda/convolution3d/helper.h index e8f68ccf32ff986b3a534fc8e495101b2e584ade..7a7567be52d8cb88ebccc3aa28f3e7d0a6424303 100644 --- a/dnn/src/cuda/convolution3d/helper.h +++ b/dnn/src/cuda/convolution3d/helper.h @@ -26,6 +26,7 @@ namespace convolution3d { struct ForwardSizeArgs { HandleImpl *handle; const TensorLayout *src_layout; + const TensorLayout *filter_layout; CanonizedFilterMeta filter_meta; const TensorLayout *dst_layout; param::Convolution3D::DataType data_type; diff --git a/dnn/src/cuda/convolution3d/opr_impl.cpp b/dnn/src/cuda/convolution3d/opr_impl.cpp index c7719d6e19758999d40486bdac0409af27fcc569..cc883fb1e0085bc9cb1de650598837230f71c0e2 100644 --- a/dnn/src/cuda/convolution3d/opr_impl.cpp +++ b/dnn/src/cuda/convolution3d/opr_impl.cpp @@ -35,16 +35,6 @@ Convolution3DForwardImpl::get_algorithm_heuristic( const TensorLayout& dst, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { - auto fm = check_layout_fwd(src, filter, dst); - return get_algorithm_heuristic(src, fm, dst, workspace_limit_in_bytes, - positive_attr, negative_attr); -} -Convolution3DForwardImpl::Algorithm* -Convolution3DForwardImpl::get_algorithm_heuristic( - const TensorLayout& src, const CanonizedFilterMeta& filter, - const TensorLayout& dst, size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr) { AlgoBase::SizeArgs args(this, src, filter, dst); #if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5) @@ -98,18 +88,14 @@ Convolution3DForwardImpl::get_algorithm_heuristic( if (auto algo = get_cudnn_algo()) return algo; } + if (args.filter_meta.group > 1) { - auto orig_args = args; - TensorLayout a, b; - AlgoGroupConvGeneral::modify_size_args(args, a, b); - if (prefer_1x1x1()) { - return sm_algo_pack.algo2gconv.at(&sm_algo_pack.a1x1x1); - } - if (is_cudnn_supported(args)) { - if (auto algo = get_cudnn_algo()) - return sm_algo_pack.algo2gconv.at(algo); + if (auto algo = + megdnn::get_algo_match_attribute( + &sm_algo_pack.group, positive_attr, + negative_attr)) { + return algo; } - args = orig_args; } return megdnn::get_algo_match_attribute( @@ -129,7 +115,7 @@ size_t Convolution3DForwardImpl::get_workspace_in_bytes( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst) { AlgoBase::SizeArgs args(this, src, filter, dst); - return get_algorithm(this, src, args.filter_meta, dst) + return get_algorithm(this, src, filter, dst) ->get_workspace_in_bytes(args); } @@ -138,7 +124,7 @@ void Convolution3DForwardImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, _megdnn_workspace workspace) { AlgoBase::ExecArgs args(this, src, filter, dst, workspace); - auto algo = get_algorithm(this, src.layout, args.filter_meta, dst.layout); + auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout); algo->check_workspace(args, workspace).exec(args); } @@ -151,7 +137,7 @@ void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter, _megdnn_tensor_out grad, _megdnn_workspace workspace) { AlgoBase::ExecArgs args(this, filter, diff, grad, workspace); - auto algo = get_algorithm(this, args.filter_meta, diff.layout, grad.layout); + auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout); algo->check_workspace(args, workspace).exec(args); } @@ -169,17 +155,6 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( const TensorLayout& grad, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { - auto fm = check_layout_fwd(grad, filter, diff); - return get_algorithm_heuristic(fm, diff, grad, workspace_limit_in_bytes, - positive_attr, negative_attr); -} - -Convolution3DBackwardDataImpl::Algorithm* -Convolution3DBackwardDataImpl::get_algorithm_heuristic( - const CanonizedFilterMeta& filter, const TensorLayout& diff, - const TensorLayout& grad, size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr) { AlgoBase::SizeArgs args(this, filter, diff, grad); if (args.filter_meta.group > 1 && @@ -215,14 +190,11 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic( } if (args.filter_meta.group > 1) { - auto orig_args = args; - TensorLayout a, b; - AlgoGroupConvGeneral::modify_size_args(args, a, b); - if (is_cudnn_supported(args.as_fwd_args())) { - if (auto algo = get_cudnn_algo()) - return sm_algo_pack.algo2gconv.at(algo); + if (auto algo = megdnn::get_algo_match_attribute< + Convolution3DBackwardDataImpl>( + &sm_algo_pack.group, positive_attr, negative_attr)) { + return algo; } - args = orig_args; } return megdnn::get_algo_match_attribute( @@ -234,7 +206,7 @@ size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes( const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& grad) { AlgoBase::SizeArgs args(this, filter, diff, grad); - return get_algorithm(this, args.filter_meta, diff, grad) + return get_algorithm(this, filter, diff, grad) ->get_workspace_in_bytes(args); } @@ -248,7 +220,7 @@ void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src, _megdnn_workspace workspace) { AlgoBase::ExecArgs args(this, src, diff, grad, workspace); auto algo = - get_algorithm(this, src.layout, diff.layout, args.grad_filter_meta); + get_algorithm(this, src.layout, diff.layout, grad.layout); algo->check_workspace(args, workspace).exec(args); } @@ -266,17 +238,6 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( const TensorLayout& grad, size_t workspace_limit_in_bytes, const AlgoAttribute& positive_attr, const AlgoAttribute& negative_attr) { - auto fm = check_layout_fwd(src, grad, diff); - return get_algorithm_heuristic(src, diff, fm, workspace_limit_in_bytes, - positive_attr, negative_attr); -} - -Convolution3DBackwardFilterImpl::Algorithm* -Convolution3DBackwardFilterImpl::get_algorithm_heuristic( - const TensorLayout& src, const TensorLayout& diff, - const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr) { AlgoBase::SizeArgs args(this, src, diff, grad); if (args.grad_filter_meta.group > 1 && @@ -310,15 +271,13 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic( if (auto algo = get_cudnn_algo()) return algo; } + if (args.grad_filter_meta.group > 1) { - auto orig_args = args; - TensorLayout a, b; - AlgoGroupConvGeneral::modify_size_args(args, a, b); - if (is_cudnn_supported(args.as_fwd_args())) { - if (auto algo = get_cudnn_algo()) - return sm_algo_pack.algo2gconv.at(algo); + if (auto algo = megdnn::get_algo_match_attribute< + Convolution3DBackwardFilterImpl>( + &sm_algo_pack.group, positive_attr, negative_attr)) { + return algo; } - args = orig_args; } return megdnn::get_algo_match_attribute( @@ -330,7 +289,7 @@ size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes( const TensorLayout& src, const TensorLayout& diff, const TensorLayout& grad) { AlgoBase::SizeArgs args(this, src, diff, grad); - return get_algorithm(this, src, diff, args.grad_filter_meta) + return get_algorithm(this, src, diff, grad) ->get_workspace_in_bytes(args); } diff --git a/dnn/src/cuda/convolution3d/opr_impl.h b/dnn/src/cuda/convolution3d/opr_impl.h index 815e7b2cf1ae3c071bca5bf0edf3c3000d64d47a..f240ae5d74793bb07d69e3fbf2ba30c3cfb92066 100644 --- a/dnn/src/cuda/convolution3d/opr_impl.h +++ b/dnn/src/cuda/convolution3d/opr_impl.h @@ -21,17 +21,6 @@ public: using Convolution3DForward::Convolution3DForward; void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter, _megdnn_tensor_out dst, _megdnn_workspace workspace) override; - AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& src, - const CanonizedFilterMeta& filter, - const TensorLayout& dst, - size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr) { - return get_algorithm_heuristic(src, filter, dst, - workspace_limit_in_bytes, positive_attr, - negative_attr) - ->info(); - } size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst) override; @@ -57,13 +46,6 @@ protected: const AlgoAttribute& negative_attr) override; private: - Algorithm* get_algorithm_heuristic(const TensorLayout& src, - const CanonizedFilterMeta& filter, - const TensorLayout& dst, - size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr); - static AlgoPack sm_algo_pack; }; @@ -72,16 +54,6 @@ public: using Convolution3DBackwardData::Convolution3DBackwardData; void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_out grad, _megdnn_workspace workspace) override; - AlgorithmInfo get_algorithm_info_heuristic( - const CanonizedFilterMeta& filter, const TensorLayout& diff, - const TensorLayout& grad, size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr) { - return get_algorithm_heuristic(filter, diff, grad, - workspace_limit_in_bytes, positive_attr, - negative_attr) - ->info(); - } size_t get_workspace_in_bytes(const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& grad) override; @@ -109,13 +81,6 @@ protected: const AlgoAttribute& negative_attr) override; private: - Algorithm* get_algorithm_heuristic(const CanonizedFilterMeta& filter, - const TensorLayout& diff, - const TensorLayout& grad, - size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr); - static AlgoPack sm_algo_pack; }; @@ -127,17 +92,6 @@ public: size_t get_workspace_in_bytes(const TensorLayout& src, const TensorLayout& diff, const TensorLayout& grad) override; - AlgorithmInfo get_algorithm_info_heuristic( - const TensorLayout& src, const TensorLayout& diff, - const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr) { - return get_algorithm_heuristic(src, diff, grad, - workspace_limit_in_bytes, positive_attr, - negative_attr) - ->info(); - } - const char* get_algorithm_set_name() const override; class AlgoBase; @@ -162,13 +116,6 @@ protected: const AlgoAttribute& negative_attr) override; private: - Algorithm* get_algorithm_heuristic(const TensorLayout& src, - const TensorLayout& diff, - const CanonizedFilterMeta& grad, - size_t workspace_limit_in_bytes, - const AlgoAttribute& positive_attr, - const AlgoAttribute& negative_attr); - static AlgoPack sm_algo_pack; }; } // namespace cuda diff --git a/dnn/test/cuda/group_conv3d.cpp b/dnn/test/cuda/group_conv3d.cpp index 402d0f61463495afdc94b93ffe225631cfd37477..611293c425e37264617e009cd8a79d3e0d59058e 100644 --- a/dnn/test/cuda/group_conv3d.cpp +++ b/dnn/test/cuda/group_conv3d.cpp @@ -66,12 +66,10 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD_1x1x1) { auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, size_t FD, size_t FH, size_t FW, size_t OC, size_t group) { Checker checker(handle_cuda()); -#if CUDNN_MAJOR <= 6 - bool require_algo = true; - checker.set_before_exec_callback( - AlgoChecker{ - "group_conv3d:1x1x1", &require_algo}); -#endif + checker.set_before_exec_callback(AlgoChecker( + ExecutionPolicyAlgoName{"CUDA:GROUP_CONV3D_FORWARD", + {{"1x1x1", + {}}}})); Convolution3D::Param param; param.sparse = Convolution3D::Param::Sparse::GROUP; auto ICg = IC / group; @@ -125,6 +123,45 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA) { run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); } +TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA_CUDNN) { + auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, + size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, + size_t OH, size_t OW, size_t PD, size_t PH, size_t PW, + size_t SD, size_t SH, size_t SW, size_t group) { + Checker checker(handle_cuda()); + checker.set_before_exec_callback( + AlgoChecker(ExecutionPolicyAlgoName{ + "CUDA:GROUP_CONV3D_BACKWARD_DATA", {{"CUDNN", {}}}})); + Convolution3DBackwardData::Param param; + param.sparse = Convolution3D::Param::Sparse::GROUP; + param.pad_d = PD; + param.pad_h = PH; + param.pad_w = PW; + param.stride_d = SD; + param.stride_h = SH; + param.stride_w = SW; + auto ICg = IC / group; + auto OCg = OC / group; + checker.set_param(param).exec({{group, OCg, ICg, FD, FH, FW}, + {N, OC, OD, OH, OW}, + {N, IC, ID, IH, IW}}); + }; + // bug case in prev ver + + run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 3, 0, 0, 1, 1, 1, 1, 2); + run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 1, 1, 1, 2, 2); + run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 2); + run(1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 0, 0, 2, 1, 1, 2); + // normal case + run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2); + // padded case + run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4); + // strided case + run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); + // bigger case + run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4); +} + TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, @@ -153,6 +190,39 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) { run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); } +TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER_CUDNN) { + auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, + size_t FD, size_t FH, size_t FW, size_t OC, size_t OD, + size_t OH, size_t OW, size_t PD, size_t PH, size_t PW, + size_t SD, size_t SH, size_t SW, size_t group) { + Checker checker(handle_cuda()); + checker.set_before_exec_callback( + AlgoChecker( + ExecutionPolicyAlgoName{ + "CUDA:GROUP_CONV3D_BACKWARD_FILTER", + {{"CUDNN", {}}}})); + Convolution3DBackwardFilter::Param param; + param.sparse = Convolution3D::Param::Sparse::GROUP; + param.pad_d = PD; + param.pad_h = PH; + param.pad_w = PW; + param.stride_d = SD; + param.stride_h = SH; + param.stride_w = SW; + auto ICg = IC / group; + auto OCg = OC / group; + checker.set_param(param).exec({{N, IC, ID, IH, IW}, + {N, OC, OD, OH, OW}, + {group, OCg, ICg, FD, FH, FW}}); + }; + // normal case + run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2); + // padded case + run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4); + // strided case + run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8); +} + } // namespace test } // namespace megdnn