From d04cd67fafcb105278345ff924320ac61c70aa8f Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 11 Jun 2021 19:17:20 +0800 Subject: [PATCH] refactor(mgb): make conv-backward-filter handle noncontiguous tensors GitOrigin-RevId: 44c586f912a235f5610f7ef40a815181726aa153 --- dnn/src/common/convolution.cpp | 7 +++- .../convolution/backward_data/chanwise.cpp | 4 +++ .../backward_data/chanwise_small.cpp | 4 +++ .../cuda/convolution/backward_filter/algo.cpp | 2 +- .../convolution/backward_filter/chanwise.cpp | 4 +++ .../convolution/backward_filter/cudnn.cpp | 7 ++++ .../convolution/backward_filter/matmul.cpp | 5 +-- dnn/test/cuda/convolution.cpp | 32 +++++++++++++++++++ 8 files changed, 61 insertions(+), 4 deletions(-) diff --git a/dnn/src/common/convolution.cpp b/dnn/src/common/convolution.cpp index 1474f7c7..fc81aab7 100644 --- a/dnn/src/common/convolution.cpp +++ b/dnn/src/common/convolution.cpp @@ -1189,7 +1189,12 @@ ConvolutionBackwardFilter::check_exec(const TensorLayout& src, diff.dtype.category() == DTypeCategory::FLOAT && grad.dtype.category() == DTypeCategory::FLOAT, "only float type is supported for conv backward filter"); - auto ret = check_layout_fwd(src, grad, diff); + auto src_fwd = src; + auto diff_fwd = diff; + + src_fwd.init_contiguous_stride(); + diff_fwd.init_contiguous_stride(); + auto ret = check_layout_fwd(src_fwd, grad, diff_fwd); auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); return ret; diff --git a/dnn/src/cuda/convolution/backward_data/chanwise.cpp b/dnn/src/cuda/convolution/backward_data/chanwise.cpp index 89562f4d..a5a7f510 100644 --- a/dnn/src/cuda/convolution/backward_data/chanwise.cpp +++ b/dnn/src/cuda/convolution/backward_data/chanwise.cpp @@ -20,6 +20,10 @@ using namespace convolution; bool ConvolutionBackwardDataImpl::AlgoChanwise::is_available( const SizeArgs& args) const { + if (!args.grad_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } if ((args.diff_layout->dtype == args.filter_layout->dtype && args.diff_layout->dtype == dtype::BFloat16()) || (args.diff_layout->dtype == args.filter_layout->dtype && diff --git a/dnn/src/cuda/convolution/backward_data/chanwise_small.cpp b/dnn/src/cuda/convolution/backward_data/chanwise_small.cpp index bb8ba182..755f5359 100644 --- a/dnn/src/cuda/convolution/backward_data/chanwise_small.cpp +++ b/dnn/src/cuda/convolution/backward_data/chanwise_small.cpp @@ -30,6 +30,10 @@ inline bool is_available_small(const chanwise::Param& param) { bool ConvolutionBackwardDataImpl::AlgoChanwiseSmall::is_available( const SizeArgs& args) const { + if (!args.grad_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } if ((args.diff_layout->dtype == args.filter_layout->dtype && args.diff_layout->dtype == dtype::BFloat16()) || (args.diff_layout->dtype == args.filter_layout->dtype && diff --git a/dnn/src/cuda/convolution/backward_filter/algo.cpp b/dnn/src/cuda/convolution/backward_filter/algo.cpp index bd367f80..7560e438 100644 --- a/dnn/src/cuda/convolution/backward_filter/algo.cpp +++ b/dnn/src/cuda/convolution/backward_filter/algo.cpp @@ -71,7 +71,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( ConvolutionBackwardFilterImpl *o, const TensorLayout &src, const TensorLayout &diff, const TensorLayout &grad): - SizeArgs(o, src, diff, grad, o->check_layout_fwd(src, grad, diff)) + SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad)) { } diff --git a/dnn/src/cuda/convolution/backward_filter/chanwise.cpp b/dnn/src/cuda/convolution/backward_filter/chanwise.cpp index c3b3f731..ad91fed9 100644 --- a/dnn/src/cuda/convolution/backward_filter/chanwise.cpp +++ b/dnn/src/cuda/convolution/backward_filter/chanwise.cpp @@ -19,6 +19,10 @@ using namespace convolution; bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available( const SizeArgs &args) const { + if (!args.grad_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } if (args.src_layout->dtype == args.src_layout->dtype && args.diff_layout->dtype == dtype::BFloat16()) { return false; diff --git a/dnn/src/cuda/convolution/backward_filter/cudnn.cpp b/dnn/src/cuda/convolution/backward_filter/cudnn.cpp index acf39d75..07addd6c 100644 --- a/dnn/src/cuda/convolution/backward_filter/cudnn.cpp +++ b/dnn/src/cuda/convolution/backward_filter/cudnn.cpp @@ -21,6 +21,13 @@ using namespace convolution; bool ConvolutionBackwardFilterImpl::AlgoCUDNN::is_available( const SizeArgs &args) const { + if (args.grad_filter_meta.format != Param::Format::NCHW && + args.grad_filter_meta.format != Param::Format::NHWC) { + if (!args.grad_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } + } auto& cudnn = args.handle->cudnn(); CUDNNBwdFilterDescs D; diff --git a/dnn/src/cuda/convolution/backward_filter/matmul.cpp b/dnn/src/cuda/convolution/backward_filter/matmul.cpp index b5977e7d..7d0bbeec 100644 --- a/dnn/src/cuda/convolution/backward_filter/matmul.cpp +++ b/dnn/src/cuda/convolution/backward_filter/matmul.cpp @@ -64,8 +64,9 @@ ConvolutionBackwardFilterImpl::AlgoMatmul::get_subopr_list( const TensorLayoutArray& layouts, const OperatorBase* opr) const { const ConvolutionBackwardFilterImpl* conv_backward_filter_opr = static_cast(opr); - CanonizedFilterMeta fm = conv_backward_filter_opr->check_layout_fwd( - layouts[0], layouts[2], layouts[1]); + CanonizedFilterMeta fm = + conv_backward_filter_opr->make_canonized_filter_meta( + layouts[0].ndim, layouts[2]); auto&& config = sub_opr_config(fm, layouts[0], layouts[1], layouts[2], conv_backward_filter_opr); diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp index 75945326..43eae93a 100644 --- a/dnn/test/cuda/convolution.cpp +++ b/dnn/test/cuda/convolution.cpp @@ -519,6 +519,38 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_MATMUL) { .set_param(arg.param) .exec(TensorLayoutArray{src, dst, filter}); } + //! noncontiguous case + { + NormalRNG default_rng; + param::Convolution param; + param.pad_h = param.pad_w = 1; + checker.set_rng(0, &default_rng) + .set_rng(1, &default_rng) + .set_param(param) + .execl(TensorLayoutArray{ + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, + {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()}}); + } +} + +TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_CUDNN) { + if (cuda::is_compute_capability_required(7, 0)) + return; + using namespace convolution; + Checker checker(handle_cuda()); + checker.set_before_exec_callback(AlgoChecker( + "CUDNN_CONVOLUTION")); + //! noncontiguous case + { + param::Convolution param; + param.pad_h = param.pad_w = 1; + checker.set_param(param).execl(TensorLayoutArray{ + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, + {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()} + }); + } } TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { -- GitLab