From 44376f702a055dc46658e1baa0d92babb84c7327 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 11 Jun 2021 18:24:29 +0800 Subject: [PATCH] refactor(mgb): make conv-backward-data handle noncontiguous tensors GitOrigin-RevId: 0a8f66f9d378b6466bc383a94c57ec80bcc5cb74 --- dnn/src/cuda/conv_bias/cudnn_conv.cpp | 8 ++++ .../conv_bias/cudnn_conv_bias_activation.cpp | 9 +++-- .../cuda/convolution/backward_data/algo.cpp | 4 +- .../cuda/convolution/backward_data/cudnn.cpp | 8 ++++ .../implicit_gemm_int8_nchw4_dp4a.cpp | 5 +++ .../implicit_gemm_int8_nchw_dp4a.cpp | 5 +++ .../cuda/convolution/backward_data/matmul.cpp | 4 +- dnn/src/naive/convolution/helper.h | 2 - dnn/test/cuda/convolution.cpp | 39 +++++++++++++++++++ 9 files changed, 75 insertions(+), 9 deletions(-) diff --git a/dnn/src/cuda/conv_bias/cudnn_conv.cpp b/dnn/src/cuda/conv_bias/cudnn_conv.cpp index dbc3f4a9..1be71d81 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv.cpp @@ -23,6 +23,14 @@ bool ConvBiasForwardImpl::AlgoCUDNNConv::is_available( if (args.z_layout->ndim > 0) return false; + if (args.filter_meta.format != Param::Format::NCHW && + args.filter_meta.format != Param::Format::NHWC) { + if (!args.src_layout->is_contiguous() || + !args.dst_layout->is_contiguous()) { + return false; + } + } + auto dst_layout = *args.dst_layout; if (dst_layout.dtype.enumv() != args.bias_layout->dtype.enumv()) { dst_layout.dtype = DType(); diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp index 590bc8c3..9839dcbe 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp @@ -24,9 +24,12 @@ using namespace conv_bias; bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available( const SizeArgs& args) const { - if (!args.src_layout->is_contiguous() || - !args.dst_layout->is_contiguous()) { - return false; + if (args.filter_meta.format != Param::Format::NCHW && + args.filter_meta.format != Param::Format::NHWC) { + if (!args.src_layout->is_contiguous() || + !args.dst_layout->is_contiguous()) { + return false; + } } if ((args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS4 || args.src_layout->dtype.enumv() == DTypeEnum::Quantized4Asymm) && diff --git a/dnn/src/cuda/convolution/backward_data/algo.cpp b/dnn/src/cuda/convolution/backward_data/algo.cpp index f178240b..bc91fa3f 100644 --- a/dnn/src/cuda/convolution/backward_data/algo.cpp +++ b/dnn/src/cuda/convolution/backward_data/algo.cpp @@ -82,8 +82,8 @@ ConvolutionBackwardDataImpl::AlgoPack ConvolutionBackwardDataImpl::sm_algo_pack; ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( ConvolutionBackwardDataImpl* o, const TensorLayout& filter, const TensorLayout& diff, const TensorLayout& grad) - : SizeArgs(o, filter, o->check_layout_fwd(grad, filter, diff), diff, - grad) {} + : SizeArgs(o, filter, o->make_canonized_filter_meta(grad.ndim, filter), + diff, grad) {} ConvolutionBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( ConvolutionBackwardDataImpl* o, const TensorLayout& filter, diff --git a/dnn/src/cuda/convolution/backward_data/cudnn.cpp b/dnn/src/cuda/convolution/backward_data/cudnn.cpp index 0d2d4be5..821a8426 100644 --- a/dnn/src/cuda/convolution/backward_data/cudnn.cpp +++ b/dnn/src/cuda/convolution/backward_data/cudnn.cpp @@ -21,6 +21,14 @@ using namespace convolution; bool ConvolutionBackwardDataImpl::AlgoCUDNN::is_available( const SizeArgs &args) const { + if (args.filter_meta.format != Param::Format::NCHW && + args.filter_meta.format != Param::Format::NHWC) { + if (!args.grad_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } + } + CUDNNBwdDataDescs D; if (!is_cudnn_supported(args.as_fwd_args())) diff --git a/dnn/src/cuda/convolution/backward_data/implicit_gemm_int8_nchw4_dp4a.cpp b/dnn/src/cuda/convolution/backward_data/implicit_gemm_int8_nchw4_dp4a.cpp index 4ea73378..10f0c70d 100644 --- a/dnn/src/cuda/convolution/backward_data/implicit_gemm_int8_nchw4_dp4a.cpp +++ b/dnn/src/cuda/convolution/backward_data/implicit_gemm_int8_nchw4_dp4a.cpp @@ -25,6 +25,11 @@ bool ConvolutionBackwardDataImpl::AlgoInt8NCHW4DotProdImplicitGemm:: if (fm.format != Param::Format::NCHW4) return false; + if (!args.grad_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } + bool available = true; auto src_dtype = args.diff_layout->dtype, diff --git a/dnn/src/cuda/convolution/backward_data/implicit_gemm_int8_nchw_dp4a.cpp b/dnn/src/cuda/convolution/backward_data/implicit_gemm_int8_nchw_dp4a.cpp index a3755b8c..a8f2fd9d 100644 --- a/dnn/src/cuda/convolution/backward_data/implicit_gemm_int8_nchw_dp4a.cpp +++ b/dnn/src/cuda/convolution/backward_data/implicit_gemm_int8_nchw_dp4a.cpp @@ -25,6 +25,11 @@ bool ConvolutionBackwardDataImpl::AlgoInt8NCHWDotProdImplicitGemm:: if (fm.format != Param::Format::NCHW) return false; + if (!args.grad_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } + bool available = true; auto src_dtype = args.diff_layout->dtype, diff --git a/dnn/src/cuda/convolution/backward_data/matmul.cpp b/dnn/src/cuda/convolution/backward_data/matmul.cpp index e4059d42..165e60f4 100644 --- a/dnn/src/cuda/convolution/backward_data/matmul.cpp +++ b/dnn/src/cuda/convolution/backward_data/matmul.cpp @@ -64,8 +64,8 @@ ConvolutionBackwardDataImpl::AlgoMatmul::get_subopr_list( const TensorLayoutArray& layouts, const OperatorBase* opr) const { const ConvolutionBackwardDataImpl* conv_backward_data_opr = static_cast(opr); - CanonizedFilterMeta fm = conv_backward_data_opr->check_layout_fwd( - layouts[2], layouts[0], layouts[1]); + CanonizedFilterMeta fm = conv_backward_data_opr->make_canonized_filter_meta( + layouts[2].ndim, layouts[0]); auto&& config = sub_opr_config(fm, layouts[0], layouts[1], layouts[2], conv_backward_data_opr); diff --git a/dnn/src/naive/convolution/helper.h b/dnn/src/naive/convolution/helper.h index d498252a..0eb38de9 100644 --- a/dnn/src/naive/convolution/helper.h +++ b/dnn/src/naive/convolution/helper.h @@ -661,7 +661,6 @@ template void backward_data(_megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_out grad, const Convolution::CanonizedFilterMeta& filter_meta) { - megdnn_assert(grad.layout.is_contiguous()); memset(grad.raw_ptr, 0, grad.layout.span().dist_byte()); megdnn_assert(filter_meta.spatial_ndim == 2); if (filter_meta.format == param::Convolution::Format::NHWCD4) { @@ -676,7 +675,6 @@ template void backward_filter(_megdnn_tensor_in src, _megdnn_tensor_in diff, _megdnn_tensor_out grad, const Convolution::CanonizedFilterMeta& filter_meta) { - megdnn_assert(grad.layout.is_contiguous()); memset(grad.raw_ptr, 0, grad.layout.span().dist_byte()); megdnn_assert(filter_meta.spatial_ndim == 2); compute2d( diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp index 7bde5223..75945326 100644 --- a/dnn/test/cuda/convolution.cpp +++ b/dnn/test/cuda/convolution.cpp @@ -238,6 +238,25 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA) { } } +TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_CUDNN) { + if (cuda::is_compute_capability_required(7, 0)) + return; + using namespace convolution; + Checker checker(handle_cuda()); + checker.set_before_exec_callback(AlgoChecker( + "CUDNN_CONVOLUTION")); + //! noncontiguous case + { + param::Convolution param; + param.pad_h = param.pad_w = 1; + checker.set_param(param).execl(TensorLayoutArray{ + {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()}, + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, + }); + } +} + TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_MATMUL) { using namespace convolution; std::vector args = get_args_cuda_conv_bwd_data(); @@ -265,6 +284,16 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_MATMUL) { .set_param(arg.param) .exec(TensorLayoutArray{filter, dst, src}); } + //! noncontiguous case + { + param::Convolution param; + param.pad_h = param.pad_w = 1; + checker.set_param(param).execl(TensorLayoutArray{ + {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()}, + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()}, + }); + } } TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A) { @@ -355,6 +384,16 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) { } checker.set_rng(0, &rng).set_rng(1, &rng).set_param(arg.param).exec( TensorLayoutArray{filter, dst, src}); + //! noncontiguous case + { + param::Convolution param; + param.pad_h = param.pad_w = 1; + checker.set_param(param).execl(TensorLayoutArray{ + {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::QuantizedS8{1.3f}}, + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::QuantizedS8{1.2f}}, + {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::QuantizedS8{1.2f}} + }); + } } } -- GitLab