From d915c5a3fdb48e2a1f2790bb6f5dfb5cf44731c4 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Thu, 24 Jun 2021 14:18:02 +0800 Subject: [PATCH] refactor(mgb): make convolution3D handle noncontiguous tensors GitOrigin-RevId: 3d3c31b02161532637948ba9aec42d161ec05e92 --- dnn/src/common/convolution3d.cpp | 25 +++- .../convolution/backward_data/group_conv.cpp | 2 + .../convolution/backward_filter/chanwise.cpp | 2 +- .../backward_filter/group_conv.cpp | 2 + .../cuda/convolution3d/backward_data/algo.cpp | 2 +- .../convolution3d/backward_data/chanwise.cpp | 4 + .../backward_data/group_conv.cpp | 2 + .../convolution3d/backward_filter/algo.cpp | 2 +- .../backward_filter/chanwise.cpp | 4 + .../backward_filter/group_conv.cpp | 2 + .../backward_filter/inplace_matmul.cpp | 4 + dnn/src/cuda/convolution3d/forward/algo.cpp | 2 +- .../cuda/convolution3d/forward/chanwise.cpp | 4 + .../cuda/convolution3d/forward/group_conv.cpp | 2 + dnn/src/naive/convolution3d/helper.h | 2 - dnn/test/cuda/convolution.cpp | 10 -- dnn/test/cuda/convolution3d.cpp | 125 ++++++++++++++++++ 17 files changed, 175 insertions(+), 21 deletions(-) diff --git a/dnn/src/common/convolution3d.cpp b/dnn/src/common/convolution3d.cpp index 98aee7298..5f2be1f7e 100644 --- a/dnn/src/common/convolution3d.cpp +++ b/dnn/src/common/convolution3d.cpp @@ -122,8 +122,6 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBase::deduce_layout_fwd( TensorLayout& dst) const { auto errmsg = [&]() { return get_errmsg(src, filter, dst, param()); }; MEGDNN_MARK_USED_VAR(errmsg); - megdnn_assert_contiguous(src); - megdnn_assert_contiguous(filter); megdnn_assert(src.ndim >= 5_z, "%s", errmsg().c_str()); megdnn_assert(src.dtype == filter.dtype, "%s", errmsg().c_str()); if (param().data_type == Param::DataType::FLOAT) { @@ -170,6 +168,8 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBase::deduce_layout_fwd( Convolution3DBase::CanonizedFilterMeta Convolution3DBase::check_layout_fwd( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst) const { + megdnn_assert_contiguous(src); + megdnn_assert_contiguous(filter); TensorLayout dst_expected; auto ret = deduce_layout_fwd(src, filter, dst_expected); megdnn_assert_eq_layout(dst_expected, dst); @@ -185,7 +185,12 @@ void Convolution3DForward::deduce_layout(const TensorLayout& src, Convolution3DBase::CanonizedFilterMeta Convolution3DForward::check_exec( const TensorLayout& src, const TensorLayout& filter, const TensorLayout& dst, size_t workspace_in_bytes) { - auto ret = check_layout_fwd(src, filter, dst); + auto src_fwd = src; + auto dst_fwd = dst; + src_fwd.init_contiguous_stride(); + dst_fwd.init_contiguous_stride(); + + auto ret = check_layout_fwd(src_fwd, filter, dst_fwd); auto required_workspace_in_bytes = get_workspace_in_bytes(src, filter, dst); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); return ret; @@ -196,7 +201,12 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBackwardData::check_exec( const TensorLayout& grad, size_t workspace_in_bytes) { megdnn_assert(param().data_type == Param::DataType::FLOAT, "only float type is supported for conv backward"); - auto ret = check_layout_fwd(grad, filter, diff); + auto diff_fwd = diff; + auto grad_fwd = grad; + diff_fwd.init_contiguous_stride(); + grad_fwd.init_contiguous_stride(); + + auto ret = check_layout_fwd(grad_fwd, filter, diff_fwd); auto required_workspace_in_bytes = get_workspace_in_bytes(filter, diff, grad); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); @@ -244,7 +254,12 @@ Convolution3DBase::CanonizedFilterMeta Convolution3DBackwardFilter::check_exec( const TensorLayout& grad, size_t workspace_in_bytes) { megdnn_assert(param().data_type == Param::DataType::FLOAT, "only float type is supported for conv backward"); - auto ret = check_layout_fwd(src, grad, diff); + auto src_fwd = src; + auto diff_fwd = diff; + src_fwd.init_contiguous_stride(); + diff_fwd.init_contiguous_stride(); + + auto ret = check_layout_fwd(src_fwd, grad, diff_fwd); auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad); megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes); return ret; diff --git a/dnn/src/cuda/convolution/backward_data/group_conv.cpp b/dnn/src/cuda/convolution/backward_data/group_conv.cpp index c2e3b9f45..d32a407fe 100644 --- a/dnn/src/cuda/convolution/backward_data/group_conv.cpp +++ b/dnn/src/cuda/convolution/backward_data/group_conv.cpp @@ -44,6 +44,8 @@ bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available( args.diff_layout->dtype == dtype::QuantizedS8())) { return false; } + if (args.filter_meta.group <= 1) + return false; auto sub_args = args; TensorLayout diff_pg, grad_pg; modify_size_args(sub_args, diff_pg, grad_pg); diff --git a/dnn/src/cuda/convolution/backward_filter/chanwise.cpp b/dnn/src/cuda/convolution/backward_filter/chanwise.cpp index ad91fed9f..e6d6893df 100644 --- a/dnn/src/cuda/convolution/backward_filter/chanwise.cpp +++ b/dnn/src/cuda/convolution/backward_filter/chanwise.cpp @@ -19,7 +19,7 @@ using namespace convolution; bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available( const SizeArgs &args) const { - if (!args.grad_layout->is_contiguous() || + if (!args.src_layout->is_contiguous() || !args.diff_layout->is_contiguous()) { return false; } diff --git a/dnn/src/cuda/convolution/backward_filter/group_conv.cpp b/dnn/src/cuda/convolution/backward_filter/group_conv.cpp index 797469f3d..0e0a8e1bd 100644 --- a/dnn/src/cuda/convolution/backward_filter/group_conv.cpp +++ b/dnn/src/cuda/convolution/backward_filter/group_conv.cpp @@ -42,6 +42,8 @@ bool ConvolutionBackwardFilterImpl::AlgoGroupConvGeneral::is_available( args.diff_layout->dtype == dtype::BFloat16()) { return false; } + if (args.grad_filter_meta.group <= 1) + return false; auto sub_args = args; TensorLayout src_pg, diff_pg; modify_size_args(sub_args, src_pg, diff_pg); diff --git a/dnn/src/cuda/convolution3d/backward_data/algo.cpp b/dnn/src/cuda/convolution3d/backward_data/algo.cpp index b019d0b09..2ecc5a6a8 100644 --- a/dnn/src/cuda/convolution3d/backward_data/algo.cpp +++ b/dnn/src/cuda/convolution3d/backward_data/algo.cpp @@ -64,7 +64,7 @@ Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs( Convolution3DBackwardDataImpl *o, const TensorLayout &filter, const TensorLayout &diff, const TensorLayout &grad): - SizeArgs(o, o->check_layout_fwd(grad, filter, diff), diff, grad) + SizeArgs(o, o->make_canonized_filter_meta(grad.ndim, filter), diff, grad) { } diff --git a/dnn/src/cuda/convolution3d/backward_data/chanwise.cpp b/dnn/src/cuda/convolution3d/backward_data/chanwise.cpp index 7c08aef14..c4d0c5848 100644 --- a/dnn/src/cuda/convolution3d/backward_data/chanwise.cpp +++ b/dnn/src/cuda/convolution3d/backward_data/chanwise.cpp @@ -19,6 +19,10 @@ using namespace convolution3d; bool Convolution3DBackwardDataImpl::AlgoChanwise::is_available( const SizeArgs &args) const { + if (!args.grad_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } auto &&fm = args.filter_meta; return args.filter_meta.format == Param::Format::NCDHW && args.diff_layout->dtype.category() == DTypeCategory::FLOAT && diff --git a/dnn/src/cuda/convolution3d/backward_data/group_conv.cpp b/dnn/src/cuda/convolution3d/backward_data/group_conv.cpp index a134e1708..9e6d90b59 100644 --- a/dnn/src/cuda/convolution3d/backward_data/group_conv.cpp +++ b/dnn/src/cuda/convolution3d/backward_data/group_conv.cpp @@ -38,6 +38,8 @@ Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available( const SizeArgs &args) const { + if (args.filter_meta.group <= 1) + return false; auto sub_args = args; TensorLayout diff_pg, grad_pg; modify_size_args(sub_args, diff_pg, grad_pg); diff --git a/dnn/src/cuda/convolution3d/backward_filter/algo.cpp b/dnn/src/cuda/convolution3d/backward_filter/algo.cpp index 4ae4aa94e..ad9568a30 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/algo.cpp +++ b/dnn/src/cuda/convolution3d/backward_filter/algo.cpp @@ -67,7 +67,7 @@ Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs( Convolution3DBackwardFilterImpl *o, const TensorLayout &src, const TensorLayout &diff, const TensorLayout &grad): - SizeArgs(o, src, diff, o->check_layout_fwd(src, grad, diff)) + SizeArgs(o, src, diff, o->make_canonized_filter_meta(src.ndim, grad)) { } diff --git a/dnn/src/cuda/convolution3d/backward_filter/chanwise.cpp b/dnn/src/cuda/convolution3d/backward_filter/chanwise.cpp index 7f738f093..06831344a 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/chanwise.cpp +++ b/dnn/src/cuda/convolution3d/backward_filter/chanwise.cpp @@ -19,6 +19,10 @@ using namespace convolution3d; bool Convolution3DBackwardFilterImpl::AlgoChanwise::is_available( const SizeArgs &args) const { + if (!args.src_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } auto &&fm = args.grad_filter_meta; return fm.format == Param::Format::NCDHW && args.diff_layout->dtype.category() == DTypeCategory::FLOAT && diff --git a/dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp b/dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp index 0f2c29adb..eaf4ea92f 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp +++ b/dnn/src/cuda/convolution3d/backward_filter/group_conv.cpp @@ -38,6 +38,8 @@ Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available( const SizeArgs &args) const { + if (args.grad_filter_meta.group <= 1) + return false; auto sub_args = args; TensorLayout src_pg, diff_pg; modify_size_args(sub_args, src_pg, diff_pg); diff --git a/dnn/src/cuda/convolution3d/backward_filter/inplace_matmul.cpp b/dnn/src/cuda/convolution3d/backward_filter/inplace_matmul.cpp index d27680ae6..63add1d7c 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/inplace_matmul.cpp +++ b/dnn/src/cuda/convolution3d/backward_filter/inplace_matmul.cpp @@ -17,6 +17,10 @@ using namespace cuda; bool Convolution3DBackwardFilterImpl::AlgoInplaceMatmul::is_available( const SizeArgs &args) const { + if (!args.src_layout->is_contiguous() || + !args.diff_layout->is_contiguous()) { + return false; + } auto &&fm = args.grad_filter_meta; return args.grad_filter_meta.format == Param::Format::NCDHW && args.src_layout->dtype == dtype::Float32() && diff --git a/dnn/src/cuda/convolution3d/forward/algo.cpp b/dnn/src/cuda/convolution3d/forward/algo.cpp index 872ec8f9a..e8639f82d 100644 --- a/dnn/src/cuda/convolution3d/forward/algo.cpp +++ b/dnn/src/cuda/convolution3d/forward/algo.cpp @@ -69,7 +69,7 @@ Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs( Convolution3DForwardImpl *o, const TensorLayout &src, const TensorLayout &filter, const TensorLayout &dst): - SizeArgs(o, src, o->check_layout_fwd(src, filter, dst), dst) + SizeArgs(o, src, o->make_canonized_filter_meta(src.ndim, filter), dst) { } diff --git a/dnn/src/cuda/convolution3d/forward/chanwise.cpp b/dnn/src/cuda/convolution3d/forward/chanwise.cpp index ce30a8962..c3a28044d 100644 --- a/dnn/src/cuda/convolution3d/forward/chanwise.cpp +++ b/dnn/src/cuda/convolution3d/forward/chanwise.cpp @@ -19,6 +19,10 @@ using namespace convolution3d; bool Convolution3DForwardImpl::AlgoChanwise::is_available( const SizeArgs &args) const { + if (!args.src_layout->is_contiguous() || + !args.dst_layout->is_contiguous()) { + return false; + } auto &&fm = args.filter_meta; return args.filter_meta.format == Param::Format::NCDHW && args.src_layout->dtype.category() == DTypeCategory::FLOAT && diff --git a/dnn/src/cuda/convolution3d/forward/group_conv.cpp b/dnn/src/cuda/convolution3d/forward/group_conv.cpp index 39da7fe37..c20cb37e5 100644 --- a/dnn/src/cuda/convolution3d/forward/group_conv.cpp +++ b/dnn/src/cuda/convolution3d/forward/group_conv.cpp @@ -45,6 +45,8 @@ Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral( bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available( const SizeArgs &args) const { + if (args.filter_meta.group <= 1) + return false; auto sub_args = args; TensorLayout src_pg, dst_pg; modify_size_args(sub_args, src_pg, dst_pg); diff --git a/dnn/src/naive/convolution3d/helper.h b/dnn/src/naive/convolution3d/helper.h index 478eda5d9..867cf618f 100644 --- a/dnn/src/naive/convolution3d/helper.h +++ b/dnn/src/naive/convolution3d/helper.h @@ -215,7 +215,6 @@ void backward_data(_megdnn_tensor_in filter, _megdnn_tensor_in diff, _megdnn_tensor_out grad, const Convolution3D::CanonizedFilterMeta &filter_meta) { - megdnn_assert(grad.layout.is_contiguous()); memset(grad.raw_ptr, 0, grad.layout.span().dist_byte()); megdnn_assert(filter_meta.spatial_ndim == 3); compute3d( @@ -227,7 +226,6 @@ void backward_filter(_megdnn_tensor_in src, _megdnn_tensor_in diff, _megdnn_tensor_out grad, const Convolution3D::CanonizedFilterMeta &filter_meta) { - megdnn_assert(grad.layout.is_contiguous()); memset(grad.raw_ptr, 0, grad.layout.span().dist_byte()); megdnn_assert(filter_meta.spatial_ndim == 3); compute3d( diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp index 43eae93af..bef4bfd5a 100644 --- a/dnn/test/cuda/convolution.cpp +++ b/dnn/test/cuda/convolution.cpp @@ -384,16 +384,6 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) { } checker.set_rng(0, &rng).set_rng(1, &rng).set_param(arg.param).exec( TensorLayoutArray{filter, dst, src}); - //! noncontiguous case - { - param::Convolution param; - param.pad_h = param.pad_w = 1; - checker.set_param(param).execl(TensorLayoutArray{ - {{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::QuantizedS8{1.3f}}, - {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::QuantizedS8{1.2f}}, - {{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::QuantizedS8{1.2f}} - }); - } } } diff --git a/dnn/test/cuda/convolution3d.cpp b/dnn/test/cuda/convolution3d.cpp index ab21e733a..5b614909d 100644 --- a/dnn/test/cuda/convolution3d.cpp +++ b/dnn/test/cuda/convolution3d.cpp @@ -150,6 +150,77 @@ TEST_F(CUDA, CONVOLUTION3D_MATMUL_FORWARD) { } } +TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_CUDNN) { + using namespace convolution3d; + Checker checker(handle_cuda()); + checker.set_before_exec_callback(AlgoChecker( + "CUDNN")); + param::Convolution3D param; + param.pad_d = param.pad_h = param.pad_w = 1; + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_epsilon(1e-3); + + //! noncontiguous case + { + checker.set_param(param).execl(TensorLayoutArray{ + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}, + {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}, + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}}); + } +} + +TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_INPLACE_MATMUL) { + using namespace convolution3d; + Checker checker(handle_cuda()); + checker.set_before_exec_callback(AlgoChecker( + "INPLACE_MATMUL")); + param::Convolution3D param; + param.pad_d = param.pad_h = param.pad_w = 1; + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_epsilon(1e-3); + + //! noncontiguous case + { + checker.set_param(param).execl(TensorLayoutArray{ + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}, + {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}, + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}}); + } +} + +TEST_F(CUDA, CONVOLUTION3D_FORWARD_NONCONTIG_1x1x1) { + using namespace convolution3d; + Checker checker(handle_cuda()); + checker.set_before_exec_callback(AlgoChecker( + "1x1x1")); + param::Convolution3D param; + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_epsilon(1e-3); + + //! noncontiguous case + { + checker.set_param(param).execl(TensorLayoutArray{ + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}, + {{5, 5, 1, 1, 1}, {5, 1, 1, 1, 1}, dtype::Float32()}, + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}}); + } +} + #if MEGDNN_WITH_BENCHMARK TEST_F(CUDA, BENCHMARK_CONVOLUTION3D_MATMUL_BACKWARD_FILTER) { using namespace convolution3d; @@ -343,6 +414,60 @@ TEST_F(CUDA, CONVOLUTION3D_MATMUL_BACKWARD_FILTER) { } } +TEST_F(CUDA, CONVOLUTION3D_BACKWARD_DATA_NONCONTIG_CUDNN) { + using namespace convolution3d; + Checker checker(handle_cuda()); + checker.set_before_exec_callback(AlgoChecker( + "CUDNN")); + Convolution3DBackwardData::Param param; + param.pad_d = param.pad_h = param.pad_w = 1; + NormalRNG default_rng; + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_rng(0, &default_rng) + .set_rng(1, &default_rng) + .set_epsilon(1e-3) + .set_param(param); + //! noncontiguous case + { + checker.execl(TensorLayoutArray{ + {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}, + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}, + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}}); + } +} + +TEST_F(CUDA, CONVOLUTION3D_BACKWARD_FILTER_NONCONTIG_CUDNN) { + using namespace convolution3d; + Checker checker(handle_cuda()); + checker.set_before_exec_callback(AlgoChecker( + "CUDNN")); + Convolution3DBackwardFilter::Param param; + param.pad_d = param.pad_h = param.pad_w = 1; + NormalRNG default_rng; + checker.set_dtype(0, dtype::Float32()) + .set_dtype(1, dtype::Float32()) + .set_rng(0, &default_rng) + .set_rng(1, &default_rng) + .set_epsilon(1e-3) + .set_param(param); + //! noncontiguous case + { + checker.execl(TensorLayoutArray{ + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}, + {{4, 5, 16, 16, 16}, + {40960, 4096, 256, 16, 1}, + dtype::Float32()}, + {{5, 5, 3, 3, 3}, {135, 27, 9, 3, 1}, dtype::Float32()}}); + } +} + /* TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) { auto eps_getter = [](bool f16, int stage, const char *name) -> float { -- GitLab