From 3b452d8c166ae52b11f347f893c46c305a16c3dc Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 28 Jul 2021 19:14:26 +0800 Subject: [PATCH] feat(mgb): cuda conv support nhwc format and fp16 dtype GitOrigin-RevId: b8ddcd108a4370a0b093c51bd90ebde0e007cb24 --- .../conv_bias/cudnn_conv_bias_activation.cpp | 6 ++++ dnn/src/cuda/conv_bias/helper.cpp | 8 ++--- dnn/test/cuda/conv_bias.cpp | 35 +++++++++++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp index 9b6d6d677..515435d4a 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp @@ -69,6 +69,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available( return false; } + if (args.src_layout->dtype.enumv() == DTypeEnum::Float16 && + args.dst_layout->dtype.enumv() == DTypeEnum::Float16 && + param.format == param::ConvBias::Format::NHWC) { + return false; + } + //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal //! memory access errors, so we have to disable this kernel here. if (param.format == param::ConvBias::Format::NCHW4_NCHW || diff --git a/dnn/src/cuda/conv_bias/helper.cpp b/dnn/src/cuda/conv_bias/helper.cpp index b98520e27..d4cd291bc 100644 --- a/dnn/src/cuda/conv_bias/helper.cpp +++ b/dnn/src/cuda/conv_bias/helper.cpp @@ -151,14 +151,14 @@ bool is_cudnn_supported(const BiasForwardSizeArgs& args) { if (args.handle->is_tegra_k1()) return false; - // TODO: We only support NCHW format now. It seems cuDNN provides support - // for NHWC as well. - if (args.filter_meta.format == param::Convolution::Format::NCHW4) { + if (args.filter_meta.format == param::Convolution::Format::NCHW4 || + args.filter_meta.format == param::Convolution::Format::NCHW32) { if (args.dst_layout->dtype.enumv() != DTypeEnum::Int8 && args.dst_layout->dtype.enumv() != DTypeEnum::QuantizedS8) { return false; } - } else if (args.filter_meta.format != param::Convolution::Format::NCHW) { + } else if (args.filter_meta.format != param::Convolution::Format::NCHW && + args.filter_meta.format != param::Convolution::Format::NHWC) { return false; } auto& fm = args.filter_meta; diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index d1fdf8816..3435e106c 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -216,6 +216,41 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) { } } +TEST_F(CUDA, CONV_BIAS_FORWARD_FLOAT16) { + require_compute_capability(6, 1); + + Checker checker(handle_cuda()); + ConvBias::Param param; + param.format = ConvBias::Param::Format::NHWC; + param.nonlineMode = ConvBias::Param::NonlineMode::IDENTITY; + + checker.set_epsilon(2e-2) + .set_dtype(0, dtype::Float16()) + .set_dtype(1, dtype::Float16()) + .set_dtype(2, dtype::Float16()) + .set_dtype(3, dtype::Float16()) + .set_dtype(4, dtype::Float16()); + { + auto src_shape = TensorShape{20, 224, 224, 4}; + auto filter_shape = TensorShape{24, 1, 1, 4}; + auto bias_shape = TensorShape{1, 1, 1, 24}; + checker.set_param(param).execs( + {src_shape, filter_shape, bias_shape, {}, {}}); + param.compute_mode = ConvBias::Param::ComputeMode::FLOAT32; + checker.set_param(param).execs( + {src_shape, filter_shape, bias_shape, {}, {}}); + } + + { + param.sparse = ConvBias::Param::Sparse::GROUP; + auto src_shape = TensorShape{20, 224, 224, 16}; + auto filter_shape = TensorShape{4, 4, 1, 1, 4}; + auto bias_shape = TensorShape{1, 1, 1, 16}; + checker.set_param(param).execs( + {src_shape, filter_shape, bias_shape, {}, {}}); + } +} + TEST_F(CUDA, CONV_BIAS_NCHW_QS8) { //! not support NonlineMode::SIGMOID and NonlineMode::H_SWISH require_compute_capability(6, 1); -- GitLab