From 4e8cd4629cdc6f1dc51d913b25417950f67c984a Mon Sep 17 00:00:00 2001 From: YashasSamaga Date: Mon, 23 Mar 2020 19:47:00 +0530 Subject: [PATCH] fix CUDNN_STATUS_NOT_SUPPORTED, remove redundant fusion checks --- modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp | 12 ++++++++-- .../src/cuda4dnn/primitives/convolution.hpp | 22 ++++++++++++++++--- modules/dnn/src/dnn.cpp | 10 --------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp b/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp index 19b46a9b36..13ecc1a0e7 100644 --- a/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp +++ b/modules/dnn/src/cuda4dnn/csl/cudnn/cudnn.hpp @@ -27,13 +27,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu /** @brief exception class for errors thrown by the cuDNN API */ class cuDNNException : public CUDAException { public: - using CUDAException::CUDAException; + cuDNNException(cudnnStatus_t code, const std::string& msg, const std::string& func, const std::string& file, int line) + : CUDAException(Error::GpuApiCallError, msg, func, file, line), cudnnError{code} + { + } + + cudnnStatus_t getCUDNNStatus() const noexcept { return cudnnError; } + + private: + cudnnStatus_t cudnnError; }; namespace detail { inline void check(cudnnStatus_t status, const char* func, const char* file, int line) { if (status != CUDNN_STATUS_SUCCESS) - throw cuDNNException(Error::GpuApiCallError, cudnnGetErrorString(status), func, file, line); + throw cuDNNException(status, cudnnGetErrorString(status), func, file, line); } /** get_data_type returns the equivalent cudnn enumeration constant for type T */ diff --git a/modules/dnn/src/cuda4dnn/primitives/convolution.hpp b/modules/dnn/src/cuda4dnn/primitives/convolution.hpp index b0039525ae..282ae7cf77 100644 --- a/modules/dnn/src/cuda4dnn/primitives/convolution.hpp +++ b/modules/dnn/src/cuda4dnn/primitives/convolution.hpp @@ -261,16 +261,32 @@ namespace cv { namespace dnn { namespace cuda4dnn { input = transformed_input; } + auto conv_scratchpad = allocator.get_instance(); + auto output_wrapper = outputs[0].dynamicCast(); auto output = output_wrapper->getSpan(); if (fusion_location == InternalFusionLocation::CUDNN) { - convoluter.convolve_with_bias_activation(output, input, filtersTensor, biasTensor, allocator.get_instance()); + try + { + convoluter.convolve_with_bias_activation(output, input, filtersTensor, biasTensor, conv_scratchpad); + } + catch(const csl::cudnn::cuDNNException& ex) + { + if (ex.getCUDNNStatus() == CUDNN_STATUS_NOT_SUPPORTED) + { + /* drop cuDNN fusion and use the native fusion path */ + fusion_location = InternalFusionLocation::NATIVE; + } + else + throw; + } } - else + + if (fusion_location == InternalFusionLocation::NATIVE) { - convoluter.convolve(output, input, filtersTensor, allocator.get_instance()); + convoluter.convolve(output, input, filtersTensor, conv_scratchpad); if (!biasTensor.empty()) { std::size_t inner_size = output.size_range(2, output.rank()); diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 542744176d..53b316940c 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -2580,16 +2580,6 @@ struct Net::Impl nextData->type != "Power") break; - if (IS_DNN_CUDA_TARGET(preferableTarget) && - nextData->type != "ReLU" && - nextData->type != "ReLU6" && - nextData->type != "Power" && - nextData->type != "TanH" && - nextData->type != "Sigmoid" && - nextData->type != "Swish" && - nextData->type != "Mish") - break; - Ptr nextActivLayer = nextData->layerInstance.dynamicCast(); if (nextActivLayer.empty()) break; -- GitLab