提交 ea34b2fe 编写于 作者: A Alexander Alekhin

Merge pull request #16888 from YashasSamaga:cuda4dnn-redundant-act-fusion-check

......@@ -27,13 +27,21 @@ namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cu
/** @brief exception class for errors thrown by the cuDNN API */
class cuDNNException : public CUDAException {
public:
using CUDAException::CUDAException;
cuDNNException(cudnnStatus_t code, const std::string& msg, const std::string& func, const std::string& file, int line)
: CUDAException(Error::GpuApiCallError, msg, func, file, line), cudnnError{code}
{
}
cudnnStatus_t getCUDNNStatus() const noexcept { return cudnnError; }
private:
cudnnStatus_t cudnnError;
};
namespace detail {
inline void check(cudnnStatus_t status, const char* func, const char* file, int line) {
if (status != CUDNN_STATUS_SUCCESS)
throw cuDNNException(Error::GpuApiCallError, cudnnGetErrorString(status), func, file, line);
throw cuDNNException(status, cudnnGetErrorString(status), func, file, line);
}
/** get_data_type<T> returns the equivalent cudnn enumeration constant for type T */
......
......@@ -261,16 +261,32 @@ namespace cv { namespace dnn { namespace cuda4dnn {
input = transformed_input;
}
auto conv_scratchpad = allocator.get_instance();
auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
auto output = output_wrapper->getSpan();
if (fusion_location == InternalFusionLocation::CUDNN)
{
convoluter.convolve_with_bias_activation(output, input, filtersTensor, biasTensor, allocator.get_instance());
try
{
convoluter.convolve_with_bias_activation(output, input, filtersTensor, biasTensor, conv_scratchpad);
}
catch(const csl::cudnn::cuDNNException& ex)
{
if (ex.getCUDNNStatus() == CUDNN_STATUS_NOT_SUPPORTED)
{
/* drop cuDNN fusion and use the native fusion path */
fusion_location = InternalFusionLocation::NATIVE;
}
else
throw;
}
}
else
if (fusion_location == InternalFusionLocation::NATIVE)
{
convoluter.convolve(output, input, filtersTensor, allocator.get_instance());
convoluter.convolve(output, input, filtersTensor, conv_scratchpad);
if (!biasTensor.empty())
{
std::size_t inner_size = output.size_range(2, output.rank());
......
......@@ -2580,16 +2580,6 @@ struct Net::Impl
nextData->type != "Power")
break;
if (IS_DNN_CUDA_TARGET(preferableTarget) &&
nextData->type != "ReLU" &&
nextData->type != "ReLU6" &&
nextData->type != "Power" &&
nextData->type != "TanH" &&
nextData->type != "Sigmoid" &&
nextData->type != "Swish" &&
nextData->type != "Mish")
break;
Ptr<ActivationLayer> nextActivLayer = nextData->layerInstance.dynamicCast<ActivationLayer>();
if (nextActivLayer.empty())
break;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册