From 70b0274c8e73991748a3cbbefd78363fa0042def Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.a.alekhin@gmail.com>
Date: Wed, 26 Jan 2022 05:00:47 +0000
Subject: [PATCH] dnn: apply hint to ignore denormals processing

---
 modules/dnn/src/caffe/caffe_importer.cpp     |  4 ++++
 modules/dnn/src/darknet/darknet_importer.cpp |  3 +++
 modules/dnn/src/dnn.cpp                      | 13 +++++++++++++
 modules/dnn/src/layers/convolution_layer.cpp | 11 -----------
 modules/dnn/src/onnx/onnx_importer.cpp       |  4 ++++
 modules/dnn/src/tensorflow/tf_importer.cpp   |  3 +++
 modules/dnn/src/torch/torch_importer.cpp     |  5 +++++
 7 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp
index 0b6c0a6e38..03dff96464 100644
--- a/modules/dnn/src/caffe/caffe_importer.cpp
+++ b/modules/dnn/src/caffe/caffe_importer.cpp
@@ -53,6 +53,8 @@
 #include "caffe_io.hpp"
 #endif
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 namespace cv {
 namespace dnn {
 CV__DNN_EXPERIMENTAL_NS_BEGIN
@@ -88,6 +90,8 @@ MatShape parseBlobShape(const caffe::BlobShape& _input_shape)
 
 class CaffeImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     caffe::NetParameter net;
     caffe::NetParameter netBinary;
 
diff --git a/modules/dnn/src/darknet/darknet_importer.cpp b/modules/dnn/src/darknet/darknet_importer.cpp
index 282b37277c..5d28dbd2e2 100644
--- a/modules/dnn/src/darknet/darknet_importer.cpp
+++ b/modules/dnn/src/darknet/darknet_importer.cpp
@@ -51,6 +51,7 @@
 
 #include "darknet_io.hpp"
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
 
 namespace cv {
 namespace dnn {
@@ -61,6 +62,8 @@ namespace
 
 class DarknetImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     darknet::NetParameter net;
 
 public:
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
index 28bcde4827..52428a9898 100644
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -55,6 +55,8 @@
 #include <opencv2/dnn/shape_utils.hpp>
 #include <opencv2/imgproc.hpp>
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/configuration.private.hpp>
 #include <opencv2/core/utils/logger.hpp>
 
@@ -3504,6 +3506,9 @@ Net Net::readFromModelOptimizer(const String& xml, const String& bin)
     CV_UNUSED(xml); CV_UNUSED(bin);
     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
 #else
+
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
     InferenceEngine::CNNNetReader reader;
     reader.ReadNetwork(xml);
@@ -3540,6 +3545,8 @@ Net Net::readFromModelOptimizer(
     CV_Error(Error::StsError, "Build OpenCV with Inference Engine to enable loading models from Model Optimizer.");
 #else
 
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
 #if INF_ENGINE_VER_MAJOR_LE(INF_ENGINE_RELEASE_2019R3)
     InferenceEngine::CNNNetReader reader;
 
@@ -3639,6 +3646,7 @@ Mat Net::forward(const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     String layerName = outputName;
 
@@ -3660,6 +3668,7 @@ AsyncArray Net::forwardAsync(const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
 #ifdef CV_CXX11
     String layerName = outputName;
@@ -3691,6 +3700,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName)
 {
     CV_TRACE_FUNCTION();
     CV_Assert(!empty());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     String layerName = outputName;
 
@@ -3769,6 +3779,7 @@ void Net::forward(OutputArrayOfArrays outputBlobs,
                   const std::vector<String>& outBlobNames)
 {
     CV_TRACE_FUNCTION();
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     std::vector<LayerPin> pins;
     for (int i = 0; i < outBlobNames.size(); i++)
@@ -3796,6 +3807,7 @@ void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,
                      const std::vector<String>& outBlobNames)
 {
     CV_TRACE_FUNCTION();
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     std::vector<LayerPin> pins;
     for (int i = 0; i < outBlobNames.size(); i++)
@@ -3886,6 +3898,7 @@ void Net::setInput(InputArray blob, const String& name, double scalefactor, cons
 {
     CV_TRACE_FUNCTION();
     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 
     LayerPin pin;
     pin.lid = 0;
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
index f5c158453d..e160cfeebe 100644
--- a/modules/dnn/src/layers/convolution_layer.cpp
+++ b/modules/dnn/src/layers/convolution_layer.cpp
@@ -1629,13 +1629,6 @@ public:
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-#if CV_SSE3
-        uint32_t ftzMode = _MM_GET_FLUSH_ZERO_MODE();
-        uint32_t dazMode = _MM_GET_DENORMALS_ZERO_MODE();
-        _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
-        _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
-#endif
-
         CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget),
                    forward_ocl(inputs_arr, outputs_arr, internals_arr))
 
@@ -1748,10 +1741,6 @@ public:
             ParallelConv::run(inputs[0], outputs[0], weightsMat, biasvec, reluslope,
                             kernel_size, strides, pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes);
         }
-#if CV_SSE3
-        _MM_SET_FLUSH_ZERO_MODE(ftzMode);
-        _MM_SET_DENORMALS_ZERO_MODE(dazMode);
-#endif
     }
 
     virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp
index 47ec830313..f68b1ccf2f 100644
--- a/modules/dnn/src/onnx/onnx_importer.cpp
+++ b/modules/dnn/src/onnx/onnx_importer.cpp
@@ -8,6 +8,8 @@
 #include "../precomp.hpp"
 #include <opencv2/dnn/shape_utils.hpp>
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/logger.defines.hpp>
 #undef CV_LOG_STRIP_LEVEL
 #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_VERBOSE + 1
@@ -40,6 +42,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
 
 class ONNXImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     opencv_onnx::ModelProto model_proto;
     struct LayerInfo {
         int layerId;
diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp
index 8cbe1c4b23..43ab1a93ac 100644
--- a/modules/dnn/src/tensorflow/tf_importer.cpp
+++ b/modules/dnn/src/tensorflow/tf_importer.cpp
@@ -11,6 +11,8 @@ Implementation of Tensorflow models parser
 
 #include "../precomp.hpp"
 
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <opencv2/core/utils/logger.defines.hpp>
 #include <opencv2/dnn/shape_utils.hpp>
 #undef CV_LOG_STRIP_LEVEL
@@ -509,6 +511,7 @@ void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int in
 
 class TFImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
 public:
     TFImporter(Net& net, const char *model, const char *config = NULL);
     TFImporter(Net& net, const char *dataModel, size_t lenModel,
diff --git a/modules/dnn/src/torch/torch_importer.cpp b/modules/dnn/src/torch/torch_importer.cpp
index 5dd9e3e290..f158bc12c2 100644
--- a/modules/dnn/src/torch/torch_importer.cpp
+++ b/modules/dnn/src/torch/torch_importer.cpp
@@ -40,6 +40,9 @@
 //M*/
 
 #include "../precomp.hpp"
+
+#include <opencv2/core/utils/fp_control_utils.hpp>
+
 #include <limits>
 #include <set>
 #include <map>
@@ -106,6 +109,8 @@ static inline bool endsWith(const String &str, const char *substr)
 
 struct TorchImporter
 {
+    FPDenormalsIgnoreHintScope fp_denormals_ignore_scope;
+
     typedef std::map<String, std::pair<int, Mat> > TensorsMap;
     Net net;
 
-- 
GitLab