Merge pull request #20410 from alalek:fix_dnn_dldt_output_layout

98c5fc6a · Alexander Alekhin · eb9218a8 · fbde0c6c · 98c5fc6a · 98c5fc6a
4 changed file
--- a/modules/dnn/src/dnn.cpp
+++ b/modules/dnn/src/dnn.cpp
@@ -1944,7 +1944,10 @@ struct Net::Impl : public detail::NetImplBase

            Ptr<InfEngineNgraphNode> ieNode = node.dynamicCast<InfEngineNgraphNode>();
            CV_Assert(!ieNode.empty());
-            ieNode->net->reset();
+
+            CV_Assert(ieNode->net);
+            InfEngineNgraphNet& ienet = *ieNode->net;
+            ienet.reset();

            for (it = layers.begin(); it != layers.end(); ++it)
            {
@@ -1961,16 +1964,26 @@ struct Net::Impl : public detail::NetImplBase
                {
                    for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i)
                    {
-                        InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
-                        dataPtr->setName(ld.name);
+                        auto it = ienet.outputsDesc.find(ld.name);
+                        if (it != ienet.outputsDesc.end())
+                        {
+                            const InferenceEngine::TensorDesc& descriptor = it->second;
+                            InferenceEngine::DataPtr dataPtr = ngraphDataOutputNode(ld.outputBlobsWrappers[i], descriptor, ld.name);
+                            dataPtr->setName(ld.name);
+                        }
+                        else
+                        {
+                            InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]);
+                            dataPtr->setName(ld.name);
+                        }
                    }
                }
-                ieNode->net->addBlobs(ld.inputBlobsWrappers);
-                ieNode->net->addBlobs(ld.outputBlobsWrappers);
+                ienet.addBlobs(ld.inputBlobsWrappers);
+                ienet.addBlobs(ld.outputBlobsWrappers);
                ld.skip = true;
            }
            layers[lastLayerId].skip = false;
-            ieNode->net->init((Target)preferableTarget);
+            ienet.init((Target)preferableTarget);
            return;
        }

@@ -3719,8 +3732,8 @@ void Net::forward(OutputArrayOfArrays outputBlobs,
        matvec.push_back(impl->getBlob(pins[i]));
    }

-    std::vector<Mat> & outputvec = *(std::vector<Mat> *)outputBlobs.getObj();
-    outputvec = matvec;
+    outputBlobs.create((int)matvec.size(), 1, CV_32F/*FIXIT*/, -1);  // allocate vector
+    outputBlobs.assign(matvec);
 }

 void Net::forward(std::vector<std::vector<Mat> >& outputBlobs,

--- a/modules/dnn/src/ie_ngraph.cpp
+++ b/modules/dnn/src/ie_ngraph.cpp
@@ -789,21 +789,32 @@ void NgraphBackendLayer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays
 }


-static InferenceEngine::Layout estimateLayout(const Mat& m)
+static InferenceEngine::Layout estimateLayout(int dims)
 {
-    if (m.dims == 4)
+    if (dims == 4)
        return InferenceEngine::Layout::NCHW;
-    else if (m.dims == 3)
+    else if (dims == 3)
        return InferenceEngine::Layout::CHW;
-    else if (m.dims == 2)
+    else if (dims == 2)
        return InferenceEngine::Layout::NC;
-    else if (m.dims == 1)
+    else if (dims == 1)
        return InferenceEngine::Layout::C;
-    else if (m.dims == 5)
+    else if (dims == 5)
        return InferenceEngine::Layout::NCDHW;
    else
        return InferenceEngine::Layout::ANY;
 }
+static inline
+InferenceEngine::Layout estimateLayout(size_t dims)
+{
+    return estimateLayout((int)dims);
+}
+
+static inline
+InferenceEngine::Layout estimateLayout(const Mat& m)
+{
+    return estimateLayout(m.dims);
+}

 static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std::string& name = "")
 {
@@ -839,6 +850,7 @@ InferenceEngine::Blob::Ptr wrapToNgraphBlob(const Mat& m, InferenceEngine::Layou

 NgraphBackendWrapper::NgraphBackendWrapper(int targetId, const cv::Mat& m)
    : BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, targetId)
+    , host((Mat*)&m)
 {
    dataPtr = wrapToInfEngineDataNode(m);
    blob = wrapToNgraphBlob(m, estimateLayout(m));
@@ -890,7 +902,11 @@ InferenceEngine::Blob::Ptr copyBlob(const InferenceEngine::Blob::Ptr& blob)
        copy = InferenceEngine::make_shared_blob<uint8_t>(description);
    }
    else
-        CV_Error(Error::StsNotImplemented, "Unsupported blob precision");
+    {
+        std::ostringstream msg;
+        msg << precision;
+        CV_Error_(Error::StsNotImplemented, ("Unsupported blob precision: %s", msg.str().c_str()));
+    }
    copy->allocate();
    return copy;
 }
@@ -903,6 +919,66 @@ InferenceEngine::DataPtr ngraphDataNode(const Ptr<BackendWrapper>& ptr)
    return p->dataPtr;
 }

+static
+InferenceEngine::Blob::Ptr reallocateBlob(Mat &m, const InferenceEngine::TensorDesc& description)
+{
+    auto dims = description.getDims();
+    auto layout = estimateLayout(dims.size());
+    MatShape matShape(dims.begin(), dims.end());
+    if (description.getPrecision() == InferenceEngine::Precision::FP32)
+    {
+        m.create(matShape, CV_32FC1);
+        return InferenceEngine::make_shared_blob<float>(
+                {description.getPrecision(), dims, layout}, (float*)m.data);
+    }
+    else if (description.getPrecision() == InferenceEngine::Precision::I32)
+    {
+        m.create(matShape, CV_32SC1);
+        return InferenceEngine::make_shared_blob<int>(
+                {description.getPrecision(), dims, layout}, (int*)m.data);
+    }
+    else if (description.getPrecision() == InferenceEngine::Precision::U8)
+    {
+        m.create(matShape, CV_8UC1);
+        return InferenceEngine::make_shared_blob<uchar>(
+                {description.getPrecision(), dims, layout}, (uchar*)m.data);
+    }
+    std::ostringstream msg;
+    msg << "Unsupported IE precision: " << description.getPrecision();
+    CV_Error(Error::StsNotImplemented, msg.str());
+}
+
+InferenceEngine::DataPtr ngraphDataOutputNode(
+        const Ptr<BackendWrapper>& ptr,
+        const InferenceEngine::TensorDesc& description,
+        const std::string name)
+{
+    CV_Assert(!ptr.empty());
+    Ptr<NgraphBackendWrapper> p = ptr.dynamicCast<NgraphBackendWrapper>();
+    CV_Assert(!p.empty());
+    NgraphBackendWrapper& w = *p;
+    const InferenceEngine::TensorDesc& blobDesc = w.blob.get()->getTensorDesc();
+    auto dims = description.getDims();
+    bool reallocate = false;
+    if (blobDesc.getPrecision() != description.getPrecision())
+    {
+        reallocate = true;
+        CV_LOG_WARNING(NULL, "Reallocate output '" << name << "' blob due to wrong precision: " << blobDesc.getPrecision() << " => " << description.getPrecision() << "  ndims=" << dims.size());
+    }
+    if (dims.size() != blobDesc.getDims().size())
+    {
+        reallocate = true;
+        CV_LOG_WARNING(NULL, "Reallocate output '" << name << "' blob due to wrong dims: " << blobDesc.getDims().size() << " => " << dims.size());
+    }
+    if (reallocate)
+    {
+        auto layout = estimateLayout(dims.size());
+        w.dataPtr = InferenceEngine::DataPtr(new InferenceEngine::Data(name,
+               {description.getPrecision(), dims, layout}));
+        w.blob = reallocateBlob(*w.host, description);
+    }
+    return w.dataPtr;
+}

 void forwardNgraph(const std::vector<Ptr<BackendWrapper> >& outBlobsWrappers,
                      Ptr<BackendNode>& node, bool isAsync)
@@ -918,6 +994,13 @@ void InfEngineNgraphNet::reset()
    allBlobs.clear();
    infRequests.clear();
    isInit = false;
+
+    outputsDesc.clear();
+    for (const auto& it : cnn.getOutputsInfo())
+    {
+        const std::string& name = it.first;
+        outputsDesc.insert({name, it.second->getTensorDesc()});
+    }
 }

 void InfEngineNgraphNet::addBlobs(const std::vector<cv::Ptr<BackendWrapper> >& ptrs)

--- a/modules/dnn/src/ie_ngraph.hpp
+++ b/modules/dnn/src/ie_ngraph.hpp
@@ -54,7 +54,8 @@ public:
    void setNodePtr(std::shared_ptr<ngraph::Node>* ptr);

    void reset();
-private:
+
+//private:
    detail::NetImplBase& netImpl_;

    void release();
@@ -89,6 +90,8 @@ private:
    bool hasNetOwner;
    std::vector<std::string> requestedOutputs;
    std::unordered_set<std::shared_ptr<ngraph::Node>> unconnectedNodes;
+
+    std::map<std::string, InferenceEngine::TensorDesc> outputsDesc;
 };

 class InfEngineNgraphNode : public BackendNode
@@ -121,12 +124,17 @@ public:
    virtual void copyToHost() CV_OVERRIDE;
    virtual void setHostDirty() CV_OVERRIDE;

+    Mat* host;
    InferenceEngine::DataPtr dataPtr;
    InferenceEngine::Blob::Ptr blob;
    AsyncArray futureMat;
 };

 InferenceEngine::DataPtr ngraphDataNode(const Ptr<BackendWrapper>& ptr);
+InferenceEngine::DataPtr ngraphDataOutputNode(
+        const Ptr<BackendWrapper>& ptr,
+        const InferenceEngine::TensorDesc& description,
+        const std::string name);

 // This is a fake class to run networks from Model Optimizer. Objects of that
 // class simulate responses of layers are imported by OpenCV and supported by

--- a/modules/dnn/test/test_ie_models.cpp
+++ b/modules/dnn/test/test_ie_models.cpp
@@ -103,11 +103,34 @@ static const std::map<std::string, OpenVINOModelTestCaseInfo>& getOpenVINOTestMo
 #if INF_ENGINE_RELEASE >= 2020010000
        // Downloaded using these parameters for Open Model Zoo downloader (2020.1):
        // ./downloader.py -o ${OPENCV_DNN_TEST_DATA_PATH}/omz_intel_models --cache_dir ${OPENCV_DNN_TEST_DATA_PATH}/.omz_cache/ \
-        //     --name person-detection-retail-0013
+        //     --name person-detection-retail-0013,age-gender-recognition-retail-0013
        { "person-detection-retail-0013", {  // IRv10
            "intel/person-detection-retail-0013/FP32/person-detection-retail-0013",
            "intel/person-detection-retail-0013/FP16/person-detection-retail-0013"
        }},
+        { "age-gender-recognition-retail-0013", {
+            "intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013",
+            "intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013"
+        }},
+#endif
+#if INF_ENGINE_RELEASE >= 2021020000
+        // OMZ: 2020.2
+        { "face-detection-0105", {
+            "intel/face-detection-0105/FP32/face-detection-0105",
+            "intel/face-detection-0105/FP16/face-detection-0105"
+        }},
+        { "face-detection-0106", {
+            "intel/face-detection-0106/FP32/face-detection-0106",
+            "intel/face-detection-0106/FP16/face-detection-0106"
+        }},
+#endif
+#if INF_ENGINE_RELEASE >= 2021040000
+        // OMZ: 2021.4
+        { "person-vehicle-bike-detection-2004", {
+            "intel/person-vehicle-bike-detection-2004/FP32/person-vehicle-bike-detection-2004",
+            "intel/person-vehicle-bike-detection-2004/FP16/person-vehicle-bike-detection-2004"
+            //"intel/person-vehicle-bike-detection-2004/FP16-INT8/person-vehicle-bike-detection-2004"
+        }},
 #endif
    };

@@ -123,13 +146,40 @@ static const std::vector<std::string> getOpenVINOTestModelsList()
    return result;
 }

+inline static std::string getOpenVINOModel(const std::string &modelName, bool isFP16)
+{
+    const std::map<std::string, OpenVINOModelTestCaseInfo>& models = getOpenVINOTestModels();
+    const auto it = models.find(modelName);
+    if (it != models.end())
+    {
+        OpenVINOModelTestCaseInfo modelInfo = it->second;
+        if (isFP16 && modelInfo.modelPathFP16)
+            return std::string(modelInfo.modelPathFP16);
+        else if (!isFP16 && modelInfo.modelPathFP32)
+            return std::string(modelInfo.modelPathFP32);
+    }
+    return std::string();
+}
+
 static inline void genData(const InferenceEngine::TensorDesc& desc, Mat& m, Blob::Ptr& dataPtr)
 {
    const std::vector<size_t>& dims = desc.getDims();
-    m.create(std::vector<int>(dims.begin(), dims.end()), CV_32F);
-    randu(m, -1, 1);
-
-    dataPtr = make_shared_blob<float>(desc, (float*)m.data);
+    if (desc.getPrecision() == InferenceEngine::Precision::FP32)
+    {
+        m.create(std::vector<int>(dims.begin(), dims.end()), CV_32F);
+        randu(m, -1, 1);
+        dataPtr = make_shared_blob<float>(desc, (float*)m.data);
+    }
+    else if (desc.getPrecision() == InferenceEngine::Precision::I32)
+    {
+        m.create(std::vector<int>(dims.begin(), dims.end()), CV_32S);
+        randu(m, -100, 100);
+        dataPtr = make_shared_blob<int>(desc, (int*)m.data);
+    }
+    else
+    {
+        FAIL() << "Unsupported precision: " << desc.getPrecision();
+    }
 }

 void runIE(Target target, const std::string& xmlPath, const std::string& binPath,
@@ -235,7 +285,16 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath
    BlobMap inputBlobs;
    for (auto& it : net.getInputsInfo())
    {
-        genData(it.second->getTensorDesc(), inputsMap[it.first], inputBlobs[it.first]);
+        const InferenceEngine::TensorDesc& desc = it.second->getTensorDesc();
+        genData(desc, inputsMap[it.first], inputBlobs[it.first]);
+        if (cvtest::debugLevel > 0)
+        {
+            const std::vector<size_t>& dims = desc.getDims();
+            std::cout << "Input: '" << it.first << "' precison=" << desc.getPrecision() << " dims=" << dims.size() << " [";
+            for (auto d : dims)
+                std::cout << " " << d;
+            std::cout << "]  ocv_mat=" << inputsMap[it.first].size << " of " << typeToString(inputsMap[it.first].type()) << std::endl;
+        }
    }
    infRequest.SetInput(inputBlobs);

@@ -244,7 +303,16 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath
    BlobMap outputBlobs;
    for (auto& it : net.getOutputsInfo())
    {
-        genData(it.second->getTensorDesc(), outputsMap[it.first], outputBlobs[it.first]);
+        const InferenceEngine::TensorDesc& desc = it.second->getTensorDesc();
+        genData(desc, outputsMap[it.first], outputBlobs[it.first]);
+        if (cvtest::debugLevel > 0)
+        {
+            const std::vector<size_t>& dims = desc.getDims();
+            std::cout << "Output: '" << it.first << "' precison=" << desc.getPrecision() << " dims=" << dims.size() << " [";
+            for (auto d : dims)
+                std::cout << " " << d;
+            std::cout << "]  ocv_mat=" << outputsMap[it.first].size << " of " << typeToString(outputsMap[it.first].type()) << std::endl;
+        }
    }
    infRequest.SetOutput(outputBlobs);

@@ -265,6 +333,12 @@ void runCV(Backend backendId, Target targetId, const std::string& xmlPath, const
    net.setPreferableTarget(targetId);

    std::vector<String> outNames = net.getUnconnectedOutLayersNames();
+    if (cvtest::debugLevel > 0)
+    {
+        std::cout << "OpenCV output names: " << outNames.size() << std::endl;
+        for (auto name : outNames)
+            std::cout << "- " << name << std::endl;
+    }
    std::vector<Mat> outs;
    net.forward(outs, outNames);

@@ -288,13 +362,26 @@ TEST_P(DNNTestOpenVINO, models)
    ASSERT_FALSE(backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) <<
        "Inference Engine backend is required";

-#if INF_ENGINE_VER_MAJOR_EQ(2021040000)
-    if (targetId == DNN_TARGET_MYRIAD && (
-            modelName == "person-detection-retail-0013" ||  // ncDeviceOpen:1013 Failed to find booted device after boot
-            modelName == "age-gender-recognition-retail-0013"  // ncDeviceOpen:1013 Failed to find booted device after boot
+#if INF_ENGINE_VER_MAJOR_GE(2021030000)
+    if (targetId == DNN_TARGET_MYRIAD && (false
+            || modelName == "person-detection-retail-0013"  // ncDeviceOpen:1013 Failed to find booted device after boot
+            || modelName == "age-gender-recognition-retail-0013"  // ncDeviceOpen:1013 Failed to find booted device after boot
+            || modelName == "face-detection-0105"  // get_element_type() must be called on a node with exactly one output
+            || modelName == "face-detection-0106"  // get_element_type() must be called on a node with exactly one output
+            || modelName == "person-vehicle-bike-detection-2004"  // 2021.4+: ncDeviceOpen:1013 Failed to find booted device after boot
        )
    )
        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+    if (targetId == DNN_TARGET_OPENCL && (false
+            || modelName == "face-detection-0106"  // Operation: 2278 of type ExperimentalDetectronPriorGridGenerator(op::v6) is not supported
+        )
+    )
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
+    if (targetId == DNN_TARGET_OPENCL_FP16 && (false
+            || modelName == "face-detection-0106"  // Operation: 2278 of type ExperimentalDetectronPriorGridGenerator(op::v6) is not supported
+        )
+    )
+        applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION);
 #endif

 #if INF_ENGINE_VER_MAJOR_GE(2020020000)
@@ -319,11 +406,8 @@ TEST_P(DNNTestOpenVINO, models)

    bool isFP16 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD);

-    const std::map<std::string, OpenVINOModelTestCaseInfo>& models = getOpenVINOTestModels();
-    const auto it = models.find(modelName);
-    ASSERT_TRUE(it != models.end()) << modelName;
-    OpenVINOModelTestCaseInfo modelInfo = it->second;
-    std::string modelPath = isFP16 ? modelInfo.modelPathFP16 : modelInfo.modelPathFP32;
+    const std::string modelPath = getOpenVINOModel(modelName, isFP16);
+    ASSERT_FALSE(modelPath.empty()) << modelName;

    std::string xmlPath = findDataFile(modelPath + ".xml", false);
    std::string binPath = findDataFile(modelPath + ".bin", false);
@@ -334,6 +418,8 @@ TEST_P(DNNTestOpenVINO, models)
    if (targetId == DNN_TARGET_MYRIAD)
        resetMyriadDevice();
    EXPECT_NO_THROW(runIE(targetId, xmlPath, binPath, inputsMap, ieOutputsMap)) << "runIE";
+    if (targetId == DNN_TARGET_MYRIAD)
+        resetMyriadDevice();
    EXPECT_NO_THROW(runCV(backendId, targetId, xmlPath, binPath, inputsMap, cvOutputsMap)) << "runCV";

    double eps = 0;
@@ -341,6 +427,14 @@ TEST_P(DNNTestOpenVINO, models)
    if (targetId == DNN_TARGET_CPU && checkHardwareSupport(CV_CPU_AVX_512F))
        eps = 1e-5;
 #endif
+#if INF_ENGINE_VER_MAJOR_GE(2021030000)
+    if (targetId == DNN_TARGET_CPU && modelName == "face-detection-0105")
+        eps = 2e-4;
+#endif
+#if INF_ENGINE_VER_MAJOR_GE(2021040000)
+    if (targetId == DNN_TARGET_CPU && modelName == "person-vehicle-bike-detection-2004")
+        eps = 1e-6;
+#endif

    EXPECT_EQ(ieOutputsMap.size(), cvOutputsMap.size());
    for (auto& srcIt : ieOutputsMap)