MobileNet-SSD from TensorFlow 1.3 and Inception-V2-SSD using Inference Engine backend

7fe97376 · Dmitry Kurtaev · 090ee46f · 7fe97376 · 7fe97376 · 7fe97376
10 changed file
--- a/cmake/OpenCVDetectInferenceEngine.cmake
+++ b/cmake/OpenCVDetectInferenceEngine.cmake
@@ -15,23 +15,25 @@ macro(ie_fail)
    return()
 endmacro()

-if(NOT INF_ENGINE_ROOT_DIR OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}/inference_engine/include/inference_engine.hpp")
+if(NOT INF_ENGINE_ROOT_DIR OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}/include/inference_engine.hpp")
    set(ie_root_paths "${INF_ENGINE_ROOT_DIR}")
    if(DEFINED ENV{INTEL_CVSDK_DIR})
        list(APPEND ie_root_paths "$ENV{INTEL_CVSDK_DIR}")
+        list(APPEND ie_root_paths "$ENV{INTEL_CVSDK_DIR}/inference_engine")
    endif()
    if(DEFINED INTEL_CVSDK_DIR)
        list(APPEND ie_root_paths "${INTEL_CVSDK_DIR}")
+        list(APPEND ie_root_paths "${INTEL_CVSDK_DIR}/inference_engine")
    endif()

    if(WITH_INF_ENGINE AND NOT ie_root_paths)
-        list(APPEND ie_root_paths "/opt/intel/deeplearning_deploymenttoolkit/deployment_tools")
+        list(APPEND ie_root_paths "/opt/intel/deeplearning_deploymenttoolkit/deployment_tools/inference_engine")
    endif()

-    find_path(INF_ENGINE_ROOT_DIR inference_engine/include/inference_engine.hpp PATHS ${ie_root_paths})
+    find_path(INF_ENGINE_ROOT_DIR include/inference_engine.hpp PATHS ${ie_root_paths})
 endif()

-set(INF_ENGINE_INCLUDE_DIRS "${INF_ENGINE_ROOT_DIR}/inference_engine/include" CACHE PATH "Path to Inference Engine include directory")
+set(INF_ENGINE_INCLUDE_DIRS "${INF_ENGINE_ROOT_DIR}/include" CACHE PATH "Path to Inference Engine include directory")

 if(NOT INF_ENGINE_ROOT_DIR
    OR NOT EXISTS "${INF_ENGINE_ROOT_DIR}"
@@ -42,12 +44,21 @@ if(NOT INF_ENGINE_ROOT_DIR
 endif()

 set(INF_ENGINE_LIBRARIES "")
-foreach(lib inference_engine mklml_intel iomp5)
+
+set(ie_lib_list inference_engine)
+if(UNIX)
+    list(APPEND ie_lib_list mklml_intel iomp5)
+endif()
+
+foreach(lib ${ie_lib_list})
    find_library(${lib}
        NAMES ${lib}
+        # For inference_engine
        HINTS ${IE_PLUGINS_PATH}
        HINTS "$ENV{IE_PLUGINS_PATH}"
-        HINTS ${INF_ENGINE_ROOT_DIR}/external/mklml_lnx/lib
+        # For mklml_intel, iomp5
+        HINTS ${INTEL_CVSDK_DIR}/external/mklml_lnx/lib
+        HINTS ${INTEL_CVSDK_DIR}/inference_engine/external/mklml_lnx/lib
    )
    if(NOT ${lib})
        ie_fail()

--- a/modules/dnn/perf/perf_net.cpp
+++ b/modules/dnn/perf/perf_net.cpp
@@ -157,13 +157,16 @@ PERF_TEST_P_(DNNTestNetwork, OpenFace)

 PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_Caffe)
 {
+    if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
    processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", "",
            Mat(cv::Size(300, 300), CV_32FC3), "detection_out", "caffe");
 }

 PERF_TEST_P_(DNNTestNetwork, MobileNet_SSD_TensorFlow)
 {
-    if (backend == DNN_BACKEND_INFERENCE_ENGINE) throw SkipTestException("");
+    if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL ||
+        backend == DNN_BACKEND_HALIDE)
+        throw SkipTestException("");
    processNet("dnn/ssd_mobilenet_v1_coco.pb", "ssd_mobilenet_v1_coco.pbtxt", "",
            Mat(cv::Size(300, 300), CV_32FC3), "", "tensorflow");
 }
@@ -207,6 +210,13 @@ PERF_TEST_P_(DNNTestNetwork, opencv_face_detector)
               Mat(cv::Size(300, 300), CV_32FC3), "", "caffe");
 }

+PERF_TEST_P_(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
+{
+    if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
+    processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "ssd_inception_v2_coco_2017_11_17.pbtxt", "",
+            Mat(cv::Size(300, 300), CV_32FC3), "", "tensorflow");
+}
+
 const tuple<DNNBackend, DNNTarget> testCases[] = {
 #ifdef HAVE_HALIDE
    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),

--- a/modules/dnn/src/layers/concat_layer.cpp
+++ b/modules/dnn/src/layers/concat_layer.cpp
@@ -298,15 +298,16 @@ public:
        return Ptr<BackendNode>();
    }

-    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&)
+    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >& inputs)
    {
 #ifdef HAVE_INF_ENGINE
+        InferenceEngine::DataPtr input = infEngineDataNode(inputs[0]);
        InferenceEngine::LayerParams lp;
        lp.name = name;
        lp.type = "Concat";
        lp.precision = InferenceEngine::Precision::FP32;
        std::shared_ptr<InferenceEngine::ConcatLayer> ieLayer(new InferenceEngine::ConcatLayer(lp));
-        ieLayer->_axis = axis;
+        ieLayer->_axis = clamp(axis, input->dims.size());
        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
 #endif  // HAVE_INF_ENGINE
        return Ptr<BackendNode>();

--- a/modules/dnn/src/layers/detection_output_layer.cpp
+++ b/modules/dnn/src/layers/detection_output_layer.cpp
@@ -193,7 +193,7 @@ public:
    virtual bool supportBackend(int backendId)
    {
        return backendId == DNN_BACKEND_DEFAULT ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
+               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && !_locPredTransposed;
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,

--- a/modules/dnn/src/layers/elementwise_layers.cpp
+++ b/modules/dnn/src/layers/elementwise_layers.cpp
@@ -114,7 +114,7 @@ public:
    {
        return backendId == DNN_BACKEND_DEFAULT ||
               backendId == DNN_BACKEND_HALIDE && haveHalide() ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
+               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() && this->type != "Sigmoid";
    }

    virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node)
@@ -397,8 +397,11 @@ struct ReLU6Functor
 #ifdef HAVE_INF_ENGINE
    InferenceEngine::CNNLayerPtr initInfEngine(InferenceEngine::LayerParams& lp)
    {
-        CV_Error(Error::StsNotImplemented, "ReLU6");
-        return InferenceEngine::CNNLayerPtr();
+        lp.type = "Clamp";
+        std::shared_ptr<InferenceEngine::ClampLayer> ieLayer(new InferenceEngine::ClampLayer(lp));
+        ieLayer->min_value = minValue;
+        ieLayer->max_value = maxValue;
+        return ieLayer;
    }
 #endif  // HAVE_INF_ENGINE


--- a/modules/dnn/src/layers/pooling_layer.cpp
+++ b/modules/dnn/src/layers/pooling_layer.cpp
@@ -239,7 +239,7 @@ public:
        ieLayer->_stride_y = stride.height;
        ieLayer->_padding_x = pad.width;
        ieLayer->_padding_y = pad.height;
-        ieLayer->_exclude_pad = false;
+        ieLayer->_exclude_pad = type == AVE && padMode == "SAME";
        ieLayer->params["rounding-type"] = ceilMode ? "ceil" : "floor";
        if (type == MAX)
            ieLayer->_type = InferenceEngine::PoolingLayer::PoolType::MAX;

--- a/modules/dnn/src/layers/prior_box_layer.cpp
+++ b/modules/dnn/src/layers/prior_box_layer.cpp
@@ -252,7 +252,8 @@ public:
    virtual bool supportBackend(int backendId)
    {
        return backendId == DNN_BACKEND_DEFAULT ||
-               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
+               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine() &&
+              _scales.empty() && !_explicitSizes;
    }

    bool getMemoryShapes(const std::vector<MatShape> &inputs,

--- a/modules/dnn/src/layers/shift_layer.cpp
+++ b/modules/dnn/src/layers/shift_layer.cpp
@@ -10,6 +10,7 @@ Implementation of shift layer, which adds up const values to blob.
 */

 #include "../precomp.hpp"
+#include "op_inf_engine.hpp"
 #include <opencv2/dnn/shape_utils.hpp>

 namespace cv
@@ -26,6 +27,12 @@ public:
        CV_Assert(blobs.size() == 1);
    }

+    virtual bool supportBackend(int backendId)
+    {
+        return backendId == DNN_BACKEND_DEFAULT ||
+               backendId == DNN_BACKEND_INFERENCE_ENGINE && haveInfEngine();
+    }
+
    bool getMemoryShapes(const std::vector<MatShape> &inputs,
                         const int requiredOutputs,
                         std::vector<MatShape> &outputs,
@@ -83,6 +90,52 @@ public:
        }
    }

+    virtual Ptr<BackendNode> tryAttach(const Ptr<BackendNode>& node)
+    {
+        switch (node->backendId)
+        {
+            case DNN_BACKEND_INFERENCE_ENGINE:
+            {
+#ifdef HAVE_INF_ENGINE
+                auto base = node.dynamicCast<InfEngineBackendNode>();
+                auto conv = std::dynamic_pointer_cast<InferenceEngine::ConvolutionLayer>(base->layer);
+                if (conv)
+                {
+                    fuseConvWeights(conv, Mat(), blobs[0]);
+                    return base;
+                }
+#endif  // HAVE_INF_ENGINE
+                break;
+            }
+        }
+        return Ptr<BackendNode>();
+    }
+
+    virtual Ptr<BackendNode> initInfEngine(const std::vector<Ptr<BackendWrapper> >&)
+    {
+#ifdef HAVE_INF_ENGINE
+        // Inference Engine has no layer just for biases. Create a linear
+        // transformation layer with ones weights.
+        InferenceEngine::LayerParams lp;
+        lp.name = name;
+        lp.type = "ScaleShift";
+        lp.precision = InferenceEngine::Precision::FP32;
+        std::shared_ptr<InferenceEngine::ScaleShiftLayer> ieLayer(new InferenceEngine::ScaleShiftLayer(lp));
+
+        auto weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32,
+                                                                {blobs[0].total()});
+        weights->allocate();
+
+        std::vector<float> ones(blobs[0].total(), 1);
+        weights->set(ones);
+        ieLayer->_weights = weights;
+
+        ieLayer->_biases = wrapToInfEngineBlob(blobs[0]);
+        return Ptr<BackendNode>(new InfEngineBackendNode(ieLayer));
+#endif  // HAVE_INF_ENGINE
+        return Ptr<BackendNode>();
+    }
+
    virtual int64 getFLOPS(const std::vector<MatShape> &inputs,
                           const std::vector<MatShape> &outputs) const
    {

--- a/modules/dnn/src/op_inf_engine.cpp
+++ b/modules/dnn/src/op_inf_engine.cpp
@@ -54,7 +54,8 @@ static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std:
    std::vector<size_t> reversedShape(&m.size[0], &m.size[0] + m.dims);
    std::reverse(reversedShape.begin(), reversedShape.end());
    return InferenceEngine::DataPtr(
-      new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32)
+      new InferenceEngine::Data(name, reversedShape, InferenceEngine::Precision::FP32,
+                                InferenceEngine::Layout::ANY)
    );
 }

@@ -122,37 +123,6 @@ void InfEngineBackendNet::getOutputsInfo(InferenceEngine::OutputsDataMap &output
 // Returns input references that aren't connected to internal outputs.
 void InfEngineBackendNet::getInputsInfo(InferenceEngine::InputsDataMap &inputs_) noexcept
 {
-    if (inputs.empty())
-    {
-        std::map<std::string, InferenceEngine::DataPtr> internalOutputs;
-        for (const auto& l : layers)
-        {
-            for (const InferenceEngine::DataWeakPtr& ptr : l->insData)
-            {
-                InferenceEngine::DataPtr inp(ptr);
-                if (internalOutputs.find(inp->name) == internalOutputs.end())
-                {
-                    InferenceEngine::InputInfo::Ptr inpInfo(new InferenceEngine::InputInfo());
-                    inpInfo->setInputData(inp);
-                    if (inputs.find(inp->name) == inputs.end())
-                        inputs[inp->name] = inpInfo;
-                }
-            }
-            for (const InferenceEngine::DataPtr& out : l->outData)
-            {
-                // TODO: Replace to uniquness assertion.
-                if (internalOutputs.find(out->name) == internalOutputs.end())
-                    internalOutputs[out->name] = out;
-            }
-        }
-        CV_Assert(layers.empty() || !inputs.empty());
-    }
-    inpBlobs.clear();
-    for (const auto& it : inputs)
-    {
-        CV_Assert(allBlobs.find(it.first) != allBlobs.end());
-        inpBlobs[it.first] = allBlobs[it.first];
-    }
    inputs_ = inputs;
 }

@@ -239,7 +209,31 @@ size_t InfEngineBackendNet::getBatchSize() const noexcept

 void InfEngineBackendNet::initEngine()
 {
-    CV_Assert(!isInitialized());
+    CV_Assert(!isInitialized(), !layers.empty());
+
+    // Collect all external input blobs.
+    std::map<std::string, InferenceEngine::DataPtr> internalOutputs;
+    for (const auto& l : layers)
+    {
+        for (const InferenceEngine::DataWeakPtr& ptr : l->insData)
+        {
+            InferenceEngine::DataPtr inp(ptr);
+            if (internalOutputs.find(inp->name) == internalOutputs.end())
+            {
+                InferenceEngine::InputInfo::Ptr inpInfo(new InferenceEngine::InputInfo());
+                inpInfo->setInputData(inp);
+                if (inputs.find(inp->name) == inputs.end())
+                    inputs[inp->name] = inpInfo;
+            }
+        }
+        for (const InferenceEngine::DataPtr& out : l->outData)
+        {
+            // TODO: Replace to uniquness assertion.
+            if (internalOutputs.find(out->name) == internalOutputs.end())
+                internalOutputs[out->name] = out;
+        }
+    }
+    CV_Assert(!inputs.empty());

    // Add all unconnected blobs to output blobs.
    InferenceEngine::OutputsDataMap unconnectedOuts;
@@ -258,13 +252,21 @@ void InfEngineBackendNet::initEngine()
            unconnectedOuts.erase(InferenceEngine::DataPtr(inp)->name);
        }
    }
-    CV_Assert(layers.empty() || !unconnectedOuts.empty());
+    CV_Assert(!unconnectedOuts.empty());

    for (auto it = unconnectedOuts.begin(); it != unconnectedOuts.end(); ++it)
    {
        outputs[it->first] = it->second;
    }

+    // Set up input blobs.
+    inpBlobs.clear();
+    for (const auto& it : inputs)
+    {
+        CV_Assert(allBlobs.find(it.first) != allBlobs.end());
+        inpBlobs[it.first] = allBlobs[it.first];
+    }
+
    // Set up output blobs.
    outBlobs.clear();
    for (const auto& it : outputs)
@@ -273,7 +275,11 @@ void InfEngineBackendNet::initEngine()
        outBlobs[it.first] = allBlobs[it.first];
    }

+#ifdef _WIN32
+    engine = InferenceEngine::InferenceEnginePluginPtr("MKLDNNPlugin.dll");
+#else
    engine = InferenceEngine::InferenceEnginePluginPtr("libMKLDNNPlugin.so");
+#endif  // _WIN32
    InferenceEngine::ResponseDesc resp;
    InferenceEngine::StatusCode status = engine->LoadNetwork(*this, &resp);
    if (status != InferenceEngine::StatusCode::OK)
@@ -305,7 +311,8 @@ void InfEngineBackendNet::forward()
 static inline Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
 {
    // NOTE: Inference Engine sizes are reversed.
-    std::vector<int> size(blob->dims().begin(), blob->dims().end());
+    std::vector<size_t> dims = blob->dims();
+    std::vector<int> size(dims.begin(), dims.end());
    std::reverse(size.begin(), size.end());
    return Mat(size, CV_32F, (void*)blob->buffer());
 }
@@ -313,28 +320,32 @@ static inline Mat infEngineBlobToMat(const InferenceEngine::Blob::Ptr& blob)
 void fuseConvWeights(const std::shared_ptr<InferenceEngine::ConvolutionLayer>& conv,
                     const Mat& w, const Mat& b)
 {
-    // Get convolution's weights. Clone the data because Inference Engine can host it
-    // and conv->_weights->allocate() below will deallocate it.
-    Mat originWeights = infEngineBlobToMat(conv->_weights).clone();
-
-    // Create new weights blob.
-    conv->_weights = InferenceEngine::make_shared_blob<float>(
-                        InferenceEngine::Precision::FP32, conv->_weights->dims());
-    conv->_weights->allocate();
-
-    // Convolution weights have OIHW data layout.
-    // (conv(I) + b1 ) * w + b2
-    // w*conv(I) + b1 * w + b2
-    Mat fusedWeights = infEngineBlobToMat(conv->_weights);
-
-    const int numChannels = fusedWeights.size[0];
-    // Mat weights = blobs[0].reshape(1, 1);
-    // Mat bias = hasBias ? blobs[1].reshape(1, 1) : Mat();
-    CV_Assert(numChannels == w.total());
-    CV_Assert(b.empty() || numChannels == b.total());
-    for (int i = 0; i < numChannels; ++i)
+    CV_Assert(!w.empty() || !b.empty());
+    if (!w.empty())
    {
-        cv::multiply(slice(originWeights, i), w.at<float>(i), slice(fusedWeights, i));
+        // Get convolution's weights. Clone the data because Inference Engine can host it
+        // and conv->_weights->allocate() below will deallocate it.
+        Mat originWeights = infEngineBlobToMat(conv->_weights).clone();
+
+        // Create new weights blob.
+        conv->_weights = InferenceEngine::make_shared_blob<float>(
+                            InferenceEngine::Precision::FP32, conv->_weights->dims());
+        conv->_weights->allocate();
+
+        // Convolution weights have OIHW data layout.
+        // (conv(I) + b1 ) * w + b2
+        // w*conv(I) + b1 * w + b2
+        Mat fusedWeights = infEngineBlobToMat(conv->_weights);
+
+        const int numChannels = fusedWeights.size[0];
+        // Mat weights = blobs[0].reshape(1, 1);
+        // Mat bias = hasBias ? blobs[1].reshape(1, 1) : Mat();
+        CV_Assert(numChannels == w.total());
+        CV_Assert(b.empty() || numChannels == b.total());
+        for (int i = 0; i < numChannels; ++i)
+        {
+            cv::multiply(slice(originWeights, i), w.at<float>(i), slice(fusedWeights, i));
+        }
    }
    if (conv->_biases)
    {
@@ -345,8 +356,10 @@ void fuseConvWeights(const std::shared_ptr<InferenceEngine::ConvolutionLayer>& c
                            InferenceEngine::Precision::FP32, conv->_biases->dims());
        conv->_biases->allocate();
        Mat fusedBiases = infEngineBlobToMat(conv->_biases);
+        originBiases.copyTo(fusedBiases);

-        cv::multiply(w.reshape(1, fusedBiases.dims, &fusedBiases.size[0]), originBiases, fusedBiases);
+        if (!w.empty())
+            cv::multiply(w.reshape(1, fusedBiases.dims, &fusedBiases.size[0]), fusedBiases, fusedBiases);
        if (!b.empty())
            cv::add(fusedBiases, b.reshape(1, fusedBiases.dims, &fusedBiases.size[0]), fusedBiases);
    }

--- a/modules/dnn/test/test_backends.cpp
+++ b/modules/dnn/test/test_backends.cpp
@@ -162,8 +162,9 @@ TEST_P(DNNTestNetwork, ENet)
               2e-5, 0.15);
 }

-TEST_P(DNNTestNetwork, MobileNetSSD)
+TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe)
 {
+    if (backend == DNN_BACKEND_HALIDE) throw SkipTestException("");
    Mat sample = imread(findDataFile("dnn/street.png", false));
    Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);

@@ -171,6 +172,17 @@ TEST_P(DNNTestNetwork, MobileNetSSD)
               inp, "detection_out", "caffe");
 }

+TEST_P(DNNTestNetwork, MobileNet_SSD_TensorFlow)
+{
+    if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL ||
+        backend == DNN_BACKEND_HALIDE)
+        throw SkipTestException("");
+    Mat sample = imread(findDataFile("dnn/street.png", false));
+    Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
+    processNet("dnn/ssd_mobilenet_v1_coco.pb", "dnn/ssd_mobilenet_v1_coco.pbtxt",
+               inp, "detection_out", "tensorflow");
+}
+
 TEST_P(DNNTestNetwork, SSD_VGG16)
 {
    if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL ||
@@ -221,6 +233,17 @@ TEST_P(DNNTestNetwork, opencv_face_detector)
               inp, "detection_out", "caffe");
 }

+TEST_P(DNNTestNetwork, Inception_v2_SSD_TensorFlow)
+{
+    if (backend == DNN_BACKEND_DEFAULT && target == DNN_TARGET_OPENCL ||
+        backend == DNN_BACKEND_HALIDE)
+        throw SkipTestException("");
+    Mat sample = imread(findDataFile("dnn/street.png", false));
+    Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false);
+    processNet("dnn/ssd_inception_v2_coco_2017_11_17.pb", "dnn/ssd_inception_v2_coco_2017_11_17.pbtxt",
+               inp, "detection_out", "tensorflow");
+}
+
 const tuple<DNNBackend, DNNTarget> testCases[] = {
 #ifdef HAVE_HALIDE
    tuple<DNNBackend, DNNTarget>(DNN_BACKEND_HALIDE, DNN_TARGET_CPU),