Support for Nvidia NVCaffe (#1169)

4cfad62a · Hannes Badertscher · Gines · fcf9ab8e · 4cfad62a · 4cfad62a
4 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -185,6 +185,12 @@ if (${DL_FRAMEWORK} MATCHES "CAFFE")
  add_definitions(-DUSE_CAFFE)
 endif (${DL_FRAMEWORK} MATCHES "CAFFE")

+OPTION (NV_CAFFE "Use NVCaffe instead of normal Caffe." OFF)
+if (NV_CAFFE)
+    MESSAGE("-- Using NVIDIA NVCaffe")
+    add_definitions(-DNV_CAFFE)
+endif (NV_CAFFE)
+
 # Set the acceleration library
 if (APPLE)
  set(GPU_MODE CPU_ONLY CACHE STRING "Select the acceleration GPU library or CPU otherwise.")

--- a/doc/installation.md
+++ b/doc/installation.md
@@ -22,9 +22,10 @@ OpenPose - Installation
    8. [Calibration Module](#calibration-module)
    9. [Compiling without cuDNN](#compiling-without-cudnn)
    10. [Custom Caffe](#custom-caffe)
-    11. [Custom OpenCV](#custom-opencv)
-    12. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
-    13. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)
+    11. [Custom NVIDIA NVCaffe](#custom-nvidia-nvcaffe)
+    12. [Custom OpenCV](#custom-opencv)
+    13. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
+    14. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)



@@ -365,6 +366,15 @@ For Windows, simply replace the OpenCV DLLs and include folder for your custom o



+#### Custom NVIDIA NVCaffe
+This has been tested with the official Nvidia Docker image [nvcr.io/nvidia/caffe:18.12-py2](https://ngc.nvidia.com/catalog/containers/nvidia:caffe).
+
+This functionality was added by the community, and we do not officially support it. For questions and issues, please only post on the related [Pull Request #1169](https://github.com/CMU-Perceptual-Computing-Lab/openpose/pull/1169). New GitHub issues about this topic (i.e., outside PR #1169) will be automatically closed with no answer.
+
+To use a NVIDIA's NVCaffe docker image instead of the standard Caffe, 1) set the `NV_CAFFE` variable in CMake, 2) set the `BUILD_CAFFE` variable to `OFF`, and 3) set the correct `Caffe_INCLUDE_DIRS` and `Caffe_LIBS` paths following [Custom Caffe](#custom-caffe).
+
+
+
 #### Custom OpenCV
 If you have built OpenCV from source and OpenPose cannot find it automatically, you can set the `OPENCV_DIR` variable to the directory where you build OpenCV (Ubuntu and Mac). For Windows, simply replace the OpenCV DLLs and include folder for your custom one.


--- a/src/openpose/core/arrayCpuGpu.cpp
+++ b/src/openpose/core/arrayCpuGpu.cpp
@@ -10,8 +10,13 @@ namespace op
    struct ArrayCpuGpu<T>::ImplArrayCpuGpu
    {
        #ifdef USE_CAFFE
-            std::unique_ptr<caffe::Blob<T>> upCaffeBlobT;
-            caffe::Blob<T>* pCaffeBlobT;
+            #ifdef NV_CAFFE
+                std::unique_ptr<caffe::TBlob<T>> upCaffeBlobT;
+                caffe::TBlob<T>* pCaffeBlobT;
+            #else
+                std::unique_ptr<caffe::Blob<T>> upCaffeBlobT;
+                caffe::Blob<T>* pCaffeBlobT;
+            #endif
        #endif
    };

@@ -25,7 +30,11 @@ namespace op
            #ifdef USE_CAFFE
                // Construct spImpl
                spImpl.reset(new ImplArrayCpuGpu{});
-                spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{});
+                #ifdef NV_CAFFE
+                    spImpl->upCaffeBlobT.reset(new caffe::TBlob<T>{});
+                #else
+                    spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{});
+                #endif
                spImpl->pCaffeBlobT = spImpl->upCaffeBlobT.get();
            #else
                error(constructorErrorMessage, __LINE__, __FUNCTION__, __FILE__);
@@ -45,7 +54,11 @@ namespace op
            #ifdef USE_CAFFE
                // Construct spImpl
                spImpl.reset(new ImplArrayCpuGpu{});
-                spImpl->pCaffeBlobT = (caffe::Blob<T>*)caffeBlobTPtr;
+                #ifdef NV_CAFFE
+                    spImpl->pCaffeBlobT = (caffe::TBlob<T>*)caffeBlobTPtr;
+                #else
+                    spImpl->pCaffeBlobT = (caffe::Blob<T>*)caffeBlobTPtr;
+                #endif
            #else
                UNUSED(caffeBlobTPtr);
                error(constructorErrorMessage, __LINE__, __FUNCTION__, __FILE__);
@@ -74,7 +87,11 @@ namespace op
                    arraySize.emplace_back(sizeI);
                // Construct spImpl
                spImpl.reset(new ImplArrayCpuGpu{});
-                spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{arraySize});
+                #ifdef NV_CAFFE
+                    spImpl->upCaffeBlobT.reset(new caffe::TBlob<T>{arraySize});
+                #else
+                    spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{arraySize});
+                #endif
                spImpl->pCaffeBlobT = spImpl->upCaffeBlobT.get();
                // Copy data
                // CPU copy
@@ -107,7 +124,11 @@ namespace op
            #ifdef USE_CAFFE
                // Construct spImpl
                spImpl.reset(new ImplArrayCpuGpu{});
-                spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{num, channels, height, width});
+                #ifdef NV_CAFFE
+                    spImpl->upCaffeBlobT.reset(new caffe::TBlob<T>{num, channels, height, width});
+                #else
+                    spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{num, channels, height, width});
+                #endif
                spImpl->pCaffeBlobT = spImpl->upCaffeBlobT.get();
            #else
                UNUSED(num);

--- a/src/openpose/net/netCaffe.cpp
+++ b/src/openpose/net/netCaffe.cpp
@@ -34,8 +34,13 @@ namespace op
            const std::string mLastBlobName;
            std::vector<int> mNetInputSize4D;
            // Init with thread
-            std::unique_ptr<caffe::Net<float>> upCaffeNet;
-            boost::shared_ptr<caffe::Blob<float>> spOutputBlob;
+            #ifdef NV_CAFFE
+                std::unique_ptr<caffe::Net> upCaffeNet;
+                boost::shared_ptr<caffe::TBlob<float>> spOutputBlob;
+            #else
+                std::unique_ptr<caffe::Net<float>> upCaffeNet;
+                boost::shared_ptr<caffe::Blob<float>> spOutputBlob;
+            #endif

            ImplNetCaffe(const std::string& caffeProto, const std::string& caffeTrainedModel, const int gpuId,
                         const bool enableGoogleLogging, const std::string& lastBlobName) :
@@ -93,7 +98,11 @@ namespace op
    };

    #ifdef USE_CAFFE
+        #ifdef NV_CAFFE
+        inline void reshapeNetCaffe(caffe::Net* caffeNet, const std::vector<int>& dimensions)
+        #else
        inline void reshapeNetCaffe(caffe::Net<float>* caffeNet, const std::vector<int>& dimensions)
+        #endif
        {
            try
            {
@@ -156,7 +165,11 @@ namespace op
                    #ifdef USE_CUDA
                        caffe::Caffe::set_mode(caffe::Caffe::GPU);
                        caffe::Caffe::SetDevice(upImpl->mGpuId);
-                        upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
+                        #ifdef NV_CAFFE
+                            upImpl->upCaffeNet.reset(new caffe::Net{upImpl->mCaffeProto, caffe::TEST});
+                        #else
+                            upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
+                        #endif
                    #else
                        caffe::Caffe::set_mode(caffe::Caffe::CPU);
                        #ifdef _WIN32
@@ -172,7 +185,12 @@ namespace op
                    #endif
                #endif
                // Set spOutputBlob
-                upImpl->spOutputBlob = upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName);
+                #ifdef NV_CAFFE
+                    upImpl->spOutputBlob = boost::static_pointer_cast<caffe::TBlob<float>>(
+                        upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName));
+                #else
+                    upImpl->spOutputBlob = upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName);
+                #endif
                // Sanity check
                if (upImpl->spOutputBlob == nullptr)
                    error("The output blob is a nullptr. Did you use the same name than the prototxt? (Used: "
@@ -207,7 +225,11 @@ namespace op
                }
                // Copy frame data to GPU memory
                #ifdef USE_CUDA
-                    auto* gpuImagePtr = upImpl->upCaffeNet->blobs().at(0)->mutable_gpu_data();
+                    #ifdef NV_CAFFE
+                        auto* gpuImagePtr = upImpl->upCaffeNet->blobs().at(0)->mutable_gpu_data<float>();
+                    #else
+                        auto* gpuImagePtr = upImpl->upCaffeNet->blobs().at(0)->mutable_gpu_data();
+                    #endif
                    cudaMemcpy(gpuImagePtr, inputData.getConstPtr(), inputData.getVolume() * sizeof(float),
                               cudaMemcpyHostToDevice);
                #elif defined USE_OPENCL