提交 4cfad62a 编写于 作者: H Hannes Badertscher 提交者: Gines

Support for Nvidia NVCaffe (#1169)

上级 fcf9ab8e
......@@ -185,6 +185,12 @@ if (${DL_FRAMEWORK} MATCHES "CAFFE")
add_definitions(-DUSE_CAFFE)
endif (${DL_FRAMEWORK} MATCHES "CAFFE")
OPTION (NV_CAFFE "Use NVCaffe instead of normal Caffe." OFF)
if (NV_CAFFE)
MESSAGE("-- Using NVIDIA NVCaffe")
add_definitions(-DNV_CAFFE)
endif (NV_CAFFE)
# Set the acceleration library
if (APPLE)
set(GPU_MODE CPU_ONLY CACHE STRING "Select the acceleration GPU library or CPU otherwise.")
......
......@@ -22,9 +22,10 @@ OpenPose - Installation
8. [Calibration Module](#calibration-module)
9. [Compiling without cuDNN](#compiling-without-cudnn)
10. [Custom Caffe](#custom-caffe)
11. [Custom OpenCV](#custom-opencv)
12. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
13. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)
11. [Custom NVIDIA NVCaffe](#custom-nvidia-nvcaffe)
12. [Custom OpenCV](#custom-opencv)
13. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
14. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)
......@@ -365,6 +366,15 @@ For Windows, simply replace the OpenCV DLLs and include folder for your custom o
#### Custom NVIDIA NVCaffe
This has been tested with the official Nvidia Docker image [nvcr.io/nvidia/caffe:18.12-py2](https://ngc.nvidia.com/catalog/containers/nvidia:caffe).
This functionality was added by the community, and we do not officially support it. For questions and issues, please only post on the related [Pull Request #1169](https://github.com/CMU-Perceptual-Computing-Lab/openpose/pull/1169). New GitHub issues about this topic (i.e., outside PR #1169) will be automatically closed with no answer.
To use a NVIDIA's NVCaffe docker image instead of the standard Caffe, 1) set the `NV_CAFFE` variable in CMake, 2) set the `BUILD_CAFFE` variable to `OFF`, and 3) set the correct `Caffe_INCLUDE_DIRS` and `Caffe_LIBS` paths following [Custom Caffe](#custom-caffe).
#### Custom OpenCV
If you have built OpenCV from source and OpenPose cannot find it automatically, you can set the `OPENCV_DIR` variable to the directory where you build OpenCV (Ubuntu and Mac). For Windows, simply replace the OpenCV DLLs and include folder for your custom one.
......
......@@ -10,8 +10,13 @@ namespace op
struct ArrayCpuGpu<T>::ImplArrayCpuGpu
{
#ifdef USE_CAFFE
std::unique_ptr<caffe::Blob<T>> upCaffeBlobT;
caffe::Blob<T>* pCaffeBlobT;
#ifdef NV_CAFFE
std::unique_ptr<caffe::TBlob<T>> upCaffeBlobT;
caffe::TBlob<T>* pCaffeBlobT;
#else
std::unique_ptr<caffe::Blob<T>> upCaffeBlobT;
caffe::Blob<T>* pCaffeBlobT;
#endif
#endif
};
......@@ -25,7 +30,11 @@ namespace op
#ifdef USE_CAFFE
// Construct spImpl
spImpl.reset(new ImplArrayCpuGpu{});
spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{});
#ifdef NV_CAFFE
spImpl->upCaffeBlobT.reset(new caffe::TBlob<T>{});
#else
spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{});
#endif
spImpl->pCaffeBlobT = spImpl->upCaffeBlobT.get();
#else
error(constructorErrorMessage, __LINE__, __FUNCTION__, __FILE__);
......@@ -45,7 +54,11 @@ namespace op
#ifdef USE_CAFFE
// Construct spImpl
spImpl.reset(new ImplArrayCpuGpu{});
spImpl->pCaffeBlobT = (caffe::Blob<T>*)caffeBlobTPtr;
#ifdef NV_CAFFE
spImpl->pCaffeBlobT = (caffe::TBlob<T>*)caffeBlobTPtr;
#else
spImpl->pCaffeBlobT = (caffe::Blob<T>*)caffeBlobTPtr;
#endif
#else
UNUSED(caffeBlobTPtr);
error(constructorErrorMessage, __LINE__, __FUNCTION__, __FILE__);
......@@ -74,7 +87,11 @@ namespace op
arraySize.emplace_back(sizeI);
// Construct spImpl
spImpl.reset(new ImplArrayCpuGpu{});
spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{arraySize});
#ifdef NV_CAFFE
spImpl->upCaffeBlobT.reset(new caffe::TBlob<T>{arraySize});
#else
spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{arraySize});
#endif
spImpl->pCaffeBlobT = spImpl->upCaffeBlobT.get();
// Copy data
// CPU copy
......@@ -107,7 +124,11 @@ namespace op
#ifdef USE_CAFFE
// Construct spImpl
spImpl.reset(new ImplArrayCpuGpu{});
spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{num, channels, height, width});
#ifdef NV_CAFFE
spImpl->upCaffeBlobT.reset(new caffe::TBlob<T>{num, channels, height, width});
#else
spImpl->upCaffeBlobT.reset(new caffe::Blob<T>{num, channels, height, width});
#endif
spImpl->pCaffeBlobT = spImpl->upCaffeBlobT.get();
#else
UNUSED(num);
......
......@@ -34,8 +34,13 @@ namespace op
const std::string mLastBlobName;
std::vector<int> mNetInputSize4D;
// Init with thread
std::unique_ptr<caffe::Net<float>> upCaffeNet;
boost::shared_ptr<caffe::Blob<float>> spOutputBlob;
#ifdef NV_CAFFE
std::unique_ptr<caffe::Net> upCaffeNet;
boost::shared_ptr<caffe::TBlob<float>> spOutputBlob;
#else
std::unique_ptr<caffe::Net<float>> upCaffeNet;
boost::shared_ptr<caffe::Blob<float>> spOutputBlob;
#endif
ImplNetCaffe(const std::string& caffeProto, const std::string& caffeTrainedModel, const int gpuId,
const bool enableGoogleLogging, const std::string& lastBlobName) :
......@@ -93,7 +98,11 @@ namespace op
};
#ifdef USE_CAFFE
#ifdef NV_CAFFE
inline void reshapeNetCaffe(caffe::Net* caffeNet, const std::vector<int>& dimensions)
#else
inline void reshapeNetCaffe(caffe::Net<float>* caffeNet, const std::vector<int>& dimensions)
#endif
{
try
{
......@@ -156,7 +165,11 @@ namespace op
#ifdef USE_CUDA
caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SetDevice(upImpl->mGpuId);
upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
#ifdef NV_CAFFE
upImpl->upCaffeNet.reset(new caffe::Net{upImpl->mCaffeProto, caffe::TEST});
#else
upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
#endif
#else
caffe::Caffe::set_mode(caffe::Caffe::CPU);
#ifdef _WIN32
......@@ -172,7 +185,12 @@ namespace op
#endif
#endif
// Set spOutputBlob
upImpl->spOutputBlob = upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName);
#ifdef NV_CAFFE
upImpl->spOutputBlob = boost::static_pointer_cast<caffe::TBlob<float>>(
upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName));
#else
upImpl->spOutputBlob = upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName);
#endif
// Sanity check
if (upImpl->spOutputBlob == nullptr)
error("The output blob is a nullptr. Did you use the same name than the prototxt? (Used: "
......@@ -207,7 +225,11 @@ namespace op
}
// Copy frame data to GPU memory
#ifdef USE_CUDA
auto* gpuImagePtr = upImpl->upCaffeNet->blobs().at(0)->mutable_gpu_data();
#ifdef NV_CAFFE
auto* gpuImagePtr = upImpl->upCaffeNet->blobs().at(0)->mutable_gpu_data<float>();
#else
auto* gpuImagePtr = upImpl->upCaffeNet->blobs().at(0)->mutable_gpu_data();
#endif
cudaMemcpy(gpuImagePtr, inputData.getConstPtr(), inputData.getVolume() * sizeof(float),
cudaMemcpyHostToDevice);
#elif defined USE_OPENCL
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册